aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/configs/am200epdkit_defconfig22
-rw-r--r--arch/arm/mach-at91/at91cap9_devices.c2
-rw-r--r--arch/arm/mach-at91/at91sam9263_devices.c2
-rw-r--r--arch/arm/mach-at91/pm.c14
-rw-r--r--arch/arm/mach-pxa/Makefile6
-rw-r--r--arch/arm/mach-pxa/gumstix.c1
-rw-r--r--arch/arm/mach-pxa/magician.c61
-rw-r--r--arch/arm/mach-pxa/pm.c4
-rw-r--r--arch/arm/mach-pxa/pxa3xx.c2
-rw-r--r--arch/arm/oprofile/op_model_mpcore.c44
-rw-r--r--arch/ia64/Kconfig3
-rw-r--r--arch/ia64/Makefile1
-rw-r--r--arch/ia64/kvm/Kconfig49
-rw-r--r--arch/ia64/kvm/Makefile61
-rw-r--r--arch/ia64/kvm/asm-offsets.c251
-rw-r--r--arch/ia64/kvm/kvm-ia64.c1806
-rw-r--r--arch/ia64/kvm/kvm_fw.c500
-rw-r--r--arch/ia64/kvm/kvm_minstate.h273
-rw-r--r--arch/ia64/kvm/lapic.h25
-rw-r--r--arch/ia64/kvm/misc.h93
-rw-r--r--arch/ia64/kvm/mmio.c341
-rw-r--r--arch/ia64/kvm/optvfault.S918
-rw-r--r--arch/ia64/kvm/process.c970
-rw-r--r--arch/ia64/kvm/trampoline.S1038
-rw-r--r--arch/ia64/kvm/vcpu.c2163
-rw-r--r--arch/ia64/kvm/vcpu.h740
-rw-r--r--arch/ia64/kvm/vmm.c66
-rw-r--r--arch/ia64/kvm/vmm_ivt.S1424
-rw-r--r--arch/ia64/kvm/vti.h290
-rw-r--r--arch/ia64/kvm/vtlb.c636
-rw-r--r--arch/powerpc/Kconfig84
-rw-r--r--arch/powerpc/Kconfig.debug3
-rw-r--r--arch/powerpc/Makefile1
-rw-r--r--arch/powerpc/boot/.gitignore1
-rw-r--r--arch/powerpc/boot/Makefile2
-rw-r--r--arch/powerpc/boot/dts/canyonlands.dts37
-rw-r--r--arch/powerpc/boot/dts/glacier.dts37
-rw-r--r--arch/powerpc/boot/ns16550.c5
-rw-r--r--arch/powerpc/kernel/Makefile9
-rw-r--r--arch/powerpc/kernel/asm-offsets.c39
-rw-r--r--arch/powerpc/kernel/cpu_setup_44x.S1
-rw-r--r--arch/powerpc/kernel/cpu_setup_6xx.S8
-rw-r--r--arch/powerpc/kernel/cputable.c4
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S11
-rw-r--r--arch/powerpc/kernel/misc_32.S6
-rw-r--r--arch/powerpc/kernel/misc_64.S20
-rw-r--r--arch/powerpc/kernel/of_platform.c2
-rw-r--r--arch/powerpc/kernel/paca.c87
-rw-r--r--arch/powerpc/kernel/ppc32.h2
-rw-r--r--arch/powerpc/kernel/process.c31
-rw-r--r--arch/powerpc/kernel/prom.c4
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh58
-rw-r--r--arch/powerpc/kernel/ptrace32.c27
-rw-r--r--arch/powerpc/kernel/setup_64.c5
-rw-r--r--arch/powerpc/kernel/stacktrace.c1
-rw-r--r--arch/powerpc/kernel/udbg.c4
-rw-r--r--arch/powerpc/kvm/44x_tlb.c224
-rw-r--r--arch/powerpc/kvm/44x_tlb.h91
-rw-r--r--arch/powerpc/kvm/Kconfig42
-rw-r--r--arch/powerpc/kvm/Makefile15
-rw-r--r--arch/powerpc/kvm/booke_guest.c615
-rw-r--r--arch/powerpc/kvm/booke_host.c83
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S436
-rw-r--r--arch/powerpc/kvm/emulate.c760
-rw-r--r--arch/powerpc/kvm/powerpc.c436
-rw-r--r--arch/powerpc/mm/fsl_booke_mmu.c2
-rw-r--r--arch/powerpc/mm/hash_low_32.S4
-rw-r--r--arch/powerpc/mm/init_32.c13
-rw-r--r--arch/powerpc/mm/init_64.c3
-rw-r--r--arch/powerpc/mm/mem.c37
-rw-r--r--arch/powerpc/mm/numa.c1
-rw-r--r--arch/powerpc/mm/pgtable_32.c23
-rw-r--r--arch/powerpc/platforms/Kconfig1
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype4
-rw-r--r--arch/powerpc/platforms/Makefile1
-rw-r--r--arch/powerpc/platforms/cell/Kconfig13
-rw-r--r--arch/powerpc/platforms/cell/Makefile20
-rw-r--r--arch/powerpc/platforms/cell/axon_msi.c6
-rw-r--r--arch/powerpc/platforms/cell/beat.c (renamed from arch/powerpc/platforms/celleb/beat.c)2
-rw-r--r--arch/powerpc/platforms/cell/beat.h (renamed from arch/powerpc/platforms/celleb/beat.h)0
-rw-r--r--arch/powerpc/platforms/cell/beat_htab.c (renamed from arch/powerpc/platforms/celleb/htab.c)0
-rw-r--r--arch/powerpc/platforms/cell/beat_hvCall.S (renamed from arch/powerpc/platforms/celleb/hvCall.S)0
-rw-r--r--arch/powerpc/platforms/cell/beat_interrupt.c (renamed from arch/powerpc/platforms/celleb/interrupt.c)2
-rw-r--r--arch/powerpc/platforms/cell/beat_interrupt.h (renamed from arch/powerpc/platforms/celleb/interrupt.h)0
-rw-r--r--arch/powerpc/platforms/cell/beat_iommu.c (renamed from arch/powerpc/platforms/celleb/iommu.c)0
-rw-r--r--arch/powerpc/platforms/cell/beat_smp.c (renamed from arch/powerpc/platforms/celleb/smp.c)2
-rw-r--r--arch/powerpc/platforms/cell/beat_spu_priv1.c (renamed from arch/powerpc/platforms/celleb/spu_priv1.c)0
-rw-r--r--arch/powerpc/platforms/cell/beat_syscall.h (renamed from arch/powerpc/platforms/celleb/beat_syscall.h)0
-rw-r--r--arch/powerpc/platforms/cell/beat_udbg.c (renamed from arch/powerpc/platforms/celleb/udbg_beat.c)0
-rw-r--r--arch/powerpc/platforms/cell/beat_wrapper.h (renamed from arch/powerpc/platforms/celleb/beat_wrapper.h)0
-rw-r--r--arch/powerpc/platforms/cell/celleb_pci.c (renamed from arch/powerpc/platforms/celleb/pci.c)50
-rw-r--r--arch/powerpc/platforms/cell/celleb_pci.h (renamed from arch/powerpc/platforms/celleb/pci.h)19
-rw-r--r--arch/powerpc/platforms/cell/celleb_scc.h (renamed from arch/powerpc/platforms/celleb/scc.h)87
-rw-r--r--arch/powerpc/platforms/cell/celleb_scc_epci.c (renamed from arch/powerpc/platforms/celleb/scc_epci.c)77
-rw-r--r--arch/powerpc/platforms/cell/celleb_scc_pciex.c547
-rw-r--r--arch/powerpc/platforms/cell/celleb_scc_sio.c (renamed from arch/powerpc/platforms/celleb/scc_sio.c)0
-rw-r--r--arch/powerpc/platforms/cell/celleb_scc_uhc.c (renamed from arch/powerpc/platforms/celleb/scc_uhc.c)2
-rw-r--r--arch/powerpc/platforms/cell/celleb_setup.c (renamed from arch/powerpc/platforms/celleb/setup.c)12
-rw-r--r--arch/powerpc/platforms/cell/io-workarounds.c358
-rw-r--r--arch/powerpc/platforms/cell/io-workarounds.h49
-rw-r--r--arch/powerpc/platforms/cell/setup.c43
-rw-r--r--arch/powerpc/platforms/cell/spider-pci.c184
-rw-r--r--arch/powerpc/platforms/celleb/Kconfig12
-rw-r--r--arch/powerpc/platforms/celleb/Makefile9
-rw-r--r--arch/powerpc/platforms/celleb/io-workarounds.c280
-rw-r--r--arch/powerpc/platforms/iseries/exception.S27
-rw-r--r--arch/powerpc/platforms/ps3/os-area.c1
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig5
-rw-r--r--arch/powerpc/platforms/pseries/Makefile4
-rw-r--r--arch/powerpc/platforms/pseries/eeh.c1
-rw-r--r--arch/powerpc/platforms/pseries/eeh_cache.c1
-rw-r--r--arch/powerpc/platforms/pseries/firmware.c10
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c39
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c36
-rw-r--r--arch/powerpc/platforms/pseries/ras.c4
-rw-r--r--arch/powerpc/platforms/pseries/rtasd.c14
-rw-r--r--arch/powerpc/platforms/pseries/scanlog.c23
-rw-r--r--arch/powerpc/platforms/pseries/setup.c17
-rw-r--r--arch/powerpc/platforms/pseries/smp.c11
-rw-r--r--arch/powerpc/platforms/pseries/xics.c1
-rw-r--r--arch/powerpc/sysdev/mv64x60_dev.c52
-rw-r--r--arch/powerpc/sysdev/mv64x60_udbg.c2
-rw-r--r--arch/ppc/8260_io/fcc_enet.c19
-rw-r--r--arch/ppc/8xx_io/enet.c23
-rw-r--r--arch/ppc/Kconfig82
-rw-r--r--arch/ppc/configs/ads8272_defconfig930
-rw-r--r--arch/ppc/configs/mpc86x_ads_defconfig633
-rw-r--r--arch/ppc/configs/mpc885ads_defconfig622
-rw-r--r--arch/ppc/kernel/ppc_ksyms.c1
-rw-r--r--arch/ppc/lib/string.S14
-rw-r--r--arch/ppc/platforms/Makefile4
-rw-r--r--arch/ppc/platforms/fads.h25
-rw-r--r--arch/ppc/platforms/mpc8272ads_setup.c367
-rw-r--r--arch/ppc/platforms/mpc885ads.h93
-rw-r--r--arch/ppc/platforms/mpc885ads_setup.c476
-rw-r--r--arch/ppc/platforms/pq2ads.c53
-rw-r--r--arch/ppc/platforms/pq2ads.h94
-rw-r--r--arch/ppc/platforms/pq2ads_pd.h32
-rw-r--r--arch/ppc/syslib/m8260_setup.c6
-rw-r--r--arch/ppc/syslib/m82xx_pci.c38
-rw-r--r--arch/ppc/syslib/m8xx_setup.c10
-rw-r--r--arch/s390/Kconfig14
-rw-r--r--arch/s390/Makefile2
-rw-r--r--arch/s390/kernel/early.c4
-rw-r--r--arch/s390/kernel/setup.c14
-rw-r--r--arch/s390/kernel/vtime.c1
-rw-r--r--arch/s390/kvm/Kconfig46
-rw-r--r--arch/s390/kvm/Makefile14
-rw-r--r--arch/s390/kvm/diag.c67
-rw-r--r--arch/s390/kvm/gaccess.h274
-rw-r--r--arch/s390/kvm/intercept.c216
-rw-r--r--arch/s390/kvm/interrupt.c592
-rw-r--r--arch/s390/kvm/kvm-s390.c685
-rw-r--r--arch/s390/kvm/kvm-s390.h64
-rw-r--r--arch/s390/kvm/priv.c323
-rw-r--r--arch/s390/kvm/sie64a.S47
-rw-r--r--arch/s390/kvm/sigp.c288
-rw-r--r--arch/s390/mm/pgtable.c65
-rw-r--r--arch/sparc64/kernel/smp.c27
-rw-r--r--arch/sparc64/kernel/sys_sparc.c4
-rw-r--r--arch/um/Kconfig.x86_647
-rw-r--r--arch/um/os-Linux/helper.c1
-rw-r--r--arch/um/sys-i386/Makefile2
-rw-r--r--arch/um/sys-x86_64/Makefile2
-rw-r--r--arch/x86/Kconfig28
-rw-r--r--arch/x86/Kconfig.cpu11
-rw-r--r--arch/x86/Kconfig.debug24
-rw-r--r--arch/x86/boot/.gitignore5
-rw-r--r--arch/x86/boot/header.S6
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--arch/x86/ia32/ia32_signal.c10
-rw-r--r--arch/x86/ia32/ia32entry.S2
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c4
-rw-r--r--arch/x86/kernel/acpi/realmode/.gitignore3
-rw-r--r--arch/x86/kernel/alternative.c39
-rw-r--r--arch/x86/kernel/apic_32.c5
-rw-r--r--arch/x86/kernel/apic_64.c7
-rw-r--r--arch/x86/kernel/apm_32.c3
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/amd.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c18
-rw-r--r--arch/x86/kernel/cpu/nexgen.c59
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c14
-rw-r--r--arch/x86/kernel/crash.c3
-rw-r--r--arch/x86/kernel/e820_64.c35
-rw-r--r--arch/x86/kernel/entry_32.S12
-rw-r--r--arch/x86/kernel/genapic_64.c2
-rw-r--r--arch/x86/kernel/head64.c25
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/i8253.c6
-rw-r--r--arch/x86/kernel/io_apic_32.c2
-rw-r--r--arch/x86/kernel/io_apic_64.c2
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/kdebugfs.c163
-rw-r--r--arch/x86/kernel/kvm.c248
-rw-r--r--arch/x86/kernel/kvmclock.c187
-rw-r--r--arch/x86/kernel/mfgpt_32.c3
-rw-r--r--arch/x86/kernel/mpparse.c39
-rw-r--r--arch/x86/kernel/paravirt.c12
-rw-r--r--arch/x86/kernel/pci-calgary_64.c1
-rw-r--r--arch/x86/kernel/process.c117
-rw-r--r--arch/x86/kernel/process_32.c120
-rw-r--r--arch/x86/kernel/process_64.c125
-rw-r--r--arch/x86/kernel/ptrace.c95
-rw-r--r--arch/x86/kernel/reboot.c17
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/setup_32.c14
-rw-r--r--arch/x86/kernel/setup_64.c36
-rw-r--r--arch/x86/kernel/signal_32.c35
-rw-r--r--arch/x86/kernel/signal_64.c30
-rw-r--r--arch/x86/kernel/smpboot.c20
-rw-r--r--arch/x86/kernel/summit_32.c5
-rw-r--r--arch/x86/kernel/tlb_64.c6
-rw-r--r--arch/x86/kernel/trampoline_32.S2
-rw-r--r--arch/x86/kernel/traps_32.c2
-rw-r--r--arch/x86/kernel/vmi_32.c22
-rw-r--r--arch/x86/kernel/vsyscall_64.c2
-rw-r--r--arch/x86/kvm/Kconfig13
-rw-r--r--arch/x86/kvm/Makefile6
-rw-r--r--arch/x86/kvm/i8254.c611
-rw-r--r--arch/x86/kvm/i8254.h63
-rw-r--r--arch/x86/kvm/irq.c18
-rw-r--r--arch/x86/kvm/irq.h3
-rw-r--r--arch/x86/kvm/kvm_svm.h2
-rw-r--r--arch/x86/kvm/lapic.c35
-rw-r--r--arch/x86/kvm/mmu.c672
-rw-r--r--arch/x86/kvm/mmu.h6
-rw-r--r--arch/x86/kvm/paging_tmpl.h86
-rw-r--r--arch/x86/kvm/segment_descriptor.h29
-rw-r--r--arch/x86/kvm/svm.c352
-rw-r--r--arch/x86/kvm/svm.h3
-rw-r--r--arch/x86/kvm/tss.h59
-rw-r--r--arch/x86/kvm/vmx.c278
-rw-r--r--arch/x86/kvm/vmx.h10
-rw-r--r--arch/x86/kvm/x86.c897
-rw-r--r--arch/x86/kvm/x86_emulate.c285
-rw-r--r--arch/x86/lib/Makefile3
-rw-r--r--arch/x86/lib/bitops_32.c70
-rw-r--r--arch/x86/lib/bitops_64.c175
-rw-r--r--arch/x86/mach-visws/mpparse.c15
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c31
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/dump_pagetables.c2
-rw-r--r--arch/x86/mm/init_32.c33
-rw-r--r--arch/x86/mm/init_64.c65
-rw-r--r--arch/x86/mm/ioremap.c31
-rw-r--r--arch/x86/mm/numa_64.c42
-rw-r--r--arch/x86/mm/pageattr.c6
-rw-r--r--arch/x86/mm/pat.c208
-rw-r--r--arch/x86/mm/pgtable.c276
-rw-r--r--arch/x86/mm/pgtable_32.c204
-rw-r--r--arch/x86/mm/srat_64.c2
-rw-r--r--arch/x86/xen/Kconfig2
-rw-r--r--arch/x86/xen/Makefile4
-rw-r--r--arch/x86/xen/enlighten.c54
-rw-r--r--arch/x86/xen/events.c591
-rw-r--r--arch/x86/xen/features.c29
-rw-r--r--arch/x86/xen/grant-table.c91
-rw-r--r--arch/x86/xen/mmu.c143
-rw-r--r--arch/x86/xen/setup.c21
-rw-r--r--arch/x86/xen/smp.c22
-rw-r--r--arch/x86/xen/xen-asm.S42
-rw-r--r--arch/x86/xen/xen-ops.h8
265 files changed, 23683 insertions, 6949 deletions
diff --git a/arch/arm/configs/am200epdkit_defconfig b/arch/arm/configs/am200epdkit_defconfig
index dc030cfe5009..5e68420f4680 100644
--- a/arch/arm/configs/am200epdkit_defconfig
+++ b/arch/arm/configs/am200epdkit_defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.25-rc3 3# Linux kernel version: 2.6.25
4# Sun Mar 9 06:33:33 2008 4# Sun Apr 20 00:29:49 2008
5# 5#
6CONFIG_ARM=y 6CONFIG_ARM=y
7CONFIG_SYS_SUPPORTS_APM_EMULATION=y 7CONFIG_SYS_SUPPORTS_APM_EMULATION=y
@@ -51,7 +51,8 @@ CONFIG_FAIR_GROUP_SCHED=y
51# CONFIG_RT_GROUP_SCHED is not set 51# CONFIG_RT_GROUP_SCHED is not set
52CONFIG_USER_SCHED=y 52CONFIG_USER_SCHED=y
53# CONFIG_CGROUP_SCHED is not set 53# CONFIG_CGROUP_SCHED is not set
54# CONFIG_SYSFS_DEPRECATED is not set 54CONFIG_SYSFS_DEPRECATED=y
55CONFIG_SYSFS_DEPRECATED_V2=y
55# CONFIG_RELAY is not set 56# CONFIG_RELAY is not set
56# CONFIG_NAMESPACES is not set 57# CONFIG_NAMESPACES is not set
57# CONFIG_BLK_DEV_INITRD is not set 58# CONFIG_BLK_DEV_INITRD is not set
@@ -85,6 +86,7 @@ CONFIG_SLAB=y
85CONFIG_HAVE_OPROFILE=y 86CONFIG_HAVE_OPROFILE=y
86# CONFIG_KPROBES is not set 87# CONFIG_KPROBES is not set
87CONFIG_HAVE_KPROBES=y 88CONFIG_HAVE_KPROBES=y
89CONFIG_HAVE_KRETPROBES=y
88CONFIG_PROC_PAGE_MONITOR=y 90CONFIG_PROC_PAGE_MONITOR=y
89CONFIG_SLABINFO=y 91CONFIG_SLABINFO=y
90CONFIG_RT_MUTEXES=y 92CONFIG_RT_MUTEXES=y
@@ -115,7 +117,6 @@ CONFIG_IOSCHED_NOOP=y
115CONFIG_DEFAULT_NOOP=y 117CONFIG_DEFAULT_NOOP=y
116CONFIG_DEFAULT_IOSCHED="noop" 118CONFIG_DEFAULT_IOSCHED="noop"
117CONFIG_CLASSIC_RCU=y 119CONFIG_CLASSIC_RCU=y
118# CONFIG_PREEMPT_RCU is not set
119 120
120# 121#
121# System Type 122# System Type
@@ -320,8 +321,6 @@ CONFIG_TCP_CONG_CUBIC=y
320CONFIG_DEFAULT_TCP_CONG="cubic" 321CONFIG_DEFAULT_TCP_CONG="cubic"
321# CONFIG_TCP_MD5SIG is not set 322# CONFIG_TCP_MD5SIG is not set
322# CONFIG_IPV6 is not set 323# CONFIG_IPV6 is not set
323# CONFIG_INET6_XFRM_TUNNEL is not set
324# CONFIG_INET6_TUNNEL is not set
325# CONFIG_NETWORK_SECMARK is not set 324# CONFIG_NETWORK_SECMARK is not set
326# CONFIG_NETFILTER is not set 325# CONFIG_NETFILTER is not set
327# CONFIG_IP_DCCP is not set 326# CONFIG_IP_DCCP is not set
@@ -383,7 +382,6 @@ CONFIG_IEEE80211=m
383CONFIG_IEEE80211_CRYPT_WEP=m 382CONFIG_IEEE80211_CRYPT_WEP=m
384# CONFIG_IEEE80211_CRYPT_CCMP is not set 383# CONFIG_IEEE80211_CRYPT_CCMP is not set
385# CONFIG_IEEE80211_CRYPT_TKIP is not set 384# CONFIG_IEEE80211_CRYPT_TKIP is not set
386# CONFIG_IEEE80211_SOFTMAC is not set
387# CONFIG_RFKILL is not set 385# CONFIG_RFKILL is not set
388# CONFIG_NET_9P is not set 386# CONFIG_NET_9P is not set
389 387
@@ -503,7 +501,7 @@ CONFIG_IDE_MAX_HWIFS=2
503CONFIG_BLK_DEV_IDE=m 501CONFIG_BLK_DEV_IDE=m
504 502
505# 503#
506# Please see Documentation/ide.txt for help/info on IDE drives 504# Please see Documentation/ide/ide.txt for help/info on IDE drives
507# 505#
508# CONFIG_BLK_DEV_IDE_SATA is not set 506# CONFIG_BLK_DEV_IDE_SATA is not set
509CONFIG_BLK_DEV_IDEDISK=m 507CONFIG_BLK_DEV_IDEDISK=m
@@ -518,10 +516,9 @@ CONFIG_IDE_PROC_FS=y
518# 516#
519# IDE chipset support/bugfixes 517# IDE chipset support/bugfixes
520# 518#
521CONFIG_IDE_GENERIC=m
522# CONFIG_BLK_DEV_PLATFORM is not set 519# CONFIG_BLK_DEV_PLATFORM is not set
523# CONFIG_BLK_DEV_IDEDMA is not set 520# CONFIG_BLK_DEV_IDEDMA is not set
524CONFIG_IDE_ARCH_OBSOLETE_INIT=y 521# CONFIG_BLK_DEV_HD_ONLY is not set
525# CONFIG_BLK_DEV_HD is not set 522# CONFIG_BLK_DEV_HD is not set
526 523
527# 524#
@@ -562,6 +559,7 @@ CONFIG_NETDEV_10000=y
562# 559#
563# CONFIG_WLAN_PRE80211 is not set 560# CONFIG_WLAN_PRE80211 is not set
564# CONFIG_WLAN_80211 is not set 561# CONFIG_WLAN_80211 is not set
562# CONFIG_IWLWIFI_LEDS is not set
565# CONFIG_NET_PCMCIA is not set 563# CONFIG_NET_PCMCIA is not set
566# CONFIG_WAN is not set 564# CONFIG_WAN is not set
567# CONFIG_PPP is not set 565# CONFIG_PPP is not set
@@ -707,6 +705,8 @@ CONFIG_SSB_POSSIBLE=y
707# 705#
708# CONFIG_MFD_SM501 is not set 706# CONFIG_MFD_SM501 is not set
709# CONFIG_MFD_ASIC3 is not set 707# CONFIG_MFD_ASIC3 is not set
708# CONFIG_HTC_EGPIO is not set
709# CONFIG_HTC_PASIC3 is not set
710 710
711# 711#
712# Multimedia devices 712# Multimedia devices
@@ -745,6 +745,7 @@ CONFIG_FB_TILEBLITTING=y
745CONFIG_FB_PXA=y 745CONFIG_FB_PXA=y
746CONFIG_FB_PXA_PARAMETERS=y 746CONFIG_FB_PXA_PARAMETERS=y
747CONFIG_FB_MBX=m 747CONFIG_FB_MBX=m
748# CONFIG_FB_METRONOME is not set
748CONFIG_FB_VIRTUAL=m 749CONFIG_FB_VIRTUAL=m
749# CONFIG_BACKLIGHT_LCD_SUPPORT is not set 750# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
750 751
@@ -891,7 +892,6 @@ CONFIG_RTC_LIB=y
891# CONFIG_JFS_FS is not set 892# CONFIG_JFS_FS is not set
892# CONFIG_FS_POSIX_ACL is not set 893# CONFIG_FS_POSIX_ACL is not set
893# CONFIG_XFS_FS is not set 894# CONFIG_XFS_FS is not set
894# CONFIG_GFS2_FS is not set
895# CONFIG_OCFS2_FS is not set 895# CONFIG_OCFS2_FS is not set
896# CONFIG_DNOTIFY is not set 896# CONFIG_DNOTIFY is not set
897CONFIG_INOTIFY=y 897CONFIG_INOTIFY=y
diff --git a/arch/arm/mach-at91/at91cap9_devices.c b/arch/arm/mach-at91/at91cap9_devices.c
index f1a80d74a4b6..be526746e01e 100644
--- a/arch/arm/mach-at91/at91cap9_devices.c
+++ b/arch/arm/mach-at91/at91cap9_devices.c
@@ -246,7 +246,7 @@ void __init at91_add_device_mmc(short mmc_id, struct at91_mmc_data *data)
246 } 246 }
247 247
248 mmc0_data = *data; 248 mmc0_data = *data;
249 at91_clock_associate("mci0_clk", &at91cap9_mmc1_device.dev, "mci_clk"); 249 at91_clock_associate("mci0_clk", &at91cap9_mmc0_device.dev, "mci_clk");
250 platform_device_register(&at91cap9_mmc0_device); 250 platform_device_register(&at91cap9_mmc0_device);
251 } else { /* MCI1 */ 251 } else { /* MCI1 */
252 /* CLK */ 252 /* CLK */
diff --git a/arch/arm/mach-at91/at91sam9263_devices.c b/arch/arm/mach-at91/at91sam9263_devices.c
index b6454c525962..719667e25c98 100644
--- a/arch/arm/mach-at91/at91sam9263_devices.c
+++ b/arch/arm/mach-at91/at91sam9263_devices.c
@@ -308,7 +308,7 @@ void __init at91_add_device_mmc(short mmc_id, struct at91_mmc_data *data)
308 } 308 }
309 309
310 mmc0_data = *data; 310 mmc0_data = *data;
311 at91_clock_associate("mci0_clk", &at91sam9263_mmc1_device.dev, "mci_clk"); 311 at91_clock_associate("mci0_clk", &at91sam9263_mmc0_device.dev, "mci_clk");
312 platform_device_register(&at91sam9263_mmc0_device); 312 platform_device_register(&at91sam9263_mmc0_device);
313 } else { /* MCI1 */ 313 } else { /* MCI1 */
314 /* CLK */ 314 /* CLK */
diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 39733b6992aa..aa863c157708 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -61,6 +61,15 @@ static inline void sdram_selfrefresh_enable(void)
61#else 61#else
62#include <asm/arch/at91sam9_sdramc.h> 62#include <asm/arch/at91sam9_sdramc.h>
63 63
64#ifdef CONFIG_ARCH_AT91SAM9263
65/*
66 * FIXME either or both the SDRAM controllers (EB0, EB1) might be in use;
67 * handle those cases both here and in the Suspend-To-RAM support.
68 */
69#define AT91_SDRAMC AT91_SDRAMC0
70#warning Assuming EB1 SDRAM controller is *NOT* used
71#endif
72
64static u32 saved_lpr; 73static u32 saved_lpr;
65 74
66static inline void sdram_selfrefresh_enable(void) 75static inline void sdram_selfrefresh_enable(void)
@@ -75,11 +84,6 @@ static inline void sdram_selfrefresh_enable(void)
75 84
76#define sdram_selfrefresh_disable() at91_sys_write(AT91_SDRAMC_LPR, saved_lpr) 85#define sdram_selfrefresh_disable() at91_sys_write(AT91_SDRAMC_LPR, saved_lpr)
77 86
78/*
79 * FIXME: The AT91SAM9263 has a second EBI controller which may have
80 * additional SDRAM. pm_slowclock.S will require a similar fix.
81 */
82
83#endif 87#endif
84 88
85 89
diff --git a/arch/arm/mach-pxa/Makefile b/arch/arm/mach-pxa/Makefile
index 7cdcb459ea9d..6a830853aa6a 100644
--- a/arch/arm/mach-pxa/Makefile
+++ b/arch/arm/mach-pxa/Makefile
@@ -5,9 +5,9 @@
5# Common support (must be linked before board specific support) 5# Common support (must be linked before board specific support)
6obj-y += clock.o devices.o generic.o irq.o dma.o \ 6obj-y += clock.o devices.o generic.o irq.o dma.o \
7 time.o gpio.o 7 time.o gpio.o
8obj-$(CONFIG_PXA25x) += pxa25x.o mfp-pxa2xx.o 8obj-$(CONFIG_PXA25x) += mfp-pxa2xx.o pxa25x.o
9obj-$(CONFIG_PXA27x) += pxa27x.o mfp-pxa2xx.o 9obj-$(CONFIG_PXA27x) += mfp-pxa2xx.o pxa27x.o
10obj-$(CONFIG_PXA3xx) += pxa3xx.o mfp-pxa3xx.o smemc.o 10obj-$(CONFIG_PXA3xx) += mfp-pxa3xx.o pxa3xx.o smemc.o
11obj-$(CONFIG_CPU_PXA300) += pxa300.o 11obj-$(CONFIG_CPU_PXA300) += pxa300.o
12obj-$(CONFIG_CPU_PXA320) += pxa320.o 12obj-$(CONFIG_CPU_PXA320) += pxa320.o
13 13
diff --git a/arch/arm/mach-pxa/gumstix.c b/arch/arm/mach-pxa/gumstix.c
index f01d18544133..bdf239754037 100644
--- a/arch/arm/mach-pxa/gumstix.c
+++ b/arch/arm/mach-pxa/gumstix.c
@@ -40,6 +40,7 @@
40 40
41#include <asm/arch/pxa-regs.h> 41#include <asm/arch/pxa-regs.h>
42#include <asm/arch/pxa2xx-regs.h> 42#include <asm/arch/pxa2xx-regs.h>
43#include <asm/arch/pxa2xx-gpio.h>
43 44
44#include "generic.h" 45#include "generic.h"
45 46
diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index d70be75bd199..badba064dc04 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -114,6 +114,14 @@ static unsigned long magician_pin_config[] = {
114 GPIO82_CIF_DD_5, 114 GPIO82_CIF_DD_5,
115 GPIO84_CIF_FV, 115 GPIO84_CIF_FV,
116 GPIO85_CIF_LV, 116 GPIO85_CIF_LV,
117
118 /* Magician specific input GPIOs */
119 GPIO9_GPIO, /* unknown */
120 GPIO10_GPIO, /* GSM_IRQ */
121 GPIO13_GPIO, /* CPLD_IRQ */
122 GPIO107_GPIO, /* DS1WM_IRQ */
123 GPIO108_GPIO, /* GSM_READY */
124 GPIO115_GPIO, /* nPEN_IRQ */
117}; 125};
118 126
119/* 127/*
@@ -438,7 +446,7 @@ static struct pasic3_led pasic3_leds[] = {
438 446
439static struct platform_device pasic3; 447static struct platform_device pasic3;
440 448
441static struct pasic3_leds_machinfo __devinit pasic3_leds_info = { 449static struct pasic3_leds_machinfo pasic3_leds_info = {
442 .num_leds = ARRAY_SIZE(pasic3_leds), 450 .num_leds = ARRAY_SIZE(pasic3_leds),
443 .power_gpio = EGPIO_MAGICIAN_LED_POWER, 451 .power_gpio = EGPIO_MAGICIAN_LED_POWER,
444 .leds = pasic3_leds, 452 .leds = pasic3_leds,
@@ -543,9 +551,28 @@ static struct platform_device power_supply = {
543static int magician_mci_init(struct device *dev, 551static int magician_mci_init(struct device *dev,
544 irq_handler_t detect_irq, void *data) 552 irq_handler_t detect_irq, void *data)
545{ 553{
546 return request_irq(IRQ_MAGICIAN_SD, detect_irq, 554 int err;
555
556 err = request_irq(IRQ_MAGICIAN_SD, detect_irq,
547 IRQF_DISABLED | IRQF_SAMPLE_RANDOM, 557 IRQF_DISABLED | IRQF_SAMPLE_RANDOM,
548 "MMC card detect", data); 558 "MMC card detect", data);
559 if (err)
560 goto err_request_irq;
561 err = gpio_request(EGPIO_MAGICIAN_SD_POWER, "SD_POWER");
562 if (err)
563 goto err_request_power;
564 err = gpio_request(EGPIO_MAGICIAN_nSD_READONLY, "nSD_READONLY");
565 if (err)
566 goto err_request_readonly;
567
568 return 0;
569
570err_request_readonly:
571 gpio_free(EGPIO_MAGICIAN_SD_POWER);
572err_request_power:
573 free_irq(IRQ_MAGICIAN_SD, data);
574err_request_irq:
575 return err;
549} 576}
550 577
551static void magician_mci_setpower(struct device *dev, unsigned int vdd) 578static void magician_mci_setpower(struct device *dev, unsigned int vdd)
@@ -562,6 +589,8 @@ static int magician_mci_get_ro(struct device *dev)
562 589
563static void magician_mci_exit(struct device *dev, void *data) 590static void magician_mci_exit(struct device *dev, void *data)
564{ 591{
592 gpio_free(EGPIO_MAGICIAN_nSD_READONLY);
593 gpio_free(EGPIO_MAGICIAN_SD_POWER);
565 free_irq(IRQ_MAGICIAN_SD, data); 594 free_irq(IRQ_MAGICIAN_SD, data);
566} 595}
567 596
@@ -643,28 +672,42 @@ static void __init magician_init(void)
643{ 672{
644 void __iomem *cpld; 673 void __iomem *cpld;
645 int lcd_select; 674 int lcd_select;
675 int err;
676
677 gpio_request(GPIO13_MAGICIAN_CPLD_IRQ, "CPLD_IRQ");
678 gpio_request(GPIO107_MAGICIAN_DS1WM_IRQ, "DS1WM_IRQ");
646 679
647 pxa2xx_mfp_config(ARRAY_AND_SIZE(magician_pin_config)); 680 pxa2xx_mfp_config(ARRAY_AND_SIZE(magician_pin_config));
648 681
649 platform_add_devices(devices, ARRAY_SIZE(devices)); 682 platform_add_devices(devices, ARRAY_SIZE(devices));
683
684 err = gpio_request(GPIO83_MAGICIAN_nIR_EN, "nIR_EN");
685 if (!err) {
686 gpio_direction_output(GPIO83_MAGICIAN_nIR_EN, 1);
687 pxa_set_ficp_info(&magician_ficp_info);
688 }
650 pxa_set_i2c_info(NULL); 689 pxa_set_i2c_info(NULL);
651 pxa_set_mci_info(&magician_mci_info); 690 pxa_set_mci_info(&magician_mci_info);
652 pxa_set_ohci_info(&magician_ohci_info); 691 pxa_set_ohci_info(&magician_ohci_info);
653 pxa_set_ficp_info(&magician_ficp_info);
654 692
655 /* Check LCD type we have */ 693 /* Check LCD type we have */
656 cpld = ioremap_nocache(PXA_CS3_PHYS, 0x1000); 694 cpld = ioremap_nocache(PXA_CS3_PHYS, 0x1000);
657 if (cpld) { 695 if (cpld) {
658 u8 board_id = __raw_readb(cpld+0x14); 696 u8 board_id = __raw_readb(cpld+0x14);
697 iounmap(cpld);
659 system_rev = board_id & 0x7; 698 system_rev = board_id & 0x7;
660 lcd_select = board_id & 0x8; 699 lcd_select = board_id & 0x8;
661 iounmap(cpld);
662 pr_info("LCD type: %s\n", lcd_select ? "Samsung" : "Toppoly"); 700 pr_info("LCD type: %s\n", lcd_select ? "Samsung" : "Toppoly");
663 if (lcd_select && (system_rev < 3)) 701 if (lcd_select && (system_rev < 3)) {
664 pxa_gpio_mode(GPIO75_MAGICIAN_SAMSUNG_POWER_MD); 702 gpio_request(GPIO75_MAGICIAN_SAMSUNG_POWER, "SAMSUNG_POWER");
665 pxa_gpio_mode(GPIO104_MAGICIAN_LCD_POWER_1_MD); 703 gpio_direction_output(GPIO75_MAGICIAN_SAMSUNG_POWER, 0);
666 pxa_gpio_mode(GPIO105_MAGICIAN_LCD_POWER_2_MD); 704 }
667 pxa_gpio_mode(GPIO106_MAGICIAN_LCD_POWER_3_MD); 705 gpio_request(GPIO104_MAGICIAN_LCD_POWER_1, "LCD_POWER_1");
706 gpio_request(GPIO105_MAGICIAN_LCD_POWER_2, "LCD_POWER_2");
707 gpio_request(GPIO106_MAGICIAN_LCD_POWER_3, "LCD_POWER_3");
708 gpio_direction_output(GPIO104_MAGICIAN_LCD_POWER_1, 0);
709 gpio_direction_output(GPIO105_MAGICIAN_LCD_POWER_2, 0);
710 gpio_direction_output(GPIO106_MAGICIAN_LCD_POWER_3, 0);
668 set_pxa_fb_info(lcd_select ? &samsung_info : &toppoly_info); 711 set_pxa_fb_info(lcd_select ? &samsung_info : &toppoly_info);
669 } else 712 } else
670 pr_err("LCD detection: CPLD mapping failed\n"); 713 pr_err("LCD detection: CPLD mapping failed\n");
diff --git a/arch/arm/mach-pxa/pm.c b/arch/arm/mach-pxa/pm.c
index 039194cbe477..ec1bbf333a3a 100644
--- a/arch/arm/mach-pxa/pm.c
+++ b/arch/arm/mach-pxa/pm.c
@@ -46,8 +46,8 @@ int pxa_pm_enter(suspend_state_t state)
46 sleep_save_checksum += sleep_save[i]; 46 sleep_save_checksum += sleep_save[i];
47 } 47 }
48 48
49 /* Clear sleep reset status */ 49 /* Clear reset status */
50 RCSR = RCSR_SMR; 50 RCSR = RCSR_HWR | RCSR_WDR | RCSR_SMR | RCSR_GPR;
51 51
52 /* *** go zzz *** */ 52 /* *** go zzz *** */
53 pxa_cpu_pm_fns->enter(state); 53 pxa_cpu_pm_fns->enter(state);
diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c
index dde355e88fa1..b6a6f5fcc77a 100644
--- a/arch/arm/mach-pxa/pxa3xx.c
+++ b/arch/arm/mach-pxa/pxa3xx.c
@@ -486,6 +486,8 @@ static int pxa3xx_set_wake(unsigned int irq, unsigned int on)
486 case IRQ_MMC3: 486 case IRQ_MMC3:
487 mask = ADXER_MFP_GEN12; 487 mask = ADXER_MFP_GEN12;
488 break; 488 break;
489 default:
490 return -EINVAL;
489 } 491 }
490 492
491 local_irq_save(flags); 493 local_irq_save(flags);
diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c
index 75bae067922d..74fae6045650 100644
--- a/arch/arm/oprofile/op_model_mpcore.c
+++ b/arch/arm/oprofile/op_model_mpcore.c
@@ -51,7 +51,7 @@
51/* 51/*
52 * MPCore SCU event monitor support 52 * MPCore SCU event monitor support
53 */ 53 */
54#define SCU_EVENTMONITORS_VA_BASE __io_address(REALVIEW_MPCORE_SCU_BASE + 0x10) 54#define SCU_EVENTMONITORS_VA_BASE __io_address(REALVIEW_EB11MP_SCU_BASE + 0x10)
55 55
56/* 56/*
57 * Bitmask of used SCU counters 57 * Bitmask of used SCU counters
@@ -80,7 +80,7 @@ static irqreturn_t scu_em_interrupt(int irq, void *arg)
80 struct eventmonitor __iomem *emc = SCU_EVENTMONITORS_VA_BASE; 80 struct eventmonitor __iomem *emc = SCU_EVENTMONITORS_VA_BASE;
81 unsigned int cnt; 81 unsigned int cnt;
82 82
83 cnt = irq - IRQ_PMU_SCU0; 83 cnt = irq - IRQ_EB11MP_PMU_SCU0;
84 oprofile_add_sample(get_irq_regs(), SCU_COUNTER(cnt)); 84 oprofile_add_sample(get_irq_regs(), SCU_COUNTER(cnt));
85 scu_reset_counter(emc, cnt); 85 scu_reset_counter(emc, cnt);
86 86
@@ -119,10 +119,10 @@ static int scu_start(void)
119 */ 119 */
120 for (i = 0; i < NUM_SCU_COUNTERS; i++) { 120 for (i = 0; i < NUM_SCU_COUNTERS; i++) {
121 if (scu_em_used & (1 << i)) { 121 if (scu_em_used & (1 << i)) {
122 ret = request_irq(IRQ_PMU_SCU0 + i, scu_em_interrupt, IRQF_DISABLED, "SCU PMU", NULL); 122 ret = request_irq(IRQ_EB11MP_PMU_SCU0 + i, scu_em_interrupt, IRQF_DISABLED, "SCU PMU", NULL);
123 if (ret) { 123 if (ret) {
124 printk(KERN_ERR "oprofile: unable to request IRQ%u for SCU Event Monitor\n", 124 printk(KERN_ERR "oprofile: unable to request IRQ%u for SCU Event Monitor\n",
125 IRQ_PMU_SCU0 + i); 125 IRQ_EB11MP_PMU_SCU0 + i);
126 goto err_free_scu; 126 goto err_free_scu;
127 } 127 }
128 } 128 }
@@ -153,7 +153,7 @@ static int scu_start(void)
153 153
154 err_free_scu: 154 err_free_scu:
155 while (i--) 155 while (i--)
156 free_irq(IRQ_PMU_SCU0 + i, NULL); 156 free_irq(IRQ_EB11MP_PMU_SCU0 + i, NULL);
157 return ret; 157 return ret;
158} 158}
159 159
@@ -175,7 +175,7 @@ static void scu_stop(void)
175 for (i = 0; i < NUM_SCU_COUNTERS; i++) { 175 for (i = 0; i < NUM_SCU_COUNTERS; i++) {
176 if (scu_em_used & (1 << i)) { 176 if (scu_em_used & (1 << i)) {
177 scu_reset_counter(emc, i); 177 scu_reset_counter(emc, i);
178 free_irq(IRQ_PMU_SCU0 + i, NULL); 178 free_irq(IRQ_EB11MP_PMU_SCU0 + i, NULL);
179 } 179 }
180 } 180 }
181} 181}
@@ -225,10 +225,10 @@ static int em_setup_ctrs(void)
225} 225}
226 226
227static int arm11_irqs[] = { 227static int arm11_irqs[] = {
228 [0] = IRQ_PMU_CPU0, 228 [0] = IRQ_EB11MP_PMU_CPU0,
229 [1] = IRQ_PMU_CPU1, 229 [1] = IRQ_EB11MP_PMU_CPU1,
230 [2] = IRQ_PMU_CPU2, 230 [2] = IRQ_EB11MP_PMU_CPU2,
231 [3] = IRQ_PMU_CPU3 231 [3] = IRQ_EB11MP_PMU_CPU3
232}; 232};
233 233
234static int em_start(void) 234static int em_start(void)
@@ -273,22 +273,22 @@ static int em_setup(void)
273 /* 273 /*
274 * Send SCU PMU interrupts to the "owner" CPU. 274 * Send SCU PMU interrupts to the "owner" CPU.
275 */ 275 */
276 em_route_irq(IRQ_PMU_SCU0, 0); 276 em_route_irq(IRQ_EB11MP_PMU_SCU0, 0);
277 em_route_irq(IRQ_PMU_SCU1, 0); 277 em_route_irq(IRQ_EB11MP_PMU_SCU1, 0);
278 em_route_irq(IRQ_PMU_SCU2, 1); 278 em_route_irq(IRQ_EB11MP_PMU_SCU2, 1);
279 em_route_irq(IRQ_PMU_SCU3, 1); 279 em_route_irq(IRQ_EB11MP_PMU_SCU3, 1);
280 em_route_irq(IRQ_PMU_SCU4, 2); 280 em_route_irq(IRQ_EB11MP_PMU_SCU4, 2);
281 em_route_irq(IRQ_PMU_SCU5, 2); 281 em_route_irq(IRQ_EB11MP_PMU_SCU5, 2);
282 em_route_irq(IRQ_PMU_SCU6, 3); 282 em_route_irq(IRQ_EB11MP_PMU_SCU6, 3);
283 em_route_irq(IRQ_PMU_SCU7, 3); 283 em_route_irq(IRQ_EB11MP_PMU_SCU7, 3);
284 284
285 /* 285 /*
286 * Send CP15 PMU interrupts to the owner CPU. 286 * Send CP15 PMU interrupts to the owner CPU.
287 */ 287 */
288 em_route_irq(IRQ_PMU_CPU0, 0); 288 em_route_irq(IRQ_EB11MP_PMU_CPU0, 0);
289 em_route_irq(IRQ_PMU_CPU1, 1); 289 em_route_irq(IRQ_EB11MP_PMU_CPU1, 1);
290 em_route_irq(IRQ_PMU_CPU2, 2); 290 em_route_irq(IRQ_EB11MP_PMU_CPU2, 2);
291 em_route_irq(IRQ_PMU_CPU3, 3); 291 em_route_irq(IRQ_EB11MP_PMU_CPU3, 3);
292 292
293 return 0; 293 return 0;
294} 294}
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index cd13e138bd03..3aa6c821449a 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -19,6 +19,7 @@ config IA64
19 select HAVE_OPROFILE 19 select HAVE_OPROFILE
20 select HAVE_KPROBES 20 select HAVE_KPROBES
21 select HAVE_KRETPROBES 21 select HAVE_KRETPROBES
22 select HAVE_KVM
22 default y 23 default y
23 help 24 help
24 The Itanium Processor Family is Intel's 64-bit successor to 25 The Itanium Processor Family is Intel's 64-bit successor to
@@ -589,6 +590,8 @@ config MSPEC
589 590
590source "fs/Kconfig" 591source "fs/Kconfig"
591 592
593source "arch/ia64/kvm/Kconfig"
594
592source "lib/Kconfig" 595source "lib/Kconfig"
593 596
594# 597#
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index f1645c4f7039..ec4cca477f49 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -57,6 +57,7 @@ core-$(CONFIG_IA64_GENERIC) += arch/ia64/dig/
57core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/ 57core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/
58core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/ 58core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
59core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/ 59core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/
60core-$(CONFIG_KVM) += arch/ia64/kvm/
60 61
61drivers-$(CONFIG_PCI) += arch/ia64/pci/ 62drivers-$(CONFIG_PCI) += arch/ia64/pci/
62drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/ 63drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
new file mode 100644
index 000000000000..7914e4828504
--- /dev/null
+++ b/arch/ia64/kvm/Kconfig
@@ -0,0 +1,49 @@
1#
2# KVM configuration
3#
4config HAVE_KVM
5 bool
6
7menuconfig VIRTUALIZATION
8 bool "Virtualization"
9 depends on HAVE_KVM || IA64
10 default y
11 ---help---
12 Say Y here to get to see options for using your Linux host to run other
13 operating systems inside virtual machines (guests).
14 This option alone does not add any kernel code.
15
16 If you say N, all options in this submenu will be skipped and disabled.
17
18if VIRTUALIZATION
19
20config KVM
21 tristate "Kernel-based Virtual Machine (KVM) support"
22 depends on HAVE_KVM && EXPERIMENTAL
23 select PREEMPT_NOTIFIERS
24 select ANON_INODES
25 ---help---
26 Support hosting fully virtualized guest machines using hardware
27 virtualization extensions. You will need a fairly recent
28 processor equipped with virtualization extensions. You will also
29 need to select one or more of the processor modules below.
30
31 This module provides access to the hardware capabilities through
32 a character device node named /dev/kvm.
33
34 To compile this as a module, choose M here: the module
35 will be called kvm.
36
37 If unsure, say N.
38
39config KVM_INTEL
40 tristate "KVM for Intel Itanium 2 processors support"
41 depends on KVM && m
42 ---help---
43 Provides support for KVM on Itanium 2 processors equipped with the VT
44 extensions.
45
46config KVM_TRACE
47 bool
48
49endif # VIRTUALIZATION
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
new file mode 100644
index 000000000000..41b034ffa73b
--- /dev/null
+++ b/arch/ia64/kvm/Makefile
@@ -0,0 +1,61 @@
1#This Make file is to generate asm-offsets.h and build source.
2#
3
4#Generate asm-offsets.h for vmm module build
5offsets-file := asm-offsets.h
6
7always := $(offsets-file)
8targets := $(offsets-file)
9targets += arch/ia64/kvm/asm-offsets.s
10clean-files := $(addprefix $(objtree)/,$(targets) $(obj)/memcpy.S $(obj)/memset.S)
11
12# Default sed regexp - multiline due to syntax constraints
13define sed-y
14 "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
15endef
16
17quiet_cmd_offsets = GEN $@
18define cmd_offsets
19 (set -e; \
20 echo "#ifndef __ASM_KVM_OFFSETS_H__"; \
21 echo "#define __ASM_KVM_OFFSETS_H__"; \
22 echo "/*"; \
23 echo " * DO NOT MODIFY."; \
24 echo " *"; \
25 echo " * This file was generated by Makefile"; \
26 echo " *"; \
27 echo " */"; \
28 echo ""; \
29 sed -ne $(sed-y) $<; \
30 echo ""; \
31 echo "#endif" ) > $@
32endef
33# We use internal rules to avoid the "is up to date" message from make
34arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c
35 $(call if_changed_dep,cc_s_c)
36
37$(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s
38 $(call cmd,offsets)
39
40#
41# Makefile for Kernel-based Virtual Machine module
42#
43
44EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
45
46$(addprefix $(objtree)/,$(obj)/memcpy.S $(obj)/memset.S):
47 $(shell ln -snf ../lib/memcpy.S $(src)/memcpy.S)
48 $(shell ln -snf ../lib/memset.S $(src)/memset.S)
49
50common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
51
52kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
53obj-$(CONFIG_KVM) += kvm.o
54
55FORCE : $(obj)/$(offsets-file)
56EXTRA_CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
57kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
58 vtlb.o process.o
59#Add link memcpy and memset to avoid possible structure assignment error
60kvm-intel-objs += memset.o memcpy.o
61obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
new file mode 100644
index 000000000000..4e3dc13a619c
--- /dev/null
+++ b/arch/ia64/kvm/asm-offsets.c
@@ -0,0 +1,251 @@
1/*
2 * asm-offsets.c Generate definitions needed by assembly language modules.
3 * This code generates raw asm output which is post-processed
4 * to extract and format the required data.
5 *
6 * Anthony Xu <anthony.xu@intel.com>
7 * Xiantao Zhang <xiantao.zhang@intel.com>
8 * Copyright (c) 2007 Intel Corporation KVM support.
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms and conditions of the GNU General Public License,
12 * version 2, as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * more details.
18 *
19 * You should have received a copy of the GNU General Public License along with
20 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
21 * Place - Suite 330, Boston, MA 02111-1307 USA.
22 *
23 */
24
25#include <linux/autoconf.h>
26#include <linux/kvm_host.h>
27
28#include "vcpu.h"
29
30#define task_struct kvm_vcpu
31
32#define DEFINE(sym, val) \
33 asm volatile("\n->" #sym " (%0) " #val : : "i" (val))
34
35#define BLANK() asm volatile("\n->" : :)
36
37#define OFFSET(_sym, _str, _mem) \
38 DEFINE(_sym, offsetof(_str, _mem));
39
40void foo(void)
41{
42 DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
43 DEFINE(VMM_PT_REGS_SIZE, sizeof(struct kvm_pt_regs));
44
45 BLANK();
46
47 DEFINE(VMM_VCPU_META_RR0_OFFSET,
48 offsetof(struct kvm_vcpu, arch.metaphysical_rr0));
49 DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
50 offsetof(struct kvm_vcpu,
51 arch.metaphysical_saved_rr0));
52 DEFINE(VMM_VCPU_VRR0_OFFSET,
53 offsetof(struct kvm_vcpu, arch.vrr[0]));
54 DEFINE(VMM_VPD_IRR0_OFFSET,
55 offsetof(struct vpd, irr[0]));
56 DEFINE(VMM_VCPU_ITC_CHECK_OFFSET,
57 offsetof(struct kvm_vcpu, arch.itc_check));
58 DEFINE(VMM_VCPU_IRQ_CHECK_OFFSET,
59 offsetof(struct kvm_vcpu, arch.irq_check));
60 DEFINE(VMM_VPD_VHPI_OFFSET,
61 offsetof(struct vpd, vhpi));
62 DEFINE(VMM_VCPU_VSA_BASE_OFFSET,
63 offsetof(struct kvm_vcpu, arch.vsa_base));
64 DEFINE(VMM_VCPU_VPD_OFFSET,
65 offsetof(struct kvm_vcpu, arch.vpd));
66 DEFINE(VMM_VCPU_IRQ_CHECK,
67 offsetof(struct kvm_vcpu, arch.irq_check));
68 DEFINE(VMM_VCPU_TIMER_PENDING,
69 offsetof(struct kvm_vcpu, arch.timer_pending));
70 DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
71 offsetof(struct kvm_vcpu, arch.metaphysical_saved_rr0));
72 DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
73 offsetof(struct kvm_vcpu, arch.mode_flags));
74 DEFINE(VMM_VCPU_ITC_OFS_OFFSET,
75 offsetof(struct kvm_vcpu, arch.itc_offset));
76 DEFINE(VMM_VCPU_LAST_ITC_OFFSET,
77 offsetof(struct kvm_vcpu, arch.last_itc));
78 DEFINE(VMM_VCPU_SAVED_GP_OFFSET,
79 offsetof(struct kvm_vcpu, arch.saved_gp));
80
81 BLANK();
82
83 DEFINE(VMM_PT_REGS_B6_OFFSET,
84 offsetof(struct kvm_pt_regs, b6));
85 DEFINE(VMM_PT_REGS_B7_OFFSET,
86 offsetof(struct kvm_pt_regs, b7));
87 DEFINE(VMM_PT_REGS_AR_CSD_OFFSET,
88 offsetof(struct kvm_pt_regs, ar_csd));
89 DEFINE(VMM_PT_REGS_AR_SSD_OFFSET,
90 offsetof(struct kvm_pt_regs, ar_ssd));
91 DEFINE(VMM_PT_REGS_R8_OFFSET,
92 offsetof(struct kvm_pt_regs, r8));
93 DEFINE(VMM_PT_REGS_R9_OFFSET,
94 offsetof(struct kvm_pt_regs, r9));
95 DEFINE(VMM_PT_REGS_R10_OFFSET,
96 offsetof(struct kvm_pt_regs, r10));
97 DEFINE(VMM_PT_REGS_R11_OFFSET,
98 offsetof(struct kvm_pt_regs, r11));
99 DEFINE(VMM_PT_REGS_CR_IPSR_OFFSET,
100 offsetof(struct kvm_pt_regs, cr_ipsr));
101 DEFINE(VMM_PT_REGS_CR_IIP_OFFSET,
102 offsetof(struct kvm_pt_regs, cr_iip));
103 DEFINE(VMM_PT_REGS_CR_IFS_OFFSET,
104 offsetof(struct kvm_pt_regs, cr_ifs));
105 DEFINE(VMM_PT_REGS_AR_UNAT_OFFSET,
106 offsetof(struct kvm_pt_regs, ar_unat));
107 DEFINE(VMM_PT_REGS_AR_PFS_OFFSET,
108 offsetof(struct kvm_pt_regs, ar_pfs));
109 DEFINE(VMM_PT_REGS_AR_RSC_OFFSET,
110 offsetof(struct kvm_pt_regs, ar_rsc));
111 DEFINE(VMM_PT_REGS_AR_RNAT_OFFSET,
112 offsetof(struct kvm_pt_regs, ar_rnat));
113
114 DEFINE(VMM_PT_REGS_AR_BSPSTORE_OFFSET,
115 offsetof(struct kvm_pt_regs, ar_bspstore));
116 DEFINE(VMM_PT_REGS_PR_OFFSET,
117 offsetof(struct kvm_pt_regs, pr));
118 DEFINE(VMM_PT_REGS_B0_OFFSET,
119 offsetof(struct kvm_pt_regs, b0));
120 DEFINE(VMM_PT_REGS_LOADRS_OFFSET,
121 offsetof(struct kvm_pt_regs, loadrs));
122 DEFINE(VMM_PT_REGS_R1_OFFSET,
123 offsetof(struct kvm_pt_regs, r1));
124 DEFINE(VMM_PT_REGS_R12_OFFSET,
125 offsetof(struct kvm_pt_regs, r12));
126 DEFINE(VMM_PT_REGS_R13_OFFSET,
127 offsetof(struct kvm_pt_regs, r13));
128 DEFINE(VMM_PT_REGS_AR_FPSR_OFFSET,
129 offsetof(struct kvm_pt_regs, ar_fpsr));
130 DEFINE(VMM_PT_REGS_R15_OFFSET,
131 offsetof(struct kvm_pt_regs, r15));
132 DEFINE(VMM_PT_REGS_R14_OFFSET,
133 offsetof(struct kvm_pt_regs, r14));
134 DEFINE(VMM_PT_REGS_R2_OFFSET,
135 offsetof(struct kvm_pt_regs, r2));
136 DEFINE(VMM_PT_REGS_R3_OFFSET,
137 offsetof(struct kvm_pt_regs, r3));
138 DEFINE(VMM_PT_REGS_R16_OFFSET,
139 offsetof(struct kvm_pt_regs, r16));
140 DEFINE(VMM_PT_REGS_R17_OFFSET,
141 offsetof(struct kvm_pt_regs, r17));
142 DEFINE(VMM_PT_REGS_R18_OFFSET,
143 offsetof(struct kvm_pt_regs, r18));
144 DEFINE(VMM_PT_REGS_R19_OFFSET,
145 offsetof(struct kvm_pt_regs, r19));
146 DEFINE(VMM_PT_REGS_R20_OFFSET,
147 offsetof(struct kvm_pt_regs, r20));
148 DEFINE(VMM_PT_REGS_R21_OFFSET,
149 offsetof(struct kvm_pt_regs, r21));
150 DEFINE(VMM_PT_REGS_R22_OFFSET,
151 offsetof(struct kvm_pt_regs, r22));
152 DEFINE(VMM_PT_REGS_R23_OFFSET,
153 offsetof(struct kvm_pt_regs, r23));
154 DEFINE(VMM_PT_REGS_R24_OFFSET,
155 offsetof(struct kvm_pt_regs, r24));
156 DEFINE(VMM_PT_REGS_R25_OFFSET,
157 offsetof(struct kvm_pt_regs, r25));
158 DEFINE(VMM_PT_REGS_R26_OFFSET,
159 offsetof(struct kvm_pt_regs, r26));
160 DEFINE(VMM_PT_REGS_R27_OFFSET,
161 offsetof(struct kvm_pt_regs, r27));
162 DEFINE(VMM_PT_REGS_R28_OFFSET,
163 offsetof(struct kvm_pt_regs, r28));
164 DEFINE(VMM_PT_REGS_R29_OFFSET,
165 offsetof(struct kvm_pt_regs, r29));
166 DEFINE(VMM_PT_REGS_R30_OFFSET,
167 offsetof(struct kvm_pt_regs, r30));
168 DEFINE(VMM_PT_REGS_R31_OFFSET,
169 offsetof(struct kvm_pt_regs, r31));
170 DEFINE(VMM_PT_REGS_AR_CCV_OFFSET,
171 offsetof(struct kvm_pt_regs, ar_ccv));
172 DEFINE(VMM_PT_REGS_F6_OFFSET,
173 offsetof(struct kvm_pt_regs, f6));
174 DEFINE(VMM_PT_REGS_F7_OFFSET,
175 offsetof(struct kvm_pt_regs, f7));
176 DEFINE(VMM_PT_REGS_F8_OFFSET,
177 offsetof(struct kvm_pt_regs, f8));
178 DEFINE(VMM_PT_REGS_F9_OFFSET,
179 offsetof(struct kvm_pt_regs, f9));
180 DEFINE(VMM_PT_REGS_F10_OFFSET,
181 offsetof(struct kvm_pt_regs, f10));
182 DEFINE(VMM_PT_REGS_F11_OFFSET,
183 offsetof(struct kvm_pt_regs, f11));
184 DEFINE(VMM_PT_REGS_R4_OFFSET,
185 offsetof(struct kvm_pt_regs, r4));
186 DEFINE(VMM_PT_REGS_R5_OFFSET,
187 offsetof(struct kvm_pt_regs, r5));
188 DEFINE(VMM_PT_REGS_R6_OFFSET,
189 offsetof(struct kvm_pt_regs, r6));
190 DEFINE(VMM_PT_REGS_R7_OFFSET,
191 offsetof(struct kvm_pt_regs, r7));
192 DEFINE(VMM_PT_REGS_EML_UNAT_OFFSET,
193 offsetof(struct kvm_pt_regs, eml_unat));
194 DEFINE(VMM_VCPU_IIPA_OFFSET,
195 offsetof(struct kvm_vcpu, arch.cr_iipa));
196 DEFINE(VMM_VCPU_OPCODE_OFFSET,
197 offsetof(struct kvm_vcpu, arch.opcode));
198 DEFINE(VMM_VCPU_CAUSE_OFFSET, offsetof(struct kvm_vcpu, arch.cause));
199 DEFINE(VMM_VCPU_ISR_OFFSET,
200 offsetof(struct kvm_vcpu, arch.cr_isr));
201 DEFINE(VMM_PT_REGS_R16_SLOT,
202 (((offsetof(struct kvm_pt_regs, r16)
203 - sizeof(struct kvm_pt_regs)) >> 3) & 0x3f));
204 DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
205 offsetof(struct kvm_vcpu, arch.mode_flags));
206 DEFINE(VMM_VCPU_GP_OFFSET, offsetof(struct kvm_vcpu, arch.__gp));
207 BLANK();
208
209 DEFINE(VMM_VPD_BASE_OFFSET, offsetof(struct kvm_vcpu, arch.vpd));
210 DEFINE(VMM_VPD_VIFS_OFFSET, offsetof(struct vpd, ifs));
211 DEFINE(VMM_VLSAPIC_INSVC_BASE_OFFSET,
212 offsetof(struct kvm_vcpu, arch.insvc[0]));
213 DEFINE(VMM_VPD_VPTA_OFFSET, offsetof(struct vpd, pta));
214 DEFINE(VMM_VPD_VPSR_OFFSET, offsetof(struct vpd, vpsr));
215
216 DEFINE(VMM_CTX_R4_OFFSET, offsetof(union context, gr[4]));
217 DEFINE(VMM_CTX_R5_OFFSET, offsetof(union context, gr[5]));
218 DEFINE(VMM_CTX_R12_OFFSET, offsetof(union context, gr[12]));
219 DEFINE(VMM_CTX_R13_OFFSET, offsetof(union context, gr[13]));
220 DEFINE(VMM_CTX_KR0_OFFSET, offsetof(union context, ar[0]));
221 DEFINE(VMM_CTX_KR1_OFFSET, offsetof(union context, ar[1]));
222 DEFINE(VMM_CTX_B0_OFFSET, offsetof(union context, br[0]));
223 DEFINE(VMM_CTX_B1_OFFSET, offsetof(union context, br[1]));
224 DEFINE(VMM_CTX_B2_OFFSET, offsetof(union context, br[2]));
225 DEFINE(VMM_CTX_RR0_OFFSET, offsetof(union context, rr[0]));
226 DEFINE(VMM_CTX_RSC_OFFSET, offsetof(union context, ar[16]));
227 DEFINE(VMM_CTX_BSPSTORE_OFFSET, offsetof(union context, ar[18]));
228 DEFINE(VMM_CTX_RNAT_OFFSET, offsetof(union context, ar[19]));
229 DEFINE(VMM_CTX_FCR_OFFSET, offsetof(union context, ar[21]));
230 DEFINE(VMM_CTX_EFLAG_OFFSET, offsetof(union context, ar[24]));
231 DEFINE(VMM_CTX_CFLG_OFFSET, offsetof(union context, ar[27]));
232 DEFINE(VMM_CTX_FSR_OFFSET, offsetof(union context, ar[28]));
233 DEFINE(VMM_CTX_FIR_OFFSET, offsetof(union context, ar[29]));
234 DEFINE(VMM_CTX_FDR_OFFSET, offsetof(union context, ar[30]));
235 DEFINE(VMM_CTX_UNAT_OFFSET, offsetof(union context, ar[36]));
236 DEFINE(VMM_CTX_FPSR_OFFSET, offsetof(union context, ar[40]));
237 DEFINE(VMM_CTX_PFS_OFFSET, offsetof(union context, ar[64]));
238 DEFINE(VMM_CTX_LC_OFFSET, offsetof(union context, ar[65]));
239 DEFINE(VMM_CTX_DCR_OFFSET, offsetof(union context, cr[0]));
240 DEFINE(VMM_CTX_IVA_OFFSET, offsetof(union context, cr[2]));
241 DEFINE(VMM_CTX_PTA_OFFSET, offsetof(union context, cr[8]));
242 DEFINE(VMM_CTX_IBR0_OFFSET, offsetof(union context, ibr[0]));
243 DEFINE(VMM_CTX_DBR0_OFFSET, offsetof(union context, dbr[0]));
244 DEFINE(VMM_CTX_F2_OFFSET, offsetof(union context, fr[2]));
245 DEFINE(VMM_CTX_F3_OFFSET, offsetof(union context, fr[3]));
246 DEFINE(VMM_CTX_F32_OFFSET, offsetof(union context, fr[32]));
247 DEFINE(VMM_CTX_F33_OFFSET, offsetof(union context, fr[33]));
248 DEFINE(VMM_CTX_PKR0_OFFSET, offsetof(union context, pkr[0]));
249 DEFINE(VMM_CTX_PSR_OFFSET, offsetof(union context, psr));
250 BLANK();
251}
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
new file mode 100644
index 000000000000..6df073240135
--- /dev/null
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -0,0 +1,1806 @@
1
2/*
3 * kvm_ia64.c: Basic KVM suppport On Itanium series processors
4 *
5 *
6 * Copyright (C) 2007, Intel Corporation.
7 * Xiantao Zhang (xiantao.zhang@intel.com)
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms and conditions of the GNU General Public License,
11 * version 2, as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 *
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
20 * Place - Suite 330, Boston, MA 02111-1307 USA.
21 *
22 */
23
24#include <linux/module.h>
25#include <linux/errno.h>
26#include <linux/percpu.h>
27#include <linux/gfp.h>
28#include <linux/fs.h>
29#include <linux/smp.h>
30#include <linux/kvm_host.h>
31#include <linux/kvm.h>
32#include <linux/bitops.h>
33#include <linux/hrtimer.h>
34#include <linux/uaccess.h>
35
36#include <asm/pgtable.h>
37#include <asm/gcc_intrin.h>
38#include <asm/pal.h>
39#include <asm/cacheflush.h>
40#include <asm/div64.h>
41#include <asm/tlb.h>
42
43#include "misc.h"
44#include "vti.h"
45#include "iodev.h"
46#include "ioapic.h"
47#include "lapic.h"
48
49static unsigned long kvm_vmm_base;
50static unsigned long kvm_vsa_base;
51static unsigned long kvm_vm_buffer;
52static unsigned long kvm_vm_buffer_size;
53unsigned long kvm_vmm_gp;
54
55static long vp_env_info;
56
57static struct kvm_vmm_info *kvm_vmm_info;
58
59static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu);
60
61struct kvm_stats_debugfs_item debugfs_entries[] = {
62 { NULL }
63};
64
65
66struct fdesc{
67 unsigned long ip;
68 unsigned long gp;
69};
70
71static void kvm_flush_icache(unsigned long start, unsigned long len)
72{
73 int l;
74
75 for (l = 0; l < (len + 32); l += 32)
76 ia64_fc(start + l);
77
78 ia64_sync_i();
79 ia64_srlz_i();
80}
81
82static void kvm_flush_tlb_all(void)
83{
84 unsigned long i, j, count0, count1, stride0, stride1, addr;
85 long flags;
86
87 addr = local_cpu_data->ptce_base;
88 count0 = local_cpu_data->ptce_count[0];
89 count1 = local_cpu_data->ptce_count[1];
90 stride0 = local_cpu_data->ptce_stride[0];
91 stride1 = local_cpu_data->ptce_stride[1];
92
93 local_irq_save(flags);
94 for (i = 0; i < count0; ++i) {
95 for (j = 0; j < count1; ++j) {
96 ia64_ptce(addr);
97 addr += stride1;
98 }
99 addr += stride0;
100 }
101 local_irq_restore(flags);
102 ia64_srlz_i(); /* srlz.i implies srlz.d */
103}
104
105long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
106{
107 struct ia64_pal_retval iprv;
108
109 PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva,
110 (u64)opt_handler);
111
112 return iprv.status;
113}
114
115static DEFINE_SPINLOCK(vp_lock);
116
117void kvm_arch_hardware_enable(void *garbage)
118{
119 long status;
120 long tmp_base;
121 unsigned long pte;
122 unsigned long saved_psr;
123 int slot;
124
125 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
126 PAGE_KERNEL));
127 local_irq_save(saved_psr);
128 slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
129 if (slot < 0)
130 return;
131 local_irq_restore(saved_psr);
132
133 spin_lock(&vp_lock);
134 status = ia64_pal_vp_init_env(kvm_vsa_base ?
135 VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
136 __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
137 if (status != 0) {
138 printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
139 return ;
140 }
141
142 if (!kvm_vsa_base) {
143 kvm_vsa_base = tmp_base;
144 printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base);
145 }
146 spin_unlock(&vp_lock);
147 ia64_ptr_entry(0x3, slot);
148}
149
150void kvm_arch_hardware_disable(void *garbage)
151{
152
153 long status;
154 int slot;
155 unsigned long pte;
156 unsigned long saved_psr;
157 unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA);
158
159 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
160 PAGE_KERNEL));
161
162 local_irq_save(saved_psr);
163 slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
164 if (slot < 0)
165 return;
166 local_irq_restore(saved_psr);
167
168 status = ia64_pal_vp_exit_env(host_iva);
169 if (status)
170 printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n",
171 status);
172 ia64_ptr_entry(0x3, slot);
173}
174
175void kvm_arch_check_processor_compat(void *rtn)
176{
177 *(int *)rtn = 0;
178}
179
180int kvm_dev_ioctl_check_extension(long ext)
181{
182
183 int r;
184
185 switch (ext) {
186 case KVM_CAP_IRQCHIP:
187 case KVM_CAP_USER_MEMORY:
188
189 r = 1;
190 break;
191 default:
192 r = 0;
193 }
194 return r;
195
196}
197
198static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
199 gpa_t addr)
200{
201 struct kvm_io_device *dev;
202
203 dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
204
205 return dev;
206}
207
208static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
209{
210 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
211 kvm_run->hw.hardware_exit_reason = 1;
212 return 0;
213}
214
215static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
216{
217 struct kvm_mmio_req *p;
218 struct kvm_io_device *mmio_dev;
219
220 p = kvm_get_vcpu_ioreq(vcpu);
221
222 if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS)
223 goto mmio;
224 vcpu->mmio_needed = 1;
225 vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr;
226 vcpu->mmio_size = kvm_run->mmio.len = p->size;
227 vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir;
228
229 if (vcpu->mmio_is_write)
230 memcpy(vcpu->mmio_data, &p->data, p->size);
231 memcpy(kvm_run->mmio.data, &p->data, p->size);
232 kvm_run->exit_reason = KVM_EXIT_MMIO;
233 return 0;
234mmio:
235 mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr);
236 if (mmio_dev) {
237 if (!p->dir)
238 kvm_iodevice_write(mmio_dev, p->addr, p->size,
239 &p->data);
240 else
241 kvm_iodevice_read(mmio_dev, p->addr, p->size,
242 &p->data);
243
244 } else
245 printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
246 p->state = STATE_IORESP_READY;
247
248 return 1;
249}
250
251static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
252{
253 struct exit_ctl_data *p;
254
255 p = kvm_get_exit_data(vcpu);
256
257 if (p->exit_reason == EXIT_REASON_PAL_CALL)
258 return kvm_pal_emul(vcpu, kvm_run);
259 else {
260 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
261 kvm_run->hw.hardware_exit_reason = 2;
262 return 0;
263 }
264}
265
266static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
267{
268 struct exit_ctl_data *p;
269
270 p = kvm_get_exit_data(vcpu);
271
272 if (p->exit_reason == EXIT_REASON_SAL_CALL) {
273 kvm_sal_emul(vcpu);
274 return 1;
275 } else {
276 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
277 kvm_run->hw.hardware_exit_reason = 3;
278 return 0;
279 }
280
281}
282
283/*
284 * offset: address offset to IPI space.
285 * value: deliver value.
286 */
287static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
288 uint64_t vector)
289{
290 switch (dm) {
291 case SAPIC_FIXED:
292 kvm_apic_set_irq(vcpu, vector, 0);
293 break;
294 case SAPIC_NMI:
295 kvm_apic_set_irq(vcpu, 2, 0);
296 break;
297 case SAPIC_EXTINT:
298 kvm_apic_set_irq(vcpu, 0, 0);
299 break;
300 case SAPIC_INIT:
301 case SAPIC_PMI:
302 default:
303 printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
304 break;
305 }
306}
307
308static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
309 unsigned long eid)
310{
311 union ia64_lid lid;
312 int i;
313
314 for (i = 0; i < KVM_MAX_VCPUS; i++) {
315 if (kvm->vcpus[i]) {
316 lid.val = VCPU_LID(kvm->vcpus[i]);
317 if (lid.id == id && lid.eid == eid)
318 return kvm->vcpus[i];
319 }
320 }
321
322 return NULL;
323}
324
325static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
326{
327 struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
328 struct kvm_vcpu *target_vcpu;
329 struct kvm_pt_regs *regs;
330 union ia64_ipi_a addr = p->u.ipi_data.addr;
331 union ia64_ipi_d data = p->u.ipi_data.data;
332
333 target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid);
334 if (!target_vcpu)
335 return handle_vm_error(vcpu, kvm_run);
336
337 if (!target_vcpu->arch.launched) {
338 regs = vcpu_regs(target_vcpu);
339
340 regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip;
341 regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp;
342
343 target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
344 if (waitqueue_active(&target_vcpu->wq))
345 wake_up_interruptible(&target_vcpu->wq);
346 } else {
347 vcpu_deliver_ipi(target_vcpu, data.dm, data.vector);
348 if (target_vcpu != vcpu)
349 kvm_vcpu_kick(target_vcpu);
350 }
351
352 return 1;
353}
354
355struct call_data {
356 struct kvm_ptc_g ptc_g_data;
357 struct kvm_vcpu *vcpu;
358};
359
360static void vcpu_global_purge(void *info)
361{
362 struct call_data *p = (struct call_data *)info;
363 struct kvm_vcpu *vcpu = p->vcpu;
364
365 if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
366 return;
367
368 set_bit(KVM_REQ_PTC_G, &vcpu->requests);
369 if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) {
370 vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] =
371 p->ptc_g_data;
372 } else {
373 clear_bit(KVM_REQ_PTC_G, &vcpu->requests);
374 vcpu->arch.ptc_g_count = 0;
375 set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
376 }
377}
378
379static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
380{
381 struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
382 struct kvm *kvm = vcpu->kvm;
383 struct call_data call_data;
384 int i;
385 call_data.ptc_g_data = p->u.ptc_g_data;
386
387 for (i = 0; i < KVM_MAX_VCPUS; i++) {
388 if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state ==
389 KVM_MP_STATE_UNINITIALIZED ||
390 vcpu == kvm->vcpus[i])
391 continue;
392
393 if (waitqueue_active(&kvm->vcpus[i]->wq))
394 wake_up_interruptible(&kvm->vcpus[i]->wq);
395
396 if (kvm->vcpus[i]->cpu != -1) {
397 call_data.vcpu = kvm->vcpus[i];
398 smp_call_function_single(kvm->vcpus[i]->cpu,
399 vcpu_global_purge, &call_data, 0, 1);
400 } else
401 printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
402
403 }
404 return 1;
405}
406
407static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
408{
409 return 1;
410}
411
412int kvm_emulate_halt(struct kvm_vcpu *vcpu)
413{
414
415 ktime_t kt;
416 long itc_diff;
417 unsigned long vcpu_now_itc;
418
419 unsigned long expires;
420 struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
421 unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec;
422 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
423
424 vcpu_now_itc = ia64_getreg(_IA64_REG_AR_ITC) + vcpu->arch.itc_offset;
425
426 if (time_after(vcpu_now_itc, vpd->itm)) {
427 vcpu->arch.timer_check = 1;
428 return 1;
429 }
430 itc_diff = vpd->itm - vcpu_now_itc;
431 if (itc_diff < 0)
432 itc_diff = -itc_diff;
433
434 expires = div64_64(itc_diff, cyc_per_usec);
435 kt = ktime_set(0, 1000 * expires);
436 vcpu->arch.ht_active = 1;
437 hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
438
439 if (irqchip_in_kernel(vcpu->kvm)) {
440 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
441 kvm_vcpu_block(vcpu);
442 hrtimer_cancel(p_ht);
443 vcpu->arch.ht_active = 0;
444
445 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
446 return -EINTR;
447 return 1;
448 } else {
449 printk(KERN_ERR"kvm: Unsupported userspace halt!");
450 return 0;
451 }
452}
453
454static int handle_vm_shutdown(struct kvm_vcpu *vcpu,
455 struct kvm_run *kvm_run)
456{
457 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
458 return 0;
459}
460
461static int handle_external_interrupt(struct kvm_vcpu *vcpu,
462 struct kvm_run *kvm_run)
463{
464 return 1;
465}
466
467static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
468 struct kvm_run *kvm_run) = {
469 [EXIT_REASON_VM_PANIC] = handle_vm_error,
470 [EXIT_REASON_MMIO_INSTRUCTION] = handle_mmio,
471 [EXIT_REASON_PAL_CALL] = handle_pal_call,
472 [EXIT_REASON_SAL_CALL] = handle_sal_call,
473 [EXIT_REASON_SWITCH_RR6] = handle_switch_rr6,
474 [EXIT_REASON_VM_DESTROY] = handle_vm_shutdown,
475 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
476 [EXIT_REASON_IPI] = handle_ipi,
477 [EXIT_REASON_PTC_G] = handle_global_purge,
478
479};
480
481static const int kvm_vti_max_exit_handlers =
482 sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers);
483
484static void kvm_prepare_guest_switch(struct kvm_vcpu *vcpu)
485{
486}
487
488static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu)
489{
490 struct exit_ctl_data *p_exit_data;
491
492 p_exit_data = kvm_get_exit_data(vcpu);
493 return p_exit_data->exit_reason;
494}
495
496/*
497 * The guest has exited. See if we can fix it or if we need userspace
498 * assistance.
499 */
500static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
501{
502 u32 exit_reason = kvm_get_exit_reason(vcpu);
503 vcpu->arch.last_exit = exit_reason;
504
505 if (exit_reason < kvm_vti_max_exit_handlers
506 && kvm_vti_exit_handlers[exit_reason])
507 return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run);
508 else {
509 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
510 kvm_run->hw.hardware_exit_reason = exit_reason;
511 }
512 return 0;
513}
514
515static inline void vti_set_rr6(unsigned long rr6)
516{
517 ia64_set_rr(RR6, rr6);
518 ia64_srlz_i();
519}
520
521static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu)
522{
523 unsigned long pte;
524 struct kvm *kvm = vcpu->kvm;
525 int r;
526
527 /*Insert a pair of tr to map vmm*/
528 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
529 r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
530 if (r < 0)
531 goto out;
532 vcpu->arch.vmm_tr_slot = r;
533 /*Insert a pairt of tr to map data of vm*/
534 pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL));
535 r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE,
536 pte, KVM_VM_DATA_SHIFT);
537 if (r < 0)
538 goto out;
539 vcpu->arch.vm_tr_slot = r;
540 r = 0;
541out:
542 return r;
543
544}
545
546static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
547{
548
549 ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
550 ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
551
552}
553
554static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
555{
556 int cpu = smp_processor_id();
557
558 if (vcpu->arch.last_run_cpu != cpu ||
559 per_cpu(last_vcpu, cpu) != vcpu) {
560 per_cpu(last_vcpu, cpu) = vcpu;
561 vcpu->arch.last_run_cpu = cpu;
562 kvm_flush_tlb_all();
563 }
564
565 vcpu->arch.host_rr6 = ia64_get_rr(RR6);
566 vti_set_rr6(vcpu->arch.vmm_rr);
567 return kvm_insert_vmm_mapping(vcpu);
568}
569static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
570{
571 kvm_purge_vmm_mapping(vcpu);
572 vti_set_rr6(vcpu->arch.host_rr6);
573}
574
575static int vti_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
576{
577 union context *host_ctx, *guest_ctx;
578 int r;
579
580 /*Get host and guest context with guest address space.*/
581 host_ctx = kvm_get_host_context(vcpu);
582 guest_ctx = kvm_get_guest_context(vcpu);
583
584 r = kvm_vcpu_pre_transition(vcpu);
585 if (r < 0)
586 goto out;
587 kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
588 kvm_vcpu_post_transition(vcpu);
589 r = 0;
590out:
591 return r;
592}
593
594static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
595{
596 int r;
597
598again:
599 preempt_disable();
600
601 kvm_prepare_guest_switch(vcpu);
602 local_irq_disable();
603
604 if (signal_pending(current)) {
605 local_irq_enable();
606 preempt_enable();
607 r = -EINTR;
608 kvm_run->exit_reason = KVM_EXIT_INTR;
609 goto out;
610 }
611
612 vcpu->guest_mode = 1;
613 kvm_guest_enter();
614
615 r = vti_vcpu_run(vcpu, kvm_run);
616 if (r < 0) {
617 local_irq_enable();
618 preempt_enable();
619 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
620 goto out;
621 }
622
623 vcpu->arch.launched = 1;
624 vcpu->guest_mode = 0;
625 local_irq_enable();
626
627 /*
628 * We must have an instruction between local_irq_enable() and
629 * kvm_guest_exit(), so the timer interrupt isn't delayed by
630 * the interrupt shadow. The stat.exits increment will do nicely.
631 * But we need to prevent reordering, hence this barrier():
632 */
633 barrier();
634
635 kvm_guest_exit();
636
637 preempt_enable();
638
639 r = kvm_handle_exit(kvm_run, vcpu);
640
641 if (r > 0) {
642 if (!need_resched())
643 goto again;
644 }
645
646out:
647 if (r > 0) {
648 kvm_resched(vcpu);
649 goto again;
650 }
651
652 return r;
653}
654
655static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
656{
657 struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu);
658
659 if (!vcpu->mmio_is_write)
660 memcpy(&p->data, vcpu->mmio_data, 8);
661 p->state = STATE_IORESP_READY;
662}
663
664int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
665{
666 int r;
667 sigset_t sigsaved;
668
669 vcpu_load(vcpu);
670
671 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
672 kvm_vcpu_block(vcpu);
673 vcpu_put(vcpu);
674 return -EAGAIN;
675 }
676
677 if (vcpu->sigset_active)
678 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
679
680 if (vcpu->mmio_needed) {
681 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
682 kvm_set_mmio_data(vcpu);
683 vcpu->mmio_read_completed = 1;
684 vcpu->mmio_needed = 0;
685 }
686 r = __vcpu_run(vcpu, kvm_run);
687
688 if (vcpu->sigset_active)
689 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
690
691 vcpu_put(vcpu);
692 return r;
693}
694
695/*
696 * Allocate 16M memory for every vm to hold its specific data.
697 * Its memory map is defined in kvm_host.h.
698 */
699static struct kvm *kvm_alloc_kvm(void)
700{
701
702 struct kvm *kvm;
703 uint64_t vm_base;
704
705 vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
706
707 if (!vm_base)
708 return ERR_PTR(-ENOMEM);
709 printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base);
710
711 /* Zero all pages before use! */
712 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
713
714 kvm = (struct kvm *)(vm_base + KVM_VM_OFS);
715 kvm->arch.vm_base = vm_base;
716
717 return kvm;
718}
719
720struct kvm_io_range {
721 unsigned long start;
722 unsigned long size;
723 unsigned long type;
724};
725
726static const struct kvm_io_range io_ranges[] = {
727 {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER},
728 {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO},
729 {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO},
730 {IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC},
731 {PIB_START, PIB_SIZE, GPFN_PIB},
732};
733
734static void kvm_build_io_pmt(struct kvm *kvm)
735{
736 unsigned long i, j;
737
738 /* Mark I/O ranges */
739 for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range));
740 i++) {
741 for (j = io_ranges[i].start;
742 j < io_ranges[i].start + io_ranges[i].size;
743 j += PAGE_SIZE)
744 kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT,
745 io_ranges[i].type, 0);
746 }
747
748}
749
750/*Use unused rids to virtualize guest rid.*/
751#define GUEST_PHYSICAL_RR0 0x1739
752#define GUEST_PHYSICAL_RR4 0x2739
753#define VMM_INIT_RR 0x1660
754
755static void kvm_init_vm(struct kvm *kvm)
756{
757 long vm_base;
758
759 BUG_ON(!kvm);
760
761 kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
762 kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
763 kvm->arch.vmm_init_rr = VMM_INIT_RR;
764
765 vm_base = kvm->arch.vm_base;
766 if (vm_base) {
767 kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS;
768 kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS;
769 kvm->arch.vpd_base = vm_base + KVM_VPD_OFS;
770 }
771
772 /*
773 *Fill P2M entries for MMIO/IO ranges
774 */
775 kvm_build_io_pmt(kvm);
776
777}
778
779struct kvm *kvm_arch_create_vm(void)
780{
781 struct kvm *kvm = kvm_alloc_kvm();
782
783 if (IS_ERR(kvm))
784 return ERR_PTR(-ENOMEM);
785 kvm_init_vm(kvm);
786
787 return kvm;
788
789}
790
791static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
792 struct kvm_irqchip *chip)
793{
794 int r;
795
796 r = 0;
797 switch (chip->chip_id) {
798 case KVM_IRQCHIP_IOAPIC:
799 memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm),
800 sizeof(struct kvm_ioapic_state));
801 break;
802 default:
803 r = -EINVAL;
804 break;
805 }
806 return r;
807}
808
809static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
810{
811 int r;
812
813 r = 0;
814 switch (chip->chip_id) {
815 case KVM_IRQCHIP_IOAPIC:
816 memcpy(ioapic_irqchip(kvm),
817 &chip->chip.ioapic,
818 sizeof(struct kvm_ioapic_state));
819 break;
820 default:
821 r = -EINVAL;
822 break;
823 }
824 return r;
825}
826
827#define RESTORE_REGS(_x) vcpu->arch._x = regs->_x
828
829int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
830{
831 int i;
832 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
833 int r;
834
835 vcpu_load(vcpu);
836
837 for (i = 0; i < 16; i++) {
838 vpd->vgr[i] = regs->vpd.vgr[i];
839 vpd->vbgr[i] = regs->vpd.vbgr[i];
840 }
841 for (i = 0; i < 128; i++)
842 vpd->vcr[i] = regs->vpd.vcr[i];
843 vpd->vhpi = regs->vpd.vhpi;
844 vpd->vnat = regs->vpd.vnat;
845 vpd->vbnat = regs->vpd.vbnat;
846 vpd->vpsr = regs->vpd.vpsr;
847
848 vpd->vpr = regs->vpd.vpr;
849
850 r = -EFAULT;
851 r = copy_from_user(&vcpu->arch.guest, regs->saved_guest,
852 sizeof(union context));
853 if (r)
854 goto out;
855 r = copy_from_user(vcpu + 1, regs->saved_stack +
856 sizeof(struct kvm_vcpu),
857 IA64_STK_OFFSET - sizeof(struct kvm_vcpu));
858 if (r)
859 goto out;
860 vcpu->arch.exit_data =
861 ((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data;
862
863 RESTORE_REGS(mp_state);
864 RESTORE_REGS(vmm_rr);
865 memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS);
866 memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS);
867 RESTORE_REGS(itr_regions);
868 RESTORE_REGS(dtr_regions);
869 RESTORE_REGS(tc_regions);
870 RESTORE_REGS(irq_check);
871 RESTORE_REGS(itc_check);
872 RESTORE_REGS(timer_check);
873 RESTORE_REGS(timer_pending);
874 RESTORE_REGS(last_itc);
875 for (i = 0; i < 8; i++) {
876 vcpu->arch.vrr[i] = regs->vrr[i];
877 vcpu->arch.ibr[i] = regs->ibr[i];
878 vcpu->arch.dbr[i] = regs->dbr[i];
879 }
880 for (i = 0; i < 4; i++)
881 vcpu->arch.insvc[i] = regs->insvc[i];
882 RESTORE_REGS(xtp);
883 RESTORE_REGS(metaphysical_rr0);
884 RESTORE_REGS(metaphysical_rr4);
885 RESTORE_REGS(metaphysical_saved_rr0);
886 RESTORE_REGS(metaphysical_saved_rr4);
887 RESTORE_REGS(fp_psr);
888 RESTORE_REGS(saved_gp);
889
890 vcpu->arch.irq_new_pending = 1;
891 vcpu->arch.itc_offset = regs->saved_itc - ia64_getreg(_IA64_REG_AR_ITC);
892 set_bit(KVM_REQ_RESUME, &vcpu->requests);
893
894 vcpu_put(vcpu);
895 r = 0;
896out:
897 return r;
898}
899
900long kvm_arch_vm_ioctl(struct file *filp,
901 unsigned int ioctl, unsigned long arg)
902{
903 struct kvm *kvm = filp->private_data;
904 void __user *argp = (void __user *)arg;
905 int r = -EINVAL;
906
907 switch (ioctl) {
908 case KVM_SET_MEMORY_REGION: {
909 struct kvm_memory_region kvm_mem;
910 struct kvm_userspace_memory_region kvm_userspace_mem;
911
912 r = -EFAULT;
913 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
914 goto out;
915 kvm_userspace_mem.slot = kvm_mem.slot;
916 kvm_userspace_mem.flags = kvm_mem.flags;
917 kvm_userspace_mem.guest_phys_addr =
918 kvm_mem.guest_phys_addr;
919 kvm_userspace_mem.memory_size = kvm_mem.memory_size;
920 r = kvm_vm_ioctl_set_memory_region(kvm,
921 &kvm_userspace_mem, 0);
922 if (r)
923 goto out;
924 break;
925 }
926 case KVM_CREATE_IRQCHIP:
927 r = -EFAULT;
928 r = kvm_ioapic_init(kvm);
929 if (r)
930 goto out;
931 break;
932 case KVM_IRQ_LINE: {
933 struct kvm_irq_level irq_event;
934
935 r = -EFAULT;
936 if (copy_from_user(&irq_event, argp, sizeof irq_event))
937 goto out;
938 if (irqchip_in_kernel(kvm)) {
939 mutex_lock(&kvm->lock);
940 kvm_ioapic_set_irq(kvm->arch.vioapic,
941 irq_event.irq,
942 irq_event.level);
943 mutex_unlock(&kvm->lock);
944 r = 0;
945 }
946 break;
947 }
948 case KVM_GET_IRQCHIP: {
949 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
950 struct kvm_irqchip chip;
951
952 r = -EFAULT;
953 if (copy_from_user(&chip, argp, sizeof chip))
954 goto out;
955 r = -ENXIO;
956 if (!irqchip_in_kernel(kvm))
957 goto out;
958 r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
959 if (r)
960 goto out;
961 r = -EFAULT;
962 if (copy_to_user(argp, &chip, sizeof chip))
963 goto out;
964 r = 0;
965 break;
966 }
967 case KVM_SET_IRQCHIP: {
968 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
969 struct kvm_irqchip chip;
970
971 r = -EFAULT;
972 if (copy_from_user(&chip, argp, sizeof chip))
973 goto out;
974 r = -ENXIO;
975 if (!irqchip_in_kernel(kvm))
976 goto out;
977 r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
978 if (r)
979 goto out;
980 r = 0;
981 break;
982 }
983 default:
984 ;
985 }
986out:
987 return r;
988}
989
990int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
991 struct kvm_sregs *sregs)
992{
993 return -EINVAL;
994}
995
996int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
997 struct kvm_sregs *sregs)
998{
999 return -EINVAL;
1000
1001}
1002int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1003 struct kvm_translation *tr)
1004{
1005
1006 return -EINVAL;
1007}
1008
1009static int kvm_alloc_vmm_area(void)
1010{
1011 if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) {
1012 kvm_vmm_base = __get_free_pages(GFP_KERNEL,
1013 get_order(KVM_VMM_SIZE));
1014 if (!kvm_vmm_base)
1015 return -ENOMEM;
1016
1017 memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
1018 kvm_vm_buffer = kvm_vmm_base + VMM_SIZE;
1019
1020 printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n",
1021 kvm_vmm_base, kvm_vm_buffer);
1022 }
1023
1024 return 0;
1025}
1026
1027static void kvm_free_vmm_area(void)
1028{
1029 if (kvm_vmm_base) {
1030 /*Zero this area before free to avoid bits leak!!*/
1031 memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
1032 free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE));
1033 kvm_vmm_base = 0;
1034 kvm_vm_buffer = 0;
1035 kvm_vsa_base = 0;
1036 }
1037}
1038
1039/*
1040 * Make sure that a cpu that is being hot-unplugged does not have any vcpus
1041 * cached on it. Leave it as blank for IA64.
1042 */
1043void decache_vcpus_on_cpu(int cpu)
1044{
1045}
1046
1047static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1048{
1049}
1050
1051static int vti_init_vpd(struct kvm_vcpu *vcpu)
1052{
1053 int i;
1054 union cpuid3_t cpuid3;
1055 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
1056
1057 if (IS_ERR(vpd))
1058 return PTR_ERR(vpd);
1059
1060 /* CPUID init */
1061 for (i = 0; i < 5; i++)
1062 vpd->vcpuid[i] = ia64_get_cpuid(i);
1063
1064 /* Limit the CPUID number to 5 */
1065 cpuid3.value = vpd->vcpuid[3];
1066 cpuid3.number = 4; /* 5 - 1 */
1067 vpd->vcpuid[3] = cpuid3.value;
1068
1069 /*Set vac and vdc fields*/
1070 vpd->vac.a_from_int_cr = 1;
1071 vpd->vac.a_to_int_cr = 1;
1072 vpd->vac.a_from_psr = 1;
1073 vpd->vac.a_from_cpuid = 1;
1074 vpd->vac.a_cover = 1;
1075 vpd->vac.a_bsw = 1;
1076 vpd->vac.a_int = 1;
1077 vpd->vdc.d_vmsw = 1;
1078
1079 /*Set virtual buffer*/
1080 vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE;
1081
1082 return 0;
1083}
1084
1085static int vti_create_vp(struct kvm_vcpu *vcpu)
1086{
1087 long ret;
1088 struct vpd *vpd = vcpu->arch.vpd;
1089 unsigned long vmm_ivt;
1090
1091 vmm_ivt = kvm_vmm_info->vmm_ivt;
1092
1093 printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt);
1094
1095 ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0);
1096
1097 if (ret) {
1098 printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n");
1099 return -EINVAL;
1100 }
1101 return 0;
1102}
1103
1104static void init_ptce_info(struct kvm_vcpu *vcpu)
1105{
1106 ia64_ptce_info_t ptce = {0};
1107
1108 ia64_get_ptce(&ptce);
1109 vcpu->arch.ptce_base = ptce.base;
1110 vcpu->arch.ptce_count[0] = ptce.count[0];
1111 vcpu->arch.ptce_count[1] = ptce.count[1];
1112 vcpu->arch.ptce_stride[0] = ptce.stride[0];
1113 vcpu->arch.ptce_stride[1] = ptce.stride[1];
1114}
1115
1116static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu)
1117{
1118 struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
1119
1120 if (hrtimer_cancel(p_ht))
1121 hrtimer_start(p_ht, p_ht->expires, HRTIMER_MODE_ABS);
1122}
1123
1124static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data)
1125{
1126 struct kvm_vcpu *vcpu;
1127 wait_queue_head_t *q;
1128
1129 vcpu = container_of(data, struct kvm_vcpu, arch.hlt_timer);
1130 if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
1131 goto out;
1132
1133 q = &vcpu->wq;
1134 if (waitqueue_active(q)) {
1135 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
1136 wake_up_interruptible(q);
1137 }
1138out:
1139 vcpu->arch.timer_check = 1;
1140 return HRTIMER_NORESTART;
1141}
1142
1143#define PALE_RESET_ENTRY 0x80000000ffffffb0UL
1144
1145int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1146{
1147 struct kvm_vcpu *v;
1148 int r;
1149 int i;
1150 long itc_offset;
1151 struct kvm *kvm = vcpu->kvm;
1152 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1153
1154 union context *p_ctx = &vcpu->arch.guest;
1155 struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu);
1156
1157 /*Init vcpu context for first run.*/
1158 if (IS_ERR(vmm_vcpu))
1159 return PTR_ERR(vmm_vcpu);
1160
1161 if (vcpu->vcpu_id == 0) {
1162 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
1163
1164 /*Set entry address for first run.*/
1165 regs->cr_iip = PALE_RESET_ENTRY;
1166
1167 /*Initilize itc offset for vcpus*/
1168 itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
1169 for (i = 0; i < MAX_VCPU_NUM; i++) {
1170 v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
1171 v->arch.itc_offset = itc_offset;
1172 v->arch.last_itc = 0;
1173 }
1174 } else
1175 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
1176
1177 r = -ENOMEM;
1178 vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL);
1179 if (!vcpu->arch.apic)
1180 goto out;
1181 vcpu->arch.apic->vcpu = vcpu;
1182
1183 p_ctx->gr[1] = 0;
1184 p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET);
1185 p_ctx->gr[13] = (unsigned long)vmm_vcpu;
1186 p_ctx->psr = 0x1008522000UL;
1187 p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
1188 p_ctx->caller_unat = 0;
1189 p_ctx->pr = 0x0;
1190 p_ctx->ar[36] = 0x0; /*unat*/
1191 p_ctx->ar[19] = 0x0; /*rnat*/
1192 p_ctx->ar[18] = (unsigned long)vmm_vcpu +
1193 ((sizeof(struct kvm_vcpu)+15) & ~15);
1194 p_ctx->ar[64] = 0x0; /*pfs*/
1195 p_ctx->cr[0] = 0x7e04UL;
1196 p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt;
1197 p_ctx->cr[8] = 0x3c;
1198
1199 /*Initilize region register*/
1200 p_ctx->rr[0] = 0x30;
1201 p_ctx->rr[1] = 0x30;
1202 p_ctx->rr[2] = 0x30;
1203 p_ctx->rr[3] = 0x30;
1204 p_ctx->rr[4] = 0x30;
1205 p_ctx->rr[5] = 0x30;
1206 p_ctx->rr[7] = 0x30;
1207
1208 /*Initilize branch register 0*/
1209 p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry;
1210
1211 vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr;
1212 vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0;
1213 vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4;
1214
1215 hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1216 vcpu->arch.hlt_timer.function = hlt_timer_fn;
1217
1218 vcpu->arch.last_run_cpu = -1;
1219 vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id);
1220 vcpu->arch.vsa_base = kvm_vsa_base;
1221 vcpu->arch.__gp = kvm_vmm_gp;
1222 vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
1223 vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id);
1224 vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id);
1225 init_ptce_info(vcpu);
1226
1227 r = 0;
1228out:
1229 return r;
1230}
1231
1232static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
1233{
1234 unsigned long psr;
1235 int r;
1236
1237 local_irq_save(psr);
1238 r = kvm_insert_vmm_mapping(vcpu);
1239 if (r)
1240 goto fail;
1241 r = kvm_vcpu_init(vcpu, vcpu->kvm, id);
1242 if (r)
1243 goto fail;
1244
1245 r = vti_init_vpd(vcpu);
1246 if (r) {
1247 printk(KERN_DEBUG"kvm: vpd init error!!\n");
1248 goto uninit;
1249 }
1250
1251 r = vti_create_vp(vcpu);
1252 if (r)
1253 goto uninit;
1254
1255 kvm_purge_vmm_mapping(vcpu);
1256 local_irq_restore(psr);
1257
1258 return 0;
1259uninit:
1260 kvm_vcpu_uninit(vcpu);
1261fail:
1262 return r;
1263}
1264
1265struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1266 unsigned int id)
1267{
1268 struct kvm_vcpu *vcpu;
1269 unsigned long vm_base = kvm->arch.vm_base;
1270 int r;
1271 int cpu;
1272
1273 r = -ENOMEM;
1274 if (!vm_base) {
1275 printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
1276 goto fail;
1277 }
1278 vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id);
1279 vcpu->kvm = kvm;
1280
1281 cpu = get_cpu();
1282 vti_vcpu_load(vcpu, cpu);
1283 r = vti_vcpu_setup(vcpu, id);
1284 put_cpu();
1285
1286 if (r) {
1287 printk(KERN_DEBUG"kvm: vcpu_setup error!!\n");
1288 goto fail;
1289 }
1290
1291 return vcpu;
1292fail:
1293 return ERR_PTR(r);
1294}
1295
1296int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1297{
1298 return 0;
1299}
1300
1301int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1302{
1303 return -EINVAL;
1304}
1305
1306int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1307{
1308 return -EINVAL;
1309}
1310
1311int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
1312 struct kvm_debug_guest *dbg)
1313{
1314 return -EINVAL;
1315}
1316
1317static void free_kvm(struct kvm *kvm)
1318{
1319 unsigned long vm_base = kvm->arch.vm_base;
1320
1321 if (vm_base) {
1322 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
1323 free_pages(vm_base, get_order(KVM_VM_DATA_SIZE));
1324 }
1325
1326}
1327
1328static void kvm_release_vm_pages(struct kvm *kvm)
1329{
1330 struct kvm_memory_slot *memslot;
1331 int i, j;
1332 unsigned long base_gfn;
1333
1334 for (i = 0; i < kvm->nmemslots; i++) {
1335 memslot = &kvm->memslots[i];
1336 base_gfn = memslot->base_gfn;
1337
1338 for (j = 0; j < memslot->npages; j++) {
1339 if (memslot->rmap[j])
1340 put_page((struct page *)memslot->rmap[j]);
1341 }
1342 }
1343}
1344
1345void kvm_arch_destroy_vm(struct kvm *kvm)
1346{
1347 kfree(kvm->arch.vioapic);
1348 kvm_release_vm_pages(kvm);
1349 kvm_free_physmem(kvm);
1350 free_kvm(kvm);
1351}
1352
1353void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1354{
1355}
1356
1357void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1358{
1359 if (cpu != vcpu->cpu) {
1360 vcpu->cpu = cpu;
1361 if (vcpu->arch.ht_active)
1362 kvm_migrate_hlt_timer(vcpu);
1363 }
1364}
1365
1366#define SAVE_REGS(_x) regs->_x = vcpu->arch._x
1367
1368int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1369{
1370 int i;
1371 int r;
1372 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
1373 vcpu_load(vcpu);
1374
1375 for (i = 0; i < 16; i++) {
1376 regs->vpd.vgr[i] = vpd->vgr[i];
1377 regs->vpd.vbgr[i] = vpd->vbgr[i];
1378 }
1379 for (i = 0; i < 128; i++)
1380 regs->vpd.vcr[i] = vpd->vcr[i];
1381 regs->vpd.vhpi = vpd->vhpi;
1382 regs->vpd.vnat = vpd->vnat;
1383 regs->vpd.vbnat = vpd->vbnat;
1384 regs->vpd.vpsr = vpd->vpsr;
1385 regs->vpd.vpr = vpd->vpr;
1386
1387 r = -EFAULT;
1388 r = copy_to_user(regs->saved_guest, &vcpu->arch.guest,
1389 sizeof(union context));
1390 if (r)
1391 goto out;
1392 r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET);
1393 if (r)
1394 goto out;
1395 SAVE_REGS(mp_state);
1396 SAVE_REGS(vmm_rr);
1397 memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
1398 memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS);
1399 SAVE_REGS(itr_regions);
1400 SAVE_REGS(dtr_regions);
1401 SAVE_REGS(tc_regions);
1402 SAVE_REGS(irq_check);
1403 SAVE_REGS(itc_check);
1404 SAVE_REGS(timer_check);
1405 SAVE_REGS(timer_pending);
1406 SAVE_REGS(last_itc);
1407 for (i = 0; i < 8; i++) {
1408 regs->vrr[i] = vcpu->arch.vrr[i];
1409 regs->ibr[i] = vcpu->arch.ibr[i];
1410 regs->dbr[i] = vcpu->arch.dbr[i];
1411 }
1412 for (i = 0; i < 4; i++)
1413 regs->insvc[i] = vcpu->arch.insvc[i];
1414 regs->saved_itc = vcpu->arch.itc_offset + ia64_getreg(_IA64_REG_AR_ITC);
1415 SAVE_REGS(xtp);
1416 SAVE_REGS(metaphysical_rr0);
1417 SAVE_REGS(metaphysical_rr4);
1418 SAVE_REGS(metaphysical_saved_rr0);
1419 SAVE_REGS(metaphysical_saved_rr4);
1420 SAVE_REGS(fp_psr);
1421 SAVE_REGS(saved_gp);
1422 vcpu_put(vcpu);
1423 r = 0;
1424out:
1425 return r;
1426}
1427
1428void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
1429{
1430
1431 hrtimer_cancel(&vcpu->arch.hlt_timer);
1432 kfree(vcpu->arch.apic);
1433}
1434
1435
1436long kvm_arch_vcpu_ioctl(struct file *filp,
1437 unsigned int ioctl, unsigned long arg)
1438{
1439 return -EINVAL;
1440}
1441
1442int kvm_arch_set_memory_region(struct kvm *kvm,
1443 struct kvm_userspace_memory_region *mem,
1444 struct kvm_memory_slot old,
1445 int user_alloc)
1446{
1447 unsigned long i;
1448 struct page *page;
1449 int npages = mem->memory_size >> PAGE_SHIFT;
1450 struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
1451 unsigned long base_gfn = memslot->base_gfn;
1452
1453 for (i = 0; i < npages; i++) {
1454 page = gfn_to_page(kvm, base_gfn + i);
1455 kvm_set_pmt_entry(kvm, base_gfn + i,
1456 page_to_pfn(page) << PAGE_SHIFT,
1457 _PAGE_AR_RWX|_PAGE_MA_WB);
1458 memslot->rmap[i] = (unsigned long)page;
1459 }
1460
1461 return 0;
1462}
1463
1464
1465long kvm_arch_dev_ioctl(struct file *filp,
1466 unsigned int ioctl, unsigned long arg)
1467{
1468 return -EINVAL;
1469}
1470
1471void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1472{
1473 kvm_vcpu_uninit(vcpu);
1474}
1475
1476static int vti_cpu_has_kvm_support(void)
1477{
1478 long avail = 1, status = 1, control = 1;
1479 long ret;
1480
1481 ret = ia64_pal_proc_get_features(&avail, &status, &control, 0);
1482 if (ret)
1483 goto out;
1484
1485 if (!(avail & PAL_PROC_VM_BIT))
1486 goto out;
1487
1488 printk(KERN_DEBUG"kvm: Hardware Supports VT\n");
1489
1490 ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info);
1491 if (ret)
1492 goto out;
1493 printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size);
1494
1495 if (!(vp_env_info & VP_OPCODE)) {
1496 printk(KERN_WARNING"kvm: No opcode ability on hardware, "
1497 "vm_env_info:0x%lx\n", vp_env_info);
1498 }
1499
1500 return 1;
1501out:
1502 return 0;
1503}
1504
1505static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
1506 struct module *module)
1507{
1508 unsigned long module_base;
1509 unsigned long vmm_size;
1510
1511 unsigned long vmm_offset, func_offset, fdesc_offset;
1512 struct fdesc *p_fdesc;
1513
1514 BUG_ON(!module);
1515
1516 if (!kvm_vmm_base) {
1517 printk("kvm: kvm area hasn't been initilized yet!!\n");
1518 return -EFAULT;
1519 }
1520
1521 /*Calculate new position of relocated vmm module.*/
1522 module_base = (unsigned long)module->module_core;
1523 vmm_size = module->core_size;
1524 if (unlikely(vmm_size > KVM_VMM_SIZE))
1525 return -EFAULT;
1526
1527 memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
1528 kvm_flush_icache(kvm_vmm_base, vmm_size);
1529
1530 /*Recalculate kvm_vmm_info based on new VMM*/
1531 vmm_offset = vmm_info->vmm_ivt - module_base;
1532 kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset;
1533 printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n",
1534 kvm_vmm_info->vmm_ivt);
1535
1536 fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base;
1537 kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE +
1538 fdesc_offset);
1539 func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base;
1540 p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
1541 p_fdesc->ip = KVM_VMM_BASE + func_offset;
1542 p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base);
1543
1544 printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n",
1545 KVM_VMM_BASE+func_offset);
1546
1547 fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base;
1548 kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE +
1549 fdesc_offset);
1550 func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base;
1551 p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
1552 p_fdesc->ip = KVM_VMM_BASE + func_offset;
1553 p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base);
1554
1555 kvm_vmm_gp = p_fdesc->gp;
1556
1557 printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n",
1558 kvm_vmm_info->vmm_entry);
1559 printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n",
1560 KVM_VMM_BASE + func_offset);
1561
1562 return 0;
1563}
1564
1565int kvm_arch_init(void *opaque)
1566{
1567 int r;
1568 struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque;
1569
1570 if (!vti_cpu_has_kvm_support()) {
1571 printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n");
1572 r = -EOPNOTSUPP;
1573 goto out;
1574 }
1575
1576 if (kvm_vmm_info) {
1577 printk(KERN_ERR "kvm: Already loaded VMM module!\n");
1578 r = -EEXIST;
1579 goto out;
1580 }
1581
1582 r = -ENOMEM;
1583 kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL);
1584 if (!kvm_vmm_info)
1585 goto out;
1586
1587 if (kvm_alloc_vmm_area())
1588 goto out_free0;
1589
1590 r = kvm_relocate_vmm(vmm_info, vmm_info->module);
1591 if (r)
1592 goto out_free1;
1593
1594 return 0;
1595
1596out_free1:
1597 kvm_free_vmm_area();
1598out_free0:
1599 kfree(kvm_vmm_info);
1600out:
1601 return r;
1602}
1603
1604void kvm_arch_exit(void)
1605{
1606 kvm_free_vmm_area();
1607 kfree(kvm_vmm_info);
1608 kvm_vmm_info = NULL;
1609}
1610
1611static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
1612 struct kvm_dirty_log *log)
1613{
1614 struct kvm_memory_slot *memslot;
1615 int r, i;
1616 long n, base;
1617 unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS
1618 + KVM_MEM_DIRTY_LOG_OFS);
1619
1620 r = -EINVAL;
1621 if (log->slot >= KVM_MEMORY_SLOTS)
1622 goto out;
1623
1624 memslot = &kvm->memslots[log->slot];
1625 r = -ENOENT;
1626 if (!memslot->dirty_bitmap)
1627 goto out;
1628
1629 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
1630 base = memslot->base_gfn / BITS_PER_LONG;
1631
1632 for (i = 0; i < n/sizeof(long); ++i) {
1633 memslot->dirty_bitmap[i] = dirty_bitmap[base + i];
1634 dirty_bitmap[base + i] = 0;
1635 }
1636 r = 0;
1637out:
1638 return r;
1639}
1640
1641int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1642 struct kvm_dirty_log *log)
1643{
1644 int r;
1645 int n;
1646 struct kvm_memory_slot *memslot;
1647 int is_dirty = 0;
1648
1649 spin_lock(&kvm->arch.dirty_log_lock);
1650
1651 r = kvm_ia64_sync_dirty_log(kvm, log);
1652 if (r)
1653 goto out;
1654
1655 r = kvm_get_dirty_log(kvm, log, &is_dirty);
1656 if (r)
1657 goto out;
1658
1659 /* If nothing is dirty, don't bother messing with page tables. */
1660 if (is_dirty) {
1661 kvm_flush_remote_tlbs(kvm);
1662 memslot = &kvm->memslots[log->slot];
1663 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
1664 memset(memslot->dirty_bitmap, 0, n);
1665 }
1666 r = 0;
1667out:
1668 spin_unlock(&kvm->arch.dirty_log_lock);
1669 return r;
1670}
1671
1672int kvm_arch_hardware_setup(void)
1673{
1674 return 0;
1675}
1676
1677void kvm_arch_hardware_unsetup(void)
1678{
1679}
1680
1681static void vcpu_kick_intr(void *info)
1682{
1683#ifdef DEBUG
1684 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
1685 printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu);
1686#endif
1687}
1688
1689void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
1690{
1691 int ipi_pcpu = vcpu->cpu;
1692
1693 if (waitqueue_active(&vcpu->wq))
1694 wake_up_interruptible(&vcpu->wq);
1695
1696 if (vcpu->guest_mode)
1697 smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
1698}
1699
1700int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig)
1701{
1702
1703 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
1704
1705 if (!test_and_set_bit(vec, &vpd->irr[0])) {
1706 vcpu->arch.irq_new_pending = 1;
1707 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
1708 kvm_vcpu_kick(vcpu);
1709 else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) {
1710 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
1711 if (waitqueue_active(&vcpu->wq))
1712 wake_up_interruptible(&vcpu->wq);
1713 }
1714 return 1;
1715 }
1716 return 0;
1717}
1718
1719int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
1720{
1721 return apic->vcpu->vcpu_id == dest;
1722}
1723
1724int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
1725{
1726 return 0;
1727}
1728
1729struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
1730 unsigned long bitmap)
1731{
1732 struct kvm_vcpu *lvcpu = kvm->vcpus[0];
1733 int i;
1734
1735 for (i = 1; i < KVM_MAX_VCPUS; i++) {
1736 if (!kvm->vcpus[i])
1737 continue;
1738 if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp)
1739 lvcpu = kvm->vcpus[i];
1740 }
1741
1742 return lvcpu;
1743}
1744
1745static int find_highest_bits(int *dat)
1746{
1747 u32 bits, bitnum;
1748 int i;
1749
1750 /* loop for all 256 bits */
1751 for (i = 7; i >= 0 ; i--) {
1752 bits = dat[i];
1753 if (bits) {
1754 bitnum = fls(bits);
1755 return i * 32 + bitnum - 1;
1756 }
1757 }
1758
1759 return -1;
1760}
1761
1762int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
1763{
1764 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
1765
1766 if (vpd->irr[0] & (1UL << NMI_VECTOR))
1767 return NMI_VECTOR;
1768 if (vpd->irr[0] & (1UL << ExtINT_VECTOR))
1769 return ExtINT_VECTOR;
1770
1771 return find_highest_bits((int *)&vpd->irr[0]);
1772}
1773
1774int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
1775{
1776 if (kvm_highest_pending_irq(vcpu) != -1)
1777 return 1;
1778 return 0;
1779}
1780
1781int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
1782{
1783 return 0;
1784}
1785
1786gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
1787{
1788 return gfn;
1789}
1790
1791int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1792{
1793 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE;
1794}
1795
1796int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1797 struct kvm_mp_state *mp_state)
1798{
1799 return -EINVAL;
1800}
1801
1802int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1803 struct kvm_mp_state *mp_state)
1804{
1805 return -EINVAL;
1806}
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
new file mode 100644
index 000000000000..091f936c4485
--- /dev/null
+++ b/arch/ia64/kvm/kvm_fw.c
@@ -0,0 +1,500 @@
1/*
2 * PAL/SAL call delegation
3 *
4 * Copyright (c) 2004 Li Susie <susie.li@intel.com>
5 * Copyright (c) 2005 Yu Ke <ke.yu@intel.com>
6 * Copyright (c) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19 * Place - Suite 330, Boston, MA 02111-1307 USA.
20 */
21
22#include <linux/kvm_host.h>
23#include <linux/smp.h>
24
25#include "vti.h"
26#include "misc.h"
27
28#include <asm/pal.h>
29#include <asm/sal.h>
30#include <asm/tlb.h>
31
32/*
33 * Handy macros to make sure that the PAL return values start out
34 * as something meaningful.
35 */
36#define INIT_PAL_STATUS_UNIMPLEMENTED(x) \
37 { \
38 x.status = PAL_STATUS_UNIMPLEMENTED; \
39 x.v0 = 0; \
40 x.v1 = 0; \
41 x.v2 = 0; \
42 }
43
44#define INIT_PAL_STATUS_SUCCESS(x) \
45 { \
46 x.status = PAL_STATUS_SUCCESS; \
47 x.v0 = 0; \
48 x.v1 = 0; \
49 x.v2 = 0; \
50 }
51
52static void kvm_get_pal_call_data(struct kvm_vcpu *vcpu,
53 u64 *gr28, u64 *gr29, u64 *gr30, u64 *gr31) {
54 struct exit_ctl_data *p;
55
56 if (vcpu) {
57 p = &vcpu->arch.exit_data;
58 if (p->exit_reason == EXIT_REASON_PAL_CALL) {
59 *gr28 = p->u.pal_data.gr28;
60 *gr29 = p->u.pal_data.gr29;
61 *gr30 = p->u.pal_data.gr30;
62 *gr31 = p->u.pal_data.gr31;
63 return ;
64 }
65 }
66 printk(KERN_DEBUG"Failed to get vcpu pal data!!!\n");
67}
68
69static void set_pal_result(struct kvm_vcpu *vcpu,
70 struct ia64_pal_retval result) {
71
72 struct exit_ctl_data *p;
73
74 p = kvm_get_exit_data(vcpu);
75 if (p && p->exit_reason == EXIT_REASON_PAL_CALL) {
76 p->u.pal_data.ret = result;
77 return ;
78 }
79 INIT_PAL_STATUS_UNIMPLEMENTED(p->u.pal_data.ret);
80}
81
82static void set_sal_result(struct kvm_vcpu *vcpu,
83 struct sal_ret_values result) {
84 struct exit_ctl_data *p;
85
86 p = kvm_get_exit_data(vcpu);
87 if (p && p->exit_reason == EXIT_REASON_SAL_CALL) {
88 p->u.sal_data.ret = result;
89 return ;
90 }
91 printk(KERN_WARNING"Failed to set sal result!!\n");
92}
93
94struct cache_flush_args {
95 u64 cache_type;
96 u64 operation;
97 u64 progress;
98 long status;
99};
100
101cpumask_t cpu_cache_coherent_map;
102
103static void remote_pal_cache_flush(void *data)
104{
105 struct cache_flush_args *args = data;
106 long status;
107 u64 progress = args->progress;
108
109 status = ia64_pal_cache_flush(args->cache_type, args->operation,
110 &progress, NULL);
111 if (status != 0)
112 args->status = status;
113}
114
115static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu)
116{
117 u64 gr28, gr29, gr30, gr31;
118 struct ia64_pal_retval result = {0, 0, 0, 0};
119 struct cache_flush_args args = {0, 0, 0, 0};
120 long psr;
121
122 gr28 = gr29 = gr30 = gr31 = 0;
123 kvm_get_pal_call_data(vcpu, &gr28, &gr29, &gr30, &gr31);
124
125 if (gr31 != 0)
126 printk(KERN_ERR"vcpu:%p called cache_flush error!\n", vcpu);
127
128 /* Always call Host Pal in int=1 */
129 gr30 &= ~PAL_CACHE_FLUSH_CHK_INTRS;
130 args.cache_type = gr29;
131 args.operation = gr30;
132 smp_call_function(remote_pal_cache_flush,
133 (void *)&args, 1, 1);
134 if (args.status != 0)
135 printk(KERN_ERR"pal_cache_flush error!,"
136 "status:0x%lx\n", args.status);
137 /*
138 * Call Host PAL cache flush
139 * Clear psr.ic when call PAL_CACHE_FLUSH
140 */
141 local_irq_save(psr);
142 result.status = ia64_pal_cache_flush(gr29, gr30, &result.v1,
143 &result.v0);
144 local_irq_restore(psr);
145 if (result.status != 0)
146 printk(KERN_ERR"vcpu:%p crashed due to cache_flush err:%ld"
147 "in1:%lx,in2:%lx\n",
148 vcpu, result.status, gr29, gr30);
149
150#if 0
151 if (gr29 == PAL_CACHE_TYPE_COHERENT) {
152 cpus_setall(vcpu->arch.cache_coherent_map);
153 cpu_clear(vcpu->cpu, vcpu->arch.cache_coherent_map);
154 cpus_setall(cpu_cache_coherent_map);
155 cpu_clear(vcpu->cpu, cpu_cache_coherent_map);
156 }
157#endif
158 return result;
159}
160
161struct ia64_pal_retval pal_cache_summary(struct kvm_vcpu *vcpu)
162{
163
164 struct ia64_pal_retval result;
165
166 PAL_CALL(result, PAL_CACHE_SUMMARY, 0, 0, 0);
167 return result;
168}
169
170static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu)
171{
172
173 struct ia64_pal_retval result;
174
175 PAL_CALL(result, PAL_FREQ_BASE, 0, 0, 0);
176
177 /*
178 * PAL_FREQ_BASE may not be implemented in some platforms,
179 * call SAL instead.
180 */
181 if (result.v0 == 0) {
182 result.status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
183 &result.v0,
184 &result.v1);
185 result.v2 = 0;
186 }
187
188 return result;
189}
190
191static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
192{
193
194 struct ia64_pal_retval result;
195
196 PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0);
197 return result;
198}
199
200static struct ia64_pal_retval pal_logical_to_physica(struct kvm_vcpu *vcpu)
201{
202 struct ia64_pal_retval result;
203
204 INIT_PAL_STATUS_UNIMPLEMENTED(result);
205 return result;
206}
207
208static struct ia64_pal_retval pal_platform_addr(struct kvm_vcpu *vcpu)
209{
210
211 struct ia64_pal_retval result;
212
213 INIT_PAL_STATUS_SUCCESS(result);
214 return result;
215}
216
217static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu)
218{
219
220 struct ia64_pal_retval result = {0, 0, 0, 0};
221 long in0, in1, in2, in3;
222
223 kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
224 result.status = ia64_pal_proc_get_features(&result.v0, &result.v1,
225 &result.v2, in2);
226
227 return result;
228}
229
230static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu)
231{
232
233 pal_cache_config_info_t ci;
234 long status;
235 unsigned long in0, in1, in2, in3, r9, r10;
236
237 kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
238 status = ia64_pal_cache_config_info(in1, in2, &ci);
239 r9 = ci.pcci_info_1.pcci1_data;
240 r10 = ci.pcci_info_2.pcci2_data;
241 return ((struct ia64_pal_retval){status, r9, r10, 0});
242}
243
244#define GUEST_IMPL_VA_MSB 59
245#define GUEST_RID_BITS 18
246
247static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu)
248{
249
250 pal_vm_info_1_u_t vminfo1;
251 pal_vm_info_2_u_t vminfo2;
252 struct ia64_pal_retval result;
253
254 PAL_CALL(result, PAL_VM_SUMMARY, 0, 0, 0);
255 if (!result.status) {
256 vminfo1.pvi1_val = result.v0;
257 vminfo1.pal_vm_info_1_s.max_itr_entry = 8;
258 vminfo1.pal_vm_info_1_s.max_dtr_entry = 8;
259 result.v0 = vminfo1.pvi1_val;
260 vminfo2.pal_vm_info_2_s.impl_va_msb = GUEST_IMPL_VA_MSB;
261 vminfo2.pal_vm_info_2_s.rid_size = GUEST_RID_BITS;
262 result.v1 = vminfo2.pvi2_val;
263 }
264
265 return result;
266}
267
268static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu)
269{
270 struct ia64_pal_retval result;
271
272 INIT_PAL_STATUS_UNIMPLEMENTED(result);
273
274 return result;
275}
276
277static u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu)
278{
279 u64 index = 0;
280 struct exit_ctl_data *p;
281
282 p = kvm_get_exit_data(vcpu);
283 if (p && (p->exit_reason == EXIT_REASON_PAL_CALL))
284 index = p->u.pal_data.gr28;
285
286 return index;
287}
288
289int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
290{
291
292 u64 gr28;
293 struct ia64_pal_retval result;
294 int ret = 1;
295
296 gr28 = kvm_get_pal_call_index(vcpu);
297 /*printk("pal_call index:%lx\n",gr28);*/
298 switch (gr28) {
299 case PAL_CACHE_FLUSH:
300 result = pal_cache_flush(vcpu);
301 break;
302 case PAL_CACHE_SUMMARY:
303 result = pal_cache_summary(vcpu);
304 break;
305 case PAL_HALT_LIGHT:
306 {
307 vcpu->arch.timer_pending = 1;
308 INIT_PAL_STATUS_SUCCESS(result);
309 if (kvm_highest_pending_irq(vcpu) == -1)
310 ret = kvm_emulate_halt(vcpu);
311
312 }
313 break;
314
315 case PAL_FREQ_RATIOS:
316 result = pal_freq_ratios(vcpu);
317 break;
318
319 case PAL_FREQ_BASE:
320 result = pal_freq_base(vcpu);
321 break;
322
323 case PAL_LOGICAL_TO_PHYSICAL :
324 result = pal_logical_to_physica(vcpu);
325 break;
326
327 case PAL_VM_SUMMARY :
328 result = pal_vm_summary(vcpu);
329 break;
330
331 case PAL_VM_INFO :
332 result = pal_vm_info(vcpu);
333 break;
334 case PAL_PLATFORM_ADDR :
335 result = pal_platform_addr(vcpu);
336 break;
337 case PAL_CACHE_INFO:
338 result = pal_cache_info(vcpu);
339 break;
340 case PAL_PTCE_INFO:
341 INIT_PAL_STATUS_SUCCESS(result);
342 result.v1 = (1L << 32) | 1L;
343 break;
344 case PAL_VM_PAGE_SIZE:
345 result.status = ia64_pal_vm_page_size(&result.v0,
346 &result.v1);
347 break;
348 case PAL_RSE_INFO:
349 result.status = ia64_pal_rse_info(&result.v0,
350 (pal_hints_u_t *)&result.v1);
351 break;
352 case PAL_PROC_GET_FEATURES:
353 result = pal_proc_get_features(vcpu);
354 break;
355 case PAL_DEBUG_INFO:
356 result.status = ia64_pal_debug_info(&result.v0,
357 &result.v1);
358 break;
359 case PAL_VERSION:
360 result.status = ia64_pal_version(
361 (pal_version_u_t *)&result.v0,
362 (pal_version_u_t *)&result.v1);
363
364 break;
365 case PAL_FIXED_ADDR:
366 result.status = PAL_STATUS_SUCCESS;
367 result.v0 = vcpu->vcpu_id;
368 break;
369 default:
370 INIT_PAL_STATUS_UNIMPLEMENTED(result);
371 printk(KERN_WARNING"kvm: Unsupported pal call,"
372 " index:0x%lx\n", gr28);
373 }
374 set_pal_result(vcpu, result);
375 return ret;
376}
377
378static struct sal_ret_values sal_emulator(struct kvm *kvm,
379 long index, unsigned long in1,
380 unsigned long in2, unsigned long in3,
381 unsigned long in4, unsigned long in5,
382 unsigned long in6, unsigned long in7)
383{
384 unsigned long r9 = 0;
385 unsigned long r10 = 0;
386 long r11 = 0;
387 long status;
388
389 status = 0;
390 switch (index) {
391 case SAL_FREQ_BASE:
392 status = ia64_sal_freq_base(in1, &r9, &r10);
393 break;
394 case SAL_PCI_CONFIG_READ:
395 printk(KERN_WARNING"kvm: Not allowed to call here!"
396 " SAL_PCI_CONFIG_READ\n");
397 break;
398 case SAL_PCI_CONFIG_WRITE:
399 printk(KERN_WARNING"kvm: Not allowed to call here!"
400 " SAL_PCI_CONFIG_WRITE\n");
401 break;
402 case SAL_SET_VECTORS:
403 if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) {
404 if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) {
405 status = -2;
406 } else {
407 kvm->arch.rdv_sal_data.boot_ip = in2;
408 kvm->arch.rdv_sal_data.boot_gp = in3;
409 }
410 printk("Rendvous called! iip:%lx\n\n", in2);
411 } else
412 printk(KERN_WARNING"kvm: CALLED SAL_SET_VECTORS %lu."
413 "ignored...\n", in1);
414 break;
415 case SAL_GET_STATE_INFO:
416 /* No more info. */
417 status = -5;
418 r9 = 0;
419 break;
420 case SAL_GET_STATE_INFO_SIZE:
421 /* Return a dummy size. */
422 status = 0;
423 r9 = 128;
424 break;
425 case SAL_CLEAR_STATE_INFO:
426 /* Noop. */
427 break;
428 case SAL_MC_RENDEZ:
429 printk(KERN_WARNING
430 "kvm: called SAL_MC_RENDEZ. ignored...\n");
431 break;
432 case SAL_MC_SET_PARAMS:
433 printk(KERN_WARNING
434 "kvm: called SAL_MC_SET_PARAMS.ignored!\n");
435 break;
436 case SAL_CACHE_FLUSH:
437 if (1) {
438 /*Flush using SAL.
439 This method is faster but has a side
440 effect on other vcpu running on
441 this cpu. */
442 status = ia64_sal_cache_flush(in1);
443 } else {
444 /*Maybe need to implement the method
445 without side effect!*/
446 status = 0;
447 }
448 break;
449 case SAL_CACHE_INIT:
450 printk(KERN_WARNING
451 "kvm: called SAL_CACHE_INIT. ignored...\n");
452 break;
453 case SAL_UPDATE_PAL:
454 printk(KERN_WARNING
455 "kvm: CALLED SAL_UPDATE_PAL. ignored...\n");
456 break;
457 default:
458 printk(KERN_WARNING"kvm: called SAL_CALL with unknown index."
459 " index:%ld\n", index);
460 status = -1;
461 break;
462 }
463 return ((struct sal_ret_values) {status, r9, r10, r11});
464}
465
466static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1,
467 u64 *in2, u64 *in3, u64 *in4, u64 *in5, u64 *in6, u64 *in7){
468
469 struct exit_ctl_data *p;
470
471 p = kvm_get_exit_data(vcpu);
472
473 if (p) {
474 if (p->exit_reason == EXIT_REASON_SAL_CALL) {
475 *in0 = p->u.sal_data.in0;
476 *in1 = p->u.sal_data.in1;
477 *in2 = p->u.sal_data.in2;
478 *in3 = p->u.sal_data.in3;
479 *in4 = p->u.sal_data.in4;
480 *in5 = p->u.sal_data.in5;
481 *in6 = p->u.sal_data.in6;
482 *in7 = p->u.sal_data.in7;
483 return ;
484 }
485 }
486 *in0 = 0;
487}
488
489void kvm_sal_emul(struct kvm_vcpu *vcpu)
490{
491
492 struct sal_ret_values result;
493 u64 index, in1, in2, in3, in4, in5, in6, in7;
494
495 kvm_get_sal_call_data(vcpu, &index, &in1, &in2,
496 &in3, &in4, &in5, &in6, &in7);
497 result = sal_emulator(vcpu->kvm, index, in1, in2, in3,
498 in4, in5, in6, in7);
499 set_sal_result(vcpu, result);
500}
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h
new file mode 100644
index 000000000000..13980d9b8bcf
--- /dev/null
+++ b/arch/ia64/kvm/kvm_minstate.h
@@ -0,0 +1,273 @@
1/*
2 * kvm_minstate.h: min save macros
3 * Copyright (c) 2007, Intel Corporation.
4 *
5 * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
6 * Xiantao Zhang (xiantao.zhang@intel.com)
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19 * Place - Suite 330, Boston, MA 02111-1307 USA.
20 *
21 */
22
23
24#include <asm/asmmacro.h>
25#include <asm/types.h>
26#include <asm/kregs.h>
27#include "asm-offsets.h"
28
29#define KVM_MINSTATE_START_SAVE_MIN \
30 mov ar.rsc = 0;/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */\
31 ;; \
32 mov.m r28 = ar.rnat; \
33 addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \
34 ;; \
35 lfetch.fault.excl.nt1 [r22]; \
36 addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
37 mov r23 = ar.bspstore; /* save ar.bspstore */ \
38 ;; \
39 mov ar.bspstore = r22; /* switch to kernel RBS */\
40 ;; \
41 mov r18 = ar.bsp; \
42 mov ar.rsc = 0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */
43
44
45
46#define KVM_MINSTATE_END_SAVE_MIN \
47 bsw.1; /* switch back to bank 1 (must be last in insn group) */\
48 ;;
49
50
51#define PAL_VSA_SYNC_READ \
52 /* begin to call pal vps sync_read */ \
53 add r25 = VMM_VPD_BASE_OFFSET, r21; \
54 adds r20 = VMM_VCPU_VSA_BASE_OFFSET, r21; /* entry point */ \
55 ;; \
56 ld8 r25 = [r25]; /* read vpd base */ \
57 ld8 r20 = [r20]; \
58 ;; \
59 add r20 = PAL_VPS_SYNC_READ,r20; \
60 ;; \
61{ .mii; \
62 nop 0x0; \
63 mov r24 = ip; \
64 mov b0 = r20; \
65 ;; \
66}; \
67{ .mmb; \
68 add r24 = 0x20, r24; \
69 nop 0x0; \
70 br.cond.sptk b0; /* call the service */ \
71 ;; \
72};
73
74
75
76#define KVM_MINSTATE_GET_CURRENT(reg) mov reg=r21
77
78/*
79 * KVM_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
80 * the minimum state necessary that allows us to turn psr.ic back
81 * on.
82 *
83 * Assumed state upon entry:
84 * psr.ic: off
85 * r31: contains saved predicates (pr)
86 *
87 * Upon exit, the state is as follows:
88 * psr.ic: off
89 * r2 = points to &pt_regs.r16
90 * r8 = contents of ar.ccv
91 * r9 = contents of ar.csd
92 * r10 = contents of ar.ssd
93 * r11 = FPSR_DEFAULT
94 * r12 = kernel sp (kernel virtual address)
95 * r13 = points to current task_struct (kernel virtual address)
96 * p15 = TRUE if psr.i is set in cr.ipsr
97 * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
98 * preserved
99 *
100 * Note that psr.ic is NOT turned on by this macro. This is so that
101 * we can pass interruption state as arguments to a handler.
102 */
103
104
105#define PT(f) (VMM_PT_REGS_##f##_OFFSET)
106
107#define KVM_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \
108 KVM_MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \
109 mov r27 = ar.rsc; /* M */ \
110 mov r20 = r1; /* A */ \
111 mov r25 = ar.unat; /* M */ \
112 mov r29 = cr.ipsr; /* M */ \
113 mov r26 = ar.pfs; /* I */ \
114 mov r18 = cr.isr; \
115 COVER; /* B;; (or nothing) */ \
116 ;; \
117 tbit.z p0,p15 = r29,IA64_PSR_I_BIT; \
118 mov r1 = r16; \
119/* mov r21=r16; */ \
120 /* switch from user to kernel RBS: */ \
121 ;; \
122 invala; /* M */ \
123 SAVE_IFS; \
124 ;; \
125 KVM_MINSTATE_START_SAVE_MIN \
126 adds r17 = 2*L1_CACHE_BYTES,r1;/* cache-line size */ \
127 adds r16 = PT(CR_IPSR),r1; \
128 ;; \
129 lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \
130 st8 [r16] = r29; /* save cr.ipsr */ \
131 ;; \
132 lfetch.fault.excl.nt1 [r17]; \
133 tbit.nz p15,p0 = r29,IA64_PSR_I_BIT; \
134 mov r29 = b0 \
135 ;; \
136 adds r16 = PT(R8),r1; /* initialize first base pointer */\
137 adds r17 = PT(R9),r1; /* initialize second base pointer */\
138 ;; \
139.mem.offset 0,0; st8.spill [r16] = r8,16; \
140.mem.offset 8,0; st8.spill [r17] = r9,16; \
141 ;; \
142.mem.offset 0,0; st8.spill [r16] = r10,24; \
143.mem.offset 8,0; st8.spill [r17] = r11,24; \
144 ;; \
145 mov r9 = cr.iip; /* M */ \
146 mov r10 = ar.fpsr; /* M */ \
147 ;; \
148 st8 [r16] = r9,16; /* save cr.iip */ \
149 st8 [r17] = r30,16; /* save cr.ifs */ \
150 sub r18 = r18,r22; /* r18=RSE.ndirty*8 */ \
151 ;; \
152 st8 [r16] = r25,16; /* save ar.unat */ \
153 st8 [r17] = r26,16; /* save ar.pfs */ \
154 shl r18 = r18,16; /* calu ar.rsc used for "loadrs" */\
155 ;; \
156 st8 [r16] = r27,16; /* save ar.rsc */ \
157 st8 [r17] = r28,16; /* save ar.rnat */ \
158 ;; /* avoid RAW on r16 & r17 */ \
159 st8 [r16] = r23,16; /* save ar.bspstore */ \
160 st8 [r17] = r31,16; /* save predicates */ \
161 ;; \
162 st8 [r16] = r29,16; /* save b0 */ \
163 st8 [r17] = r18,16; /* save ar.rsc value for "loadrs" */\
164 ;; \
165.mem.offset 0,0; st8.spill [r16] = r20,16;/* save original r1 */ \
166.mem.offset 8,0; st8.spill [r17] = r12,16; \
167 adds r12 = -16,r1; /* switch to kernel memory stack */ \
168 ;; \
169.mem.offset 0,0; st8.spill [r16] = r13,16; \
170.mem.offset 8,0; st8.spill [r17] = r10,16; /* save ar.fpsr */\
171 mov r13 = r21; /* establish `current' */ \
172 ;; \
173.mem.offset 0,0; st8.spill [r16] = r15,16; \
174.mem.offset 8,0; st8.spill [r17] = r14,16; \
175 ;; \
176.mem.offset 0,0; st8.spill [r16] = r2,16; \
177.mem.offset 8,0; st8.spill [r17] = r3,16; \
178 adds r2 = VMM_PT_REGS_R16_OFFSET,r1; \
179 ;; \
180 adds r16 = VMM_VCPU_IIPA_OFFSET,r13; \
181 adds r17 = VMM_VCPU_ISR_OFFSET,r13; \
182 mov r26 = cr.iipa; \
183 mov r27 = cr.isr; \
184 ;; \
185 st8 [r16] = r26; \
186 st8 [r17] = r27; \
187 ;; \
188 EXTRA; \
189 mov r8 = ar.ccv; \
190 mov r9 = ar.csd; \
191 mov r10 = ar.ssd; \
192 movl r11 = FPSR_DEFAULT; /* L-unit */ \
193 adds r17 = VMM_VCPU_GP_OFFSET,r13; \
194 ;; \
195 ld8 r1 = [r17];/* establish kernel global pointer */ \
196 ;; \
197 PAL_VSA_SYNC_READ \
198 KVM_MINSTATE_END_SAVE_MIN
199
200/*
201 * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
202 *
203 * Assumed state upon entry:
204 * psr.ic: on
205 * r2: points to &pt_regs.f6
206 * r3: points to &pt_regs.f7
207 * r8: contents of ar.ccv
208 * r9: contents of ar.csd
209 * r10: contents of ar.ssd
210 * r11: FPSR_DEFAULT
211 *
212 * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
213 */
214#define KVM_SAVE_REST \
215.mem.offset 0,0; st8.spill [r2] = r16,16; \
216.mem.offset 8,0; st8.spill [r3] = r17,16; \
217 ;; \
218.mem.offset 0,0; st8.spill [r2] = r18,16; \
219.mem.offset 8,0; st8.spill [r3] = r19,16; \
220 ;; \
221.mem.offset 0,0; st8.spill [r2] = r20,16; \
222.mem.offset 8,0; st8.spill [r3] = r21,16; \
223 mov r18=b6; \
224 ;; \
225.mem.offset 0,0; st8.spill [r2] = r22,16; \
226.mem.offset 8,0; st8.spill [r3] = r23,16; \
227 mov r19 = b7; \
228 ;; \
229.mem.offset 0,0; st8.spill [r2] = r24,16; \
230.mem.offset 8,0; st8.spill [r3] = r25,16; \
231 ;; \
232.mem.offset 0,0; st8.spill [r2] = r26,16; \
233.mem.offset 8,0; st8.spill [r3] = r27,16; \
234 ;; \
235.mem.offset 0,0; st8.spill [r2] = r28,16; \
236.mem.offset 8,0; st8.spill [r3] = r29,16; \
237 ;; \
238.mem.offset 0,0; st8.spill [r2] = r30,16; \
239.mem.offset 8,0; st8.spill [r3] = r31,32; \
240 ;; \
241 mov ar.fpsr = r11; \
242 st8 [r2] = r8,8; \
243 adds r24 = PT(B6)-PT(F7),r3; \
244 adds r25 = PT(B7)-PT(F7),r3; \
245 ;; \
246 st8 [r24] = r18,16; /* b6 */ \
247 st8 [r25] = r19,16; /* b7 */ \
248 adds r2 = PT(R4)-PT(F6),r2; \
249 adds r3 = PT(R5)-PT(F7),r3; \
250 ;; \
251 st8 [r24] = r9; /* ar.csd */ \
252 st8 [r25] = r10; /* ar.ssd */ \
253 ;; \
254 mov r18 = ar.unat; \
255 adds r19 = PT(EML_UNAT)-PT(R4),r2; \
256 ;; \
257 st8 [r19] = r18; /* eml_unat */ \
258
259
260#define KVM_SAVE_EXTRA \
261.mem.offset 0,0; st8.spill [r2] = r4,16; \
262.mem.offset 8,0; st8.spill [r3] = r5,16; \
263 ;; \
264.mem.offset 0,0; st8.spill [r2] = r6,16; \
265.mem.offset 8,0; st8.spill [r3] = r7; \
266 ;; \
267 mov r26 = ar.unat; \
268 ;; \
269 st8 [r2] = r26;/* eml_unat */ \
270
271#define KVM_SAVE_MIN_WITH_COVER KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs,)
272#define KVM_SAVE_MIN_WITH_COVER_R19 KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs, mov r15 = r19)
273#define KVM_SAVE_MIN KVM_DO_SAVE_MIN( , mov r30 = r0, )
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
new file mode 100644
index 000000000000..6d6cbcb14893
--- /dev/null
+++ b/arch/ia64/kvm/lapic.h
@@ -0,0 +1,25 @@
1#ifndef __KVM_IA64_LAPIC_H
2#define __KVM_IA64_LAPIC_H
3
4#include <linux/kvm_host.h>
5
6/*
7 * vlsapic
8 */
9struct kvm_lapic{
10 struct kvm_vcpu *vcpu;
11 uint64_t insvc[4];
12 uint64_t vhpi;
13 uint8_t xtp;
14 uint8_t pal_init_pending;
15 uint8_t pad[2];
16};
17
18int kvm_create_lapic(struct kvm_vcpu *vcpu);
19void kvm_free_lapic(struct kvm_vcpu *vcpu);
20
21int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
22int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
23int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig);
24
25#endif
diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h
new file mode 100644
index 000000000000..e585c4607344
--- /dev/null
+++ b/arch/ia64/kvm/misc.h
@@ -0,0 +1,93 @@
1#ifndef __KVM_IA64_MISC_H
2#define __KVM_IA64_MISC_H
3
4#include <linux/kvm_host.h>
5/*
6 * misc.h
7 * Copyright (C) 2007, Intel Corporation.
8 * Xiantao Zhang (xiantao.zhang@intel.com)
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms and conditions of the GNU General Public License,
12 * version 2, as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * more details.
18 *
19 * You should have received a copy of the GNU General Public License along with
20 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
21 * Place - Suite 330, Boston, MA 02111-1307 USA.
22 *
23 */
24
25/*
26 *Return p2m base address at host side!
27 */
28static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
29{
30 return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS);
31}
32
33static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
34 u64 paddr, u64 mem_flags)
35{
36 uint64_t *pmt_base = kvm_host_get_pmt(kvm);
37 unsigned long pte;
38
39 pte = PAGE_ALIGN(paddr) | mem_flags;
40 pmt_base[gfn] = pte;
41}
42
43/*Function for translating host address to guest address*/
44
45static inline void *to_guest(struct kvm *kvm, void *addr)
46{
47 return (void *)((unsigned long)(addr) - kvm->arch.vm_base +
48 KVM_VM_DATA_BASE);
49}
50
51/*Function for translating guest address to host address*/
52
53static inline void *to_host(struct kvm *kvm, void *addr)
54{
55 return (void *)((unsigned long)addr - KVM_VM_DATA_BASE
56 + kvm->arch.vm_base);
57}
58
59/* Get host context of the vcpu */
60static inline union context *kvm_get_host_context(struct kvm_vcpu *vcpu)
61{
62 union context *ctx = &vcpu->arch.host;
63 return to_guest(vcpu->kvm, ctx);
64}
65
66/* Get guest context of the vcpu */
67static inline union context *kvm_get_guest_context(struct kvm_vcpu *vcpu)
68{
69 union context *ctx = &vcpu->arch.guest;
70 return to_guest(vcpu->kvm, ctx);
71}
72
73/* kvm get exit data from gvmm! */
74static inline struct exit_ctl_data *kvm_get_exit_data(struct kvm_vcpu *vcpu)
75{
76 return &vcpu->arch.exit_data;
77}
78
79/*kvm get vcpu ioreq for kvm module!*/
80static inline struct kvm_mmio_req *kvm_get_vcpu_ioreq(struct kvm_vcpu *vcpu)
81{
82 struct exit_ctl_data *p_ctl_data;
83
84 if (vcpu) {
85 p_ctl_data = kvm_get_exit_data(vcpu);
86 if (p_ctl_data->exit_reason == EXIT_REASON_MMIO_INSTRUCTION)
87 return &p_ctl_data->u.ioreq;
88 }
89
90 return NULL;
91}
92
93#endif
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
new file mode 100644
index 000000000000..351bf70da463
--- /dev/null
+++ b/arch/ia64/kvm/mmio.c
@@ -0,0 +1,341 @@
1/*
2 * mmio.c: MMIO emulation components.
3 * Copyright (c) 2004, Intel Corporation.
4 * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
5 * Kun Tian (Kevin Tian) (Kevin.tian@intel.com)
6 *
7 * Copyright (c) 2007 Intel Corporation KVM support.
8 * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
9 * Xiantao Zhang (xiantao.zhang@intel.com)
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms and conditions of the GNU General Public License,
13 * version 2, as published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 * more details.
19 *
20 * You should have received a copy of the GNU General Public License along with
21 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
22 * Place - Suite 330, Boston, MA 02111-1307 USA.
23 *
24 */
25
26#include <linux/kvm_host.h>
27
28#include "vcpu.h"
29
30static void vlsapic_write_xtp(struct kvm_vcpu *v, uint8_t val)
31{
32 VLSAPIC_XTP(v) = val;
33}
34
35/*
36 * LSAPIC OFFSET
37 */
38#define PIB_LOW_HALF(ofst) !(ofst & (1 << 20))
39#define PIB_OFST_INTA 0x1E0000
40#define PIB_OFST_XTP 0x1E0008
41
42/*
43 * execute write IPI op.
44 */
45static void vlsapic_write_ipi(struct kvm_vcpu *vcpu,
46 uint64_t addr, uint64_t data)
47{
48 struct exit_ctl_data *p = &current_vcpu->arch.exit_data;
49 unsigned long psr;
50
51 local_irq_save(psr);
52
53 p->exit_reason = EXIT_REASON_IPI;
54 p->u.ipi_data.addr.val = addr;
55 p->u.ipi_data.data.val = data;
56 vmm_transition(current_vcpu);
57
58 local_irq_restore(psr);
59
60}
61
62void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
63 unsigned long length, unsigned long val)
64{
65 addr &= (PIB_SIZE - 1);
66
67 switch (addr) {
68 case PIB_OFST_INTA:
69 /*panic_domain(NULL, "Undefined write on PIB INTA\n");*/
70 panic_vm(v);
71 break;
72 case PIB_OFST_XTP:
73 if (length == 1) {
74 vlsapic_write_xtp(v, val);
75 } else {
76 /*panic_domain(NULL,
77 "Undefined write on PIB XTP\n");*/
78 panic_vm(v);
79 }
80 break;
81 default:
82 if (PIB_LOW_HALF(addr)) {
83 /*lower half */
84 if (length != 8)
85 /*panic_domain(NULL,
86 "Can't LHF write with size %ld!\n",
87 length);*/
88 panic_vm(v);
89 else
90 vlsapic_write_ipi(v, addr, val);
91 } else { /* upper half
92 printk("IPI-UHF write %lx\n",addr);*/
93 panic_vm(v);
94 }
95 break;
96 }
97}
98
99unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
100 unsigned long length)
101{
102 uint64_t result = 0;
103
104 addr &= (PIB_SIZE - 1);
105
106 switch (addr) {
107 case PIB_OFST_INTA:
108 if (length == 1) /* 1 byte load */
109 ; /* There is no i8259, there is no INTA access*/
110 else
111 /*panic_domain(NULL,"Undefined read on PIB INTA\n"); */
112 panic_vm(v);
113
114 break;
115 case PIB_OFST_XTP:
116 if (length == 1) {
117 result = VLSAPIC_XTP(v);
118 /* printk("read xtp %lx\n", result); */
119 } else {
120 /*panic_domain(NULL,
121 "Undefined read on PIB XTP\n");*/
122 panic_vm(v);
123 }
124 break;
125 default:
126 panic_vm(v);
127 break;
128 }
129 return result;
130}
131
132static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
133 u16 s, int ma, int dir)
134{
135 unsigned long iot;
136 struct exit_ctl_data *p = &vcpu->arch.exit_data;
137 unsigned long psr;
138
139 iot = __gpfn_is_io(src_pa >> PAGE_SHIFT);
140
141 local_irq_save(psr);
142
143 /*Intercept the acces for PIB range*/
144 if (iot == GPFN_PIB) {
145 if (!dir)
146 lsapic_write(vcpu, src_pa, s, *dest);
147 else
148 *dest = lsapic_read(vcpu, src_pa, s);
149 goto out;
150 }
151 p->exit_reason = EXIT_REASON_MMIO_INSTRUCTION;
152 p->u.ioreq.addr = src_pa;
153 p->u.ioreq.size = s;
154 p->u.ioreq.dir = dir;
155 if (dir == IOREQ_WRITE)
156 p->u.ioreq.data = *dest;
157 p->u.ioreq.state = STATE_IOREQ_READY;
158 vmm_transition(vcpu);
159
160 if (p->u.ioreq.state == STATE_IORESP_READY) {
161 if (dir == IOREQ_READ)
162 *dest = p->u.ioreq.data;
163 } else
164 panic_vm(vcpu);
165out:
166 local_irq_restore(psr);
167 return ;
168}
169
170/*
171 dir 1: read 0:write
172 inst_type 0:integer 1:floating point
173 */
174#define SL_INTEGER 0 /* store/load interger*/
175#define SL_FLOATING 1 /* store/load floating*/
176
177void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
178{
179 struct kvm_pt_regs *regs;
180 IA64_BUNDLE bundle;
181 int slot, dir = 0;
182 int inst_type = -1;
183 u16 size = 0;
184 u64 data, slot1a, slot1b, temp, update_reg;
185 s32 imm;
186 INST64 inst;
187
188 regs = vcpu_regs(vcpu);
189
190 if (fetch_code(vcpu, regs->cr_iip, &bundle)) {
191 /* if fetch code fail, return and try again */
192 return;
193 }
194 slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
195 if (!slot)
196 inst.inst = bundle.slot0;
197 else if (slot == 1) {
198 slot1a = bundle.slot1a;
199 slot1b = bundle.slot1b;
200 inst.inst = slot1a + (slot1b << 18);
201 } else if (slot == 2)
202 inst.inst = bundle.slot2;
203
204 /* Integer Load/Store */
205 if (inst.M1.major == 4 && inst.M1.m == 0 && inst.M1.x == 0) {
206 inst_type = SL_INTEGER;
207 size = (inst.M1.x6 & 0x3);
208 if ((inst.M1.x6 >> 2) > 0xb) {
209 /*write*/
210 dir = IOREQ_WRITE;
211 data = vcpu_get_gr(vcpu, inst.M4.r2);
212 } else if ((inst.M1.x6 >> 2) < 0xb) {
213 /*read*/
214 dir = IOREQ_READ;
215 }
216 } else if (inst.M2.major == 4 && inst.M2.m == 1 && inst.M2.x == 0) {
217 /* Integer Load + Reg update */
218 inst_type = SL_INTEGER;
219 dir = IOREQ_READ;
220 size = (inst.M2.x6 & 0x3);
221 temp = vcpu_get_gr(vcpu, inst.M2.r3);
222 update_reg = vcpu_get_gr(vcpu, inst.M2.r2);
223 temp += update_reg;
224 vcpu_set_gr(vcpu, inst.M2.r3, temp, 0);
225 } else if (inst.M3.major == 5) {
226 /*Integer Load/Store + Imm update*/
227 inst_type = SL_INTEGER;
228 size = (inst.M3.x6&0x3);
229 if ((inst.M5.x6 >> 2) > 0xb) {
230 /*write*/
231 dir = IOREQ_WRITE;
232 data = vcpu_get_gr(vcpu, inst.M5.r2);
233 temp = vcpu_get_gr(vcpu, inst.M5.r3);
234 imm = (inst.M5.s << 31) | (inst.M5.i << 30) |
235 (inst.M5.imm7 << 23);
236 temp += imm >> 23;
237 vcpu_set_gr(vcpu, inst.M5.r3, temp, 0);
238
239 } else if ((inst.M3.x6 >> 2) < 0xb) {
240 /*read*/
241 dir = IOREQ_READ;
242 temp = vcpu_get_gr(vcpu, inst.M3.r3);
243 imm = (inst.M3.s << 31) | (inst.M3.i << 30) |
244 (inst.M3.imm7 << 23);
245 temp += imm >> 23;
246 vcpu_set_gr(vcpu, inst.M3.r3, temp, 0);
247
248 }
249 } else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B
250 && inst.M9.m == 0 && inst.M9.x == 0) {
251 /* Floating-point spill*/
252 struct ia64_fpreg v;
253
254 inst_type = SL_FLOATING;
255 dir = IOREQ_WRITE;
256 vcpu_get_fpreg(vcpu, inst.M9.f2, &v);
257 /* Write high word. FIXME: this is a kludge! */
258 v.u.bits[1] &= 0x3ffff;
259 mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE);
260 data = v.u.bits[0];
261 size = 3;
262 } else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) {
263 /* Floating-point spill + Imm update */
264 struct ia64_fpreg v;
265
266 inst_type = SL_FLOATING;
267 dir = IOREQ_WRITE;
268 vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
269 temp = vcpu_get_gr(vcpu, inst.M10.r3);
270 imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
271 (inst.M10.imm7 << 23);
272 temp += imm >> 23;
273 vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
274
275 /* Write high word.FIXME: this is a kludge! */
276 v.u.bits[1] &= 0x3ffff;
277 mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE);
278 data = v.u.bits[0];
279 size = 3;
280 } else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) {
281 /* Floating-point stf8 + Imm update */
282 struct ia64_fpreg v;
283 inst_type = SL_FLOATING;
284 dir = IOREQ_WRITE;
285 size = 3;
286 vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
287 data = v.u.bits[0]; /* Significand. */
288 temp = vcpu_get_gr(vcpu, inst.M10.r3);
289 imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
290 (inst.M10.imm7 << 23);
291 temp += imm >> 23;
292 vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
293 } else if (inst.M15.major == 7 && inst.M15.x6 >= 0x2c
294 && inst.M15.x6 <= 0x2f) {
295 temp = vcpu_get_gr(vcpu, inst.M15.r3);
296 imm = (inst.M15.s << 31) | (inst.M15.i << 30) |
297 (inst.M15.imm7 << 23);
298 temp += imm >> 23;
299 vcpu_set_gr(vcpu, inst.M15.r3, temp, 0);
300
301 vcpu_increment_iip(vcpu);
302 return;
303 } else if (inst.M12.major == 6 && inst.M12.m == 1
304 && inst.M12.x == 1 && inst.M12.x6 == 1) {
305 /* Floating-point Load Pair + Imm ldfp8 M12*/
306 struct ia64_fpreg v;
307
308 inst_type = SL_FLOATING;
309 dir = IOREQ_READ;
310 size = 8; /*ldfd*/
311 mmio_access(vcpu, padr, &data, size, ma, dir);
312 v.u.bits[0] = data;
313 v.u.bits[1] = 0x1003E;
314 vcpu_set_fpreg(vcpu, inst.M12.f1, &v);
315 padr += 8;
316 mmio_access(vcpu, padr, &data, size, ma, dir);
317 v.u.bits[0] = data;
318 v.u.bits[1] = 0x1003E;
319 vcpu_set_fpreg(vcpu, inst.M12.f2, &v);
320 padr += 8;
321 vcpu_set_gr(vcpu, inst.M12.r3, padr, 0);
322 vcpu_increment_iip(vcpu);
323 return;
324 } else {
325 inst_type = -1;
326 panic_vm(vcpu);
327 }
328
329 size = 1 << size;
330 if (dir == IOREQ_WRITE) {
331 mmio_access(vcpu, padr, &data, size, ma, dir);
332 } else {
333 mmio_access(vcpu, padr, &data, size, ma, dir);
334 if (inst_type == SL_INTEGER)
335 vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
336 else
337 panic_vm(vcpu);
338
339 }
340 vcpu_increment_iip(vcpu);
341}
diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S
new file mode 100644
index 000000000000..e4f15d641b22
--- /dev/null
+++ b/arch/ia64/kvm/optvfault.S
@@ -0,0 +1,918 @@
1/*
2 * arch/ia64/vmx/optvfault.S
3 * optimize virtualization fault handler
4 *
5 * Copyright (C) 2006 Intel Co
6 * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
7 */
8
9#include <asm/asmmacro.h>
10#include <asm/processor.h>
11
12#include "vti.h"
13#include "asm-offsets.h"
14
15#define ACCE_MOV_FROM_AR
16#define ACCE_MOV_FROM_RR
17#define ACCE_MOV_TO_RR
18#define ACCE_RSM
19#define ACCE_SSM
20#define ACCE_MOV_TO_PSR
21#define ACCE_THASH
22
23//mov r1=ar3
24GLOBAL_ENTRY(kvm_asm_mov_from_ar)
25#ifndef ACCE_MOV_FROM_AR
26 br.many kvm_virtualization_fault_back
27#endif
28 add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
29 add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
30 extr.u r17=r25,6,7
31 ;;
32 ld8 r18=[r18]
33 mov r19=ar.itc
34 mov r24=b0
35 ;;
36 add r19=r19,r18
37 addl r20=@gprel(asm_mov_to_reg),gp
38 ;;
39 st8 [r16] = r19
40 adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
41 shladd r17=r17,4,r20
42 ;;
43 mov b0=r17
44 br.sptk.few b0
45 ;;
46END(kvm_asm_mov_from_ar)
47
48
49// mov r1=rr[r3]
50GLOBAL_ENTRY(kvm_asm_mov_from_rr)
51#ifndef ACCE_MOV_FROM_RR
52 br.many kvm_virtualization_fault_back
53#endif
54 extr.u r16=r25,20,7
55 extr.u r17=r25,6,7
56 addl r20=@gprel(asm_mov_from_reg),gp
57 ;;
58 adds r30=kvm_asm_mov_from_rr_back_1-asm_mov_from_reg,r20
59 shladd r16=r16,4,r20
60 mov r24=b0
61 ;;
62 add r27=VMM_VCPU_VRR0_OFFSET,r21
63 mov b0=r16
64 br.many b0
65 ;;
66kvm_asm_mov_from_rr_back_1:
67 adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
68 adds r22=asm_mov_to_reg-asm_mov_from_reg,r20
69 shr.u r26=r19,61
70 ;;
71 shladd r17=r17,4,r22
72 shladd r27=r26,3,r27
73 ;;
74 ld8 r19=[r27]
75 mov b0=r17
76 br.many b0
77END(kvm_asm_mov_from_rr)
78
79
80// mov rr[r3]=r2
81GLOBAL_ENTRY(kvm_asm_mov_to_rr)
82#ifndef ACCE_MOV_TO_RR
83 br.many kvm_virtualization_fault_back
84#endif
85 extr.u r16=r25,20,7
86 extr.u r17=r25,13,7
87 addl r20=@gprel(asm_mov_from_reg),gp
88 ;;
89 adds r30=kvm_asm_mov_to_rr_back_1-asm_mov_from_reg,r20
90 shladd r16=r16,4,r20
91 mov r22=b0
92 ;;
93 add r27=VMM_VCPU_VRR0_OFFSET,r21
94 mov b0=r16
95 br.many b0
96 ;;
97kvm_asm_mov_to_rr_back_1:
98 adds r30=kvm_asm_mov_to_rr_back_2-asm_mov_from_reg,r20
99 shr.u r23=r19,61
100 shladd r17=r17,4,r20
101 ;;
102 //if rr6, go back
103 cmp.eq p6,p0=6,r23
104 mov b0=r22
105 (p6) br.cond.dpnt.many kvm_virtualization_fault_back
106 ;;
107 mov r28=r19
108 mov b0=r17
109 br.many b0
110kvm_asm_mov_to_rr_back_2:
111 adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
112 shladd r27=r23,3,r27
113 ;; // vrr.rid<<4 |0xe
114 st8 [r27]=r19
115 mov b0=r30
116 ;;
117 extr.u r16=r19,8,26
118 extr.u r18 =r19,2,6
119 mov r17 =0xe
120 ;;
121 shladd r16 = r16, 4, r17
122 extr.u r19 =r19,0,8
123 ;;
124 shl r16 = r16,8
125 ;;
126 add r19 = r19, r16
127 ;; //set ve 1
128 dep r19=-1,r19,0,1
129 cmp.lt p6,p0=14,r18
130 ;;
131 (p6) mov r18=14
132 ;;
133 (p6) dep r19=r18,r19,2,6
134 ;;
135 cmp.eq p6,p0=0,r23
136 ;;
137 cmp.eq.or p6,p0=4,r23
138 ;;
139 adds r16=VMM_VCPU_MODE_FLAGS_OFFSET,r21
140 (p6) adds r17=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
141 ;;
142 ld4 r16=[r16]
143 cmp.eq p7,p0=r0,r0
144 (p6) shladd r17=r23,1,r17
145 ;;
146 (p6) st8 [r17]=r19
147 (p6) tbit.nz p6,p7=r16,0
148 ;;
149 (p7) mov rr[r28]=r19
150 mov r24=r22
151 br.many b0
152END(kvm_asm_mov_to_rr)
153
154
155//rsm
156GLOBAL_ENTRY(kvm_asm_rsm)
157#ifndef ACCE_RSM
158 br.many kvm_virtualization_fault_back
159#endif
160 add r16=VMM_VPD_BASE_OFFSET,r21
161 extr.u r26=r25,6,21
162 extr.u r27=r25,31,2
163 ;;
164 ld8 r16=[r16]
165 extr.u r28=r25,36,1
166 dep r26=r27,r26,21,2
167 ;;
168 add r17=VPD_VPSR_START_OFFSET,r16
169 add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
170 //r26 is imm24
171 dep r26=r28,r26,23,1
172 ;;
173 ld8 r18=[r17]
174 movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI
175 ld4 r23=[r22]
176 sub r27=-1,r26
177 mov r24=b0
178 ;;
179 mov r20=cr.ipsr
180 or r28=r27,r28
181 and r19=r18,r27
182 ;;
183 st8 [r17]=r19
184 and r20=r20,r28
185 /* Comment it out due to short of fp lazy alorgithm support
186 adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
187 ;;
188 ld8 r27=[r27]
189 ;;
190 tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT
191 ;;
192 (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
193 */
194 ;;
195 mov cr.ipsr=r20
196 tbit.nz p6,p0=r23,0
197 ;;
198 tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT
199 (p6) br.dptk kvm_resume_to_guest
200 ;;
201 add r26=VMM_VCPU_META_RR0_OFFSET,r21
202 add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
203 dep r23=-1,r23,0,1
204 ;;
205 ld8 r26=[r26]
206 ld8 r27=[r27]
207 st4 [r22]=r23
208 dep.z r28=4,61,3
209 ;;
210 mov rr[r0]=r26
211 ;;
212 mov rr[r28]=r27
213 ;;
214 srlz.d
215 br.many kvm_resume_to_guest
216END(kvm_asm_rsm)
217
218
219//ssm
220GLOBAL_ENTRY(kvm_asm_ssm)
221#ifndef ACCE_SSM
222 br.many kvm_virtualization_fault_back
223#endif
224 add r16=VMM_VPD_BASE_OFFSET,r21
225 extr.u r26=r25,6,21
226 extr.u r27=r25,31,2
227 ;;
228 ld8 r16=[r16]
229 extr.u r28=r25,36,1
230 dep r26=r27,r26,21,2
231 ;; //r26 is imm24
232 add r27=VPD_VPSR_START_OFFSET,r16
233 dep r26=r28,r26,23,1
234 ;; //r19 vpsr
235 ld8 r29=[r27]
236 mov r24=b0
237 ;;
238 add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
239 mov r20=cr.ipsr
240 or r19=r29,r26
241 ;;
242 ld4 r23=[r22]
243 st8 [r27]=r19
244 or r20=r20,r26
245 ;;
246 mov cr.ipsr=r20
247 movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
248 ;;
249 and r19=r28,r19
250 tbit.z p6,p0=r23,0
251 ;;
252 cmp.ne.or p6,p0=r28,r19
253 (p6) br.dptk kvm_asm_ssm_1
254 ;;
255 add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
256 add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
257 dep r23=0,r23,0,1
258 ;;
259 ld8 r26=[r26]
260 ld8 r27=[r27]
261 st4 [r22]=r23
262 dep.z r28=4,61,3
263 ;;
264 mov rr[r0]=r26
265 ;;
266 mov rr[r28]=r27
267 ;;
268 srlz.d
269 ;;
270kvm_asm_ssm_1:
271 tbit.nz p6,p0=r29,IA64_PSR_I_BIT
272 ;;
273 tbit.z.or p6,p0=r19,IA64_PSR_I_BIT
274 (p6) br.dptk kvm_resume_to_guest
275 ;;
276 add r29=VPD_VTPR_START_OFFSET,r16
277 add r30=VPD_VHPI_START_OFFSET,r16
278 ;;
279 ld8 r29=[r29]
280 ld8 r30=[r30]
281 ;;
282 extr.u r17=r29,4,4
283 extr.u r18=r29,16,1
284 ;;
285 dep r17=r18,r17,4,1
286 ;;
287 cmp.gt p6,p0=r30,r17
288 (p6) br.dpnt.few kvm_asm_dispatch_vexirq
289 br.many kvm_resume_to_guest
290END(kvm_asm_ssm)
291
292
293//mov psr.l=r2
294GLOBAL_ENTRY(kvm_asm_mov_to_psr)
295#ifndef ACCE_MOV_TO_PSR
296 br.many kvm_virtualization_fault_back
297#endif
298 add r16=VMM_VPD_BASE_OFFSET,r21
299 extr.u r26=r25,13,7 //r2
300 ;;
301 ld8 r16=[r16]
302 addl r20=@gprel(asm_mov_from_reg),gp
303 ;;
304 adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20
305 shladd r26=r26,4,r20
306 mov r24=b0
307 ;;
308 add r27=VPD_VPSR_START_OFFSET,r16
309 mov b0=r26
310 br.many b0
311 ;;
312kvm_asm_mov_to_psr_back:
313 ld8 r17=[r27]
314 add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
315 dep r19=0,r19,32,32
316 ;;
317 ld4 r23=[r22]
318 dep r18=0,r17,0,32
319 ;;
320 add r30=r18,r19
321 movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
322 ;;
323 st8 [r27]=r30
324 and r27=r28,r30
325 and r29=r28,r17
326 ;;
327 cmp.eq p5,p0=r29,r27
328 cmp.eq p6,p7=r28,r27
329 (p5) br.many kvm_asm_mov_to_psr_1
330 ;;
331 //virtual to physical
332 (p7) add r26=VMM_VCPU_META_RR0_OFFSET,r21
333 (p7) add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
334 (p7) dep r23=-1,r23,0,1
335 ;;
336 //physical to virtual
337 (p6) add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
338 (p6) add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
339 (p6) dep r23=0,r23,0,1
340 ;;
341 ld8 r26=[r26]
342 ld8 r27=[r27]
343 st4 [r22]=r23
344 dep.z r28=4,61,3
345 ;;
346 mov rr[r0]=r26
347 ;;
348 mov rr[r28]=r27
349 ;;
350 srlz.d
351 ;;
352kvm_asm_mov_to_psr_1:
353 mov r20=cr.ipsr
354 movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT
355 ;;
356 or r19=r19,r28
357 dep r20=0,r20,0,32
358 ;;
359 add r20=r19,r20
360 mov b0=r24
361 ;;
362 /* Comment it out due to short of fp lazy algorithm support
363 adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
364 ;;
365 ld8 r27=[r27]
366 ;;
367 tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT
368 ;;
369 (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
370 ;;
371 */
372 mov cr.ipsr=r20
373 cmp.ne p6,p0=r0,r0
374 ;;
375 tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT
376 tbit.z.or p6,p0=r30,IA64_PSR_I_BIT
377 (p6) br.dpnt.few kvm_resume_to_guest
378 ;;
379 add r29=VPD_VTPR_START_OFFSET,r16
380 add r30=VPD_VHPI_START_OFFSET,r16
381 ;;
382 ld8 r29=[r29]
383 ld8 r30=[r30]
384 ;;
385 extr.u r17=r29,4,4
386 extr.u r18=r29,16,1
387 ;;
388 dep r17=r18,r17,4,1
389 ;;
390 cmp.gt p6,p0=r30,r17
391 (p6) br.dpnt.few kvm_asm_dispatch_vexirq
392 br.many kvm_resume_to_guest
393END(kvm_asm_mov_to_psr)
394
395
396ENTRY(kvm_asm_dispatch_vexirq)
397//increment iip
398 mov r16=cr.ipsr
399 ;;
400 extr.u r17=r16,IA64_PSR_RI_BIT,2
401 tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
402 ;;
403 (p6) mov r18=cr.iip
404 (p6) mov r17=r0
405 (p7) add r17=1,r17
406 ;;
407 (p6) add r18=0x10,r18
408 dep r16=r17,r16,IA64_PSR_RI_BIT,2
409 ;;
410 (p6) mov cr.iip=r18
411 mov cr.ipsr=r16
412 mov r30 =1
413 br.many kvm_dispatch_vexirq
414END(kvm_asm_dispatch_vexirq)
415
416// thash
417// TODO: add support when pta.vf = 1
418GLOBAL_ENTRY(kvm_asm_thash)
419#ifndef ACCE_THASH
420 br.many kvm_virtualization_fault_back
421#endif
422 extr.u r17=r25,20,7 // get r3 from opcode in r25
423 extr.u r18=r25,6,7 // get r1 from opcode in r25
424 addl r20=@gprel(asm_mov_from_reg),gp
425 ;;
426 adds r30=kvm_asm_thash_back1-asm_mov_from_reg,r20
427 shladd r17=r17,4,r20 // get addr of MOVE_FROM_REG(r17)
428 adds r16=VMM_VPD_BASE_OFFSET,r21 // get vcpu.arch.priveregs
429 ;;
430 mov r24=b0
431 ;;
432 ld8 r16=[r16] // get VPD addr
433 mov b0=r17
434 br.many b0 // r19 return value
435 ;;
436kvm_asm_thash_back1:
437 shr.u r23=r19,61 // get RR number
438 adds r25=VMM_VCPU_VRR0_OFFSET,r21 // get vcpu->arch.vrr[0]'s addr
439 adds r16=VMM_VPD_VPTA_OFFSET,r16 // get vpta
440 ;;
441 shladd r27=r23,3,r25 // get vcpu->arch.vrr[r23]'s addr
442 ld8 r17=[r16] // get PTA
443 mov r26=1
444 ;;
445 extr.u r29=r17,2,6 // get pta.size
446 ld8 r25=[r27] // get vcpu->arch.vrr[r23]'s value
447 ;;
448 extr.u r25=r25,2,6 // get rr.ps
449 shl r22=r26,r29 // 1UL << pta.size
450 ;;
451 shr.u r23=r19,r25 // vaddr >> rr.ps
452 adds r26=3,r29 // pta.size + 3
453 shl r27=r17,3 // pta << 3
454 ;;
455 shl r23=r23,3 // (vaddr >> rr.ps) << 3
456 shr.u r27=r27,r26 // (pta << 3) >> (pta.size+3)
457 movl r16=7<<61
458 ;;
459 adds r22=-1,r22 // (1UL << pta.size) - 1
460 shl r27=r27,r29 // ((pta<<3)>>(pta.size+3))<<pta.size
461 and r19=r19,r16 // vaddr & VRN_MASK
462 ;;
463 and r22=r22,r23 // vhpt_offset
464 or r19=r19,r27 // (vadr&VRN_MASK)|(((pta<<3)>>(pta.size + 3))<<pta.size)
465 adds r26=asm_mov_to_reg-asm_mov_from_reg,r20
466 ;;
467 or r19=r19,r22 // calc pval
468 shladd r17=r18,4,r26
469 adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
470 ;;
471 mov b0=r17
472 br.many b0
473END(kvm_asm_thash)
474
475#define MOV_TO_REG0 \
476{; \
477 nop.b 0x0; \
478 nop.b 0x0; \
479 nop.b 0x0; \
480 ;; \
481};
482
483
484#define MOV_TO_REG(n) \
485{; \
486 mov r##n##=r19; \
487 mov b0=r30; \
488 br.sptk.many b0; \
489 ;; \
490};
491
492
493#define MOV_FROM_REG(n) \
494{; \
495 mov r19=r##n##; \
496 mov b0=r30; \
497 br.sptk.many b0; \
498 ;; \
499};
500
501
502#define MOV_TO_BANK0_REG(n) \
503ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##); \
504{; \
505 mov r26=r2; \
506 mov r2=r19; \
507 bsw.1; \
508 ;; \
509}; \
510{; \
511 mov r##n##=r2; \
512 nop.b 0x0; \
513 bsw.0; \
514 ;; \
515}; \
516{; \
517 mov r2=r26; \
518 mov b0=r30; \
519 br.sptk.many b0; \
520 ;; \
521}; \
522END(asm_mov_to_bank0_reg##n##)
523
524
525#define MOV_FROM_BANK0_REG(n) \
526ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##); \
527{; \
528 mov r26=r2; \
529 nop.b 0x0; \
530 bsw.1; \
531 ;; \
532}; \
533{; \
534 mov r2=r##n##; \
535 nop.b 0x0; \
536 bsw.0; \
537 ;; \
538}; \
539{; \
540 mov r19=r2; \
541 mov r2=r26; \
542 mov b0=r30; \
543}; \
544{; \
545 nop.b 0x0; \
546 nop.b 0x0; \
547 br.sptk.many b0; \
548 ;; \
549}; \
550END(asm_mov_from_bank0_reg##n##)
551
552
553#define JMP_TO_MOV_TO_BANK0_REG(n) \
554{; \
555 nop.b 0x0; \
556 nop.b 0x0; \
557 br.sptk.many asm_mov_to_bank0_reg##n##; \
558 ;; \
559}
560
561
562#define JMP_TO_MOV_FROM_BANK0_REG(n) \
563{; \
564 nop.b 0x0; \
565 nop.b 0x0; \
566 br.sptk.many asm_mov_from_bank0_reg##n##; \
567 ;; \
568}
569
570
571MOV_FROM_BANK0_REG(16)
572MOV_FROM_BANK0_REG(17)
573MOV_FROM_BANK0_REG(18)
574MOV_FROM_BANK0_REG(19)
575MOV_FROM_BANK0_REG(20)
576MOV_FROM_BANK0_REG(21)
577MOV_FROM_BANK0_REG(22)
578MOV_FROM_BANK0_REG(23)
579MOV_FROM_BANK0_REG(24)
580MOV_FROM_BANK0_REG(25)
581MOV_FROM_BANK0_REG(26)
582MOV_FROM_BANK0_REG(27)
583MOV_FROM_BANK0_REG(28)
584MOV_FROM_BANK0_REG(29)
585MOV_FROM_BANK0_REG(30)
586MOV_FROM_BANK0_REG(31)
587
588
589// mov from reg table
590ENTRY(asm_mov_from_reg)
591 MOV_FROM_REG(0)
592 MOV_FROM_REG(1)
593 MOV_FROM_REG(2)
594 MOV_FROM_REG(3)
595 MOV_FROM_REG(4)
596 MOV_FROM_REG(5)
597 MOV_FROM_REG(6)
598 MOV_FROM_REG(7)
599 MOV_FROM_REG(8)
600 MOV_FROM_REG(9)
601 MOV_FROM_REG(10)
602 MOV_FROM_REG(11)
603 MOV_FROM_REG(12)
604 MOV_FROM_REG(13)
605 MOV_FROM_REG(14)
606 MOV_FROM_REG(15)
607 JMP_TO_MOV_FROM_BANK0_REG(16)
608 JMP_TO_MOV_FROM_BANK0_REG(17)
609 JMP_TO_MOV_FROM_BANK0_REG(18)
610 JMP_TO_MOV_FROM_BANK0_REG(19)
611 JMP_TO_MOV_FROM_BANK0_REG(20)
612 JMP_TO_MOV_FROM_BANK0_REG(21)
613 JMP_TO_MOV_FROM_BANK0_REG(22)
614 JMP_TO_MOV_FROM_BANK0_REG(23)
615 JMP_TO_MOV_FROM_BANK0_REG(24)
616 JMP_TO_MOV_FROM_BANK0_REG(25)
617 JMP_TO_MOV_FROM_BANK0_REG(26)
618 JMP_TO_MOV_FROM_BANK0_REG(27)
619 JMP_TO_MOV_FROM_BANK0_REG(28)
620 JMP_TO_MOV_FROM_BANK0_REG(29)
621 JMP_TO_MOV_FROM_BANK0_REG(30)
622 JMP_TO_MOV_FROM_BANK0_REG(31)
623 MOV_FROM_REG(32)
624 MOV_FROM_REG(33)
625 MOV_FROM_REG(34)
626 MOV_FROM_REG(35)
627 MOV_FROM_REG(36)
628 MOV_FROM_REG(37)
629 MOV_FROM_REG(38)
630 MOV_FROM_REG(39)
631 MOV_FROM_REG(40)
632 MOV_FROM_REG(41)
633 MOV_FROM_REG(42)
634 MOV_FROM_REG(43)
635 MOV_FROM_REG(44)
636 MOV_FROM_REG(45)
637 MOV_FROM_REG(46)
638 MOV_FROM_REG(47)
639 MOV_FROM_REG(48)
640 MOV_FROM_REG(49)
641 MOV_FROM_REG(50)
642 MOV_FROM_REG(51)
643 MOV_FROM_REG(52)
644 MOV_FROM_REG(53)
645 MOV_FROM_REG(54)
646 MOV_FROM_REG(55)
647 MOV_FROM_REG(56)
648 MOV_FROM_REG(57)
649 MOV_FROM_REG(58)
650 MOV_FROM_REG(59)
651 MOV_FROM_REG(60)
652 MOV_FROM_REG(61)
653 MOV_FROM_REG(62)
654 MOV_FROM_REG(63)
655 MOV_FROM_REG(64)
656 MOV_FROM_REG(65)
657 MOV_FROM_REG(66)
658 MOV_FROM_REG(67)
659 MOV_FROM_REG(68)
660 MOV_FROM_REG(69)
661 MOV_FROM_REG(70)
662 MOV_FROM_REG(71)
663 MOV_FROM_REG(72)
664 MOV_FROM_REG(73)
665 MOV_FROM_REG(74)
666 MOV_FROM_REG(75)
667 MOV_FROM_REG(76)
668 MOV_FROM_REG(77)
669 MOV_FROM_REG(78)
670 MOV_FROM_REG(79)
671 MOV_FROM_REG(80)
672 MOV_FROM_REG(81)
673 MOV_FROM_REG(82)
674 MOV_FROM_REG(83)
675 MOV_FROM_REG(84)
676 MOV_FROM_REG(85)
677 MOV_FROM_REG(86)
678 MOV_FROM_REG(87)
679 MOV_FROM_REG(88)
680 MOV_FROM_REG(89)
681 MOV_FROM_REG(90)
682 MOV_FROM_REG(91)
683 MOV_FROM_REG(92)
684 MOV_FROM_REG(93)
685 MOV_FROM_REG(94)
686 MOV_FROM_REG(95)
687 MOV_FROM_REG(96)
688 MOV_FROM_REG(97)
689 MOV_FROM_REG(98)
690 MOV_FROM_REG(99)
691 MOV_FROM_REG(100)
692 MOV_FROM_REG(101)
693 MOV_FROM_REG(102)
694 MOV_FROM_REG(103)
695 MOV_FROM_REG(104)
696 MOV_FROM_REG(105)
697 MOV_FROM_REG(106)
698 MOV_FROM_REG(107)
699 MOV_FROM_REG(108)
700 MOV_FROM_REG(109)
701 MOV_FROM_REG(110)
702 MOV_FROM_REG(111)
703 MOV_FROM_REG(112)
704 MOV_FROM_REG(113)
705 MOV_FROM_REG(114)
706 MOV_FROM_REG(115)
707 MOV_FROM_REG(116)
708 MOV_FROM_REG(117)
709 MOV_FROM_REG(118)
710 MOV_FROM_REG(119)
711 MOV_FROM_REG(120)
712 MOV_FROM_REG(121)
713 MOV_FROM_REG(122)
714 MOV_FROM_REG(123)
715 MOV_FROM_REG(124)
716 MOV_FROM_REG(125)
717 MOV_FROM_REG(126)
718 MOV_FROM_REG(127)
719END(asm_mov_from_reg)
720
721
722/* must be in bank 0
723 * parameter:
724 * r31: pr
725 * r24: b0
726 */
727ENTRY(kvm_resume_to_guest)
728 adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
729 ;;
730 ld8 r1 =[r16]
731 adds r20 = VMM_VCPU_VSA_BASE_OFFSET,r21
732 ;;
733 mov r16=cr.ipsr
734 ;;
735 ld8 r20 = [r20]
736 adds r19=VMM_VPD_BASE_OFFSET,r21
737 ;;
738 ld8 r25=[r19]
739 extr.u r17=r16,IA64_PSR_RI_BIT,2
740 tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
741 ;;
742 (p6) mov r18=cr.iip
743 (p6) mov r17=r0
744 ;;
745 (p6) add r18=0x10,r18
746 (p7) add r17=1,r17
747 ;;
748 (p6) mov cr.iip=r18
749 dep r16=r17,r16,IA64_PSR_RI_BIT,2
750 ;;
751 mov cr.ipsr=r16
752 adds r19= VPD_VPSR_START_OFFSET,r25
753 add r28=PAL_VPS_RESUME_NORMAL,r20
754 add r29=PAL_VPS_RESUME_HANDLER,r20
755 ;;
756 ld8 r19=[r19]
757 mov b0=r29
758 cmp.ne p6,p7 = r0,r0
759 ;;
760 tbit.z p6,p7 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic
761 ;;
762 (p6) ld8 r26=[r25]
763 (p7) mov b0=r28
764 mov pr=r31,-2
765 br.sptk.many b0 // call pal service
766 ;;
767END(kvm_resume_to_guest)
768
769
770MOV_TO_BANK0_REG(16)
771MOV_TO_BANK0_REG(17)
772MOV_TO_BANK0_REG(18)
773MOV_TO_BANK0_REG(19)
774MOV_TO_BANK0_REG(20)
775MOV_TO_BANK0_REG(21)
776MOV_TO_BANK0_REG(22)
777MOV_TO_BANK0_REG(23)
778MOV_TO_BANK0_REG(24)
779MOV_TO_BANK0_REG(25)
780MOV_TO_BANK0_REG(26)
781MOV_TO_BANK0_REG(27)
782MOV_TO_BANK0_REG(28)
783MOV_TO_BANK0_REG(29)
784MOV_TO_BANK0_REG(30)
785MOV_TO_BANK0_REG(31)
786
787
788// mov to reg table
789ENTRY(asm_mov_to_reg)
790 MOV_TO_REG0
791 MOV_TO_REG(1)
792 MOV_TO_REG(2)
793 MOV_TO_REG(3)
794 MOV_TO_REG(4)
795 MOV_TO_REG(5)
796 MOV_TO_REG(6)
797 MOV_TO_REG(7)
798 MOV_TO_REG(8)
799 MOV_TO_REG(9)
800 MOV_TO_REG(10)
801 MOV_TO_REG(11)
802 MOV_TO_REG(12)
803 MOV_TO_REG(13)
804 MOV_TO_REG(14)
805 MOV_TO_REG(15)
806 JMP_TO_MOV_TO_BANK0_REG(16)
807 JMP_TO_MOV_TO_BANK0_REG(17)
808 JMP_TO_MOV_TO_BANK0_REG(18)
809 JMP_TO_MOV_TO_BANK0_REG(19)
810 JMP_TO_MOV_TO_BANK0_REG(20)
811 JMP_TO_MOV_TO_BANK0_REG(21)
812 JMP_TO_MOV_TO_BANK0_REG(22)
813 JMP_TO_MOV_TO_BANK0_REG(23)
814 JMP_TO_MOV_TO_BANK0_REG(24)
815 JMP_TO_MOV_TO_BANK0_REG(25)
816 JMP_TO_MOV_TO_BANK0_REG(26)
817 JMP_TO_MOV_TO_BANK0_REG(27)
818 JMP_TO_MOV_TO_BANK0_REG(28)
819 JMP_TO_MOV_TO_BANK0_REG(29)
820 JMP_TO_MOV_TO_BANK0_REG(30)
821 JMP_TO_MOV_TO_BANK0_REG(31)
822 MOV_TO_REG(32)
823 MOV_TO_REG(33)
824 MOV_TO_REG(34)
825 MOV_TO_REG(35)
826 MOV_TO_REG(36)
827 MOV_TO_REG(37)
828 MOV_TO_REG(38)
829 MOV_TO_REG(39)
830 MOV_TO_REG(40)
831 MOV_TO_REG(41)
832 MOV_TO_REG(42)
833 MOV_TO_REG(43)
834 MOV_TO_REG(44)
835 MOV_TO_REG(45)
836 MOV_TO_REG(46)
837 MOV_TO_REG(47)
838 MOV_TO_REG(48)
839 MOV_TO_REG(49)
840 MOV_TO_REG(50)
841 MOV_TO_REG(51)
842 MOV_TO_REG(52)
843 MOV_TO_REG(53)
844 MOV_TO_REG(54)
845 MOV_TO_REG(55)
846 MOV_TO_REG(56)
847 MOV_TO_REG(57)
848 MOV_TO_REG(58)
849 MOV_TO_REG(59)
850 MOV_TO_REG(60)
851 MOV_TO_REG(61)
852 MOV_TO_REG(62)
853 MOV_TO_REG(63)
854 MOV_TO_REG(64)
855 MOV_TO_REG(65)
856 MOV_TO_REG(66)
857 MOV_TO_REG(67)
858 MOV_TO_REG(68)
859 MOV_TO_REG(69)
860 MOV_TO_REG(70)
861 MOV_TO_REG(71)
862 MOV_TO_REG(72)
863 MOV_TO_REG(73)
864 MOV_TO_REG(74)
865 MOV_TO_REG(75)
866 MOV_TO_REG(76)
867 MOV_TO_REG(77)
868 MOV_TO_REG(78)
869 MOV_TO_REG(79)
870 MOV_TO_REG(80)
871 MOV_TO_REG(81)
872 MOV_TO_REG(82)
873 MOV_TO_REG(83)
874 MOV_TO_REG(84)
875 MOV_TO_REG(85)
876 MOV_TO_REG(86)
877 MOV_TO_REG(87)
878 MOV_TO_REG(88)
879 MOV_TO_REG(89)
880 MOV_TO_REG(90)
881 MOV_TO_REG(91)
882 MOV_TO_REG(92)
883 MOV_TO_REG(93)
884 MOV_TO_REG(94)
885 MOV_TO_REG(95)
886 MOV_TO_REG(96)
887 MOV_TO_REG(97)
888 MOV_TO_REG(98)
889 MOV_TO_REG(99)
890 MOV_TO_REG(100)
891 MOV_TO_REG(101)
892 MOV_TO_REG(102)
893 MOV_TO_REG(103)
894 MOV_TO_REG(104)
895 MOV_TO_REG(105)
896 MOV_TO_REG(106)
897 MOV_TO_REG(107)
898 MOV_TO_REG(108)
899 MOV_TO_REG(109)
900 MOV_TO_REG(110)
901 MOV_TO_REG(111)
902 MOV_TO_REG(112)
903 MOV_TO_REG(113)
904 MOV_TO_REG(114)
905 MOV_TO_REG(115)
906 MOV_TO_REG(116)
907 MOV_TO_REG(117)
908 MOV_TO_REG(118)
909 MOV_TO_REG(119)
910 MOV_TO_REG(120)
911 MOV_TO_REG(121)
912 MOV_TO_REG(122)
913 MOV_TO_REG(123)
914 MOV_TO_REG(124)
915 MOV_TO_REG(125)
916 MOV_TO_REG(126)
917 MOV_TO_REG(127)
918END(asm_mov_to_reg)
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
new file mode 100644
index 000000000000..5a33f7ed29a0
--- /dev/null
+++ b/arch/ia64/kvm/process.c
@@ -0,0 +1,970 @@
1/*
2 * process.c: handle interruption inject for guests.
3 * Copyright (c) 2005, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 * Shaofan Li (Susue Li) <susie.li@intel.com>
19 * Xiaoyan Feng (Fleming Feng) <fleming.feng@intel.com>
20 * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
21 * Xiantao Zhang (xiantao.zhang@intel.com)
22 */
23#include "vcpu.h"
24
25#include <asm/pal.h>
26#include <asm/sal.h>
27#include <asm/fpswa.h>
28#include <asm/kregs.h>
29#include <asm/tlb.h>
30
31fpswa_interface_t *vmm_fpswa_interface;
32
33#define IA64_VHPT_TRANS_VECTOR 0x0000
34#define IA64_INST_TLB_VECTOR 0x0400
35#define IA64_DATA_TLB_VECTOR 0x0800
36#define IA64_ALT_INST_TLB_VECTOR 0x0c00
37#define IA64_ALT_DATA_TLB_VECTOR 0x1000
38#define IA64_DATA_NESTED_TLB_VECTOR 0x1400
39#define IA64_INST_KEY_MISS_VECTOR 0x1800
40#define IA64_DATA_KEY_MISS_VECTOR 0x1c00
41#define IA64_DIRTY_BIT_VECTOR 0x2000
42#define IA64_INST_ACCESS_BIT_VECTOR 0x2400
43#define IA64_DATA_ACCESS_BIT_VECTOR 0x2800
44#define IA64_BREAK_VECTOR 0x2c00
45#define IA64_EXTINT_VECTOR 0x3000
46#define IA64_PAGE_NOT_PRESENT_VECTOR 0x5000
47#define IA64_KEY_PERMISSION_VECTOR 0x5100
48#define IA64_INST_ACCESS_RIGHTS_VECTOR 0x5200
49#define IA64_DATA_ACCESS_RIGHTS_VECTOR 0x5300
50#define IA64_GENEX_VECTOR 0x5400
51#define IA64_DISABLED_FPREG_VECTOR 0x5500
52#define IA64_NAT_CONSUMPTION_VECTOR 0x5600
53#define IA64_SPECULATION_VECTOR 0x5700 /* UNUSED */
54#define IA64_DEBUG_VECTOR 0x5900
55#define IA64_UNALIGNED_REF_VECTOR 0x5a00
56#define IA64_UNSUPPORTED_DATA_REF_VECTOR 0x5b00
57#define IA64_FP_FAULT_VECTOR 0x5c00
58#define IA64_FP_TRAP_VECTOR 0x5d00
59#define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR 0x5e00
60#define IA64_TAKEN_BRANCH_TRAP_VECTOR 0x5f00
61#define IA64_SINGLE_STEP_TRAP_VECTOR 0x6000
62
63/* SDM vol2 5.5 - IVA based interruption handling */
64#define INITIAL_PSR_VALUE_AT_INTERRUPTION (IA64_PSR_UP | IA64_PSR_MFL |\
65 IA64_PSR_MFH | IA64_PSR_PK | IA64_PSR_DT | \
66 IA64_PSR_RT | IA64_PSR_MC|IA64_PSR_IT)
67
68#define DOMN_PAL_REQUEST 0x110000
69#define DOMN_SAL_REQUEST 0x110001
70
71static u64 vec2off[68] = {0x0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800,
72 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00,
73 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5100, 0x5200, 0x5300, 0x5400,
74 0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00,
75 0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600,
76 0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00,
77 0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800,
78 0x7900, 0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00
79};
80
81static void collect_interruption(struct kvm_vcpu *vcpu)
82{
83 u64 ipsr;
84 u64 vdcr;
85 u64 vifs;
86 unsigned long vpsr;
87 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
88
89 vpsr = vcpu_get_psr(vcpu);
90 vcpu_bsw0(vcpu);
91 if (vpsr & IA64_PSR_IC) {
92
93 /* Sync mpsr id/da/dd/ss/ed bits to vipsr
94 * since after guest do rfi, we still want these bits on in
95 * mpsr
96 */
97
98 ipsr = regs->cr_ipsr;
99 vpsr = vpsr | (ipsr & (IA64_PSR_ID | IA64_PSR_DA
100 | IA64_PSR_DD | IA64_PSR_SS
101 | IA64_PSR_ED));
102 vcpu_set_ipsr(vcpu, vpsr);
103
104 /* Currently, for trap, we do not advance IIP to next
105 * instruction. That's because we assume caller already
106 * set up IIP correctly
107 */
108
109 vcpu_set_iip(vcpu , regs->cr_iip);
110
111 /* set vifs.v to zero */
112 vifs = VCPU(vcpu, ifs);
113 vifs &= ~IA64_IFS_V;
114 vcpu_set_ifs(vcpu, vifs);
115
116 vcpu_set_iipa(vcpu, VMX(vcpu, cr_iipa));
117 }
118
119 vdcr = VCPU(vcpu, dcr);
120
121 /* Set guest psr
122 * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged
123 * be: set to the value of dcr.be
124 * pp: set to the value of dcr.pp
125 */
126 vpsr &= INITIAL_PSR_VALUE_AT_INTERRUPTION;
127 vpsr |= (vdcr & IA64_DCR_BE);
128
129 /* VDCR pp bit position is different from VPSR pp bit */
130 if (vdcr & IA64_DCR_PP) {
131 vpsr |= IA64_PSR_PP;
132 } else {
133 vpsr &= ~IA64_PSR_PP;;
134 }
135
136 vcpu_set_psr(vcpu, vpsr);
137
138}
139
140void inject_guest_interruption(struct kvm_vcpu *vcpu, u64 vec)
141{
142 u64 viva;
143 struct kvm_pt_regs *regs;
144 union ia64_isr pt_isr;
145
146 regs = vcpu_regs(vcpu);
147
148 /* clear cr.isr.ir (incomplete register frame)*/
149 pt_isr.val = VMX(vcpu, cr_isr);
150 pt_isr.ir = 0;
151 VMX(vcpu, cr_isr) = pt_isr.val;
152
153 collect_interruption(vcpu);
154
155 viva = vcpu_get_iva(vcpu);
156 regs->cr_iip = viva + vec;
157}
158
159static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa)
160{
161 union ia64_rr rr, rr1;
162
163 rr.val = vcpu_get_rr(vcpu, ifa);
164 rr1.val = 0;
165 rr1.ps = rr.ps;
166 rr1.rid = rr.rid;
167 return (rr1.val);
168}
169
170
171/*
172 * Set vIFA & vITIR & vIHA, when vPSR.ic =1
173 * Parameter:
174 * set_ifa: if true, set vIFA
175 * set_itir: if true, set vITIR
176 * set_iha: if true, set vIHA
177 */
178void set_ifa_itir_iha(struct kvm_vcpu *vcpu, u64 vadr,
179 int set_ifa, int set_itir, int set_iha)
180{
181 long vpsr;
182 u64 value;
183
184 vpsr = VCPU(vcpu, vpsr);
185 /* Vol2, Table 8-1 */
186 if (vpsr & IA64_PSR_IC) {
187 if (set_ifa)
188 vcpu_set_ifa(vcpu, vadr);
189 if (set_itir) {
190 value = vcpu_get_itir_on_fault(vcpu, vadr);
191 vcpu_set_itir(vcpu, value);
192 }
193
194 if (set_iha) {
195 value = vcpu_thash(vcpu, vadr);
196 vcpu_set_iha(vcpu, value);
197 }
198 }
199}
200
201/*
202 * Data TLB Fault
203 * @ Data TLB vector
204 * Refer to SDM Vol2 Table 5-6 & 8-1
205 */
206void dtlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
207{
208 /* If vPSR.ic, IFA, ITIR, IHA */
209 set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
210 inject_guest_interruption(vcpu, IA64_DATA_TLB_VECTOR);
211}
212
213/*
214 * Instruction TLB Fault
215 * @ Instruction TLB vector
216 * Refer to SDM Vol2 Table 5-6 & 8-1
217 */
218void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
219{
220 /* If vPSR.ic, IFA, ITIR, IHA */
221 set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
222 inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR);
223}
224
225
226
227/*
228 * Data Nested TLB Fault
229 * @ Data Nested TLB Vector
230 * Refer to SDM Vol2 Table 5-6 & 8-1
231 */
232void nested_dtlb(struct kvm_vcpu *vcpu)
233{
234 inject_guest_interruption(vcpu, IA64_DATA_NESTED_TLB_VECTOR);
235}
236
237/*
238 * Alternate Data TLB Fault
239 * @ Alternate Data TLB vector
240 * Refer to SDM Vol2 Table 5-6 & 8-1
241 */
242void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr)
243{
244 set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
245 inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR);
246}
247
248
249/*
250 * Data TLB Fault
251 * @ Data TLB vector
252 * Refer to SDM Vol2 Table 5-6 & 8-1
253 */
254void alt_itlb(struct kvm_vcpu *vcpu, u64 vadr)
255{
256 set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
257 inject_guest_interruption(vcpu, IA64_ALT_INST_TLB_VECTOR);
258}
259
260/* Deal with:
261 * VHPT Translation Vector
262 */
263static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
264{
265 /* If vPSR.ic, IFA, ITIR, IHA*/
266 set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
267 inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR);
268
269
270}
271
272/*
273 * VHPT Instruction Fault
274 * @ VHPT Translation vector
275 * Refer to SDM Vol2 Table 5-6 & 8-1
276 */
277void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
278{
279 _vhpt_fault(vcpu, vadr);
280}
281
282
283/*
284 * VHPT Data Fault
285 * @ VHPT Translation vector
286 * Refer to SDM Vol2 Table 5-6 & 8-1
287 */
288void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
289{
290 _vhpt_fault(vcpu, vadr);
291}
292
293
294
295/*
296 * Deal with:
297 * General Exception vector
298 */
299void _general_exception(struct kvm_vcpu *vcpu)
300{
301 inject_guest_interruption(vcpu, IA64_GENEX_VECTOR);
302}
303
304
305/*
306 * Illegal Operation Fault
307 * @ General Exception Vector
308 * Refer to SDM Vol2 Table 5-6 & 8-1
309 */
310void illegal_op(struct kvm_vcpu *vcpu)
311{
312 _general_exception(vcpu);
313}
314
315/*
316 * Illegal Dependency Fault
317 * @ General Exception Vector
318 * Refer to SDM Vol2 Table 5-6 & 8-1
319 */
320void illegal_dep(struct kvm_vcpu *vcpu)
321{
322 _general_exception(vcpu);
323}
324
325/*
326 * Reserved Register/Field Fault
327 * @ General Exception Vector
328 * Refer to SDM Vol2 Table 5-6 & 8-1
329 */
330void rsv_reg_field(struct kvm_vcpu *vcpu)
331{
332 _general_exception(vcpu);
333}
334/*
335 * Privileged Operation Fault
336 * @ General Exception Vector
337 * Refer to SDM Vol2 Table 5-6 & 8-1
338 */
339
340void privilege_op(struct kvm_vcpu *vcpu)
341{
342 _general_exception(vcpu);
343}
344
345/*
346 * Unimplement Data Address Fault
347 * @ General Exception Vector
348 * Refer to SDM Vol2 Table 5-6 & 8-1
349 */
350void unimpl_daddr(struct kvm_vcpu *vcpu)
351{
352 _general_exception(vcpu);
353}
354
355/*
356 * Privileged Register Fault
357 * @ General Exception Vector
358 * Refer to SDM Vol2 Table 5-6 & 8-1
359 */
360void privilege_reg(struct kvm_vcpu *vcpu)
361{
362 _general_exception(vcpu);
363}
364
365/* Deal with
366 * Nat consumption vector
367 * Parameter:
368 * vaddr: Optional, if t == REGISTER
369 */
370static void _nat_consumption_fault(struct kvm_vcpu *vcpu, u64 vadr,
371 enum tlb_miss_type t)
372{
373 /* If vPSR.ic && t == DATA/INST, IFA */
374 if (t == DATA || t == INSTRUCTION) {
375 /* IFA */
376 set_ifa_itir_iha(vcpu, vadr, 1, 0, 0);
377 }
378
379 inject_guest_interruption(vcpu, IA64_NAT_CONSUMPTION_VECTOR);
380}
381
382/*
383 * Instruction Nat Page Consumption Fault
384 * @ Nat Consumption Vector
385 * Refer to SDM Vol2 Table 5-6 & 8-1
386 */
387void inat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
388{
389 _nat_consumption_fault(vcpu, vadr, INSTRUCTION);
390}
391
392/*
393 * Register Nat Consumption Fault
394 * @ Nat Consumption Vector
395 * Refer to SDM Vol2 Table 5-6 & 8-1
396 */
397void rnat_consumption(struct kvm_vcpu *vcpu)
398{
399 _nat_consumption_fault(vcpu, 0, REGISTER);
400}
401
402/*
403 * Data Nat Page Consumption Fault
404 * @ Nat Consumption Vector
405 * Refer to SDM Vol2 Table 5-6 & 8-1
406 */
407void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
408{
409 _nat_consumption_fault(vcpu, vadr, DATA);
410}
411
412/* Deal with
413 * Page not present vector
414 */
415static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
416{
417 /* If vPSR.ic, IFA, ITIR */
418 set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
419 inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
420}
421
422
423void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
424{
425 __page_not_present(vcpu, vadr);
426}
427
428
429void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
430{
431 __page_not_present(vcpu, vadr);
432}
433
434
435/* Deal with
436 * Data access rights vector
437 */
438void data_access_rights(struct kvm_vcpu *vcpu, u64 vadr)
439{
440 /* If vPSR.ic, IFA, ITIR */
441 set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
442 inject_guest_interruption(vcpu, IA64_DATA_ACCESS_RIGHTS_VECTOR);
443}
444
445fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr,
446 unsigned long *fpsr, unsigned long *isr, unsigned long *pr,
447 unsigned long *ifs, struct kvm_pt_regs *regs)
448{
449 fp_state_t fp_state;
450 fpswa_ret_t ret;
451 struct kvm_vcpu *vcpu = current_vcpu;
452
453 uint64_t old_rr7 = ia64_get_rr(7UL<<61);
454
455 if (!vmm_fpswa_interface)
456 return (fpswa_ret_t) {-1, 0, 0, 0};
457
458 /*
459 * Just let fpswa driver to use hardware fp registers.
460 * No fp register is valid in memory.
461 */
462 memset(&fp_state, 0, sizeof(fp_state_t));
463
464 /*
465 * unsigned long (*EFI_FPSWA) (
466 * unsigned long trap_type,
467 * void *Bundle,
468 * unsigned long *pipsr,
469 * unsigned long *pfsr,
470 * unsigned long *pisr,
471 * unsigned long *ppreds,
472 * unsigned long *pifs,
473 * void *fp_state);
474 */
475 /*Call host fpswa interface directly to virtualize
476 *guest fpswa request!
477 */
478 ia64_set_rr(7UL << 61, vcpu->arch.host.rr[7]);
479 ia64_srlz_d();
480
481 ret = (*vmm_fpswa_interface->fpswa) (fp_fault, bundle,
482 ipsr, fpsr, isr, pr, ifs, &fp_state);
483 ia64_set_rr(7UL << 61, old_rr7);
484 ia64_srlz_d();
485 return ret;
486}
487
488/*
489 * Handle floating-point assist faults and traps for domain.
490 */
491unsigned long vmm_handle_fpu_swa(int fp_fault, struct kvm_pt_regs *regs,
492 unsigned long isr)
493{
494 struct kvm_vcpu *v = current_vcpu;
495 IA64_BUNDLE bundle;
496 unsigned long fault_ip;
497 fpswa_ret_t ret;
498
499 fault_ip = regs->cr_iip;
500 /*
501 * When the FP trap occurs, the trapping instruction is completed.
502 * If ipsr.ri == 0, there is the trapping instruction in previous
503 * bundle.
504 */
505 if (!fp_fault && (ia64_psr(regs)->ri == 0))
506 fault_ip -= 16;
507
508 if (fetch_code(v, fault_ip, &bundle))
509 return -EAGAIN;
510
511 if (!bundle.i64[0] && !bundle.i64[1])
512 return -EACCES;
513
514 ret = vmm_fp_emulate(fp_fault, &bundle, &regs->cr_ipsr, &regs->ar_fpsr,
515 &isr, &regs->pr, &regs->cr_ifs, regs);
516 return ret.status;
517}
518
519void reflect_interruption(u64 ifa, u64 isr, u64 iim,
520 u64 vec, struct kvm_pt_regs *regs)
521{
522 u64 vector;
523 int status ;
524 struct kvm_vcpu *vcpu = current_vcpu;
525 u64 vpsr = VCPU(vcpu, vpsr);
526
527 vector = vec2off[vec];
528
529 if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
530 panic_vm(vcpu);
531 return;
532 }
533
534 switch (vec) {
535 case 32: /*IA64_FP_FAULT_VECTOR*/
536 status = vmm_handle_fpu_swa(1, regs, isr);
537 if (!status) {
538 vcpu_increment_iip(vcpu);
539 return;
540 } else if (-EAGAIN == status)
541 return;
542 break;
543 case 33: /*IA64_FP_TRAP_VECTOR*/
544 status = vmm_handle_fpu_swa(0, regs, isr);
545 if (!status)
546 return ;
547 else if (-EAGAIN == status) {
548 vcpu_decrement_iip(vcpu);
549 return ;
550 }
551 break;
552 }
553
554 VCPU(vcpu, isr) = isr;
555 VCPU(vcpu, iipa) = regs->cr_iip;
556 if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
557 VCPU(vcpu, iim) = iim;
558 else
559 set_ifa_itir_iha(vcpu, ifa, 1, 1, 1);
560
561 inject_guest_interruption(vcpu, vector);
562}
563
564static void set_pal_call_data(struct kvm_vcpu *vcpu)
565{
566 struct exit_ctl_data *p = &vcpu->arch.exit_data;
567
568 /*FIXME:For static and stacked convention, firmware
569 * has put the parameters in gr28-gr31 before
570 * break to vmm !!*/
571
572 p->u.pal_data.gr28 = vcpu_get_gr(vcpu, 28);
573 p->u.pal_data.gr29 = vcpu_get_gr(vcpu, 29);
574 p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
575 p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31);
576 p->exit_reason = EXIT_REASON_PAL_CALL;
577}
578
579static void set_pal_call_result(struct kvm_vcpu *vcpu)
580{
581 struct exit_ctl_data *p = &vcpu->arch.exit_data;
582
583 if (p->exit_reason == EXIT_REASON_PAL_CALL) {
584 vcpu_set_gr(vcpu, 8, p->u.pal_data.ret.status, 0);
585 vcpu_set_gr(vcpu, 9, p->u.pal_data.ret.v0, 0);
586 vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
587 vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
588 } else
589 panic_vm(vcpu);
590}
591
592static void set_sal_call_data(struct kvm_vcpu *vcpu)
593{
594 struct exit_ctl_data *p = &vcpu->arch.exit_data;
595
596 p->u.sal_data.in0 = vcpu_get_gr(vcpu, 32);
597 p->u.sal_data.in1 = vcpu_get_gr(vcpu, 33);
598 p->u.sal_data.in2 = vcpu_get_gr(vcpu, 34);
599 p->u.sal_data.in3 = vcpu_get_gr(vcpu, 35);
600 p->u.sal_data.in4 = vcpu_get_gr(vcpu, 36);
601 p->u.sal_data.in5 = vcpu_get_gr(vcpu, 37);
602 p->u.sal_data.in6 = vcpu_get_gr(vcpu, 38);
603 p->u.sal_data.in7 = vcpu_get_gr(vcpu, 39);
604 p->exit_reason = EXIT_REASON_SAL_CALL;
605}
606
607static void set_sal_call_result(struct kvm_vcpu *vcpu)
608{
609 struct exit_ctl_data *p = &vcpu->arch.exit_data;
610
611 if (p->exit_reason == EXIT_REASON_SAL_CALL) {
612 vcpu_set_gr(vcpu, 8, p->u.sal_data.ret.r8, 0);
613 vcpu_set_gr(vcpu, 9, p->u.sal_data.ret.r9, 0);
614 vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
615 vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
616 } else
617 panic_vm(vcpu);
618}
619
620void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
621 unsigned long isr, unsigned long iim)
622{
623 struct kvm_vcpu *v = current_vcpu;
624
625 if (ia64_psr(regs)->cpl == 0) {
626 /* Allow hypercalls only when cpl = 0. */
627 if (iim == DOMN_PAL_REQUEST) {
628 set_pal_call_data(v);
629 vmm_transition(v);
630 set_pal_call_result(v);
631 vcpu_increment_iip(v);
632 return;
633 } else if (iim == DOMN_SAL_REQUEST) {
634 set_sal_call_data(v);
635 vmm_transition(v);
636 set_sal_call_result(v);
637 vcpu_increment_iip(v);
638 return;
639 }
640 }
641 reflect_interruption(ifa, isr, iim, 11, regs);
642}
643
644void check_pending_irq(struct kvm_vcpu *vcpu)
645{
646 int mask, h_pending, h_inservice;
647 u64 isr;
648 unsigned long vpsr;
649 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
650
651 h_pending = highest_pending_irq(vcpu);
652 if (h_pending == NULL_VECTOR) {
653 update_vhpi(vcpu, NULL_VECTOR);
654 return;
655 }
656 h_inservice = highest_inservice_irq(vcpu);
657
658 vpsr = VCPU(vcpu, vpsr);
659 mask = irq_masked(vcpu, h_pending, h_inservice);
660 if ((vpsr & IA64_PSR_I) && IRQ_NO_MASKED == mask) {
661 isr = vpsr & IA64_PSR_RI;
662 update_vhpi(vcpu, h_pending);
663 reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
664 } else if (mask == IRQ_MASKED_BY_INSVC) {
665 if (VCPU(vcpu, vhpi))
666 update_vhpi(vcpu, NULL_VECTOR);
667 } else {
668 /* masked by vpsr.i or vtpr.*/
669 update_vhpi(vcpu, h_pending);
670 }
671}
672
673static void generate_exirq(struct kvm_vcpu *vcpu)
674{
675 unsigned vpsr;
676 uint64_t isr;
677
678 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
679
680 vpsr = VCPU(vcpu, vpsr);
681 isr = vpsr & IA64_PSR_RI;
682 if (!(vpsr & IA64_PSR_IC))
683 panic_vm(vcpu);
684 reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
685}
686
687void vhpi_detection(struct kvm_vcpu *vcpu)
688{
689 uint64_t threshold, vhpi;
690 union ia64_tpr vtpr;
691 struct ia64_psr vpsr;
692
693 vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
694 vtpr.val = VCPU(vcpu, tpr);
695
696 threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic;
697 vhpi = VCPU(vcpu, vhpi);
698 if (vhpi > threshold) {
699 /* interrupt actived*/
700 generate_exirq(vcpu);
701 }
702}
703
704
705void leave_hypervisor_tail(void)
706{
707 struct kvm_vcpu *v = current_vcpu;
708
709 if (VMX(v, timer_check)) {
710 VMX(v, timer_check) = 0;
711 if (VMX(v, itc_check)) {
712 if (vcpu_get_itc(v) > VCPU(v, itm)) {
713 if (!(VCPU(v, itv) & (1 << 16))) {
714 vcpu_pend_interrupt(v, VCPU(v, itv)
715 & 0xff);
716 VMX(v, itc_check) = 0;
717 } else {
718 v->arch.timer_pending = 1;
719 }
720 VMX(v, last_itc) = VCPU(v, itm) + 1;
721 }
722 }
723 }
724
725 rmb();
726 if (v->arch.irq_new_pending) {
727 v->arch.irq_new_pending = 0;
728 VMX(v, irq_check) = 0;
729 check_pending_irq(v);
730 return;
731 }
732 if (VMX(v, irq_check)) {
733 VMX(v, irq_check) = 0;
734 vhpi_detection(v);
735 }
736}
737
738
739static inline void handle_lds(struct kvm_pt_regs *regs)
740{
741 regs->cr_ipsr |= IA64_PSR_ED;
742}
743
744void physical_tlb_miss(struct kvm_vcpu *vcpu, unsigned long vadr, int type)
745{
746 unsigned long pte;
747 union ia64_rr rr;
748
749 rr.val = ia64_get_rr(vadr);
750 pte = vadr & _PAGE_PPN_MASK;
751 pte = pte | PHY_PAGE_WB;
752 thash_vhpt_insert(vcpu, pte, (u64)(rr.ps << 2), vadr, type);
753 return;
754}
755
756void kvm_page_fault(u64 vadr , u64 vec, struct kvm_pt_regs *regs)
757{
758 unsigned long vpsr;
759 int type;
760
761 u64 vhpt_adr, gppa, pteval, rr, itir;
762 union ia64_isr misr;
763 union ia64_pta vpta;
764 struct thash_data *data;
765 struct kvm_vcpu *v = current_vcpu;
766
767 vpsr = VCPU(v, vpsr);
768 misr.val = VMX(v, cr_isr);
769
770 type = vec;
771
772 if (is_physical_mode(v) && (!(vadr << 1 >> 62))) {
773 if (vec == 2) {
774 if (__gpfn_is_io((vadr << 1) >> (PAGE_SHIFT + 1))) {
775 emulate_io_inst(v, ((vadr << 1) >> 1), 4);
776 return;
777 }
778 }
779 physical_tlb_miss(v, vadr, type);
780 return;
781 }
782 data = vtlb_lookup(v, vadr, type);
783 if (data != 0) {
784 if (type == D_TLB) {
785 gppa = (vadr & ((1UL << data->ps) - 1))
786 + (data->ppn >> (data->ps - 12) << data->ps);
787 if (__gpfn_is_io(gppa >> PAGE_SHIFT)) {
788 if (data->pl >= ((regs->cr_ipsr >>
789 IA64_PSR_CPL0_BIT) & 3))
790 emulate_io_inst(v, gppa, data->ma);
791 else {
792 vcpu_set_isr(v, misr.val);
793 data_access_rights(v, vadr);
794 }
795 return ;
796 }
797 }
798 thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type);
799
800 } else if (type == D_TLB) {
801 if (misr.sp) {
802 handle_lds(regs);
803 return;
804 }
805
806 rr = vcpu_get_rr(v, vadr);
807 itir = rr & (RR_RID_MASK | RR_PS_MASK);
808
809 if (!vhpt_enabled(v, vadr, misr.rs ? RSE_REF : DATA_REF)) {
810 if (vpsr & IA64_PSR_IC) {
811 vcpu_set_isr(v, misr.val);
812 alt_dtlb(v, vadr);
813 } else {
814 nested_dtlb(v);
815 }
816 return ;
817 }
818
819 vpta.val = vcpu_get_pta(v);
820 /* avoid recursively walking (short format) VHPT */
821
822 vhpt_adr = vcpu_thash(v, vadr);
823 if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
824 /* VHPT successfully read. */
825 if (!(pteval & _PAGE_P)) {
826 if (vpsr & IA64_PSR_IC) {
827 vcpu_set_isr(v, misr.val);
828 dtlb_fault(v, vadr);
829 } else {
830 nested_dtlb(v);
831 }
832 } else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) {
833 thash_purge_and_insert(v, pteval, itir,
834 vadr, D_TLB);
835 } else if (vpsr & IA64_PSR_IC) {
836 vcpu_set_isr(v, misr.val);
837 dtlb_fault(v, vadr);
838 } else {
839 nested_dtlb(v);
840 }
841 } else {
842 /* Can't read VHPT. */
843 if (vpsr & IA64_PSR_IC) {
844 vcpu_set_isr(v, misr.val);
845 dvhpt_fault(v, vadr);
846 } else {
847 nested_dtlb(v);
848 }
849 }
850 } else if (type == I_TLB) {
851 if (!(vpsr & IA64_PSR_IC))
852 misr.ni = 1;
853 if (!vhpt_enabled(v, vadr, INST_REF)) {
854 vcpu_set_isr(v, misr.val);
855 alt_itlb(v, vadr);
856 return;
857 }
858
859 vpta.val = vcpu_get_pta(v);
860
861 vhpt_adr = vcpu_thash(v, vadr);
862 if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
863 /* VHPT successfully read. */
864 if (pteval & _PAGE_P) {
865 if ((pteval & _PAGE_MA_MASK) == _PAGE_MA_ST) {
866 vcpu_set_isr(v, misr.val);
867 itlb_fault(v, vadr);
868 return ;
869 }
870 rr = vcpu_get_rr(v, vadr);
871 itir = rr & (RR_RID_MASK | RR_PS_MASK);
872 thash_purge_and_insert(v, pteval, itir,
873 vadr, I_TLB);
874 } else {
875 vcpu_set_isr(v, misr.val);
876 inst_page_not_present(v, vadr);
877 }
878 } else {
879 vcpu_set_isr(v, misr.val);
880 ivhpt_fault(v, vadr);
881 }
882 }
883}
884
885void kvm_vexirq(struct kvm_vcpu *vcpu)
886{
887 u64 vpsr, isr;
888 struct kvm_pt_regs *regs;
889
890 regs = vcpu_regs(vcpu);
891 vpsr = VCPU(vcpu, vpsr);
892 isr = vpsr & IA64_PSR_RI;
893 reflect_interruption(0, isr, 0, 12, regs); /*EXT IRQ*/
894}
895
896void kvm_ia64_handle_irq(struct kvm_vcpu *v)
897{
898 struct exit_ctl_data *p = &v->arch.exit_data;
899 long psr;
900
901 local_irq_save(psr);
902 p->exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
903 vmm_transition(v);
904 local_irq_restore(psr);
905
906 VMX(v, timer_check) = 1;
907
908}
909
910static void ptc_ga_remote_func(struct kvm_vcpu *v, int pos)
911{
912 u64 oldrid, moldrid, oldpsbits, vaddr;
913 struct kvm_ptc_g *p = &v->arch.ptc_g_data[pos];
914 vaddr = p->vaddr;
915
916 oldrid = VMX(v, vrr[0]);
917 VMX(v, vrr[0]) = p->rr;
918 oldpsbits = VMX(v, psbits[0]);
919 VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vaddr)]);
920 moldrid = ia64_get_rr(0x0);
921 ia64_set_rr(0x0, vrrtomrr(p->rr));
922 ia64_srlz_d();
923
924 vaddr = PAGEALIGN(vaddr, p->ps);
925 thash_purge_entries_remote(v, vaddr, p->ps);
926
927 VMX(v, vrr[0]) = oldrid;
928 VMX(v, psbits[0]) = oldpsbits;
929 ia64_set_rr(0x0, moldrid);
930 ia64_dv_serialize_data();
931}
932
933static void vcpu_do_resume(struct kvm_vcpu *vcpu)
934{
935 /*Re-init VHPT and VTLB once from resume*/
936 vcpu->arch.vhpt.num = VHPT_NUM_ENTRIES;
937 thash_init(&vcpu->arch.vhpt, VHPT_SHIFT);
938 vcpu->arch.vtlb.num = VTLB_NUM_ENTRIES;
939 thash_init(&vcpu->arch.vtlb, VTLB_SHIFT);
940
941 ia64_set_pta(vcpu->arch.vhpt.pta.val);
942}
943
944static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
945{
946 if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
947 vcpu_do_resume(vcpu);
948 return;
949 }
950
951 if (unlikely(test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))) {
952 thash_purge_all(vcpu);
953 return;
954 }
955
956 if (test_and_clear_bit(KVM_REQ_PTC_G, &vcpu->requests)) {
957 while (vcpu->arch.ptc_g_count > 0)
958 ptc_ga_remote_func(vcpu, --vcpu->arch.ptc_g_count);
959 }
960}
961
962void vmm_transition(struct kvm_vcpu *vcpu)
963{
964 ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd,
965 0, 0, 0, 0, 0, 0);
966 vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host);
967 ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd,
968 0, 0, 0, 0, 0, 0);
969 kvm_do_resume_op(vcpu);
970}
diff --git a/arch/ia64/kvm/trampoline.S b/arch/ia64/kvm/trampoline.S
new file mode 100644
index 000000000000..30897d44d61e
--- /dev/null
+++ b/arch/ia64/kvm/trampoline.S
@@ -0,0 +1,1038 @@
1/* Save all processor states
2 *
3 * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com>
4 * Copyright (c) 2007 Anthony Xu <anthony.xu@intel.com>
5 */
6
7#include <asm/asmmacro.h>
8#include "asm-offsets.h"
9
10
11#define CTX(name) VMM_CTX_##name##_OFFSET
12
13 /*
14 * r32: context_t base address
15 */
16#define SAVE_BRANCH_REGS \
17 add r2 = CTX(B0),r32; \
18 add r3 = CTX(B1),r32; \
19 mov r16 = b0; \
20 mov r17 = b1; \
21 ;; \
22 st8 [r2]=r16,16; \
23 st8 [r3]=r17,16; \
24 ;; \
25 mov r16 = b2; \
26 mov r17 = b3; \
27 ;; \
28 st8 [r2]=r16,16; \
29 st8 [r3]=r17,16; \
30 ;; \
31 mov r16 = b4; \
32 mov r17 = b5; \
33 ;; \
34 st8 [r2]=r16; \
35 st8 [r3]=r17; \
36 ;;
37
38 /*
39 * r33: context_t base address
40 */
41#define RESTORE_BRANCH_REGS \
42 add r2 = CTX(B0),r33; \
43 add r3 = CTX(B1),r33; \
44 ;; \
45 ld8 r16=[r2],16; \
46 ld8 r17=[r3],16; \
47 ;; \
48 mov b0 = r16; \
49 mov b1 = r17; \
50 ;; \
51 ld8 r16=[r2],16; \
52 ld8 r17=[r3],16; \
53 ;; \
54 mov b2 = r16; \
55 mov b3 = r17; \
56 ;; \
57 ld8 r16=[r2]; \
58 ld8 r17=[r3]; \
59 ;; \
60 mov b4=r16; \
61 mov b5=r17; \
62 ;;
63
64
65 /*
66 * r32: context_t base address
67 * bsw == 1
68 * Save all bank1 general registers, r4 ~ r7
69 */
70#define SAVE_GENERAL_REGS \
71 add r2=CTX(R4),r32; \
72 add r3=CTX(R5),r32; \
73 ;; \
74.mem.offset 0,0; \
75 st8.spill [r2]=r4,16; \
76.mem.offset 8,0; \
77 st8.spill [r3]=r5,16; \
78 ;; \
79.mem.offset 0,0; \
80 st8.spill [r2]=r6,48; \
81.mem.offset 8,0; \
82 st8.spill [r3]=r7,48; \
83 ;; \
84.mem.offset 0,0; \
85 st8.spill [r2]=r12; \
86.mem.offset 8,0; \
87 st8.spill [r3]=r13; \
88 ;;
89
90 /*
91 * r33: context_t base address
92 * bsw == 1
93 */
94#define RESTORE_GENERAL_REGS \
95 add r2=CTX(R4),r33; \
96 add r3=CTX(R5),r33; \
97 ;; \
98 ld8.fill r4=[r2],16; \
99 ld8.fill r5=[r3],16; \
100 ;; \
101 ld8.fill r6=[r2],48; \
102 ld8.fill r7=[r3],48; \
103 ;; \
104 ld8.fill r12=[r2]; \
105 ld8.fill r13 =[r3]; \
106 ;;
107
108
109
110
111 /*
112 * r32: context_t base address
113 */
114#define SAVE_KERNEL_REGS \
115 add r2 = CTX(KR0),r32; \
116 add r3 = CTX(KR1),r32; \
117 mov r16 = ar.k0; \
118 mov r17 = ar.k1; \
119 ;; \
120 st8 [r2] = r16,16; \
121 st8 [r3] = r17,16; \
122 ;; \
123 mov r16 = ar.k2; \
124 mov r17 = ar.k3; \
125 ;; \
126 st8 [r2] = r16,16; \
127 st8 [r3] = r17,16; \
128 ;; \
129 mov r16 = ar.k4; \
130 mov r17 = ar.k5; \
131 ;; \
132 st8 [r2] = r16,16; \
133 st8 [r3] = r17,16; \
134 ;; \
135 mov r16 = ar.k6; \
136 mov r17 = ar.k7; \
137 ;; \
138 st8 [r2] = r16; \
139 st8 [r3] = r17; \
140 ;;
141
142
143
144 /*
145 * r33: context_t base address
146 */
147#define RESTORE_KERNEL_REGS \
148 add r2 = CTX(KR0),r33; \
149 add r3 = CTX(KR1),r33; \
150 ;; \
151 ld8 r16=[r2],16; \
152 ld8 r17=[r3],16; \
153 ;; \
154 mov ar.k0=r16; \
155 mov ar.k1=r17; \
156 ;; \
157 ld8 r16=[r2],16; \
158 ld8 r17=[r3],16; \
159 ;; \
160 mov ar.k2=r16; \
161 mov ar.k3=r17; \
162 ;; \
163 ld8 r16=[r2],16; \
164 ld8 r17=[r3],16; \
165 ;; \
166 mov ar.k4=r16; \
167 mov ar.k5=r17; \
168 ;; \
169 ld8 r16=[r2],16; \
170 ld8 r17=[r3],16; \
171 ;; \
172 mov ar.k6=r16; \
173 mov ar.k7=r17; \
174 ;;
175
176
177
178 /*
179 * r32: context_t base address
180 */
181#define SAVE_APP_REGS \
182 add r2 = CTX(BSPSTORE),r32; \
183 mov r16 = ar.bspstore; \
184 ;; \
185 st8 [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\
186 mov r16 = ar.rnat; \
187 ;; \
188 st8 [r2] = r16,CTX(FCR)-CTX(RNAT); \
189 mov r16 = ar.fcr; \
190 ;; \
191 st8 [r2] = r16,CTX(EFLAG)-CTX(FCR); \
192 mov r16 = ar.eflag; \
193 ;; \
194 st8 [r2] = r16,CTX(CFLG)-CTX(EFLAG); \
195 mov r16 = ar.cflg; \
196 ;; \
197 st8 [r2] = r16,CTX(FSR)-CTX(CFLG); \
198 mov r16 = ar.fsr; \
199 ;; \
200 st8 [r2] = r16,CTX(FIR)-CTX(FSR); \
201 mov r16 = ar.fir; \
202 ;; \
203 st8 [r2] = r16,CTX(FDR)-CTX(FIR); \
204 mov r16 = ar.fdr; \
205 ;; \
206 st8 [r2] = r16,CTX(UNAT)-CTX(FDR); \
207 mov r16 = ar.unat; \
208 ;; \
209 st8 [r2] = r16,CTX(FPSR)-CTX(UNAT); \
210 mov r16 = ar.fpsr; \
211 ;; \
212 st8 [r2] = r16,CTX(PFS)-CTX(FPSR); \
213 mov r16 = ar.pfs; \
214 ;; \
215 st8 [r2] = r16,CTX(LC)-CTX(PFS); \
216 mov r16 = ar.lc; \
217 ;; \
218 st8 [r2] = r16; \
219 ;;
220
221 /*
222 * r33: context_t base address
223 */
224#define RESTORE_APP_REGS \
225 add r2=CTX(BSPSTORE),r33; \
226 ;; \
227 ld8 r16=[r2],CTX(RNAT)-CTX(BSPSTORE); \
228 ;; \
229 mov ar.bspstore=r16; \
230 ld8 r16=[r2],CTX(FCR)-CTX(RNAT); \
231 ;; \
232 mov ar.rnat=r16; \
233 ld8 r16=[r2],CTX(EFLAG)-CTX(FCR); \
234 ;; \
235 mov ar.fcr=r16; \
236 ld8 r16=[r2],CTX(CFLG)-CTX(EFLAG); \
237 ;; \
238 mov ar.eflag=r16; \
239 ld8 r16=[r2],CTX(FSR)-CTX(CFLG); \
240 ;; \
241 mov ar.cflg=r16; \
242 ld8 r16=[r2],CTX(FIR)-CTX(FSR); \
243 ;; \
244 mov ar.fsr=r16; \
245 ld8 r16=[r2],CTX(FDR)-CTX(FIR); \
246 ;; \
247 mov ar.fir=r16; \
248 ld8 r16=[r2],CTX(UNAT)-CTX(FDR); \
249 ;; \
250 mov ar.fdr=r16; \
251 ld8 r16=[r2],CTX(FPSR)-CTX(UNAT); \
252 ;; \
253 mov ar.unat=r16; \
254 ld8 r16=[r2],CTX(PFS)-CTX(FPSR); \
255 ;; \
256 mov ar.fpsr=r16; \
257 ld8 r16=[r2],CTX(LC)-CTX(PFS); \
258 ;; \
259 mov ar.pfs=r16; \
260 ld8 r16=[r2]; \
261 ;; \
262 mov ar.lc=r16; \
263 ;;
264
265 /*
266 * r32: context_t base address
267 */
268#define SAVE_CTL_REGS \
269 add r2 = CTX(DCR),r32; \
270 mov r16 = cr.dcr; \
271 ;; \
272 st8 [r2] = r16,CTX(IVA)-CTX(DCR); \
273 ;; \
274 mov r16 = cr.iva; \
275 ;; \
276 st8 [r2] = r16,CTX(PTA)-CTX(IVA); \
277 ;; \
278 mov r16 = cr.pta; \
279 ;; \
280 st8 [r2] = r16 ; \
281 ;;
282
283 /*
284 * r33: context_t base address
285 */
286#define RESTORE_CTL_REGS \
287 add r2 = CTX(DCR),r33; \
288 ;; \
289 ld8 r16 = [r2],CTX(IVA)-CTX(DCR); \
290 ;; \
291 mov cr.dcr = r16; \
292 dv_serialize_data; \
293 ;; \
294 ld8 r16 = [r2],CTX(PTA)-CTX(IVA); \
295 ;; \
296 mov cr.iva = r16; \
297 dv_serialize_data; \
298 ;; \
299 ld8 r16 = [r2]; \
300 ;; \
301 mov cr.pta = r16; \
302 dv_serialize_data; \
303 ;;
304
305
306 /*
307 * r32: context_t base address
308 */
309#define SAVE_REGION_REGS \
310 add r2=CTX(RR0),r32; \
311 mov r16=rr[r0]; \
312 dep.z r18=1,61,3; \
313 ;; \
314 st8 [r2]=r16,8; \
315 mov r17=rr[r18]; \
316 dep.z r18=2,61,3; \
317 ;; \
318 st8 [r2]=r17,8; \
319 mov r16=rr[r18]; \
320 dep.z r18=3,61,3; \
321 ;; \
322 st8 [r2]=r16,8; \
323 mov r17=rr[r18]; \
324 dep.z r18=4,61,3; \
325 ;; \
326 st8 [r2]=r17,8; \
327 mov r16=rr[r18]; \
328 dep.z r18=5,61,3; \
329 ;; \
330 st8 [r2]=r16,8; \
331 mov r17=rr[r18]; \
332 dep.z r18=7,61,3; \
333 ;; \
334 st8 [r2]=r17,16; \
335 mov r16=rr[r18]; \
336 ;; \
337 st8 [r2]=r16,8; \
338 ;;
339
340 /*
341 * r33:context_t base address
342 */
343#define RESTORE_REGION_REGS \
344 add r2=CTX(RR0),r33;\
345 mov r18=r0; \
346 ;; \
347 ld8 r20=[r2],8; \
348 ;; /* rr0 */ \
349 ld8 r21=[r2],8; \
350 ;; /* rr1 */ \
351 ld8 r22=[r2],8; \
352 ;; /* rr2 */ \
353 ld8 r23=[r2],8; \
354 ;; /* rr3 */ \
355 ld8 r24=[r2],8; \
356 ;; /* rr4 */ \
357 ld8 r25=[r2],16; \
358 ;; /* rr5 */ \
359 ld8 r27=[r2]; \
360 ;; /* rr7 */ \
361 mov rr[r18]=r20; \
362 dep.z r18=1,61,3; \
363 ;; /* rr1 */ \
364 mov rr[r18]=r21; \
365 dep.z r18=2,61,3; \
366 ;; /* rr2 */ \
367 mov rr[r18]=r22; \
368 dep.z r18=3,61,3; \
369 ;; /* rr3 */ \
370 mov rr[r18]=r23; \
371 dep.z r18=4,61,3; \
372 ;; /* rr4 */ \
373 mov rr[r18]=r24; \
374 dep.z r18=5,61,3; \
375 ;; /* rr5 */ \
376 mov rr[r18]=r25; \
377 dep.z r18=7,61,3; \
378 ;; /* rr7 */ \
379 mov rr[r18]=r27; \
380 ;; \
381 srlz.i; \
382 ;;
383
384
385
386 /*
387 * r32: context_t base address
388 * r36~r39:scratch registers
389 */
390#define SAVE_DEBUG_REGS \
391 add r2=CTX(IBR0),r32; \
392 add r3=CTX(DBR0),r32; \
393 mov r16=ibr[r0]; \
394 mov r17=dbr[r0]; \
395 ;; \
396 st8 [r2]=r16,8; \
397 st8 [r3]=r17,8; \
398 add r18=1,r0; \
399 ;; \
400 mov r16=ibr[r18]; \
401 mov r17=dbr[r18]; \
402 ;; \
403 st8 [r2]=r16,8; \
404 st8 [r3]=r17,8; \
405 add r18=2,r0; \
406 ;; \
407 mov r16=ibr[r18]; \
408 mov r17=dbr[r18]; \
409 ;; \
410 st8 [r2]=r16,8; \
411 st8 [r3]=r17,8; \
412 add r18=2,r0; \
413 ;; \
414 mov r16=ibr[r18]; \
415 mov r17=dbr[r18]; \
416 ;; \
417 st8 [r2]=r16,8; \
418 st8 [r3]=r17,8; \
419 add r18=3,r0; \
420 ;; \
421 mov r16=ibr[r18]; \
422 mov r17=dbr[r18]; \
423 ;; \
424 st8 [r2]=r16,8; \
425 st8 [r3]=r17,8; \
426 add r18=4,r0; \
427 ;; \
428 mov r16=ibr[r18]; \
429 mov r17=dbr[r18]; \
430 ;; \
431 st8 [r2]=r16,8; \
432 st8 [r3]=r17,8; \
433 add r18=5,r0; \
434 ;; \
435 mov r16=ibr[r18]; \
436 mov r17=dbr[r18]; \
437 ;; \
438 st8 [r2]=r16,8; \
439 st8 [r3]=r17,8; \
440 add r18=6,r0; \
441 ;; \
442 mov r16=ibr[r18]; \
443 mov r17=dbr[r18]; \
444 ;; \
445 st8 [r2]=r16,8; \
446 st8 [r3]=r17,8; \
447 add r18=7,r0; \
448 ;; \
449 mov r16=ibr[r18]; \
450 mov r17=dbr[r18]; \
451 ;; \
452 st8 [r2]=r16,8; \
453 st8 [r3]=r17,8; \
454 ;;
455
456
457/*
458 * r33: point to context_t structure
459 * ar.lc are corrupted.
460 */
461#define RESTORE_DEBUG_REGS \
462 add r2=CTX(IBR0),r33; \
463 add r3=CTX(DBR0),r33; \
464 mov r16=7; \
465 mov r17=r0; \
466 ;; \
467 mov ar.lc = r16; \
468 ;; \
4691: \
470 ld8 r18=[r2],8; \
471 ld8 r19=[r3],8; \
472 ;; \
473 mov ibr[r17]=r18; \
474 mov dbr[r17]=r19; \
475 ;; \
476 srlz.i; \
477 ;; \
478 add r17=1,r17; \
479 br.cloop.sptk 1b; \
480 ;;
481
482
483 /*
484 * r32: context_t base address
485 */
486#define SAVE_FPU_LOW \
487 add r2=CTX(F2),r32; \
488 add r3=CTX(F3),r32; \
489 ;; \
490 stf.spill.nta [r2]=f2,32; \
491 stf.spill.nta [r3]=f3,32; \
492 ;; \
493 stf.spill.nta [r2]=f4,32; \
494 stf.spill.nta [r3]=f5,32; \
495 ;; \
496 stf.spill.nta [r2]=f6,32; \
497 stf.spill.nta [r3]=f7,32; \
498 ;; \
499 stf.spill.nta [r2]=f8,32; \
500 stf.spill.nta [r3]=f9,32; \
501 ;; \
502 stf.spill.nta [r2]=f10,32; \
503 stf.spill.nta [r3]=f11,32; \
504 ;; \
505 stf.spill.nta [r2]=f12,32; \
506 stf.spill.nta [r3]=f13,32; \
507 ;; \
508 stf.spill.nta [r2]=f14,32; \
509 stf.spill.nta [r3]=f15,32; \
510 ;; \
511 stf.spill.nta [r2]=f16,32; \
512 stf.spill.nta [r3]=f17,32; \
513 ;; \
514 stf.spill.nta [r2]=f18,32; \
515 stf.spill.nta [r3]=f19,32; \
516 ;; \
517 stf.spill.nta [r2]=f20,32; \
518 stf.spill.nta [r3]=f21,32; \
519 ;; \
520 stf.spill.nta [r2]=f22,32; \
521 stf.spill.nta [r3]=f23,32; \
522 ;; \
523 stf.spill.nta [r2]=f24,32; \
524 stf.spill.nta [r3]=f25,32; \
525 ;; \
526 stf.spill.nta [r2]=f26,32; \
527 stf.spill.nta [r3]=f27,32; \
528 ;; \
529 stf.spill.nta [r2]=f28,32; \
530 stf.spill.nta [r3]=f29,32; \
531 ;; \
532 stf.spill.nta [r2]=f30; \
533 stf.spill.nta [r3]=f31; \
534 ;;
535
536 /*
537 * r32: context_t base address
538 */
539#define SAVE_FPU_HIGH \
540 add r2=CTX(F32),r32; \
541 add r3=CTX(F33),r32; \
542 ;; \
543 stf.spill.nta [r2]=f32,32; \
544 stf.spill.nta [r3]=f33,32; \
545 ;; \
546 stf.spill.nta [r2]=f34,32; \
547 stf.spill.nta [r3]=f35,32; \
548 ;; \
549 stf.spill.nta [r2]=f36,32; \
550 stf.spill.nta [r3]=f37,32; \
551 ;; \
552 stf.spill.nta [r2]=f38,32; \
553 stf.spill.nta [r3]=f39,32; \
554 ;; \
555 stf.spill.nta [r2]=f40,32; \
556 stf.spill.nta [r3]=f41,32; \
557 ;; \
558 stf.spill.nta [r2]=f42,32; \
559 stf.spill.nta [r3]=f43,32; \
560 ;; \
561 stf.spill.nta [r2]=f44,32; \
562 stf.spill.nta [r3]=f45,32; \
563 ;; \
564 stf.spill.nta [r2]=f46,32; \
565 stf.spill.nta [r3]=f47,32; \
566 ;; \
567 stf.spill.nta [r2]=f48,32; \
568 stf.spill.nta [r3]=f49,32; \
569 ;; \
570 stf.spill.nta [r2]=f50,32; \
571 stf.spill.nta [r3]=f51,32; \
572 ;; \
573 stf.spill.nta [r2]=f52,32; \
574 stf.spill.nta [r3]=f53,32; \
575 ;; \
576 stf.spill.nta [r2]=f54,32; \
577 stf.spill.nta [r3]=f55,32; \
578 ;; \
579 stf.spill.nta [r2]=f56,32; \
580 stf.spill.nta [r3]=f57,32; \
581 ;; \
582 stf.spill.nta [r2]=f58,32; \
583 stf.spill.nta [r3]=f59,32; \
584 ;; \
585 stf.spill.nta [r2]=f60,32; \
586 stf.spill.nta [r3]=f61,32; \
587 ;; \
588 stf.spill.nta [r2]=f62,32; \
589 stf.spill.nta [r3]=f63,32; \
590 ;; \
591 stf.spill.nta [r2]=f64,32; \
592 stf.spill.nta [r3]=f65,32; \
593 ;; \
594 stf.spill.nta [r2]=f66,32; \
595 stf.spill.nta [r3]=f67,32; \
596 ;; \
597 stf.spill.nta [r2]=f68,32; \
598 stf.spill.nta [r3]=f69,32; \
599 ;; \
600 stf.spill.nta [r2]=f70,32; \
601 stf.spill.nta [r3]=f71,32; \
602 ;; \
603 stf.spill.nta [r2]=f72,32; \
604 stf.spill.nta [r3]=f73,32; \
605 ;; \
606 stf.spill.nta [r2]=f74,32; \
607 stf.spill.nta [r3]=f75,32; \
608 ;; \
609 stf.spill.nta [r2]=f76,32; \
610 stf.spill.nta [r3]=f77,32; \
611 ;; \
612 stf.spill.nta [r2]=f78,32; \
613 stf.spill.nta [r3]=f79,32; \
614 ;; \
615 stf.spill.nta [r2]=f80,32; \
616 stf.spill.nta [r3]=f81,32; \
617 ;; \
618 stf.spill.nta [r2]=f82,32; \
619 stf.spill.nta [r3]=f83,32; \
620 ;; \
621 stf.spill.nta [r2]=f84,32; \
622 stf.spill.nta [r3]=f85,32; \
623 ;; \
624 stf.spill.nta [r2]=f86,32; \
625 stf.spill.nta [r3]=f87,32; \
626 ;; \
627 stf.spill.nta [r2]=f88,32; \
628 stf.spill.nta [r3]=f89,32; \
629 ;; \
630 stf.spill.nta [r2]=f90,32; \
631 stf.spill.nta [r3]=f91,32; \
632 ;; \
633 stf.spill.nta [r2]=f92,32; \
634 stf.spill.nta [r3]=f93,32; \
635 ;; \
636 stf.spill.nta [r2]=f94,32; \
637 stf.spill.nta [r3]=f95,32; \
638 ;; \
639 stf.spill.nta [r2]=f96,32; \
640 stf.spill.nta [r3]=f97,32; \
641 ;; \
642 stf.spill.nta [r2]=f98,32; \
643 stf.spill.nta [r3]=f99,32; \
644 ;; \
645 stf.spill.nta [r2]=f100,32; \
646 stf.spill.nta [r3]=f101,32; \
647 ;; \
648 stf.spill.nta [r2]=f102,32; \
649 stf.spill.nta [r3]=f103,32; \
650 ;; \
651 stf.spill.nta [r2]=f104,32; \
652 stf.spill.nta [r3]=f105,32; \
653 ;; \
654 stf.spill.nta [r2]=f106,32; \
655 stf.spill.nta [r3]=f107,32; \
656 ;; \
657 stf.spill.nta [r2]=f108,32; \
658 stf.spill.nta [r3]=f109,32; \
659 ;; \
660 stf.spill.nta [r2]=f110,32; \
661 stf.spill.nta [r3]=f111,32; \
662 ;; \
663 stf.spill.nta [r2]=f112,32; \
664 stf.spill.nta [r3]=f113,32; \
665 ;; \
666 stf.spill.nta [r2]=f114,32; \
667 stf.spill.nta [r3]=f115,32; \
668 ;; \
669 stf.spill.nta [r2]=f116,32; \
670 stf.spill.nta [r3]=f117,32; \
671 ;; \
672 stf.spill.nta [r2]=f118,32; \
673 stf.spill.nta [r3]=f119,32; \
674 ;; \
675 stf.spill.nta [r2]=f120,32; \
676 stf.spill.nta [r3]=f121,32; \
677 ;; \
678 stf.spill.nta [r2]=f122,32; \
679 stf.spill.nta [r3]=f123,32; \
680 ;; \
681 stf.spill.nta [r2]=f124,32; \
682 stf.spill.nta [r3]=f125,32; \
683 ;; \
684 stf.spill.nta [r2]=f126; \
685 stf.spill.nta [r3]=f127; \
686 ;;
687
688 /*
689 * r33: point to context_t structure
690 */
691#define RESTORE_FPU_LOW \
692 add r2 = CTX(F2), r33; \
693 add r3 = CTX(F3), r33; \
694 ;; \
695 ldf.fill.nta f2 = [r2], 32; \
696 ldf.fill.nta f3 = [r3], 32; \
697 ;; \
698 ldf.fill.nta f4 = [r2], 32; \
699 ldf.fill.nta f5 = [r3], 32; \
700 ;; \
701 ldf.fill.nta f6 = [r2], 32; \
702 ldf.fill.nta f7 = [r3], 32; \
703 ;; \
704 ldf.fill.nta f8 = [r2], 32; \
705 ldf.fill.nta f9 = [r3], 32; \
706 ;; \
707 ldf.fill.nta f10 = [r2], 32; \
708 ldf.fill.nta f11 = [r3], 32; \
709 ;; \
710 ldf.fill.nta f12 = [r2], 32; \
711 ldf.fill.nta f13 = [r3], 32; \
712 ;; \
713 ldf.fill.nta f14 = [r2], 32; \
714 ldf.fill.nta f15 = [r3], 32; \
715 ;; \
716 ldf.fill.nta f16 = [r2], 32; \
717 ldf.fill.nta f17 = [r3], 32; \
718 ;; \
719 ldf.fill.nta f18 = [r2], 32; \
720 ldf.fill.nta f19 = [r3], 32; \
721 ;; \
722 ldf.fill.nta f20 = [r2], 32; \
723 ldf.fill.nta f21 = [r3], 32; \
724 ;; \
725 ldf.fill.nta f22 = [r2], 32; \
726 ldf.fill.nta f23 = [r3], 32; \
727 ;; \
728 ldf.fill.nta f24 = [r2], 32; \
729 ldf.fill.nta f25 = [r3], 32; \
730 ;; \
731 ldf.fill.nta f26 = [r2], 32; \
732 ldf.fill.nta f27 = [r3], 32; \
733 ;; \
734 ldf.fill.nta f28 = [r2], 32; \
735 ldf.fill.nta f29 = [r3], 32; \
736 ;; \
737 ldf.fill.nta f30 = [r2], 32; \
738 ldf.fill.nta f31 = [r3], 32; \
739 ;;
740
741
742
743 /*
744 * r33: point to context_t structure
745 */
746#define RESTORE_FPU_HIGH \
747 add r2 = CTX(F32), r33; \
748 add r3 = CTX(F33), r33; \
749 ;; \
750 ldf.fill.nta f32 = [r2], 32; \
751 ldf.fill.nta f33 = [r3], 32; \
752 ;; \
753 ldf.fill.nta f34 = [r2], 32; \
754 ldf.fill.nta f35 = [r3], 32; \
755 ;; \
756 ldf.fill.nta f36 = [r2], 32; \
757 ldf.fill.nta f37 = [r3], 32; \
758 ;; \
759 ldf.fill.nta f38 = [r2], 32; \
760 ldf.fill.nta f39 = [r3], 32; \
761 ;; \
762 ldf.fill.nta f40 = [r2], 32; \
763 ldf.fill.nta f41 = [r3], 32; \
764 ;; \
765 ldf.fill.nta f42 = [r2], 32; \
766 ldf.fill.nta f43 = [r3], 32; \
767 ;; \
768 ldf.fill.nta f44 = [r2], 32; \
769 ldf.fill.nta f45 = [r3], 32; \
770 ;; \
771 ldf.fill.nta f46 = [r2], 32; \
772 ldf.fill.nta f47 = [r3], 32; \
773 ;; \
774 ldf.fill.nta f48 = [r2], 32; \
775 ldf.fill.nta f49 = [r3], 32; \
776 ;; \
777 ldf.fill.nta f50 = [r2], 32; \
778 ldf.fill.nta f51 = [r3], 32; \
779 ;; \
780 ldf.fill.nta f52 = [r2], 32; \
781 ldf.fill.nta f53 = [r3], 32; \
782 ;; \
783 ldf.fill.nta f54 = [r2], 32; \
784 ldf.fill.nta f55 = [r3], 32; \
785 ;; \
786 ldf.fill.nta f56 = [r2], 32; \
787 ldf.fill.nta f57 = [r3], 32; \
788 ;; \
789 ldf.fill.nta f58 = [r2], 32; \
790 ldf.fill.nta f59 = [r3], 32; \
791 ;; \
792 ldf.fill.nta f60 = [r2], 32; \
793 ldf.fill.nta f61 = [r3], 32; \
794 ;; \
795 ldf.fill.nta f62 = [r2], 32; \
796 ldf.fill.nta f63 = [r3], 32; \
797 ;; \
798 ldf.fill.nta f64 = [r2], 32; \
799 ldf.fill.nta f65 = [r3], 32; \
800 ;; \
801 ldf.fill.nta f66 = [r2], 32; \
802 ldf.fill.nta f67 = [r3], 32; \
803 ;; \
804 ldf.fill.nta f68 = [r2], 32; \
805 ldf.fill.nta f69 = [r3], 32; \
806 ;; \
807 ldf.fill.nta f70 = [r2], 32; \
808 ldf.fill.nta f71 = [r3], 32; \
809 ;; \
810 ldf.fill.nta f72 = [r2], 32; \
811 ldf.fill.nta f73 = [r3], 32; \
812 ;; \
813 ldf.fill.nta f74 = [r2], 32; \
814 ldf.fill.nta f75 = [r3], 32; \
815 ;; \
816 ldf.fill.nta f76 = [r2], 32; \
817 ldf.fill.nta f77 = [r3], 32; \
818 ;; \
819 ldf.fill.nta f78 = [r2], 32; \
820 ldf.fill.nta f79 = [r3], 32; \
821 ;; \
822 ldf.fill.nta f80 = [r2], 32; \
823 ldf.fill.nta f81 = [r3], 32; \
824 ;; \
825 ldf.fill.nta f82 = [r2], 32; \
826 ldf.fill.nta f83 = [r3], 32; \
827 ;; \
828 ldf.fill.nta f84 = [r2], 32; \
829 ldf.fill.nta f85 = [r3], 32; \
830 ;; \
831 ldf.fill.nta f86 = [r2], 32; \
832 ldf.fill.nta f87 = [r3], 32; \
833 ;; \
834 ldf.fill.nta f88 = [r2], 32; \
835 ldf.fill.nta f89 = [r3], 32; \
836 ;; \
837 ldf.fill.nta f90 = [r2], 32; \
838 ldf.fill.nta f91 = [r3], 32; \
839 ;; \
840 ldf.fill.nta f92 = [r2], 32; \
841 ldf.fill.nta f93 = [r3], 32; \
842 ;; \
843 ldf.fill.nta f94 = [r2], 32; \
844 ldf.fill.nta f95 = [r3], 32; \
845 ;; \
846 ldf.fill.nta f96 = [r2], 32; \
847 ldf.fill.nta f97 = [r3], 32; \
848 ;; \
849 ldf.fill.nta f98 = [r2], 32; \
850 ldf.fill.nta f99 = [r3], 32; \
851 ;; \
852 ldf.fill.nta f100 = [r2], 32; \
853 ldf.fill.nta f101 = [r3], 32; \
854 ;; \
855 ldf.fill.nta f102 = [r2], 32; \
856 ldf.fill.nta f103 = [r3], 32; \
857 ;; \
858 ldf.fill.nta f104 = [r2], 32; \
859 ldf.fill.nta f105 = [r3], 32; \
860 ;; \
861 ldf.fill.nta f106 = [r2], 32; \
862 ldf.fill.nta f107 = [r3], 32; \
863 ;; \
864 ldf.fill.nta f108 = [r2], 32; \
865 ldf.fill.nta f109 = [r3], 32; \
866 ;; \
867 ldf.fill.nta f110 = [r2], 32; \
868 ldf.fill.nta f111 = [r3], 32; \
869 ;; \
870 ldf.fill.nta f112 = [r2], 32; \
871 ldf.fill.nta f113 = [r3], 32; \
872 ;; \
873 ldf.fill.nta f114 = [r2], 32; \
874 ldf.fill.nta f115 = [r3], 32; \
875 ;; \
876 ldf.fill.nta f116 = [r2], 32; \
877 ldf.fill.nta f117 = [r3], 32; \
878 ;; \
879 ldf.fill.nta f118 = [r2], 32; \
880 ldf.fill.nta f119 = [r3], 32; \
881 ;; \
882 ldf.fill.nta f120 = [r2], 32; \
883 ldf.fill.nta f121 = [r3], 32; \
884 ;; \
885 ldf.fill.nta f122 = [r2], 32; \
886 ldf.fill.nta f123 = [r3], 32; \
887 ;; \
888 ldf.fill.nta f124 = [r2], 32; \
889 ldf.fill.nta f125 = [r3], 32; \
890 ;; \
891 ldf.fill.nta f126 = [r2], 32; \
892 ldf.fill.nta f127 = [r3], 32; \
893 ;;
894
895 /*
896 * r32: context_t base address
897 */
898#define SAVE_PTK_REGS \
899 add r2=CTX(PKR0), r32; \
900 mov r16=7; \
901 ;; \
902 mov ar.lc=r16; \
903 mov r17=r0; \
904 ;; \
9051: \
906 mov r18=pkr[r17]; \
907 ;; \
908 srlz.i; \
909 ;; \
910 st8 [r2]=r18, 8; \
911 ;; \
912 add r17 =1,r17; \
913 ;; \
914 br.cloop.sptk 1b; \
915 ;;
916
917/*
918 * r33: point to context_t structure
919 * ar.lc are corrupted.
920 */
921#define RESTORE_PTK_REGS \
922 add r2=CTX(PKR0), r33; \
923 mov r16=7; \
924 ;; \
925 mov ar.lc=r16; \
926 mov r17=r0; \
927 ;; \
9281: \
929 ld8 r18=[r2], 8; \
930 ;; \
931 mov pkr[r17]=r18; \
932 ;; \
933 srlz.i; \
934 ;; \
935 add r17 =1,r17; \
936 ;; \
937 br.cloop.sptk 1b; \
938 ;;
939
940
941/*
942 * void vmm_trampoline( context_t * from,
943 * context_t * to)
944 *
945 * from: r32
946 * to: r33
947 * note: interrupt disabled before call this function.
948 */
949GLOBAL_ENTRY(vmm_trampoline)
950 mov r16 = psr
951 adds r2 = CTX(PSR), r32
952 ;;
953 st8 [r2] = r16, 8 // psr
954 mov r17 = pr
955 ;;
956 st8 [r2] = r17, 8 // pr
957 mov r18 = ar.unat
958 ;;
959 st8 [r2] = r18
960 mov r17 = ar.rsc
961 ;;
962 adds r2 = CTX(RSC),r32
963 ;;
964 st8 [r2]= r17
965 mov ar.rsc =0
966 flushrs
967 ;;
968 SAVE_GENERAL_REGS
969 ;;
970 SAVE_KERNEL_REGS
971 ;;
972 SAVE_APP_REGS
973 ;;
974 SAVE_BRANCH_REGS
975 ;;
976 SAVE_CTL_REGS
977 ;;
978 SAVE_REGION_REGS
979 ;;
980 //SAVE_DEBUG_REGS
981 ;;
982 rsm psr.dfl
983 ;;
984 srlz.d
985 ;;
986 SAVE_FPU_LOW
987 ;;
988 rsm psr.dfh
989 ;;
990 srlz.d
991 ;;
992 SAVE_FPU_HIGH
993 ;;
994 SAVE_PTK_REGS
995 ;;
996 RESTORE_PTK_REGS
997 ;;
998 RESTORE_FPU_HIGH
999 ;;
1000 RESTORE_FPU_LOW
1001 ;;
1002 //RESTORE_DEBUG_REGS
1003 ;;
1004 RESTORE_REGION_REGS
1005 ;;
1006 RESTORE_CTL_REGS
1007 ;;
1008 RESTORE_BRANCH_REGS
1009 ;;
1010 RESTORE_APP_REGS
1011 ;;
1012 RESTORE_KERNEL_REGS
1013 ;;
1014 RESTORE_GENERAL_REGS
1015 ;;
1016 adds r2=CTX(PSR), r33
1017 ;;
1018 ld8 r16=[r2], 8 // psr
1019 ;;
1020 mov psr.l=r16
1021 ;;
1022 srlz.d
1023 ;;
1024 ld8 r16=[r2], 8 // pr
1025 ;;
1026 mov pr =r16,-1
1027 ld8 r16=[r2] // unat
1028 ;;
1029 mov ar.unat=r16
1030 ;;
1031 adds r2=CTX(RSC),r33
1032 ;;
1033 ld8 r16 =[r2]
1034 ;;
1035 mov ar.rsc = r16
1036 ;;
1037 br.ret.sptk.few b0
1038END(vmm_trampoline)
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
new file mode 100644
index 000000000000..e44027ce5667
--- /dev/null
+++ b/arch/ia64/kvm/vcpu.c
@@ -0,0 +1,2163 @@
1/*
2 * kvm_vcpu.c: handling all virtual cpu related thing.
3 * Copyright (c) 2005, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 * Shaofan Li (Susue Li) <susie.li@intel.com>
19 * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
20 * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
21 * Xiantao Zhang <xiantao.zhang@intel.com>
22 */
23
24#include <linux/kvm_host.h>
25#include <linux/types.h>
26
27#include <asm/processor.h>
28#include <asm/ia64regs.h>
29#include <asm/gcc_intrin.h>
30#include <asm/kregs.h>
31#include <asm/pgtable.h>
32#include <asm/tlb.h>
33
34#include "asm-offsets.h"
35#include "vcpu.h"
36
37/*
38 * Special notes:
39 * - Index by it/dt/rt sequence
40 * - Only existing mode transitions are allowed in this table
41 * - RSE is placed at lazy mode when emulating guest partial mode
42 * - If gva happens to be rr0 and rr4, only allowed case is identity
43 * mapping (gva=gpa), or panic! (How?)
44 */
45int mm_switch_table[8][8] = {
46 /* 2004/09/12(Kevin): Allow switch to self */
47 /*
48 * (it,dt,rt): (0,0,0) -> (1,1,1)
49 * This kind of transition usually occurs in the very early
50 * stage of Linux boot up procedure. Another case is in efi
51 * and pal calls. (see "arch/ia64/kernel/head.S")
52 *
53 * (it,dt,rt): (0,0,0) -> (0,1,1)
54 * This kind of transition is found when OSYa exits efi boot
55 * service. Due to gva = gpa in this case (Same region),
56 * data access can be satisfied though itlb entry for physical
57 * emulation is hit.
58 */
59 {SW_SELF, 0, 0, SW_NOP, 0, 0, 0, SW_P2V},
60 {0, 0, 0, 0, 0, 0, 0, 0},
61 {0, 0, 0, 0, 0, 0, 0, 0},
62 /*
63 * (it,dt,rt): (0,1,1) -> (1,1,1)
64 * This kind of transition is found in OSYa.
65 *
66 * (it,dt,rt): (0,1,1) -> (0,0,0)
67 * This kind of transition is found in OSYa
68 */
69 {SW_NOP, 0, 0, SW_SELF, 0, 0, 0, SW_P2V},
70 /* (1,0,0)->(1,1,1) */
71 {0, 0, 0, 0, 0, 0, 0, SW_P2V},
72 /*
73 * (it,dt,rt): (1,0,1) -> (1,1,1)
74 * This kind of transition usually occurs when Linux returns
75 * from the low level TLB miss handlers.
76 * (see "arch/ia64/kernel/ivt.S")
77 */
78 {0, 0, 0, 0, 0, SW_SELF, 0, SW_P2V},
79 {0, 0, 0, 0, 0, 0, 0, 0},
80 /*
81 * (it,dt,rt): (1,1,1) -> (1,0,1)
82 * This kind of transition usually occurs in Linux low level
83 * TLB miss handler. (see "arch/ia64/kernel/ivt.S")
84 *
85 * (it,dt,rt): (1,1,1) -> (0,0,0)
86 * This kind of transition usually occurs in pal and efi calls,
87 * which requires running in physical mode.
88 * (see "arch/ia64/kernel/head.S")
89 * (1,1,1)->(1,0,0)
90 */
91
92 {SW_V2P, 0, 0, 0, SW_V2P, SW_V2P, 0, SW_SELF},
93};
94
95void physical_mode_init(struct kvm_vcpu *vcpu)
96{
97 vcpu->arch.mode_flags = GUEST_IN_PHY;
98}
99
100void switch_to_physical_rid(struct kvm_vcpu *vcpu)
101{
102 unsigned long psr;
103
104 /* Save original virtual mode rr[0] and rr[4] */
105 psr = ia64_clear_ic();
106 ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0);
107 ia64_srlz_d();
108 ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4);
109 ia64_srlz_d();
110
111 ia64_set_psr(psr);
112 return;
113}
114
115
116void switch_to_virtual_rid(struct kvm_vcpu *vcpu)
117{
118 unsigned long psr;
119
120 psr = ia64_clear_ic();
121 ia64_set_rr(VRN0 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0);
122 ia64_srlz_d();
123 ia64_set_rr(VRN4 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4);
124 ia64_srlz_d();
125 ia64_set_psr(psr);
126 return;
127}
128
129static int mm_switch_action(struct ia64_psr opsr, struct ia64_psr npsr)
130{
131 return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)];
132}
133
134void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
135 struct ia64_psr new_psr)
136{
137 int act;
138 act = mm_switch_action(old_psr, new_psr);
139 switch (act) {
140 case SW_V2P:
141 /*printk("V -> P mode transition: (0x%lx -> 0x%lx)\n",
142 old_psr.val, new_psr.val);*/
143 switch_to_physical_rid(vcpu);
144 /*
145 * Set rse to enforced lazy, to prevent active rse
146 *save/restor when guest physical mode.
147 */
148 vcpu->arch.mode_flags |= GUEST_IN_PHY;
149 break;
150 case SW_P2V:
151 switch_to_virtual_rid(vcpu);
152 /*
153 * recover old mode which is saved when entering
154 * guest physical mode
155 */
156 vcpu->arch.mode_flags &= ~GUEST_IN_PHY;
157 break;
158 case SW_SELF:
159 break;
160 case SW_NOP:
161 break;
162 default:
163 /* Sanity check */
164 break;
165 }
166 return;
167}
168
169
170
171/*
172 * In physical mode, insert tc/tr for region 0 and 4 uses
173 * RID[0] and RID[4] which is for physical mode emulation.
174 * However what those inserted tc/tr wants is rid for
175 * virtual mode. So original virtual rid needs to be restored
176 * before insert.
177 *
178 * Operations which required such switch include:
179 * - insertions (itc.*, itr.*)
180 * - purges (ptc.* and ptr.*)
181 * - tpa
182 * - tak
183 * - thash?, ttag?
184 * All above needs actual virtual rid for destination entry.
185 */
186
187void check_mm_mode_switch(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
188 struct ia64_psr new_psr)
189{
190
191 if ((old_psr.dt != new_psr.dt)
192 || (old_psr.it != new_psr.it)
193 || (old_psr.rt != new_psr.rt))
194 switch_mm_mode(vcpu, old_psr, new_psr);
195
196 return;
197}
198
199
200/*
201 * In physical mode, insert tc/tr for region 0 and 4 uses
202 * RID[0] and RID[4] which is for physical mode emulation.
203 * However what those inserted tc/tr wants is rid for
204 * virtual mode. So original virtual rid needs to be restored
205 * before insert.
206 *
207 * Operations which required such switch include:
208 * - insertions (itc.*, itr.*)
209 * - purges (ptc.* and ptr.*)
210 * - tpa
211 * - tak
212 * - thash?, ttag?
213 * All above needs actual virtual rid for destination entry.
214 */
215
216void prepare_if_physical_mode(struct kvm_vcpu *vcpu)
217{
218 if (is_physical_mode(vcpu)) {
219 vcpu->arch.mode_flags |= GUEST_PHY_EMUL;
220 switch_to_virtual_rid(vcpu);
221 }
222 return;
223}
224
225/* Recover always follows prepare */
226void recover_if_physical_mode(struct kvm_vcpu *vcpu)
227{
228 if (is_physical_mode(vcpu))
229 switch_to_physical_rid(vcpu);
230 vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL;
231 return;
232}
233
234#define RPT(x) ((u16) &((struct kvm_pt_regs *)0)->x)
235
236static u16 gr_info[32] = {
237 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */
238 RPT(r1), RPT(r2), RPT(r3),
239 RPT(r4), RPT(r5), RPT(r6), RPT(r7),
240 RPT(r8), RPT(r9), RPT(r10), RPT(r11),
241 RPT(r12), RPT(r13), RPT(r14), RPT(r15),
242 RPT(r16), RPT(r17), RPT(r18), RPT(r19),
243 RPT(r20), RPT(r21), RPT(r22), RPT(r23),
244 RPT(r24), RPT(r25), RPT(r26), RPT(r27),
245 RPT(r28), RPT(r29), RPT(r30), RPT(r31)
246};
247
248#define IA64_FIRST_STACKED_GR 32
249#define IA64_FIRST_ROTATING_FR 32
250
251static inline unsigned long
252rotate_reg(unsigned long sor, unsigned long rrb, unsigned long reg)
253{
254 reg += rrb;
255 if (reg >= sor)
256 reg -= sor;
257 return reg;
258}
259
260/*
261 * Return the (rotated) index for floating point register
262 * be in the REGNUM (REGNUM must range from 32-127,
263 * result is in the range from 0-95.
264 */
265static inline unsigned long fph_index(struct kvm_pt_regs *regs,
266 long regnum)
267{
268 unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
269 return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
270}
271
272
273/*
274 * The inverse of the above: given bspstore and the number of
275 * registers, calculate ar.bsp.
276 */
277static inline unsigned long *kvm_rse_skip_regs(unsigned long *addr,
278 long num_regs)
279{
280 long delta = ia64_rse_slot_num(addr) + num_regs;
281 int i = 0;
282
283 if (num_regs < 0)
284 delta -= 0x3e;
285 if (delta < 0) {
286 while (delta <= -0x3f) {
287 i--;
288 delta += 0x3f;
289 }
290 } else {
291 while (delta >= 0x3f) {
292 i++;
293 delta -= 0x3f;
294 }
295 }
296
297 return addr + num_regs + i;
298}
299
300static void get_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
301 unsigned long *val, int *nat)
302{
303 unsigned long *bsp, *addr, *rnat_addr, *bspstore;
304 unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
305 unsigned long nat_mask;
306 unsigned long old_rsc, new_rsc;
307 long sof = (regs->cr_ifs) & 0x7f;
308 long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
309 long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
310 long ridx = r1 - 32;
311
312 if (ridx < sor)
313 ridx = rotate_reg(sor, rrb_gr, ridx);
314
315 old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
316 new_rsc = old_rsc&(~(0x3));
317 ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
318
319 bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
320 bsp = kbs + (regs->loadrs >> 19);
321
322 addr = kvm_rse_skip_regs(bsp, -sof + ridx);
323 nat_mask = 1UL << ia64_rse_slot_num(addr);
324 rnat_addr = ia64_rse_rnat_addr(addr);
325
326 if (addr >= bspstore) {
327 ia64_flushrs();
328 ia64_mf();
329 bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
330 }
331 *val = *addr;
332 if (nat) {
333 if (bspstore < rnat_addr)
334 *nat = (int)!!(ia64_getreg(_IA64_REG_AR_RNAT)
335 & nat_mask);
336 else
337 *nat = (int)!!((*rnat_addr) & nat_mask);
338 ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
339 }
340}
341
342void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
343 unsigned long val, unsigned long nat)
344{
345 unsigned long *bsp, *bspstore, *addr, *rnat_addr;
346 unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
347 unsigned long nat_mask;
348 unsigned long old_rsc, new_rsc, psr;
349 unsigned long rnat;
350 long sof = (regs->cr_ifs) & 0x7f;
351 long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
352 long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
353 long ridx = r1 - 32;
354
355 if (ridx < sor)
356 ridx = rotate_reg(sor, rrb_gr, ridx);
357
358 old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
359 /* put RSC to lazy mode, and set loadrs 0 */
360 new_rsc = old_rsc & (~0x3fff0003);
361 ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
362 bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */
363
364 addr = kvm_rse_skip_regs(bsp, -sof + ridx);
365 nat_mask = 1UL << ia64_rse_slot_num(addr);
366 rnat_addr = ia64_rse_rnat_addr(addr);
367
368 local_irq_save(psr);
369 bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
370 if (addr >= bspstore) {
371
372 ia64_flushrs();
373 ia64_mf();
374 *addr = val;
375 bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
376 rnat = ia64_getreg(_IA64_REG_AR_RNAT);
377 if (bspstore < rnat_addr)
378 rnat = rnat & (~nat_mask);
379 else
380 *rnat_addr = (*rnat_addr)&(~nat_mask);
381
382 ia64_mf();
383 ia64_loadrs();
384 ia64_setreg(_IA64_REG_AR_RNAT, rnat);
385 } else {
386 rnat = ia64_getreg(_IA64_REG_AR_RNAT);
387 *addr = val;
388 if (bspstore < rnat_addr)
389 rnat = rnat&(~nat_mask);
390 else
391 *rnat_addr = (*rnat_addr) & (~nat_mask);
392
393 ia64_setreg(_IA64_REG_AR_BSPSTORE, bspstore);
394 ia64_setreg(_IA64_REG_AR_RNAT, rnat);
395 }
396 local_irq_restore(psr);
397 ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
398}
399
400void getreg(unsigned long regnum, unsigned long *val,
401 int *nat, struct kvm_pt_regs *regs)
402{
403 unsigned long addr, *unat;
404 if (regnum >= IA64_FIRST_STACKED_GR) {
405 get_rse_reg(regs, regnum, val, nat);
406 return;
407 }
408
409 /*
410 * Now look at registers in [0-31] range and init correct UNAT
411 */
412 addr = (unsigned long)regs;
413 unat = &regs->eml_unat;;
414
415 addr += gr_info[regnum];
416
417 *val = *(unsigned long *)addr;
418 /*
419 * do it only when requested
420 */
421 if (nat)
422 *nat = (*unat >> ((addr >> 3) & 0x3f)) & 0x1UL;
423}
424
425void setreg(unsigned long regnum, unsigned long val,
426 int nat, struct kvm_pt_regs *regs)
427{
428 unsigned long addr;
429 unsigned long bitmask;
430 unsigned long *unat;
431
432 /*
433 * First takes care of stacked registers
434 */
435 if (regnum >= IA64_FIRST_STACKED_GR) {
436 set_rse_reg(regs, regnum, val, nat);
437 return;
438 }
439
440 /*
441 * Now look at registers in [0-31] range and init correct UNAT
442 */
443 addr = (unsigned long)regs;
444 unat = &regs->eml_unat;
445 /*
446 * add offset from base of struct
447 * and do it !
448 */
449 addr += gr_info[regnum];
450
451 *(unsigned long *)addr = val;
452
453 /*
454 * We need to clear the corresponding UNAT bit to fully emulate the load
455 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
456 */
457 bitmask = 1UL << ((addr >> 3) & 0x3f);
458 if (nat)
459 *unat |= bitmask;
460 else
461 *unat &= ~bitmask;
462
463}
464
465u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
466{
467 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
468 u64 val;
469
470 if (!reg)
471 return 0;
472 getreg(reg, &val, 0, regs);
473 return val;
474}
475
476void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 value, int nat)
477{
478 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
479 long sof = (regs->cr_ifs) & 0x7f;
480
481 if (!reg)
482 return;
483 if (reg >= sof + 32)
484 return;
485 setreg(reg, value, nat, regs); /* FIXME: handle NATs later*/
486}
487
488void getfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
489 struct kvm_pt_regs *regs)
490{
491 /* Take floating register rotation into consideration*/
492 if (regnum >= IA64_FIRST_ROTATING_FR)
493 regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
494#define CASE_FIXED_FP(reg) \
495 case (reg) : \
496 ia64_stf_spill(fpval, reg); \
497 break
498
499 switch (regnum) {
500 CASE_FIXED_FP(0);
501 CASE_FIXED_FP(1);
502 CASE_FIXED_FP(2);
503 CASE_FIXED_FP(3);
504 CASE_FIXED_FP(4);
505 CASE_FIXED_FP(5);
506
507 CASE_FIXED_FP(6);
508 CASE_FIXED_FP(7);
509 CASE_FIXED_FP(8);
510 CASE_FIXED_FP(9);
511 CASE_FIXED_FP(10);
512 CASE_FIXED_FP(11);
513
514 CASE_FIXED_FP(12);
515 CASE_FIXED_FP(13);
516 CASE_FIXED_FP(14);
517 CASE_FIXED_FP(15);
518 CASE_FIXED_FP(16);
519 CASE_FIXED_FP(17);
520 CASE_FIXED_FP(18);
521 CASE_FIXED_FP(19);
522 CASE_FIXED_FP(20);
523 CASE_FIXED_FP(21);
524 CASE_FIXED_FP(22);
525 CASE_FIXED_FP(23);
526 CASE_FIXED_FP(24);
527 CASE_FIXED_FP(25);
528 CASE_FIXED_FP(26);
529 CASE_FIXED_FP(27);
530 CASE_FIXED_FP(28);
531 CASE_FIXED_FP(29);
532 CASE_FIXED_FP(30);
533 CASE_FIXED_FP(31);
534 CASE_FIXED_FP(32);
535 CASE_FIXED_FP(33);
536 CASE_FIXED_FP(34);
537 CASE_FIXED_FP(35);
538 CASE_FIXED_FP(36);
539 CASE_FIXED_FP(37);
540 CASE_FIXED_FP(38);
541 CASE_FIXED_FP(39);
542 CASE_FIXED_FP(40);
543 CASE_FIXED_FP(41);
544 CASE_FIXED_FP(42);
545 CASE_FIXED_FP(43);
546 CASE_FIXED_FP(44);
547 CASE_FIXED_FP(45);
548 CASE_FIXED_FP(46);
549 CASE_FIXED_FP(47);
550 CASE_FIXED_FP(48);
551 CASE_FIXED_FP(49);
552 CASE_FIXED_FP(50);
553 CASE_FIXED_FP(51);
554 CASE_FIXED_FP(52);
555 CASE_FIXED_FP(53);
556 CASE_FIXED_FP(54);
557 CASE_FIXED_FP(55);
558 CASE_FIXED_FP(56);
559 CASE_FIXED_FP(57);
560 CASE_FIXED_FP(58);
561 CASE_FIXED_FP(59);
562 CASE_FIXED_FP(60);
563 CASE_FIXED_FP(61);
564 CASE_FIXED_FP(62);
565 CASE_FIXED_FP(63);
566 CASE_FIXED_FP(64);
567 CASE_FIXED_FP(65);
568 CASE_FIXED_FP(66);
569 CASE_FIXED_FP(67);
570 CASE_FIXED_FP(68);
571 CASE_FIXED_FP(69);
572 CASE_FIXED_FP(70);
573 CASE_FIXED_FP(71);
574 CASE_FIXED_FP(72);
575 CASE_FIXED_FP(73);
576 CASE_FIXED_FP(74);
577 CASE_FIXED_FP(75);
578 CASE_FIXED_FP(76);
579 CASE_FIXED_FP(77);
580 CASE_FIXED_FP(78);
581 CASE_FIXED_FP(79);
582 CASE_FIXED_FP(80);
583 CASE_FIXED_FP(81);
584 CASE_FIXED_FP(82);
585 CASE_FIXED_FP(83);
586 CASE_FIXED_FP(84);
587 CASE_FIXED_FP(85);
588 CASE_FIXED_FP(86);
589 CASE_FIXED_FP(87);
590 CASE_FIXED_FP(88);
591 CASE_FIXED_FP(89);
592 CASE_FIXED_FP(90);
593 CASE_FIXED_FP(91);
594 CASE_FIXED_FP(92);
595 CASE_FIXED_FP(93);
596 CASE_FIXED_FP(94);
597 CASE_FIXED_FP(95);
598 CASE_FIXED_FP(96);
599 CASE_FIXED_FP(97);
600 CASE_FIXED_FP(98);
601 CASE_FIXED_FP(99);
602 CASE_FIXED_FP(100);
603 CASE_FIXED_FP(101);
604 CASE_FIXED_FP(102);
605 CASE_FIXED_FP(103);
606 CASE_FIXED_FP(104);
607 CASE_FIXED_FP(105);
608 CASE_FIXED_FP(106);
609 CASE_FIXED_FP(107);
610 CASE_FIXED_FP(108);
611 CASE_FIXED_FP(109);
612 CASE_FIXED_FP(110);
613 CASE_FIXED_FP(111);
614 CASE_FIXED_FP(112);
615 CASE_FIXED_FP(113);
616 CASE_FIXED_FP(114);
617 CASE_FIXED_FP(115);
618 CASE_FIXED_FP(116);
619 CASE_FIXED_FP(117);
620 CASE_FIXED_FP(118);
621 CASE_FIXED_FP(119);
622 CASE_FIXED_FP(120);
623 CASE_FIXED_FP(121);
624 CASE_FIXED_FP(122);
625 CASE_FIXED_FP(123);
626 CASE_FIXED_FP(124);
627 CASE_FIXED_FP(125);
628 CASE_FIXED_FP(126);
629 CASE_FIXED_FP(127);
630 }
631#undef CASE_FIXED_FP
632}
633
634void setfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
635 struct kvm_pt_regs *regs)
636{
637 /* Take floating register rotation into consideration*/
638 if (regnum >= IA64_FIRST_ROTATING_FR)
639 regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
640
641#define CASE_FIXED_FP(reg) \
642 case (reg) : \
643 ia64_ldf_fill(reg, fpval); \
644 break
645
646 switch (regnum) {
647 CASE_FIXED_FP(2);
648 CASE_FIXED_FP(3);
649 CASE_FIXED_FP(4);
650 CASE_FIXED_FP(5);
651
652 CASE_FIXED_FP(6);
653 CASE_FIXED_FP(7);
654 CASE_FIXED_FP(8);
655 CASE_FIXED_FP(9);
656 CASE_FIXED_FP(10);
657 CASE_FIXED_FP(11);
658
659 CASE_FIXED_FP(12);
660 CASE_FIXED_FP(13);
661 CASE_FIXED_FP(14);
662 CASE_FIXED_FP(15);
663 CASE_FIXED_FP(16);
664 CASE_FIXED_FP(17);
665 CASE_FIXED_FP(18);
666 CASE_FIXED_FP(19);
667 CASE_FIXED_FP(20);
668 CASE_FIXED_FP(21);
669 CASE_FIXED_FP(22);
670 CASE_FIXED_FP(23);
671 CASE_FIXED_FP(24);
672 CASE_FIXED_FP(25);
673 CASE_FIXED_FP(26);
674 CASE_FIXED_FP(27);
675 CASE_FIXED_FP(28);
676 CASE_FIXED_FP(29);
677 CASE_FIXED_FP(30);
678 CASE_FIXED_FP(31);
679 CASE_FIXED_FP(32);
680 CASE_FIXED_FP(33);
681 CASE_FIXED_FP(34);
682 CASE_FIXED_FP(35);
683 CASE_FIXED_FP(36);
684 CASE_FIXED_FP(37);
685 CASE_FIXED_FP(38);
686 CASE_FIXED_FP(39);
687 CASE_FIXED_FP(40);
688 CASE_FIXED_FP(41);
689 CASE_FIXED_FP(42);
690 CASE_FIXED_FP(43);
691 CASE_FIXED_FP(44);
692 CASE_FIXED_FP(45);
693 CASE_FIXED_FP(46);
694 CASE_FIXED_FP(47);
695 CASE_FIXED_FP(48);
696 CASE_FIXED_FP(49);
697 CASE_FIXED_FP(50);
698 CASE_FIXED_FP(51);
699 CASE_FIXED_FP(52);
700 CASE_FIXED_FP(53);
701 CASE_FIXED_FP(54);
702 CASE_FIXED_FP(55);
703 CASE_FIXED_FP(56);
704 CASE_FIXED_FP(57);
705 CASE_FIXED_FP(58);
706 CASE_FIXED_FP(59);
707 CASE_FIXED_FP(60);
708 CASE_FIXED_FP(61);
709 CASE_FIXED_FP(62);
710 CASE_FIXED_FP(63);
711 CASE_FIXED_FP(64);
712 CASE_FIXED_FP(65);
713 CASE_FIXED_FP(66);
714 CASE_FIXED_FP(67);
715 CASE_FIXED_FP(68);
716 CASE_FIXED_FP(69);
717 CASE_FIXED_FP(70);
718 CASE_FIXED_FP(71);
719 CASE_FIXED_FP(72);
720 CASE_FIXED_FP(73);
721 CASE_FIXED_FP(74);
722 CASE_FIXED_FP(75);
723 CASE_FIXED_FP(76);
724 CASE_FIXED_FP(77);
725 CASE_FIXED_FP(78);
726 CASE_FIXED_FP(79);
727 CASE_FIXED_FP(80);
728 CASE_FIXED_FP(81);
729 CASE_FIXED_FP(82);
730 CASE_FIXED_FP(83);
731 CASE_FIXED_FP(84);
732 CASE_FIXED_FP(85);
733 CASE_FIXED_FP(86);
734 CASE_FIXED_FP(87);
735 CASE_FIXED_FP(88);
736 CASE_FIXED_FP(89);
737 CASE_FIXED_FP(90);
738 CASE_FIXED_FP(91);
739 CASE_FIXED_FP(92);
740 CASE_FIXED_FP(93);
741 CASE_FIXED_FP(94);
742 CASE_FIXED_FP(95);
743 CASE_FIXED_FP(96);
744 CASE_FIXED_FP(97);
745 CASE_FIXED_FP(98);
746 CASE_FIXED_FP(99);
747 CASE_FIXED_FP(100);
748 CASE_FIXED_FP(101);
749 CASE_FIXED_FP(102);
750 CASE_FIXED_FP(103);
751 CASE_FIXED_FP(104);
752 CASE_FIXED_FP(105);
753 CASE_FIXED_FP(106);
754 CASE_FIXED_FP(107);
755 CASE_FIXED_FP(108);
756 CASE_FIXED_FP(109);
757 CASE_FIXED_FP(110);
758 CASE_FIXED_FP(111);
759 CASE_FIXED_FP(112);
760 CASE_FIXED_FP(113);
761 CASE_FIXED_FP(114);
762 CASE_FIXED_FP(115);
763 CASE_FIXED_FP(116);
764 CASE_FIXED_FP(117);
765 CASE_FIXED_FP(118);
766 CASE_FIXED_FP(119);
767 CASE_FIXED_FP(120);
768 CASE_FIXED_FP(121);
769 CASE_FIXED_FP(122);
770 CASE_FIXED_FP(123);
771 CASE_FIXED_FP(124);
772 CASE_FIXED_FP(125);
773 CASE_FIXED_FP(126);
774 CASE_FIXED_FP(127);
775 }
776}
777
778void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
779 struct ia64_fpreg *val)
780{
781 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
782
783 getfpreg(reg, val, regs); /* FIXME: handle NATs later*/
784}
785
786void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
787 struct ia64_fpreg *val)
788{
789 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
790
791 if (reg > 1)
792 setfpreg(reg, val, regs); /* FIXME: handle NATs later*/
793}
794
795/************************************************************************
796 * lsapic timer
797 ***********************************************************************/
798u64 vcpu_get_itc(struct kvm_vcpu *vcpu)
799{
800 unsigned long guest_itc;
801 guest_itc = VMX(vcpu, itc_offset) + ia64_getreg(_IA64_REG_AR_ITC);
802
803 if (guest_itc >= VMX(vcpu, last_itc)) {
804 VMX(vcpu, last_itc) = guest_itc;
805 return guest_itc;
806 } else
807 return VMX(vcpu, last_itc);
808}
809
810static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val);
811static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
812{
813 struct kvm_vcpu *v;
814 int i;
815 long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC);
816 unsigned long vitv = VCPU(vcpu, itv);
817
818 if (vcpu->vcpu_id == 0) {
819 for (i = 0; i < MAX_VCPU_NUM; i++) {
820 v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
821 VMX(v, itc_offset) = itc_offset;
822 VMX(v, last_itc) = 0;
823 }
824 }
825 VMX(vcpu, last_itc) = 0;
826 if (VCPU(vcpu, itm) <= val) {
827 VMX(vcpu, itc_check) = 0;
828 vcpu_unpend_interrupt(vcpu, vitv);
829 } else {
830 VMX(vcpu, itc_check) = 1;
831 vcpu_set_itm(vcpu, VCPU(vcpu, itm));
832 }
833
834}
835
836static inline u64 vcpu_get_itm(struct kvm_vcpu *vcpu)
837{
838 return ((u64)VCPU(vcpu, itm));
839}
840
841static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val)
842{
843 unsigned long vitv = VCPU(vcpu, itv);
844 VCPU(vcpu, itm) = val;
845
846 if (val > vcpu_get_itc(vcpu)) {
847 VMX(vcpu, itc_check) = 1;
848 vcpu_unpend_interrupt(vcpu, vitv);
849 VMX(vcpu, timer_pending) = 0;
850 } else
851 VMX(vcpu, itc_check) = 0;
852}
853
854#define ITV_VECTOR(itv) (itv&0xff)
855#define ITV_IRQ_MASK(itv) (itv&(1<<16))
856
857static inline void vcpu_set_itv(struct kvm_vcpu *vcpu, u64 val)
858{
859 VCPU(vcpu, itv) = val;
860 if (!ITV_IRQ_MASK(val) && vcpu->arch.timer_pending) {
861 vcpu_pend_interrupt(vcpu, ITV_VECTOR(val));
862 vcpu->arch.timer_pending = 0;
863 }
864}
865
866static inline void vcpu_set_eoi(struct kvm_vcpu *vcpu, u64 val)
867{
868 int vec;
869
870 vec = highest_inservice_irq(vcpu);
871 if (vec == NULL_VECTOR)
872 return;
873 VMX(vcpu, insvc[vec >> 6]) &= ~(1UL << (vec & 63));
874 VCPU(vcpu, eoi) = 0;
875 vcpu->arch.irq_new_pending = 1;
876
877}
878
879/* See Table 5-8 in SDM vol2 for the definition */
880int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice)
881{
882 union ia64_tpr vtpr;
883
884 vtpr.val = VCPU(vcpu, tpr);
885
886 if (h_inservice == NMI_VECTOR)
887 return IRQ_MASKED_BY_INSVC;
888
889 if (h_pending == NMI_VECTOR) {
890 /* Non Maskable Interrupt */
891 return IRQ_NO_MASKED;
892 }
893
894 if (h_inservice == ExtINT_VECTOR)
895 return IRQ_MASKED_BY_INSVC;
896
897 if (h_pending == ExtINT_VECTOR) {
898 if (vtpr.mmi) {
899 /* mask all external IRQ */
900 return IRQ_MASKED_BY_VTPR;
901 } else
902 return IRQ_NO_MASKED;
903 }
904
905 if (is_higher_irq(h_pending, h_inservice)) {
906 if (is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4)))
907 return IRQ_NO_MASKED;
908 else
909 return IRQ_MASKED_BY_VTPR;
910 } else {
911 return IRQ_MASKED_BY_INSVC;
912 }
913}
914
915void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
916{
917 long spsr;
918 int ret;
919
920 local_irq_save(spsr);
921 ret = test_and_set_bit(vec, &VCPU(vcpu, irr[0]));
922 local_irq_restore(spsr);
923
924 vcpu->arch.irq_new_pending = 1;
925}
926
927void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
928{
929 long spsr;
930 int ret;
931
932 local_irq_save(spsr);
933 ret = test_and_clear_bit(vec, &VCPU(vcpu, irr[0]));
934 local_irq_restore(spsr);
935 if (ret) {
936 vcpu->arch.irq_new_pending = 1;
937 wmb();
938 }
939}
940
941void update_vhpi(struct kvm_vcpu *vcpu, int vec)
942{
943 u64 vhpi;
944
945 if (vec == NULL_VECTOR)
946 vhpi = 0;
947 else if (vec == NMI_VECTOR)
948 vhpi = 32;
949 else if (vec == ExtINT_VECTOR)
950 vhpi = 16;
951 else
952 vhpi = vec >> 4;
953
954 VCPU(vcpu, vhpi) = vhpi;
955 if (VCPU(vcpu, vac).a_int)
956 ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT,
957 (u64)vcpu->arch.vpd, 0, 0, 0, 0, 0, 0);
958}
959
960u64 vcpu_get_ivr(struct kvm_vcpu *vcpu)
961{
962 int vec, h_inservice, mask;
963
964 vec = highest_pending_irq(vcpu);
965 h_inservice = highest_inservice_irq(vcpu);
966 mask = irq_masked(vcpu, vec, h_inservice);
967 if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) {
968 if (VCPU(vcpu, vhpi))
969 update_vhpi(vcpu, NULL_VECTOR);
970 return IA64_SPURIOUS_INT_VECTOR;
971 }
972 if (mask == IRQ_MASKED_BY_VTPR) {
973 update_vhpi(vcpu, vec);
974 return IA64_SPURIOUS_INT_VECTOR;
975 }
976 VMX(vcpu, insvc[vec >> 6]) |= (1UL << (vec & 63));
977 vcpu_unpend_interrupt(vcpu, vec);
978 return (u64)vec;
979}
980
981/**************************************************************************
982 Privileged operation emulation routines
983 **************************************************************************/
984u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr)
985{
986 union ia64_pta vpta;
987 union ia64_rr vrr;
988 u64 pval;
989 u64 vhpt_offset;
990
991 vpta.val = vcpu_get_pta(vcpu);
992 vrr.val = vcpu_get_rr(vcpu, vadr);
993 vhpt_offset = ((vadr >> vrr.ps) << 3) & ((1UL << (vpta.size)) - 1);
994 if (vpta.vf) {
995 pval = ia64_call_vsa(PAL_VPS_THASH, vadr, vrr.val,
996 vpta.val, 0, 0, 0, 0);
997 } else {
998 pval = (vadr & VRN_MASK) | vhpt_offset |
999 (vpta.val << 3 >> (vpta.size + 3) << (vpta.size));
1000 }
1001 return pval;
1002}
1003
1004u64 vcpu_ttag(struct kvm_vcpu *vcpu, u64 vadr)
1005{
1006 union ia64_rr vrr;
1007 union ia64_pta vpta;
1008 u64 pval;
1009
1010 vpta.val = vcpu_get_pta(vcpu);
1011 vrr.val = vcpu_get_rr(vcpu, vadr);
1012 if (vpta.vf) {
1013 pval = ia64_call_vsa(PAL_VPS_TTAG, vadr, vrr.val,
1014 0, 0, 0, 0, 0);
1015 } else
1016 pval = 1;
1017
1018 return pval;
1019}
1020
1021u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr)
1022{
1023 struct thash_data *data;
1024 union ia64_pta vpta;
1025 u64 key;
1026
1027 vpta.val = vcpu_get_pta(vcpu);
1028 if (vpta.vf == 0) {
1029 key = 1;
1030 return key;
1031 }
1032 data = vtlb_lookup(vcpu, vadr, D_TLB);
1033 if (!data || !data->p)
1034 key = 1;
1035 else
1036 key = data->key;
1037
1038 return key;
1039}
1040
1041
1042
1043void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
1044{
1045 unsigned long thash, vadr;
1046
1047 vadr = vcpu_get_gr(vcpu, inst.M46.r3);
1048 thash = vcpu_thash(vcpu, vadr);
1049 vcpu_set_gr(vcpu, inst.M46.r1, thash, 0);
1050}
1051
1052
1053void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
1054{
1055 unsigned long tag, vadr;
1056
1057 vadr = vcpu_get_gr(vcpu, inst.M46.r3);
1058 tag = vcpu_ttag(vcpu, vadr);
1059 vcpu_set_gr(vcpu, inst.M46.r1, tag, 0);
1060}
1061
1062int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr)
1063{
1064 struct thash_data *data;
1065 union ia64_isr visr, pt_isr;
1066 struct kvm_pt_regs *regs;
1067 struct ia64_psr vpsr;
1068
1069 regs = vcpu_regs(vcpu);
1070 pt_isr.val = VMX(vcpu, cr_isr);
1071 visr.val = 0;
1072 visr.ei = pt_isr.ei;
1073 visr.ir = pt_isr.ir;
1074 vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
1075 visr.na = 1;
1076
1077 data = vhpt_lookup(vadr);
1078 if (data) {
1079 if (data->p == 0) {
1080 vcpu_set_isr(vcpu, visr.val);
1081 data_page_not_present(vcpu, vadr);
1082 return IA64_FAULT;
1083 } else if (data->ma == VA_MATTR_NATPAGE) {
1084 vcpu_set_isr(vcpu, visr.val);
1085 dnat_page_consumption(vcpu, vadr);
1086 return IA64_FAULT;
1087 } else {
1088 *padr = (data->gpaddr >> data->ps << data->ps) |
1089 (vadr & (PSIZE(data->ps) - 1));
1090 return IA64_NO_FAULT;
1091 }
1092 }
1093
1094 data = vtlb_lookup(vcpu, vadr, D_TLB);
1095 if (data) {
1096 if (data->p == 0) {
1097 vcpu_set_isr(vcpu, visr.val);
1098 data_page_not_present(vcpu, vadr);
1099 return IA64_FAULT;
1100 } else if (data->ma == VA_MATTR_NATPAGE) {
1101 vcpu_set_isr(vcpu, visr.val);
1102 dnat_page_consumption(vcpu, vadr);
1103 return IA64_FAULT;
1104 } else{
1105 *padr = ((data->ppn >> (data->ps - 12)) << data->ps)
1106 | (vadr & (PSIZE(data->ps) - 1));
1107 return IA64_NO_FAULT;
1108 }
1109 }
1110 if (!vhpt_enabled(vcpu, vadr, NA_REF)) {
1111 if (vpsr.ic) {
1112 vcpu_set_isr(vcpu, visr.val);
1113 alt_dtlb(vcpu, vadr);
1114 return IA64_FAULT;
1115 } else {
1116 nested_dtlb(vcpu);
1117 return IA64_FAULT;
1118 }
1119 } else {
1120 if (vpsr.ic) {
1121 vcpu_set_isr(vcpu, visr.val);
1122 dvhpt_fault(vcpu, vadr);
1123 return IA64_FAULT;
1124 } else{
1125 nested_dtlb(vcpu);
1126 return IA64_FAULT;
1127 }
1128 }
1129
1130 return IA64_NO_FAULT;
1131}
1132
1133
1134int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst)
1135{
1136 unsigned long r1, r3;
1137
1138 r3 = vcpu_get_gr(vcpu, inst.M46.r3);
1139
1140 if (vcpu_tpa(vcpu, r3, &r1))
1141 return IA64_FAULT;
1142
1143 vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
1144 return(IA64_NO_FAULT);
1145}
1146
1147void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst)
1148{
1149 unsigned long r1, r3;
1150
1151 r3 = vcpu_get_gr(vcpu, inst.M46.r3);
1152 r1 = vcpu_tak(vcpu, r3);
1153 vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
1154}
1155
1156
1157/************************************
1158 * Insert/Purge translation register/cache
1159 ************************************/
1160void vcpu_itc_i(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
1161{
1162 thash_purge_and_insert(vcpu, pte, itir, ifa, I_TLB);
1163}
1164
1165void vcpu_itc_d(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
1166{
1167 thash_purge_and_insert(vcpu, pte, itir, ifa, D_TLB);
1168}
1169
1170void vcpu_itr_i(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
1171{
1172 u64 ps, va, rid;
1173 struct thash_data *p_itr;
1174
1175 ps = itir_ps(itir);
1176 va = PAGEALIGN(ifa, ps);
1177 pte &= ~PAGE_FLAGS_RV_MASK;
1178 rid = vcpu_get_rr(vcpu, ifa);
1179 rid = rid & RR_RID_MASK;
1180 p_itr = (struct thash_data *)&vcpu->arch.itrs[slot];
1181 vcpu_set_tr(p_itr, pte, itir, va, rid);
1182 vcpu_quick_region_set(VMX(vcpu, itr_regions), va);
1183}
1184
1185
1186void vcpu_itr_d(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
1187{
1188 u64 gpfn;
1189 u64 ps, va, rid;
1190 struct thash_data *p_dtr;
1191
1192 ps = itir_ps(itir);
1193 va = PAGEALIGN(ifa, ps);
1194 pte &= ~PAGE_FLAGS_RV_MASK;
1195
1196 if (ps != _PAGE_SIZE_16M)
1197 thash_purge_entries(vcpu, va, ps);
1198 gpfn = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
1199 if (__gpfn_is_io(gpfn))
1200 pte |= VTLB_PTE_IO;
1201 rid = vcpu_get_rr(vcpu, va);
1202 rid = rid & RR_RID_MASK;
1203 p_dtr = (struct thash_data *)&vcpu->arch.dtrs[slot];
1204 vcpu_set_tr((struct thash_data *)&vcpu->arch.dtrs[slot],
1205 pte, itir, va, rid);
1206 vcpu_quick_region_set(VMX(vcpu, dtr_regions), va);
1207}
1208
1209void vcpu_ptr_d(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
1210{
1211 int index;
1212 u64 va;
1213
1214 va = PAGEALIGN(ifa, ps);
1215 while ((index = vtr_find_overlap(vcpu, va, ps, D_TLB)) >= 0)
1216 vcpu->arch.dtrs[index].page_flags = 0;
1217
1218 thash_purge_entries(vcpu, va, ps);
1219}
1220
1221void vcpu_ptr_i(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
1222{
1223 int index;
1224 u64 va;
1225
1226 va = PAGEALIGN(ifa, ps);
1227 while ((index = vtr_find_overlap(vcpu, va, ps, I_TLB)) >= 0)
1228 vcpu->arch.itrs[index].page_flags = 0;
1229
1230 thash_purge_entries(vcpu, va, ps);
1231}
1232
1233void vcpu_ptc_l(struct kvm_vcpu *vcpu, u64 va, u64 ps)
1234{
1235 va = PAGEALIGN(va, ps);
1236 thash_purge_entries(vcpu, va, ps);
1237}
1238
1239void vcpu_ptc_e(struct kvm_vcpu *vcpu, u64 va)
1240{
1241 thash_purge_all(vcpu);
1242}
1243
1244void vcpu_ptc_ga(struct kvm_vcpu *vcpu, u64 va, u64 ps)
1245{
1246 struct exit_ctl_data *p = &vcpu->arch.exit_data;
1247 long psr;
1248 local_irq_save(psr);
1249 p->exit_reason = EXIT_REASON_PTC_G;
1250
1251 p->u.ptc_g_data.rr = vcpu_get_rr(vcpu, va);
1252 p->u.ptc_g_data.vaddr = va;
1253 p->u.ptc_g_data.ps = ps;
1254 vmm_transition(vcpu);
1255 /* Do Local Purge Here*/
1256 vcpu_ptc_l(vcpu, va, ps);
1257 local_irq_restore(psr);
1258}
1259
1260
1261void vcpu_ptc_g(struct kvm_vcpu *vcpu, u64 va, u64 ps)
1262{
1263 vcpu_ptc_ga(vcpu, va, ps);
1264}
1265
1266void kvm_ptc_e(struct kvm_vcpu *vcpu, INST64 inst)
1267{
1268 unsigned long ifa;
1269
1270 ifa = vcpu_get_gr(vcpu, inst.M45.r3);
1271 vcpu_ptc_e(vcpu, ifa);
1272}
1273
1274void kvm_ptc_g(struct kvm_vcpu *vcpu, INST64 inst)
1275{
1276 unsigned long ifa, itir;
1277
1278 ifa = vcpu_get_gr(vcpu, inst.M45.r3);
1279 itir = vcpu_get_gr(vcpu, inst.M45.r2);
1280 vcpu_ptc_g(vcpu, ifa, itir_ps(itir));
1281}
1282
1283void kvm_ptc_ga(struct kvm_vcpu *vcpu, INST64 inst)
1284{
1285 unsigned long ifa, itir;
1286
1287 ifa = vcpu_get_gr(vcpu, inst.M45.r3);
1288 itir = vcpu_get_gr(vcpu, inst.M45.r2);
1289 vcpu_ptc_ga(vcpu, ifa, itir_ps(itir));
1290}
1291
1292void kvm_ptc_l(struct kvm_vcpu *vcpu, INST64 inst)
1293{
1294 unsigned long ifa, itir;
1295
1296 ifa = vcpu_get_gr(vcpu, inst.M45.r3);
1297 itir = vcpu_get_gr(vcpu, inst.M45.r2);
1298 vcpu_ptc_l(vcpu, ifa, itir_ps(itir));
1299}
1300
1301void kvm_ptr_d(struct kvm_vcpu *vcpu, INST64 inst)
1302{
1303 unsigned long ifa, itir;
1304
1305 ifa = vcpu_get_gr(vcpu, inst.M45.r3);
1306 itir = vcpu_get_gr(vcpu, inst.M45.r2);
1307 vcpu_ptr_d(vcpu, ifa, itir_ps(itir));
1308}
1309
1310void kvm_ptr_i(struct kvm_vcpu *vcpu, INST64 inst)
1311{
1312 unsigned long ifa, itir;
1313
1314 ifa = vcpu_get_gr(vcpu, inst.M45.r3);
1315 itir = vcpu_get_gr(vcpu, inst.M45.r2);
1316 vcpu_ptr_i(vcpu, ifa, itir_ps(itir));
1317}
1318
1319void kvm_itr_d(struct kvm_vcpu *vcpu, INST64 inst)
1320{
1321 unsigned long itir, ifa, pte, slot;
1322
1323 slot = vcpu_get_gr(vcpu, inst.M45.r3);
1324 pte = vcpu_get_gr(vcpu, inst.M45.r2);
1325 itir = vcpu_get_itir(vcpu);
1326 ifa = vcpu_get_ifa(vcpu);
1327 vcpu_itr_d(vcpu, slot, pte, itir, ifa);
1328}
1329
1330
1331
1332void kvm_itr_i(struct kvm_vcpu *vcpu, INST64 inst)
1333{
1334 unsigned long itir, ifa, pte, slot;
1335
1336 slot = vcpu_get_gr(vcpu, inst.M45.r3);
1337 pte = vcpu_get_gr(vcpu, inst.M45.r2);
1338 itir = vcpu_get_itir(vcpu);
1339 ifa = vcpu_get_ifa(vcpu);
1340 vcpu_itr_i(vcpu, slot, pte, itir, ifa);
1341}
1342
1343void kvm_itc_d(struct kvm_vcpu *vcpu, INST64 inst)
1344{
1345 unsigned long itir, ifa, pte;
1346
1347 itir = vcpu_get_itir(vcpu);
1348 ifa = vcpu_get_ifa(vcpu);
1349 pte = vcpu_get_gr(vcpu, inst.M45.r2);
1350 vcpu_itc_d(vcpu, pte, itir, ifa);
1351}
1352
1353void kvm_itc_i(struct kvm_vcpu *vcpu, INST64 inst)
1354{
1355 unsigned long itir, ifa, pte;
1356
1357 itir = vcpu_get_itir(vcpu);
1358 ifa = vcpu_get_ifa(vcpu);
1359 pte = vcpu_get_gr(vcpu, inst.M45.r2);
1360 vcpu_itc_i(vcpu, pte, itir, ifa);
1361}
1362
1363/*************************************
1364 * Moves to semi-privileged registers
1365 *************************************/
1366
1367void kvm_mov_to_ar_imm(struct kvm_vcpu *vcpu, INST64 inst)
1368{
1369 unsigned long imm;
1370
1371 if (inst.M30.s)
1372 imm = -inst.M30.imm;
1373 else
1374 imm = inst.M30.imm;
1375
1376 vcpu_set_itc(vcpu, imm);
1377}
1378
1379void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
1380{
1381 unsigned long r2;
1382
1383 r2 = vcpu_get_gr(vcpu, inst.M29.r2);
1384 vcpu_set_itc(vcpu, r2);
1385}
1386
1387
1388void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
1389{
1390 unsigned long r1;
1391
1392 r1 = vcpu_get_itc(vcpu);
1393 vcpu_set_gr(vcpu, inst.M31.r1, r1, 0);
1394}
1395/**************************************************************************
1396 struct kvm_vcpu*protection key register access routines
1397 **************************************************************************/
1398
1399unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg)
1400{
1401 return ((unsigned long)ia64_get_pkr(reg));
1402}
1403
1404void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val)
1405{
1406 ia64_set_pkr(reg, val);
1407}
1408
1409
1410unsigned long vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, unsigned long ifa)
1411{
1412 union ia64_rr rr, rr1;
1413
1414 rr.val = vcpu_get_rr(vcpu, ifa);
1415 rr1.val = 0;
1416 rr1.ps = rr.ps;
1417 rr1.rid = rr.rid;
1418 return (rr1.val);
1419}
1420
1421
1422
1423/********************************
1424 * Moves to privileged registers
1425 ********************************/
1426unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg,
1427 unsigned long val)
1428{
1429 union ia64_rr oldrr, newrr;
1430 unsigned long rrval;
1431 struct exit_ctl_data *p = &vcpu->arch.exit_data;
1432 unsigned long psr;
1433
1434 oldrr.val = vcpu_get_rr(vcpu, reg);
1435 newrr.val = val;
1436 vcpu->arch.vrr[reg >> VRN_SHIFT] = val;
1437
1438 switch ((unsigned long)(reg >> VRN_SHIFT)) {
1439 case VRN6:
1440 vcpu->arch.vmm_rr = vrrtomrr(val);
1441 local_irq_save(psr);
1442 p->exit_reason = EXIT_REASON_SWITCH_RR6;
1443 vmm_transition(vcpu);
1444 local_irq_restore(psr);
1445 break;
1446 case VRN4:
1447 rrval = vrrtomrr(val);
1448 vcpu->arch.metaphysical_saved_rr4 = rrval;
1449 if (!is_physical_mode(vcpu))
1450 ia64_set_rr(reg, rrval);
1451 break;
1452 case VRN0:
1453 rrval = vrrtomrr(val);
1454 vcpu->arch.metaphysical_saved_rr0 = rrval;
1455 if (!is_physical_mode(vcpu))
1456 ia64_set_rr(reg, rrval);
1457 break;
1458 default:
1459 ia64_set_rr(reg, vrrtomrr(val));
1460 break;
1461 }
1462
1463 return (IA64_NO_FAULT);
1464}
1465
1466
1467
1468void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst)
1469{
1470 unsigned long r3, r2;
1471
1472 r3 = vcpu_get_gr(vcpu, inst.M42.r3);
1473 r2 = vcpu_get_gr(vcpu, inst.M42.r2);
1474 vcpu_set_rr(vcpu, r3, r2);
1475}
1476
1477void kvm_mov_to_dbr(struct kvm_vcpu *vcpu, INST64 inst)
1478{
1479}
1480
1481void kvm_mov_to_ibr(struct kvm_vcpu *vcpu, INST64 inst)
1482{
1483}
1484
1485void kvm_mov_to_pmc(struct kvm_vcpu *vcpu, INST64 inst)
1486{
1487 unsigned long r3, r2;
1488
1489 r3 = vcpu_get_gr(vcpu, inst.M42.r3);
1490 r2 = vcpu_get_gr(vcpu, inst.M42.r2);
1491 vcpu_set_pmc(vcpu, r3, r2);
1492}
1493
1494void kvm_mov_to_pmd(struct kvm_vcpu *vcpu, INST64 inst)
1495{
1496 unsigned long r3, r2;
1497
1498 r3 = vcpu_get_gr(vcpu, inst.M42.r3);
1499 r2 = vcpu_get_gr(vcpu, inst.M42.r2);
1500 vcpu_set_pmd(vcpu, r3, r2);
1501}
1502
1503void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst)
1504{
1505 u64 r3, r2;
1506
1507 r3 = vcpu_get_gr(vcpu, inst.M42.r3);
1508 r2 = vcpu_get_gr(vcpu, inst.M42.r2);
1509 vcpu_set_pkr(vcpu, r3, r2);
1510}
1511
1512
1513
1514void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst)
1515{
1516 unsigned long r3, r1;
1517
1518 r3 = vcpu_get_gr(vcpu, inst.M43.r3);
1519 r1 = vcpu_get_rr(vcpu, r3);
1520 vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
1521}
1522
1523void kvm_mov_from_pkr(struct kvm_vcpu *vcpu, INST64 inst)
1524{
1525 unsigned long r3, r1;
1526
1527 r3 = vcpu_get_gr(vcpu, inst.M43.r3);
1528 r1 = vcpu_get_pkr(vcpu, r3);
1529 vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
1530}
1531
1532void kvm_mov_from_dbr(struct kvm_vcpu *vcpu, INST64 inst)
1533{
1534 unsigned long r3, r1;
1535
1536 r3 = vcpu_get_gr(vcpu, inst.M43.r3);
1537 r1 = vcpu_get_dbr(vcpu, r3);
1538 vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
1539}
1540
1541void kvm_mov_from_ibr(struct kvm_vcpu *vcpu, INST64 inst)
1542{
1543 unsigned long r3, r1;
1544
1545 r3 = vcpu_get_gr(vcpu, inst.M43.r3);
1546 r1 = vcpu_get_ibr(vcpu, r3);
1547 vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
1548}
1549
1550void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst)
1551{
1552 unsigned long r3, r1;
1553
1554 r3 = vcpu_get_gr(vcpu, inst.M43.r3);
1555 r1 = vcpu_get_pmc(vcpu, r3);
1556 vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
1557}
1558
1559
1560unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg)
1561{
1562 /* FIXME: This could get called as a result of a rsvd-reg fault */
1563 if (reg > (ia64_get_cpuid(3) & 0xff))
1564 return 0;
1565 else
1566 return ia64_get_cpuid(reg);
1567}
1568
1569void kvm_mov_from_cpuid(struct kvm_vcpu *vcpu, INST64 inst)
1570{
1571 unsigned long r3, r1;
1572
1573 r3 = vcpu_get_gr(vcpu, inst.M43.r3);
1574 r1 = vcpu_get_cpuid(vcpu, r3);
1575 vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
1576}
1577
1578void vcpu_set_tpr(struct kvm_vcpu *vcpu, unsigned long val)
1579{
1580 VCPU(vcpu, tpr) = val;
1581 vcpu->arch.irq_check = 1;
1582}
1583
1584unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst)
1585{
1586 unsigned long r2;
1587
1588 r2 = vcpu_get_gr(vcpu, inst.M32.r2);
1589 VCPU(vcpu, vcr[inst.M32.cr3]) = r2;
1590
1591 switch (inst.M32.cr3) {
1592 case 0:
1593 vcpu_set_dcr(vcpu, r2);
1594 break;
1595 case 1:
1596 vcpu_set_itm(vcpu, r2);
1597 break;
1598 case 66:
1599 vcpu_set_tpr(vcpu, r2);
1600 break;
1601 case 67:
1602 vcpu_set_eoi(vcpu, r2);
1603 break;
1604 default:
1605 break;
1606 }
1607
1608 return 0;
1609}
1610
1611
1612unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
1613{
1614 unsigned long tgt = inst.M33.r1;
1615 unsigned long val;
1616
1617 switch (inst.M33.cr3) {
1618 case 65:
1619 val = vcpu_get_ivr(vcpu);
1620 vcpu_set_gr(vcpu, tgt, val, 0);
1621 break;
1622
1623 case 67:
1624 vcpu_set_gr(vcpu, tgt, 0L, 0);
1625 break;
1626 default:
1627 val = VCPU(vcpu, vcr[inst.M33.cr3]);
1628 vcpu_set_gr(vcpu, tgt, val, 0);
1629 break;
1630 }
1631
1632 return 0;
1633}
1634
1635
1636
1637void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
1638{
1639
1640 unsigned long mask;
1641 struct kvm_pt_regs *regs;
1642 struct ia64_psr old_psr, new_psr;
1643
1644 old_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
1645
1646 regs = vcpu_regs(vcpu);
1647 /* We only support guest as:
1648 * vpsr.pk = 0
1649 * vpsr.is = 0
1650 * Otherwise panic
1651 */
1652 if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
1653 panic_vm(vcpu);
1654
1655 /*
1656 * For those IA64_PSR bits: id/da/dd/ss/ed/ia
1657 * Since these bits will become 0, after success execution of each
1658 * instruction, we will change set them to mIA64_PSR
1659 */
1660 VCPU(vcpu, vpsr) = val
1661 & (~(IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD |
1662 IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA));
1663
1664 if (!old_psr.i && (val & IA64_PSR_I)) {
1665 /* vpsr.i 0->1 */
1666 vcpu->arch.irq_check = 1;
1667 }
1668 new_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
1669
1670 /*
1671 * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr)
1672 * , except for the following bits:
1673 * ic/i/dt/si/rt/mc/it/bn/vm
1674 */
1675 mask = IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI +
1676 IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN +
1677 IA64_PSR_VM;
1678
1679 regs->cr_ipsr = (regs->cr_ipsr & mask) | (val & (~mask));
1680
1681 check_mm_mode_switch(vcpu, old_psr, new_psr);
1682
1683 return ;
1684}
1685
1686unsigned long vcpu_cover(struct kvm_vcpu *vcpu)
1687{
1688 struct ia64_psr vpsr;
1689
1690 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1691 vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
1692
1693 if (!vpsr.ic)
1694 VCPU(vcpu, ifs) = regs->cr_ifs;
1695 regs->cr_ifs = IA64_IFS_V;
1696 return (IA64_NO_FAULT);
1697}
1698
1699
1700
1701/**************************************************************************
1702 VCPU banked general register access routines
1703 **************************************************************************/
1704#define vcpu_bsw0_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \
1705 do { \
1706 __asm__ __volatile__ ( \
1707 ";;extr.u %0 = %3,%6,16;;\n" \
1708 "dep %1 = %0, %1, 0, 16;;\n" \
1709 "st8 [%4] = %1\n" \
1710 "extr.u %0 = %2, 16, 16;;\n" \
1711 "dep %3 = %0, %3, %6, 16;;\n" \
1712 "st8 [%5] = %3\n" \
1713 ::"r"(i), "r"(*b1unat), "r"(*b0unat), \
1714 "r"(*runat), "r"(b1unat), "r"(runat), \
1715 "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \
1716 } while (0)
1717
1718void vcpu_bsw0(struct kvm_vcpu *vcpu)
1719{
1720 unsigned long i;
1721
1722 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1723 unsigned long *r = &regs->r16;
1724 unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
1725 unsigned long *b1 = &VCPU(vcpu, vgr[0]);
1726 unsigned long *runat = &regs->eml_unat;
1727 unsigned long *b0unat = &VCPU(vcpu, vbnat);
1728 unsigned long *b1unat = &VCPU(vcpu, vnat);
1729
1730
1731 if (VCPU(vcpu, vpsr) & IA64_PSR_BN) {
1732 for (i = 0; i < 16; i++) {
1733 *b1++ = *r;
1734 *r++ = *b0++;
1735 }
1736 vcpu_bsw0_unat(i, b0unat, b1unat, runat,
1737 VMM_PT_REGS_R16_SLOT);
1738 VCPU(vcpu, vpsr) &= ~IA64_PSR_BN;
1739 }
1740}
1741
1742#define vcpu_bsw1_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \
1743 do { \
1744 __asm__ __volatile__ (";;extr.u %0 = %3, %6, 16;;\n" \
1745 "dep %1 = %0, %1, 16, 16;;\n" \
1746 "st8 [%4] = %1\n" \
1747 "extr.u %0 = %2, 0, 16;;\n" \
1748 "dep %3 = %0, %3, %6, 16;;\n" \
1749 "st8 [%5] = %3\n" \
1750 ::"r"(i), "r"(*b0unat), "r"(*b1unat), \
1751 "r"(*runat), "r"(b0unat), "r"(runat), \
1752 "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \
1753 } while (0)
1754
1755void vcpu_bsw1(struct kvm_vcpu *vcpu)
1756{
1757 unsigned long i;
1758 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1759 unsigned long *r = &regs->r16;
1760 unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
1761 unsigned long *b1 = &VCPU(vcpu, vgr[0]);
1762 unsigned long *runat = &regs->eml_unat;
1763 unsigned long *b0unat = &VCPU(vcpu, vbnat);
1764 unsigned long *b1unat = &VCPU(vcpu, vnat);
1765
1766 if (!(VCPU(vcpu, vpsr) & IA64_PSR_BN)) {
1767 for (i = 0; i < 16; i++) {
1768 *b0++ = *r;
1769 *r++ = *b1++;
1770 }
1771 vcpu_bsw1_unat(i, b0unat, b1unat, runat,
1772 VMM_PT_REGS_R16_SLOT);
1773 VCPU(vcpu, vpsr) |= IA64_PSR_BN;
1774 }
1775}
1776
1777
1778
1779
1780void vcpu_rfi(struct kvm_vcpu *vcpu)
1781{
1782 unsigned long ifs, psr;
1783 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1784
1785 psr = VCPU(vcpu, ipsr);
1786 if (psr & IA64_PSR_BN)
1787 vcpu_bsw1(vcpu);
1788 else
1789 vcpu_bsw0(vcpu);
1790 vcpu_set_psr(vcpu, psr);
1791 ifs = VCPU(vcpu, ifs);
1792 if (ifs >> 63)
1793 regs->cr_ifs = ifs;
1794 regs->cr_iip = VCPU(vcpu, iip);
1795}
1796
1797
1798/*
1799 VPSR can't keep track of below bits of guest PSR
1800 This function gets guest PSR
1801 */
1802
1803unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu)
1804{
1805 unsigned long mask;
1806 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1807
1808 mask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL |
1809 IA64_PSR_MFH | IA64_PSR_CPL | IA64_PSR_RI;
1810 return (VCPU(vcpu, vpsr) & ~mask) | (regs->cr_ipsr & mask);
1811}
1812
1813void kvm_rsm(struct kvm_vcpu *vcpu, INST64 inst)
1814{
1815 unsigned long vpsr;
1816 unsigned long imm24 = (inst.M44.i<<23) | (inst.M44.i2<<21)
1817 | inst.M44.imm;
1818
1819 vpsr = vcpu_get_psr(vcpu);
1820 vpsr &= (~imm24);
1821 vcpu_set_psr(vcpu, vpsr);
1822}
1823
1824void kvm_ssm(struct kvm_vcpu *vcpu, INST64 inst)
1825{
1826 unsigned long vpsr;
1827 unsigned long imm24 = (inst.M44.i << 23) | (inst.M44.i2 << 21)
1828 | inst.M44.imm;
1829
1830 vpsr = vcpu_get_psr(vcpu);
1831 vpsr |= imm24;
1832 vcpu_set_psr(vcpu, vpsr);
1833}
1834
1835/* Generate Mask
1836 * Parameter:
1837 * bit -- starting bit
1838 * len -- how many bits
1839 */
1840#define MASK(bit,len) \
1841({ \
1842 __u64 ret; \
1843 \
1844 __asm __volatile("dep %0=-1, r0, %1, %2"\
1845 : "=r" (ret): \
1846 "M" (bit), \
1847 "M" (len)); \
1848 ret; \
1849})
1850
1851void vcpu_set_psr_l(struct kvm_vcpu *vcpu, unsigned long val)
1852{
1853 val = (val & MASK(0, 32)) | (vcpu_get_psr(vcpu) & MASK(32, 32));
1854 vcpu_set_psr(vcpu, val);
1855}
1856
1857void kvm_mov_to_psr(struct kvm_vcpu *vcpu, INST64 inst)
1858{
1859 unsigned long val;
1860
1861 val = vcpu_get_gr(vcpu, inst.M35.r2);
1862 vcpu_set_psr_l(vcpu, val);
1863}
1864
1865void kvm_mov_from_psr(struct kvm_vcpu *vcpu, INST64 inst)
1866{
1867 unsigned long val;
1868
1869 val = vcpu_get_psr(vcpu);
1870 val = (val & MASK(0, 32)) | (val & MASK(35, 2));
1871 vcpu_set_gr(vcpu, inst.M33.r1, val, 0);
1872}
1873
1874void vcpu_increment_iip(struct kvm_vcpu *vcpu)
1875{
1876 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1877 struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
1878 if (ipsr->ri == 2) {
1879 ipsr->ri = 0;
1880 regs->cr_iip += 16;
1881 } else
1882 ipsr->ri++;
1883}
1884
1885void vcpu_decrement_iip(struct kvm_vcpu *vcpu)
1886{
1887 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1888 struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
1889
1890 if (ipsr->ri == 0) {
1891 ipsr->ri = 2;
1892 regs->cr_iip -= 16;
1893 } else
1894 ipsr->ri--;
1895}
1896
1897/** Emulate a privileged operation.
1898 *
1899 *
1900 * @param vcpu virtual cpu
1901 * @cause the reason cause virtualization fault
1902 * @opcode the instruction code which cause virtualization fault
1903 */
1904
1905void kvm_emulate(struct kvm_vcpu *vcpu, struct kvm_pt_regs *regs)
1906{
1907 unsigned long status, cause, opcode ;
1908 INST64 inst;
1909
1910 status = IA64_NO_FAULT;
1911 cause = VMX(vcpu, cause);
1912 opcode = VMX(vcpu, opcode);
1913 inst.inst = opcode;
1914 /*
1915 * Switch to actual virtual rid in rr0 and rr4,
1916 * which is required by some tlb related instructions.
1917 */
1918 prepare_if_physical_mode(vcpu);
1919
1920 switch (cause) {
1921 case EVENT_RSM:
1922 kvm_rsm(vcpu, inst);
1923 break;
1924 case EVENT_SSM:
1925 kvm_ssm(vcpu, inst);
1926 break;
1927 case EVENT_MOV_TO_PSR:
1928 kvm_mov_to_psr(vcpu, inst);
1929 break;
1930 case EVENT_MOV_FROM_PSR:
1931 kvm_mov_from_psr(vcpu, inst);
1932 break;
1933 case EVENT_MOV_FROM_CR:
1934 kvm_mov_from_cr(vcpu, inst);
1935 break;
1936 case EVENT_MOV_TO_CR:
1937 kvm_mov_to_cr(vcpu, inst);
1938 break;
1939 case EVENT_BSW_0:
1940 vcpu_bsw0(vcpu);
1941 break;
1942 case EVENT_BSW_1:
1943 vcpu_bsw1(vcpu);
1944 break;
1945 case EVENT_COVER:
1946 vcpu_cover(vcpu);
1947 break;
1948 case EVENT_RFI:
1949 vcpu_rfi(vcpu);
1950 break;
1951 case EVENT_ITR_D:
1952 kvm_itr_d(vcpu, inst);
1953 break;
1954 case EVENT_ITR_I:
1955 kvm_itr_i(vcpu, inst);
1956 break;
1957 case EVENT_PTR_D:
1958 kvm_ptr_d(vcpu, inst);
1959 break;
1960 case EVENT_PTR_I:
1961 kvm_ptr_i(vcpu, inst);
1962 break;
1963 case EVENT_ITC_D:
1964 kvm_itc_d(vcpu, inst);
1965 break;
1966 case EVENT_ITC_I:
1967 kvm_itc_i(vcpu, inst);
1968 break;
1969 case EVENT_PTC_L:
1970 kvm_ptc_l(vcpu, inst);
1971 break;
1972 case EVENT_PTC_G:
1973 kvm_ptc_g(vcpu, inst);
1974 break;
1975 case EVENT_PTC_GA:
1976 kvm_ptc_ga(vcpu, inst);
1977 break;
1978 case EVENT_PTC_E:
1979 kvm_ptc_e(vcpu, inst);
1980 break;
1981 case EVENT_MOV_TO_RR:
1982 kvm_mov_to_rr(vcpu, inst);
1983 break;
1984 case EVENT_MOV_FROM_RR:
1985 kvm_mov_from_rr(vcpu, inst);
1986 break;
1987 case EVENT_THASH:
1988 kvm_thash(vcpu, inst);
1989 break;
1990 case EVENT_TTAG:
1991 kvm_ttag(vcpu, inst);
1992 break;
1993 case EVENT_TPA:
1994 status = kvm_tpa(vcpu, inst);
1995 break;
1996 case EVENT_TAK:
1997 kvm_tak(vcpu, inst);
1998 break;
1999 case EVENT_MOV_TO_AR_IMM:
2000 kvm_mov_to_ar_imm(vcpu, inst);
2001 break;
2002 case EVENT_MOV_TO_AR:
2003 kvm_mov_to_ar_reg(vcpu, inst);
2004 break;
2005 case EVENT_MOV_FROM_AR:
2006 kvm_mov_from_ar_reg(vcpu, inst);
2007 break;
2008 case EVENT_MOV_TO_DBR:
2009 kvm_mov_to_dbr(vcpu, inst);
2010 break;
2011 case EVENT_MOV_TO_IBR:
2012 kvm_mov_to_ibr(vcpu, inst);
2013 break;
2014 case EVENT_MOV_TO_PMC:
2015 kvm_mov_to_pmc(vcpu, inst);
2016 break;
2017 case EVENT_MOV_TO_PMD:
2018 kvm_mov_to_pmd(vcpu, inst);
2019 break;
2020 case EVENT_MOV_TO_PKR:
2021 kvm_mov_to_pkr(vcpu, inst);
2022 break;
2023 case EVENT_MOV_FROM_DBR:
2024 kvm_mov_from_dbr(vcpu, inst);
2025 break;
2026 case EVENT_MOV_FROM_IBR:
2027 kvm_mov_from_ibr(vcpu, inst);
2028 break;
2029 case EVENT_MOV_FROM_PMC:
2030 kvm_mov_from_pmc(vcpu, inst);
2031 break;
2032 case EVENT_MOV_FROM_PKR:
2033 kvm_mov_from_pkr(vcpu, inst);
2034 break;
2035 case EVENT_MOV_FROM_CPUID:
2036 kvm_mov_from_cpuid(vcpu, inst);
2037 break;
2038 case EVENT_VMSW:
2039 status = IA64_FAULT;
2040 break;
2041 default:
2042 break;
2043 };
2044 /*Assume all status is NO_FAULT ?*/
2045 if (status == IA64_NO_FAULT && cause != EVENT_RFI)
2046 vcpu_increment_iip(vcpu);
2047
2048 recover_if_physical_mode(vcpu);
2049}
2050
2051void init_vcpu(struct kvm_vcpu *vcpu)
2052{
2053 int i;
2054
2055 vcpu->arch.mode_flags = GUEST_IN_PHY;
2056 VMX(vcpu, vrr[0]) = 0x38;
2057 VMX(vcpu, vrr[1]) = 0x38;
2058 VMX(vcpu, vrr[2]) = 0x38;
2059 VMX(vcpu, vrr[3]) = 0x38;
2060 VMX(vcpu, vrr[4]) = 0x38;
2061 VMX(vcpu, vrr[5]) = 0x38;
2062 VMX(vcpu, vrr[6]) = 0x38;
2063 VMX(vcpu, vrr[7]) = 0x38;
2064 VCPU(vcpu, vpsr) = IA64_PSR_BN;
2065 VCPU(vcpu, dcr) = 0;
2066 /* pta.size must not be 0. The minimum is 15 (32k) */
2067 VCPU(vcpu, pta) = 15 << 2;
2068 VCPU(vcpu, itv) = 0x10000;
2069 VCPU(vcpu, itm) = 0;
2070 VMX(vcpu, last_itc) = 0;
2071
2072 VCPU(vcpu, lid) = VCPU_LID(vcpu);
2073 VCPU(vcpu, ivr) = 0;
2074 VCPU(vcpu, tpr) = 0x10000;
2075 VCPU(vcpu, eoi) = 0;
2076 VCPU(vcpu, irr[0]) = 0;
2077 VCPU(vcpu, irr[1]) = 0;
2078 VCPU(vcpu, irr[2]) = 0;
2079 VCPU(vcpu, irr[3]) = 0;
2080 VCPU(vcpu, pmv) = 0x10000;
2081 VCPU(vcpu, cmcv) = 0x10000;
2082 VCPU(vcpu, lrr0) = 0x10000; /* default reset value? */
2083 VCPU(vcpu, lrr1) = 0x10000; /* default reset value? */
2084 update_vhpi(vcpu, NULL_VECTOR);
2085 VLSAPIC_XTP(vcpu) = 0x80; /* disabled */
2086
2087 for (i = 0; i < 4; i++)
2088 VLSAPIC_INSVC(vcpu, i) = 0;
2089}
2090
2091void kvm_init_all_rr(struct kvm_vcpu *vcpu)
2092{
2093 unsigned long psr;
2094
2095 local_irq_save(psr);
2096
2097 /* WARNING: not allow co-exist of both virtual mode and physical
2098 * mode in same region
2099 */
2100
2101 vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(VMX(vcpu, vrr[VRN0]));
2102 vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(VMX(vcpu, vrr[VRN4]));
2103
2104 if (is_physical_mode(vcpu)) {
2105 if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
2106 panic_vm(vcpu);
2107
2108 ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
2109 ia64_dv_serialize_data();
2110 ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4);
2111 ia64_dv_serialize_data();
2112 } else {
2113 ia64_set_rr((VRN0 << VRN_SHIFT),
2114 vcpu->arch.metaphysical_saved_rr0);
2115 ia64_dv_serialize_data();
2116 ia64_set_rr((VRN4 << VRN_SHIFT),
2117 vcpu->arch.metaphysical_saved_rr4);
2118 ia64_dv_serialize_data();
2119 }
2120 ia64_set_rr((VRN1 << VRN_SHIFT),
2121 vrrtomrr(VMX(vcpu, vrr[VRN1])));
2122 ia64_dv_serialize_data();
2123 ia64_set_rr((VRN2 << VRN_SHIFT),
2124 vrrtomrr(VMX(vcpu, vrr[VRN2])));
2125 ia64_dv_serialize_data();
2126 ia64_set_rr((VRN3 << VRN_SHIFT),
2127 vrrtomrr(VMX(vcpu, vrr[VRN3])));
2128 ia64_dv_serialize_data();
2129 ia64_set_rr((VRN5 << VRN_SHIFT),
2130 vrrtomrr(VMX(vcpu, vrr[VRN5])));
2131 ia64_dv_serialize_data();
2132 ia64_set_rr((VRN7 << VRN_SHIFT),
2133 vrrtomrr(VMX(vcpu, vrr[VRN7])));
2134 ia64_dv_serialize_data();
2135 ia64_srlz_d();
2136 ia64_set_psr(psr);
2137}
2138
2139int vmm_entry(void)
2140{
2141 struct kvm_vcpu *v;
2142 v = current_vcpu;
2143
2144 ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)v->arch.vpd,
2145 0, 0, 0, 0, 0, 0);
2146 kvm_init_vtlb(v);
2147 kvm_init_vhpt(v);
2148 init_vcpu(v);
2149 kvm_init_all_rr(v);
2150 vmm_reset_entry();
2151
2152 return 0;
2153}
2154
2155void panic_vm(struct kvm_vcpu *v)
2156{
2157 struct exit_ctl_data *p = &v->arch.exit_data;
2158
2159 p->exit_reason = EXIT_REASON_VM_PANIC;
2160 vmm_transition(v);
2161 /*Never to return*/
2162 while (1);
2163}
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
new file mode 100644
index 000000000000..b0fcfb62c49e
--- /dev/null
+++ b/arch/ia64/kvm/vcpu.h
@@ -0,0 +1,740 @@
1/*
2 * vcpu.h: vcpu routines
3 * Copyright (c) 2005, Intel Corporation.
4 * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
5 * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
6 *
7 * Copyright (c) 2007, Intel Corporation.
8 * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
9 * Xiantao Zhang (xiantao.zhang@intel.com)
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms and conditions of the GNU General Public License,
13 * version 2, as published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 * more details.
19 *
20 * You should have received a copy of the GNU General Public License along with
21 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
22 * Place - Suite 330, Boston, MA 02111-1307 USA.
23 *
24 */
25
26
27#ifndef __KVM_VCPU_H__
28#define __KVM_VCPU_H__
29
30#include <asm/types.h>
31#include <asm/fpu.h>
32#include <asm/processor.h>
33
34#ifndef __ASSEMBLY__
35#include "vti.h"
36
37#include <linux/kvm_host.h>
38#include <linux/spinlock.h>
39
40typedef unsigned long IA64_INST;
41
42typedef union U_IA64_BUNDLE {
43 unsigned long i64[2];
44 struct { unsigned long template:5, slot0:41, slot1a:18,
45 slot1b:23, slot2:41; };
46 /* NOTE: following doesn't work because bitfields can't cross natural
47 size boundaries
48 struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; }; */
49} IA64_BUNDLE;
50
51typedef union U_INST64_A5 {
52 IA64_INST inst;
53 struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5,
54 imm9d:9, s:1, major:4; };
55} INST64_A5;
56
57typedef union U_INST64_B4 {
58 IA64_INST inst;
59 struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6,
60 wh:2, d:1, un1:1, major:4; };
61} INST64_B4;
62
63typedef union U_INST64_B8 {
64 IA64_INST inst;
65 struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; };
66} INST64_B8;
67
68typedef union U_INST64_B9 {
69 IA64_INST inst;
70 struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; };
71} INST64_B9;
72
73typedef union U_INST64_I19 {
74 IA64_INST inst;
75 struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; };
76} INST64_I19;
77
78typedef union U_INST64_I26 {
79 IA64_INST inst;
80 struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
81} INST64_I26;
82
83typedef union U_INST64_I27 {
84 IA64_INST inst;
85 struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4; };
86} INST64_I27;
87
88typedef union U_INST64_I28 { /* not privileged (mov from AR) */
89 IA64_INST inst;
90 struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
91} INST64_I28;
92
93typedef union U_INST64_M28 {
94 IA64_INST inst;
95 struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4; };
96} INST64_M28;
97
98typedef union U_INST64_M29 {
99 IA64_INST inst;
100 struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
101} INST64_M29;
102
103typedef union U_INST64_M30 {
104 IA64_INST inst;
105 struct { unsigned long qp:6, :7, imm:7, ar3:7, x4:4, x2:2,
106 x3:3, s:1, major:4; };
107} INST64_M30;
108
109typedef union U_INST64_M31 {
110 IA64_INST inst;
111 struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
112} INST64_M31;
113
114typedef union U_INST64_M32 {
115 IA64_INST inst;
116 struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4; };
117} INST64_M32;
118
119typedef union U_INST64_M33 {
120 IA64_INST inst;
121 struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; };
122} INST64_M33;
123
124typedef union U_INST64_M35 {
125 IA64_INST inst;
126 struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
127
128} INST64_M35;
129
130typedef union U_INST64_M36 {
131 IA64_INST inst;
132 struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; };
133} INST64_M36;
134
135typedef union U_INST64_M37 {
136 IA64_INST inst;
137 struct { unsigned long qp:6, imm20a:20, :1, x4:4, x2:2, x3:3,
138 i:1, major:4; };
139} INST64_M37;
140
141typedef union U_INST64_M41 {
142 IA64_INST inst;
143 struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
144} INST64_M41;
145
146typedef union U_INST64_M42 {
147 IA64_INST inst;
148 struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
149} INST64_M42;
150
151typedef union U_INST64_M43 {
152 IA64_INST inst;
153 struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; };
154} INST64_M43;
155
156typedef union U_INST64_M44 {
157 IA64_INST inst;
158 struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; };
159} INST64_M44;
160
161typedef union U_INST64_M45 {
162 IA64_INST inst;
163 struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
164} INST64_M45;
165
166typedef union U_INST64_M46 {
167 IA64_INST inst;
168 struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6,
169 x3:3, un1:1, major:4; };
170} INST64_M46;
171
172typedef union U_INST64_M47 {
173 IA64_INST inst;
174 struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; };
175} INST64_M47;
176
177typedef union U_INST64_M1{
178 IA64_INST inst;
179 struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2,
180 x6:6, m:1, major:4; };
181} INST64_M1;
182
183typedef union U_INST64_M2{
184 IA64_INST inst;
185 struct { unsigned long qp:6, r1:7, r2:7, r3:7, x:1, hint:2,
186 x6:6, m:1, major:4; };
187} INST64_M2;
188
189typedef union U_INST64_M3{
190 IA64_INST inst;
191 struct { unsigned long qp:6, r1:7, imm7:7, r3:7, i:1, hint:2,
192 x6:6, s:1, major:4; };
193} INST64_M3;
194
195typedef union U_INST64_M4 {
196 IA64_INST inst;
197 struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2,
198 x6:6, m:1, major:4; };
199} INST64_M4;
200
201typedef union U_INST64_M5 {
202 IA64_INST inst;
203 struct { unsigned long qp:6, imm7:7, r2:7, r3:7, i:1, hint:2,
204 x6:6, s:1, major:4; };
205} INST64_M5;
206
207typedef union U_INST64_M6 {
208 IA64_INST inst;
209 struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2,
210 x6:6, m:1, major:4; };
211} INST64_M6;
212
213typedef union U_INST64_M9 {
214 IA64_INST inst;
215 struct { unsigned long qp:6, :7, f2:7, r3:7, x:1, hint:2,
216 x6:6, m:1, major:4; };
217} INST64_M9;
218
219typedef union U_INST64_M10 {
220 IA64_INST inst;
221 struct { unsigned long qp:6, imm7:7, f2:7, r3:7, i:1, hint:2,
222 x6:6, s:1, major:4; };
223} INST64_M10;
224
225typedef union U_INST64_M12 {
226 IA64_INST inst;
227 struct { unsigned long qp:6, f1:7, f2:7, r3:7, x:1, hint:2,
228 x6:6, m:1, major:4; };
229} INST64_M12;
230
231typedef union U_INST64_M15 {
232 IA64_INST inst;
233 struct { unsigned long qp:6, :7, imm7:7, r3:7, i:1, hint:2,
234 x6:6, s:1, major:4; };
235} INST64_M15;
236
237typedef union U_INST64 {
238 IA64_INST inst;
239 struct { unsigned long :37, major:4; } generic;
240 INST64_A5 A5; /* used in build_hypercall_bundle only */
241 INST64_B4 B4; /* used in build_hypercall_bundle only */
242 INST64_B8 B8; /* rfi, bsw.[01] */
243 INST64_B9 B9; /* break.b */
244 INST64_I19 I19; /* used in build_hypercall_bundle only */
245 INST64_I26 I26; /* mov register to ar (I unit) */
246 INST64_I27 I27; /* mov immediate to ar (I unit) */
247 INST64_I28 I28; /* mov from ar (I unit) */
248 INST64_M1 M1; /* ld integer */
249 INST64_M2 M2;
250 INST64_M3 M3;
251 INST64_M4 M4; /* st integer */
252 INST64_M5 M5;
253 INST64_M6 M6; /* ldfd floating pointer */
254 INST64_M9 M9; /* stfd floating pointer */
255 INST64_M10 M10; /* stfd floating pointer */
256 INST64_M12 M12; /* ldfd pair floating pointer */
257 INST64_M15 M15; /* lfetch + imm update */
258 INST64_M28 M28; /* purge translation cache entry */
259 INST64_M29 M29; /* mov register to ar (M unit) */
260 INST64_M30 M30; /* mov immediate to ar (M unit) */
261 INST64_M31 M31; /* mov from ar (M unit) */
262 INST64_M32 M32; /* mov reg to cr */
263 INST64_M33 M33; /* mov from cr */
264 INST64_M35 M35; /* mov to psr */
265 INST64_M36 M36; /* mov from psr */
266 INST64_M37 M37; /* break.m */
267 INST64_M41 M41; /* translation cache insert */
268 INST64_M42 M42; /* mov to indirect reg/translation reg insert*/
269 INST64_M43 M43; /* mov from indirect reg */
270 INST64_M44 M44; /* set/reset system mask */
271 INST64_M45 M45; /* translation purge */
272 INST64_M46 M46; /* translation access (tpa,tak) */
273 INST64_M47 M47; /* purge translation entry */
274} INST64;
275
276#define MASK_41 ((unsigned long)0x1ffffffffff)
277
278/* Virtual address memory attributes encoding */
279#define VA_MATTR_WB 0x0
280#define VA_MATTR_UC 0x4
281#define VA_MATTR_UCE 0x5
282#define VA_MATTR_WC 0x6
283#define VA_MATTR_NATPAGE 0x7
284
285#define PMASK(size) (~((size) - 1))
286#define PSIZE(size) (1UL<<(size))
287#define CLEARLSB(ppn, nbits) (((ppn) >> (nbits)) << (nbits))
288#define PAGEALIGN(va, ps) CLEARLSB(va, ps)
289#define PAGE_FLAGS_RV_MASK (0x2|(0x3UL<<50)|(((1UL<<11)-1)<<53))
290#define _PAGE_MA_ST (0x1 << 2) /* is reserved for software use */
291
292#define ARCH_PAGE_SHIFT 12
293
294#define INVALID_TI_TAG (1UL << 63)
295
296#define VTLB_PTE_P_BIT 0
297#define VTLB_PTE_IO_BIT 60
298#define VTLB_PTE_IO (1UL<<VTLB_PTE_IO_BIT)
299#define VTLB_PTE_P (1UL<<VTLB_PTE_P_BIT)
300
301#define vcpu_quick_region_check(_tr_regions,_ifa) \
302 (_tr_regions & (1 << ((unsigned long)_ifa >> 61)))
303
304#define vcpu_quick_region_set(_tr_regions,_ifa) \
305 do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0)
306
307static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir,
308 u64 va, u64 rid)
309{
310 trp->page_flags = pte;
311 trp->itir = itir;
312 trp->vadr = va;
313 trp->rid = rid;
314}
315
316extern u64 kvm_lookup_mpa(u64 gpfn);
317extern u64 kvm_gpa_to_mpa(u64 gpa);
318
319/* Return I/O type if trye */
320#define __gpfn_is_io(gpfn) \
321 ({ \
322 u64 pte, ret = 0; \
323 pte = kvm_lookup_mpa(gpfn); \
324 if (!(pte & GPFN_INV_MASK)) \
325 ret = pte & GPFN_IO_MASK; \
326 ret; \
327 })
328
329#endif
330
331#define IA64_NO_FAULT 0
332#define IA64_FAULT 1
333
334#define VMM_RBS_OFFSET ((VMM_TASK_SIZE + 15) & ~15)
335
336#define SW_BAD 0 /* Bad mode transitition */
337#define SW_V2P 1 /* Physical emulatino is activated */
338#define SW_P2V 2 /* Exit physical mode emulation */
339#define SW_SELF 3 /* No mode transition */
340#define SW_NOP 4 /* Mode transition, but without action required */
341
342#define GUEST_IN_PHY 0x1
343#define GUEST_PHY_EMUL 0x2
344
345#define current_vcpu ((struct kvm_vcpu *) ia64_getreg(_IA64_REG_TP))
346
347#define VRN_SHIFT 61
348#define VRN_MASK 0xe000000000000000
349#define VRN0 0x0UL
350#define VRN1 0x1UL
351#define VRN2 0x2UL
352#define VRN3 0x3UL
353#define VRN4 0x4UL
354#define VRN5 0x5UL
355#define VRN6 0x6UL
356#define VRN7 0x7UL
357
358#define IRQ_NO_MASKED 0
359#define IRQ_MASKED_BY_VTPR 1
360#define IRQ_MASKED_BY_INSVC 2 /* masked by inservice IRQ */
361
362#define PTA_BASE_SHIFT 15
363
364#define IA64_PSR_VM_BIT 46
365#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT)
366
367/* Interruption Function State */
368#define IA64_IFS_V_BIT 63
369#define IA64_IFS_V (__IA64_UL(1) << IA64_IFS_V_BIT)
370
371#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX)
372#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX)
373
374#ifndef __ASSEMBLY__
375
376#include <asm/gcc_intrin.h>
377
378#define is_physical_mode(v) \
379 ((v->arch.mode_flags) & GUEST_IN_PHY)
380
381#define is_virtual_mode(v) \
382 (!is_physical_mode(v))
383
384#define MODE_IND(psr) \
385 (((psr).it << 2) + ((psr).dt << 1) + (psr).rt)
386
387#define _vmm_raw_spin_lock(x) \
388 do { \
389 __u32 *ia64_spinlock_ptr = (__u32 *) (x); \
390 __u64 ia64_spinlock_val; \
391 ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
392 if (unlikely(ia64_spinlock_val)) { \
393 do { \
394 while (*ia64_spinlock_ptr) \
395 ia64_barrier(); \
396 ia64_spinlock_val = \
397 ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
398 } while (ia64_spinlock_val); \
399 } \
400 } while (0)
401
402#define _vmm_raw_spin_unlock(x) \
403 do { barrier(); \
404 ((spinlock_t *)x)->raw_lock.lock = 0; } \
405while (0)
406
407void vmm_spin_lock(spinlock_t *lock);
408void vmm_spin_unlock(spinlock_t *lock);
409enum {
410 I_TLB = 1,
411 D_TLB = 2
412};
413
414union kvm_va {
415 struct {
416 unsigned long off : 60; /* intra-region offset */
417 unsigned long reg : 4; /* region number */
418 } f;
419 unsigned long l;
420 void *p;
421};
422
423#define __kvm_pa(x) ({union kvm_va _v; _v.l = (long) (x); \
424 _v.f.reg = 0; _v.l; })
425#define __kvm_va(x) ({union kvm_va _v; _v.l = (long) (x); \
426 _v.f.reg = -1; _v.p; })
427
428#define _REGION_ID(x) ({union ia64_rr _v; _v.val = (long)(x); \
429 _v.rid; })
430#define _REGION_PAGE_SIZE(x) ({union ia64_rr _v; _v.val = (long)(x); \
431 _v.ps; })
432#define _REGION_HW_WALKER(x) ({union ia64_rr _v; _v.val = (long)(x); \
433 _v.ve; })
434
435enum vhpt_ref{ DATA_REF, NA_REF, INST_REF, RSE_REF };
436enum tlb_miss_type { INSTRUCTION, DATA, REGISTER };
437
438#define VCPU(_v, _x) ((_v)->arch.vpd->_x)
439#define VMX(_v, _x) ((_v)->arch._x)
440
441#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.insvc[i])
442#define VLSAPIC_XTP(_v) VMX(_v, xtp)
443
444static inline unsigned long itir_ps(unsigned long itir)
445{
446 return ((itir >> 2) & 0x3f);
447}
448
449
450/**************************************************************************
451 VCPU control register access routines
452 **************************************************************************/
453
454static inline u64 vcpu_get_itir(struct kvm_vcpu *vcpu)
455{
456 return ((u64)VCPU(vcpu, itir));
457}
458
459static inline void vcpu_set_itir(struct kvm_vcpu *vcpu, u64 val)
460{
461 VCPU(vcpu, itir) = val;
462}
463
464static inline u64 vcpu_get_ifa(struct kvm_vcpu *vcpu)
465{
466 return ((u64)VCPU(vcpu, ifa));
467}
468
469static inline void vcpu_set_ifa(struct kvm_vcpu *vcpu, u64 val)
470{
471 VCPU(vcpu, ifa) = val;
472}
473
474static inline u64 vcpu_get_iva(struct kvm_vcpu *vcpu)
475{
476 return ((u64)VCPU(vcpu, iva));
477}
478
479static inline u64 vcpu_get_pta(struct kvm_vcpu *vcpu)
480{
481 return ((u64)VCPU(vcpu, pta));
482}
483
484static inline u64 vcpu_get_lid(struct kvm_vcpu *vcpu)
485{
486 return ((u64)VCPU(vcpu, lid));
487}
488
489static inline u64 vcpu_get_tpr(struct kvm_vcpu *vcpu)
490{
491 return ((u64)VCPU(vcpu, tpr));
492}
493
494static inline u64 vcpu_get_eoi(struct kvm_vcpu *vcpu)
495{
496 return (0UL); /*reads of eoi always return 0 */
497}
498
499static inline u64 vcpu_get_irr0(struct kvm_vcpu *vcpu)
500{
501 return ((u64)VCPU(vcpu, irr[0]));
502}
503
504static inline u64 vcpu_get_irr1(struct kvm_vcpu *vcpu)
505{
506 return ((u64)VCPU(vcpu, irr[1]));
507}
508
509static inline u64 vcpu_get_irr2(struct kvm_vcpu *vcpu)
510{
511 return ((u64)VCPU(vcpu, irr[2]));
512}
513
514static inline u64 vcpu_get_irr3(struct kvm_vcpu *vcpu)
515{
516 return ((u64)VCPU(vcpu, irr[3]));
517}
518
519static inline void vcpu_set_dcr(struct kvm_vcpu *vcpu, u64 val)
520{
521 ia64_setreg(_IA64_REG_CR_DCR, val);
522}
523
524static inline void vcpu_set_isr(struct kvm_vcpu *vcpu, u64 val)
525{
526 VCPU(vcpu, isr) = val;
527}
528
529static inline void vcpu_set_lid(struct kvm_vcpu *vcpu, u64 val)
530{
531 VCPU(vcpu, lid) = val;
532}
533
534static inline void vcpu_set_ipsr(struct kvm_vcpu *vcpu, u64 val)
535{
536 VCPU(vcpu, ipsr) = val;
537}
538
539static inline void vcpu_set_iip(struct kvm_vcpu *vcpu, u64 val)
540{
541 VCPU(vcpu, iip) = val;
542}
543
544static inline void vcpu_set_ifs(struct kvm_vcpu *vcpu, u64 val)
545{
546 VCPU(vcpu, ifs) = val;
547}
548
549static inline void vcpu_set_iipa(struct kvm_vcpu *vcpu, u64 val)
550{
551 VCPU(vcpu, iipa) = val;
552}
553
554static inline void vcpu_set_iha(struct kvm_vcpu *vcpu, u64 val)
555{
556 VCPU(vcpu, iha) = val;
557}
558
559
560static inline u64 vcpu_get_rr(struct kvm_vcpu *vcpu, u64 reg)
561{
562 return vcpu->arch.vrr[reg>>61];
563}
564
565/**************************************************************************
566 VCPU debug breakpoint register access routines
567 **************************************************************************/
568
569static inline void vcpu_set_dbr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
570{
571 __ia64_set_dbr(reg, val);
572}
573
574static inline void vcpu_set_ibr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
575{
576 ia64_set_ibr(reg, val);
577}
578
579static inline u64 vcpu_get_dbr(struct kvm_vcpu *vcpu, u64 reg)
580{
581 return ((u64)__ia64_get_dbr(reg));
582}
583
584static inline u64 vcpu_get_ibr(struct kvm_vcpu *vcpu, u64 reg)
585{
586 return ((u64)ia64_get_ibr(reg));
587}
588
589/**************************************************************************
590 VCPU performance monitor register access routines
591 **************************************************************************/
592static inline void vcpu_set_pmc(struct kvm_vcpu *vcpu, u64 reg, u64 val)
593{
594 /* NOTE: Writes to unimplemented PMC registers are discarded */
595 ia64_set_pmc(reg, val);
596}
597
598static inline void vcpu_set_pmd(struct kvm_vcpu *vcpu, u64 reg, u64 val)
599{
600 /* NOTE: Writes to unimplemented PMD registers are discarded */
601 ia64_set_pmd(reg, val);
602}
603
604static inline u64 vcpu_get_pmc(struct kvm_vcpu *vcpu, u64 reg)
605{
606 /* NOTE: Reads from unimplemented PMC registers return zero */
607 return ((u64)ia64_get_pmc(reg));
608}
609
610static inline u64 vcpu_get_pmd(struct kvm_vcpu *vcpu, u64 reg)
611{
612 /* NOTE: Reads from unimplemented PMD registers return zero */
613 return ((u64)ia64_get_pmd(reg));
614}
615
616static inline unsigned long vrrtomrr(unsigned long val)
617{
618 union ia64_rr rr;
619 rr.val = val;
620 rr.rid = (rr.rid << 4) | 0xe;
621 if (rr.ps > PAGE_SHIFT)
622 rr.ps = PAGE_SHIFT;
623 rr.ve = 1;
624 return rr.val;
625}
626
627
628static inline int highest_bits(int *dat)
629{
630 u32 bits, bitnum;
631 int i;
632
633 /* loop for all 256 bits */
634 for (i = 7; i >= 0 ; i--) {
635 bits = dat[i];
636 if (bits) {
637 bitnum = fls(bits);
638 return i * 32 + bitnum - 1;
639 }
640 }
641 return NULL_VECTOR;
642}
643
644/*
645 * The pending irq is higher than the inservice one.
646 *
647 */
648static inline int is_higher_irq(int pending, int inservice)
649{
650 return ((pending > inservice)
651 || ((pending != NULL_VECTOR)
652 && (inservice == NULL_VECTOR)));
653}
654
655static inline int is_higher_class(int pending, int mic)
656{
657 return ((pending >> 4) > mic);
658}
659
660/*
661 * Return 0-255 for pending irq.
662 * NULL_VECTOR: when no pending.
663 */
664static inline int highest_pending_irq(struct kvm_vcpu *vcpu)
665{
666 if (VCPU(vcpu, irr[0]) & (1UL<<NMI_VECTOR))
667 return NMI_VECTOR;
668 if (VCPU(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR))
669 return ExtINT_VECTOR;
670
671 return highest_bits((int *)&VCPU(vcpu, irr[0]));
672}
673
674static inline int highest_inservice_irq(struct kvm_vcpu *vcpu)
675{
676 if (VMX(vcpu, insvc[0]) & (1UL<<NMI_VECTOR))
677 return NMI_VECTOR;
678 if (VMX(vcpu, insvc[0]) & (1UL<<ExtINT_VECTOR))
679 return ExtINT_VECTOR;
680
681 return highest_bits((int *)&(VMX(vcpu, insvc[0])));
682}
683
684extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, u64 reg,
685 struct ia64_fpreg *val);
686extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, u64 reg,
687 struct ia64_fpreg *val);
688extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, u64 reg);
689extern void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 val, int nat);
690extern u64 vcpu_get_psr(struct kvm_vcpu *vcpu);
691extern void vcpu_set_psr(struct kvm_vcpu *vcpu, u64 val);
692extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr);
693extern void vcpu_bsw0(struct kvm_vcpu *vcpu);
694extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte,
695 u64 itir, u64 va, int type);
696extern struct thash_data *vhpt_lookup(u64 va);
697extern u64 guest_vhpt_lookup(u64 iha, u64 *pte);
698extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps);
699extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps);
700extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va);
701extern int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte,
702 u64 itir, u64 ifa, int type);
703extern void thash_purge_all(struct kvm_vcpu *v);
704extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v,
705 u64 va, int is_data);
706extern int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va,
707 u64 ps, int is_data);
708
709extern void vcpu_increment_iip(struct kvm_vcpu *v);
710extern void vcpu_decrement_iip(struct kvm_vcpu *vcpu);
711extern void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
712extern void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
713extern void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr);
714extern void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr);
715extern void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr);
716extern void nested_dtlb(struct kvm_vcpu *vcpu);
717extern void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr);
718extern int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref);
719
720extern void update_vhpi(struct kvm_vcpu *vcpu, int vec);
721extern int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice);
722
723extern int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle);
724extern void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma);
725extern void vmm_transition(struct kvm_vcpu *vcpu);
726extern void vmm_trampoline(union context *from, union context *to);
727extern int vmm_entry(void);
728extern u64 vcpu_get_itc(struct kvm_vcpu *vcpu);
729
730extern void vmm_reset_entry(void);
731void kvm_init_vtlb(struct kvm_vcpu *v);
732void kvm_init_vhpt(struct kvm_vcpu *v);
733void thash_init(struct thash_cb *hcb, u64 sz);
734
735void panic_vm(struct kvm_vcpu *v);
736
737extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
738 u64 arg4, u64 arg5, u64 arg6, u64 arg7);
739#endif
740#endif /* __VCPU_H__ */
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
new file mode 100644
index 000000000000..2275bf4e681a
--- /dev/null
+++ b/arch/ia64/kvm/vmm.c
@@ -0,0 +1,66 @@
1/*
2 * vmm.c: vmm module interface with kvm module
3 *
4 * Copyright (c) 2007, Intel Corporation.
5 *
6 * Xiantao Zhang (xiantao.zhang@intel.com)
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19 * Place - Suite 330, Boston, MA 02111-1307 USA.
20 */
21
22
23#include<linux/module.h>
24#include<asm/fpswa.h>
25
26#include "vcpu.h"
27
28MODULE_AUTHOR("Intel");
29MODULE_LICENSE("GPL");
30
31extern char kvm_ia64_ivt;
32extern fpswa_interface_t *vmm_fpswa_interface;
33
34struct kvm_vmm_info vmm_info = {
35 .module = THIS_MODULE,
36 .vmm_entry = vmm_entry,
37 .tramp_entry = vmm_trampoline,
38 .vmm_ivt = (unsigned long)&kvm_ia64_ivt,
39};
40
41static int __init kvm_vmm_init(void)
42{
43
44 vmm_fpswa_interface = fpswa_interface;
45
46 /*Register vmm data to kvm side*/
47 return kvm_init(&vmm_info, 1024, THIS_MODULE);
48}
49
50static void __exit kvm_vmm_exit(void)
51{
52 kvm_exit();
53 return ;
54}
55
56void vmm_spin_lock(spinlock_t *lock)
57{
58 _vmm_raw_spin_lock(lock);
59}
60
61void vmm_spin_unlock(spinlock_t *lock)
62{
63 _vmm_raw_spin_unlock(lock);
64}
65module_init(kvm_vmm_init)
66module_exit(kvm_vmm_exit)
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
new file mode 100644
index 000000000000..3ee5f481c06d
--- /dev/null
+++ b/arch/ia64/kvm/vmm_ivt.S
@@ -0,0 +1,1424 @@
1/*
2 * /ia64/kvm_ivt.S
3 *
4 * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
5 * Stephane Eranian <eranian@hpl.hp.com>
6 * David Mosberger <davidm@hpl.hp.com>
7 * Copyright (C) 2000, 2002-2003 Intel Co
8 * Asit Mallick <asit.k.mallick@intel.com>
9 * Suresh Siddha <suresh.b.siddha@intel.com>
10 * Kenneth Chen <kenneth.w.chen@intel.com>
11 * Fenghua Yu <fenghua.yu@intel.com>
12 *
13 *
14 * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling
15 * for SMP
16 * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB
17 * handler now uses virtual PT.
18 *
19 * 07/6/20 Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
20 * Supporting Intel virtualization architecture
21 *
22 */
23
24/*
25 * This file defines the interruption vector table used by the CPU.
26 * It does not include one entry per possible cause of interruption.
27 *
28 * The first 20 entries of the table contain 64 bundles each while the
29 * remaining 48 entries contain only 16 bundles each.
30 *
31 * The 64 bundles are used to allow inlining the whole handler for
32 * critical
33 * interruptions like TLB misses.
34 *
35 * For each entry, the comment is as follows:
36 *
37 * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss
38 * (12,51)
39 * entry offset ----/ / / /
40 * /
41 * entry number ---------/ / /
42 * /
43 * size of the entry -------------/ /
44 * /
45 * vector name -------------------------------------/
46 * /
47 * interruptions triggering this vector
48 * ----------------------/
49 *
50 * The table is 32KB in size and must be aligned on 32KB
51 * boundary.
52 * (The CPU ignores the 15 lower bits of the address)
53 *
54 * Table is based upon EAS2.6 (Oct 1999)
55 */
56
57
58#include <asm/asmmacro.h>
59#include <asm/cache.h>
60#include <asm/pgtable.h>
61
62#include "asm-offsets.h"
63#include "vcpu.h"
64#include "kvm_minstate.h"
65#include "vti.h"
66
67#if 1
68# define PSR_DEFAULT_BITS psr.ac
69#else
70# define PSR_DEFAULT_BITS 0
71#endif
72
73
74#define KVM_FAULT(n) \
75 kvm_fault_##n:; \
76 mov r19=n;; \
77 br.sptk.many kvm_fault_##n; \
78 ;; \
79
80
81#define KVM_REFLECT(n) \
82 mov r31=pr; \
83 mov r19=n; /* prepare to save predicates */ \
84 mov r29=cr.ipsr; \
85 ;; \
86 tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \
87(p7)br.sptk.many kvm_dispatch_reflection; \
88 br.sptk.many kvm_panic; \
89
90
91GLOBAL_ENTRY(kvm_panic)
92 br.sptk.many kvm_panic
93 ;;
94END(kvm_panic)
95
96
97
98
99
100 .section .text.ivt,"ax"
101
102 .align 32768 // align on 32KB boundary
103 .global kvm_ia64_ivt
104kvm_ia64_ivt:
105///////////////////////////////////////////////////////////////
106// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
107ENTRY(kvm_vhpt_miss)
108 KVM_FAULT(0)
109END(kvm_vhpt_miss)
110
111
112 .org kvm_ia64_ivt+0x400
113////////////////////////////////////////////////////////////////
114// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
115ENTRY(kvm_itlb_miss)
116 mov r31 = pr
117 mov r29=cr.ipsr;
118 ;;
119 tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
120 (p6) br.sptk kvm_alt_itlb_miss
121 mov r19 = 1
122 br.sptk kvm_itlb_miss_dispatch
123 KVM_FAULT(1);
124END(kvm_itlb_miss)
125
126 .org kvm_ia64_ivt+0x0800
127//////////////////////////////////////////////////////////////////
128// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
129ENTRY(kvm_dtlb_miss)
130 mov r31 = pr
131 mov r29=cr.ipsr;
132 ;;
133 tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
134(p6)br.sptk kvm_alt_dtlb_miss
135 br.sptk kvm_dtlb_miss_dispatch
136END(kvm_dtlb_miss)
137
138 .org kvm_ia64_ivt+0x0c00
139////////////////////////////////////////////////////////////////////
140// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
141ENTRY(kvm_alt_itlb_miss)
142 mov r16=cr.ifa // get address that caused the TLB miss
143 ;;
144 movl r17=PAGE_KERNEL
145 mov r24=cr.ipsr
146 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
147 ;;
148 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
149 ;;
150 or r19=r17,r19 // insert PTE control bits into r19
151 ;;
152 movl r20=IA64_GRANULE_SHIFT<<2
153 ;;
154 mov cr.itir=r20
155 ;;
156 itc.i r19 // insert the TLB entry
157 mov pr=r31,-1
158 rfi
159END(kvm_alt_itlb_miss)
160
161 .org kvm_ia64_ivt+0x1000
162/////////////////////////////////////////////////////////////////////
163// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
164ENTRY(kvm_alt_dtlb_miss)
165 mov r16=cr.ifa // get address that caused the TLB miss
166 ;;
167 movl r17=PAGE_KERNEL
168 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
169 mov r24=cr.ipsr
170 ;;
171 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
172 ;;
173 or r19=r19,r17 // insert PTE control bits into r19
174 ;;
175 movl r20=IA64_GRANULE_SHIFT<<2
176 ;;
177 mov cr.itir=r20
178 ;;
179 itc.d r19 // insert the TLB entry
180 mov pr=r31,-1
181 rfi
182END(kvm_alt_dtlb_miss)
183
184 .org kvm_ia64_ivt+0x1400
185//////////////////////////////////////////////////////////////////////
186// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
187ENTRY(kvm_nested_dtlb_miss)
188 KVM_FAULT(5)
189END(kvm_nested_dtlb_miss)
190
191 .org kvm_ia64_ivt+0x1800
192/////////////////////////////////////////////////////////////////////
193// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
194ENTRY(kvm_ikey_miss)
195 KVM_REFLECT(6)
196END(kvm_ikey_miss)
197
198 .org kvm_ia64_ivt+0x1c00
199/////////////////////////////////////////////////////////////////////
200// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
201ENTRY(kvm_dkey_miss)
202 KVM_REFLECT(7)
203END(kvm_dkey_miss)
204
205 .org kvm_ia64_ivt+0x2000
206////////////////////////////////////////////////////////////////////
207// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
208ENTRY(kvm_dirty_bit)
209 KVM_REFLECT(8)
210END(kvm_dirty_bit)
211
212 .org kvm_ia64_ivt+0x2400
213////////////////////////////////////////////////////////////////////
214// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
215ENTRY(kvm_iaccess_bit)
216 KVM_REFLECT(9)
217END(kvm_iaccess_bit)
218
219 .org kvm_ia64_ivt+0x2800
220///////////////////////////////////////////////////////////////////
221// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
222ENTRY(kvm_daccess_bit)
223 KVM_REFLECT(10)
224END(kvm_daccess_bit)
225
226 .org kvm_ia64_ivt+0x2c00
227/////////////////////////////////////////////////////////////////
228// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
229ENTRY(kvm_break_fault)
230 mov r31=pr
231 mov r19=11
232 mov r29=cr.ipsr
233 ;;
234 KVM_SAVE_MIN_WITH_COVER_R19
235 ;;
236 alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
237 mov out0=cr.ifa
238 mov out2=cr.isr // FIXME: pity to make this slow access twice
239 mov out3=cr.iim // FIXME: pity to make this slow access twice
240 adds r3=8,r2 // set up second base pointer
241 ;;
242 ssm psr.ic
243 ;;
244 srlz.i // guarantee that interruption collection is on
245 ;;
246 //(p15)ssm psr.i // restore psr.i
247 addl r14=@gprel(ia64_leave_hypervisor),gp
248 ;;
249 KVM_SAVE_REST
250 mov rp=r14
251 ;;
252 adds out1=16,sp
253 br.call.sptk.many b6=kvm_ia64_handle_break
254 ;;
255END(kvm_break_fault)
256
257 .org kvm_ia64_ivt+0x3000
258/////////////////////////////////////////////////////////////////
259// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
260ENTRY(kvm_interrupt)
261 mov r31=pr // prepare to save predicates
262 mov r19=12
263 mov r29=cr.ipsr
264 ;;
265 tbit.z p6,p7=r29,IA64_PSR_VM_BIT
266 tbit.z p0,p15=r29,IA64_PSR_I_BIT
267 ;;
268(p7) br.sptk kvm_dispatch_interrupt
269 ;;
270 mov r27=ar.rsc /* M */
271 mov r20=r1 /* A */
272 mov r25=ar.unat /* M */
273 mov r26=ar.pfs /* I */
274 mov r28=cr.iip /* M */
275 cover /* B (or nothing) */
276 ;;
277 mov r1=sp
278 ;;
279 invala /* M */
280 mov r30=cr.ifs
281 ;;
282 addl r1=-VMM_PT_REGS_SIZE,r1
283 ;;
284 adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */
285 adds r16=PT(CR_IPSR),r1
286 ;;
287 lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
288 st8 [r16]=r29 /* save cr.ipsr */
289 ;;
290 lfetch.fault.excl.nt1 [r17]
291 mov r29=b0
292 ;;
293 adds r16=PT(R8),r1 /* initialize first base pointer */
294 adds r17=PT(R9),r1 /* initialize second base pointer */
295 mov r18=r0 /* make sure r18 isn't NaT */
296 ;;
297.mem.offset 0,0; st8.spill [r16]=r8,16
298.mem.offset 8,0; st8.spill [r17]=r9,16
299 ;;
300.mem.offset 0,0; st8.spill [r16]=r10,24
301.mem.offset 8,0; st8.spill [r17]=r11,24
302 ;;
303 st8 [r16]=r28,16 /* save cr.iip */
304 st8 [r17]=r30,16 /* save cr.ifs */
305 mov r8=ar.fpsr /* M */
306 mov r9=ar.csd
307 mov r10=ar.ssd
308 movl r11=FPSR_DEFAULT /* L-unit */
309 ;;
310 st8 [r16]=r25,16 /* save ar.unat */
311 st8 [r17]=r26,16 /* save ar.pfs */
312 shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */
313 ;;
314 st8 [r16]=r27,16 /* save ar.rsc */
315 adds r17=16,r17 /* skip over ar_rnat field */
316 ;;
317 st8 [r17]=r31,16 /* save predicates */
318 adds r16=16,r16 /* skip over ar_bspstore field */
319 ;;
320 st8 [r16]=r29,16 /* save b0 */
321 st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */
322 ;;
323.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */
324.mem.offset 8,0; st8.spill [r17]=r12,16
325 adds r12=-16,r1
326 /* switch to kernel memory stack (with 16 bytes of scratch) */
327 ;;
328.mem.offset 0,0; st8.spill [r16]=r13,16
329.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
330 ;;
331.mem.offset 0,0; st8.spill [r16]=r15,16
332.mem.offset 8,0; st8.spill [r17]=r14,16
333 dep r14=-1,r0,60,4
334 ;;
335.mem.offset 0,0; st8.spill [r16]=r2,16
336.mem.offset 8,0; st8.spill [r17]=r3,16
337 adds r2=VMM_PT_REGS_R16_OFFSET,r1
338 adds r14 = VMM_VCPU_GP_OFFSET,r13
339 ;;
340 mov r8=ar.ccv
341 ld8 r14 = [r14]
342 ;;
343 mov r1=r14 /* establish kernel global pointer */
344 ;; \
345 bsw.1
346 ;;
347 alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
348 mov out0=r13
349 ;;
350 ssm psr.ic
351 ;;
352 srlz.i
353 ;;
354 //(p15) ssm psr.i
355 adds r3=8,r2 // set up second base pointer for SAVE_REST
356 srlz.i // ensure everybody knows psr.ic is back on
357 ;;
358.mem.offset 0,0; st8.spill [r2]=r16,16
359.mem.offset 8,0; st8.spill [r3]=r17,16
360 ;;
361.mem.offset 0,0; st8.spill [r2]=r18,16
362.mem.offset 8,0; st8.spill [r3]=r19,16
363 ;;
364.mem.offset 0,0; st8.spill [r2]=r20,16
365.mem.offset 8,0; st8.spill [r3]=r21,16
366 mov r18=b6
367 ;;
368.mem.offset 0,0; st8.spill [r2]=r22,16
369.mem.offset 8,0; st8.spill [r3]=r23,16
370 mov r19=b7
371 ;;
372.mem.offset 0,0; st8.spill [r2]=r24,16
373.mem.offset 8,0; st8.spill [r3]=r25,16
374 ;;
375.mem.offset 0,0; st8.spill [r2]=r26,16
376.mem.offset 8,0; st8.spill [r3]=r27,16
377 ;;
378.mem.offset 0,0; st8.spill [r2]=r28,16
379.mem.offset 8,0; st8.spill [r3]=r29,16
380 ;;
381.mem.offset 0,0; st8.spill [r2]=r30,16
382.mem.offset 8,0; st8.spill [r3]=r31,32
383 ;;
384 mov ar.fpsr=r11 /* M-unit */
385 st8 [r2]=r8,8 /* ar.ccv */
386 adds r24=PT(B6)-PT(F7),r3
387 ;;
388 stf.spill [r2]=f6,32
389 stf.spill [r3]=f7,32
390 ;;
391 stf.spill [r2]=f8,32
392 stf.spill [r3]=f9,32
393 ;;
394 stf.spill [r2]=f10
395 stf.spill [r3]=f11
396 adds r25=PT(B7)-PT(F11),r3
397 ;;
398 st8 [r24]=r18,16 /* b6 */
399 st8 [r25]=r19,16 /* b7 */
400 ;;
401 st8 [r24]=r9 /* ar.csd */
402 st8 [r25]=r10 /* ar.ssd */
403 ;;
404 srlz.d // make sure we see the effect of cr.ivr
405 addl r14=@gprel(ia64_leave_nested),gp
406 ;;
407 mov rp=r14
408 br.call.sptk.many b6=kvm_ia64_handle_irq
409 ;;
410END(kvm_interrupt)
411
412 .global kvm_dispatch_vexirq
413 .org kvm_ia64_ivt+0x3400
414//////////////////////////////////////////////////////////////////////
415// 0x3400 Entry 13 (size 64 bundles) Reserved
416ENTRY(kvm_virtual_exirq)
417 mov r31=pr
418 mov r19=13
419 mov r30 =r0
420 ;;
421kvm_dispatch_vexirq:
422 cmp.eq p6,p0 = 1,r30
423 ;;
424(p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
425 ;;
426(p6)ld8 r1 = [r29]
427 ;;
428 KVM_SAVE_MIN_WITH_COVER_R19
429 alloc r14=ar.pfs,0,0,1,0
430 mov out0=r13
431
432 ssm psr.ic
433 ;;
434 srlz.i // guarantee that interruption collection is on
435 ;;
436 //(p15) ssm psr.i // restore psr.i
437 adds r3=8,r2 // set up second base pointer
438 ;;
439 KVM_SAVE_REST
440 addl r14=@gprel(ia64_leave_hypervisor),gp
441 ;;
442 mov rp=r14
443 br.call.sptk.many b6=kvm_vexirq
444END(kvm_virtual_exirq)
445
446 .org kvm_ia64_ivt+0x3800
447/////////////////////////////////////////////////////////////////////
448// 0x3800 Entry 14 (size 64 bundles) Reserved
449 KVM_FAULT(14)
450 // this code segment is from 2.6.16.13
451
452
453 .org kvm_ia64_ivt+0x3c00
454///////////////////////////////////////////////////////////////////////
455// 0x3c00 Entry 15 (size 64 bundles) Reserved
456 KVM_FAULT(15)
457
458
459 .org kvm_ia64_ivt+0x4000
460///////////////////////////////////////////////////////////////////////
461// 0x4000 Entry 16 (size 64 bundles) Reserved
462 KVM_FAULT(16)
463
464 .org kvm_ia64_ivt+0x4400
465//////////////////////////////////////////////////////////////////////
466// 0x4400 Entry 17 (size 64 bundles) Reserved
467 KVM_FAULT(17)
468
469 .org kvm_ia64_ivt+0x4800
470//////////////////////////////////////////////////////////////////////
471// 0x4800 Entry 18 (size 64 bundles) Reserved
472 KVM_FAULT(18)
473
474 .org kvm_ia64_ivt+0x4c00
475//////////////////////////////////////////////////////////////////////
476// 0x4c00 Entry 19 (size 64 bundles) Reserved
477 KVM_FAULT(19)
478
479 .org kvm_ia64_ivt+0x5000
480//////////////////////////////////////////////////////////////////////
481// 0x5000 Entry 20 (size 16 bundles) Page Not Present
482ENTRY(kvm_page_not_present)
483 KVM_REFLECT(20)
484END(kvm_page_not_present)
485
486 .org kvm_ia64_ivt+0x5100
487///////////////////////////////////////////////////////////////////////
488// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
489ENTRY(kvm_key_permission)
490 KVM_REFLECT(21)
491END(kvm_key_permission)
492
493 .org kvm_ia64_ivt+0x5200
494//////////////////////////////////////////////////////////////////////
495// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
496ENTRY(kvm_iaccess_rights)
497 KVM_REFLECT(22)
498END(kvm_iaccess_rights)
499
500 .org kvm_ia64_ivt+0x5300
501//////////////////////////////////////////////////////////////////////
502// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
503ENTRY(kvm_daccess_rights)
504 KVM_REFLECT(23)
505END(kvm_daccess_rights)
506
507 .org kvm_ia64_ivt+0x5400
508/////////////////////////////////////////////////////////////////////
509// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
510ENTRY(kvm_general_exception)
511 KVM_REFLECT(24)
512 KVM_FAULT(24)
513END(kvm_general_exception)
514
515 .org kvm_ia64_ivt+0x5500
516//////////////////////////////////////////////////////////////////////
517// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
518ENTRY(kvm_disabled_fp_reg)
519 KVM_REFLECT(25)
520END(kvm_disabled_fp_reg)
521
522 .org kvm_ia64_ivt+0x5600
523////////////////////////////////////////////////////////////////////
524// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
525ENTRY(kvm_nat_consumption)
526 KVM_REFLECT(26)
527END(kvm_nat_consumption)
528
529 .org kvm_ia64_ivt+0x5700
530/////////////////////////////////////////////////////////////////////
531// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
532ENTRY(kvm_speculation_vector)
533 KVM_REFLECT(27)
534END(kvm_speculation_vector)
535
536 .org kvm_ia64_ivt+0x5800
537/////////////////////////////////////////////////////////////////////
538// 0x5800 Entry 28 (size 16 bundles) Reserved
539 KVM_FAULT(28)
540
541 .org kvm_ia64_ivt+0x5900
542///////////////////////////////////////////////////////////////////
543// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
544ENTRY(kvm_debug_vector)
545 KVM_FAULT(29)
546END(kvm_debug_vector)
547
548 .org kvm_ia64_ivt+0x5a00
549///////////////////////////////////////////////////////////////
550// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
551ENTRY(kvm_unaligned_access)
552 KVM_REFLECT(30)
553END(kvm_unaligned_access)
554
555 .org kvm_ia64_ivt+0x5b00
556//////////////////////////////////////////////////////////////////////
557// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
558ENTRY(kvm_unsupported_data_reference)
559 KVM_REFLECT(31)
560END(kvm_unsupported_data_reference)
561
562 .org kvm_ia64_ivt+0x5c00
563////////////////////////////////////////////////////////////////////
564// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
565ENTRY(kvm_floating_point_fault)
566 KVM_REFLECT(32)
567END(kvm_floating_point_fault)
568
569 .org kvm_ia64_ivt+0x5d00
570/////////////////////////////////////////////////////////////////////
571// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
572ENTRY(kvm_floating_point_trap)
573 KVM_REFLECT(33)
574END(kvm_floating_point_trap)
575
576 .org kvm_ia64_ivt+0x5e00
577//////////////////////////////////////////////////////////////////////
578// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
579ENTRY(kvm_lower_privilege_trap)
580 KVM_REFLECT(34)
581END(kvm_lower_privilege_trap)
582
583 .org kvm_ia64_ivt+0x5f00
584//////////////////////////////////////////////////////////////////////
585// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
586ENTRY(kvm_taken_branch_trap)
587 KVM_REFLECT(35)
588END(kvm_taken_branch_trap)
589
590 .org kvm_ia64_ivt+0x6000
591////////////////////////////////////////////////////////////////////
592// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
593ENTRY(kvm_single_step_trap)
594 KVM_REFLECT(36)
595END(kvm_single_step_trap)
596 .global kvm_virtualization_fault_back
597 .org kvm_ia64_ivt+0x6100
598/////////////////////////////////////////////////////////////////////
599// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
600ENTRY(kvm_virtualization_fault)
601 mov r31=pr
602 adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
603 ;;
604 st8 [r16] = r1
605 adds r17 = VMM_VCPU_GP_OFFSET, r21
606 ;;
607 ld8 r1 = [r17]
608 cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
609 cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
610 cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
611 cmp.eq p9,p0=EVENT_RSM,r24
612 cmp.eq p10,p0=EVENT_SSM,r24
613 cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
614 cmp.eq p12,p0=EVENT_THASH,r24
615 (p6) br.dptk.many kvm_asm_mov_from_ar
616 (p7) br.dptk.many kvm_asm_mov_from_rr
617 (p8) br.dptk.many kvm_asm_mov_to_rr
618 (p9) br.dptk.many kvm_asm_rsm
619 (p10) br.dptk.many kvm_asm_ssm
620 (p11) br.dptk.many kvm_asm_mov_to_psr
621 (p12) br.dptk.many kvm_asm_thash
622 ;;
623kvm_virtualization_fault_back:
624 adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
625 ;;
626 ld8 r1 = [r16]
627 ;;
628 mov r19=37
629 adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
630 adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
631 ;;
632 st8 [r16] = r24
633 st8 [r17] = r25
634 ;;
635 cmp.ne p6,p0=EVENT_RFI, r24
636 (p6) br.sptk kvm_dispatch_virtualization_fault
637 ;;
638 adds r18=VMM_VPD_BASE_OFFSET,r21
639 ;;
640 ld8 r18=[r18]
641 ;;
642 adds r18=VMM_VPD_VIFS_OFFSET,r18
643 ;;
644 ld8 r18=[r18]
645 ;;
646 tbit.z p6,p0=r18,63
647 (p6) br.sptk kvm_dispatch_virtualization_fault
648 ;;
649 //if vifs.v=1 desert current register frame
650 alloc r18=ar.pfs,0,0,0,0
651 br.sptk kvm_dispatch_virtualization_fault
652END(kvm_virtualization_fault)
653
654 .org kvm_ia64_ivt+0x6200
655//////////////////////////////////////////////////////////////
656// 0x6200 Entry 38 (size 16 bundles) Reserved
657 KVM_FAULT(38)
658
659 .org kvm_ia64_ivt+0x6300
660/////////////////////////////////////////////////////////////////
661// 0x6300 Entry 39 (size 16 bundles) Reserved
662 KVM_FAULT(39)
663
664 .org kvm_ia64_ivt+0x6400
665/////////////////////////////////////////////////////////////////
666// 0x6400 Entry 40 (size 16 bundles) Reserved
667 KVM_FAULT(40)
668
669 .org kvm_ia64_ivt+0x6500
670//////////////////////////////////////////////////////////////////
671// 0x6500 Entry 41 (size 16 bundles) Reserved
672 KVM_FAULT(41)
673
674 .org kvm_ia64_ivt+0x6600
675//////////////////////////////////////////////////////////////////
676// 0x6600 Entry 42 (size 16 bundles) Reserved
677 KVM_FAULT(42)
678
679 .org kvm_ia64_ivt+0x6700
680//////////////////////////////////////////////////////////////////
681// 0x6700 Entry 43 (size 16 bundles) Reserved
682 KVM_FAULT(43)
683
684 .org kvm_ia64_ivt+0x6800
685//////////////////////////////////////////////////////////////////
686// 0x6800 Entry 44 (size 16 bundles) Reserved
687 KVM_FAULT(44)
688
689 .org kvm_ia64_ivt+0x6900
690///////////////////////////////////////////////////////////////////
691// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
692//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
693ENTRY(kvm_ia32_exception)
694 KVM_FAULT(45)
695END(kvm_ia32_exception)
696
697 .org kvm_ia64_ivt+0x6a00
698////////////////////////////////////////////////////////////////////
699// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
700ENTRY(kvm_ia32_intercept)
701 KVM_FAULT(47)
702END(kvm_ia32_intercept)
703
704 .org kvm_ia64_ivt+0x6c00
705/////////////////////////////////////////////////////////////////////
706// 0x6c00 Entry 48 (size 16 bundles) Reserved
707 KVM_FAULT(48)
708
709 .org kvm_ia64_ivt+0x6d00
710//////////////////////////////////////////////////////////////////////
711// 0x6d00 Entry 49 (size 16 bundles) Reserved
712 KVM_FAULT(49)
713
714 .org kvm_ia64_ivt+0x6e00
715//////////////////////////////////////////////////////////////////////
716// 0x6e00 Entry 50 (size 16 bundles) Reserved
717 KVM_FAULT(50)
718
719 .org kvm_ia64_ivt+0x6f00
720/////////////////////////////////////////////////////////////////////
721// 0x6f00 Entry 51 (size 16 bundles) Reserved
722 KVM_FAULT(52)
723
724 .org kvm_ia64_ivt+0x7100
725////////////////////////////////////////////////////////////////////
726// 0x7100 Entry 53 (size 16 bundles) Reserved
727 KVM_FAULT(53)
728
729 .org kvm_ia64_ivt+0x7200
730/////////////////////////////////////////////////////////////////////
731// 0x7200 Entry 54 (size 16 bundles) Reserved
732 KVM_FAULT(54)
733
734 .org kvm_ia64_ivt+0x7300
735////////////////////////////////////////////////////////////////////
736// 0x7300 Entry 55 (size 16 bundles) Reserved
737 KVM_FAULT(55)
738
739 .org kvm_ia64_ivt+0x7400
740////////////////////////////////////////////////////////////////////
741// 0x7400 Entry 56 (size 16 bundles) Reserved
742 KVM_FAULT(56)
743
744 .org kvm_ia64_ivt+0x7500
745/////////////////////////////////////////////////////////////////////
746// 0x7500 Entry 57 (size 16 bundles) Reserved
747 KVM_FAULT(57)
748
749 .org kvm_ia64_ivt+0x7600
750/////////////////////////////////////////////////////////////////////
751// 0x7600 Entry 58 (size 16 bundles) Reserved
752 KVM_FAULT(58)
753
754 .org kvm_ia64_ivt+0x7700
755////////////////////////////////////////////////////////////////////
756// 0x7700 Entry 59 (size 16 bundles) Reserved
757 KVM_FAULT(59)
758
759 .org kvm_ia64_ivt+0x7800
760////////////////////////////////////////////////////////////////////
761// 0x7800 Entry 60 (size 16 bundles) Reserved
762 KVM_FAULT(60)
763
764 .org kvm_ia64_ivt+0x7900
765/////////////////////////////////////////////////////////////////////
766// 0x7900 Entry 61 (size 16 bundles) Reserved
767 KVM_FAULT(61)
768
769 .org kvm_ia64_ivt+0x7a00
770/////////////////////////////////////////////////////////////////////
771// 0x7a00 Entry 62 (size 16 bundles) Reserved
772 KVM_FAULT(62)
773
774 .org kvm_ia64_ivt+0x7b00
775/////////////////////////////////////////////////////////////////////
776// 0x7b00 Entry 63 (size 16 bundles) Reserved
777 KVM_FAULT(63)
778
779 .org kvm_ia64_ivt+0x7c00
780////////////////////////////////////////////////////////////////////
781// 0x7c00 Entry 64 (size 16 bundles) Reserved
782 KVM_FAULT(64)
783
784 .org kvm_ia64_ivt+0x7d00
785/////////////////////////////////////////////////////////////////////
786// 0x7d00 Entry 65 (size 16 bundles) Reserved
787 KVM_FAULT(65)
788
789 .org kvm_ia64_ivt+0x7e00
790/////////////////////////////////////////////////////////////////////
791// 0x7e00 Entry 66 (size 16 bundles) Reserved
792 KVM_FAULT(66)
793
794 .org kvm_ia64_ivt+0x7f00
795////////////////////////////////////////////////////////////////////
796// 0x7f00 Entry 67 (size 16 bundles) Reserved
797 KVM_FAULT(67)
798
799 .org kvm_ia64_ivt+0x8000
800// There is no particular reason for this code to be here, other than that
801// there happens to be space here that would go unused otherwise. If this
802// fault ever gets "unreserved", simply moved the following code to a more
803// suitable spot...
804
805
806ENTRY(kvm_dtlb_miss_dispatch)
807 mov r19 = 2
808 KVM_SAVE_MIN_WITH_COVER_R19
809 alloc r14=ar.pfs,0,0,3,0
810 mov out0=cr.ifa
811 mov out1=r15
812 adds r3=8,r2 // set up second base pointer
813 ;;
814 ssm psr.ic
815 ;;
816 srlz.i // guarantee that interruption collection is on
817 ;;
818 //(p15) ssm psr.i // restore psr.i
819 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
820 ;;
821 KVM_SAVE_REST
822 KVM_SAVE_EXTRA
823 mov rp=r14
824 ;;
825 adds out2=16,r12
826 br.call.sptk.many b6=kvm_page_fault
827END(kvm_dtlb_miss_dispatch)
828
829ENTRY(kvm_itlb_miss_dispatch)
830
831 KVM_SAVE_MIN_WITH_COVER_R19
832 alloc r14=ar.pfs,0,0,3,0
833 mov out0=cr.ifa
834 mov out1=r15
835 adds r3=8,r2 // set up second base pointer
836 ;;
837 ssm psr.ic
838 ;;
839 srlz.i // guarantee that interruption collection is on
840 ;;
841 //(p15) ssm psr.i // restore psr.i
842 addl r14=@gprel(ia64_leave_hypervisor),gp
843 ;;
844 KVM_SAVE_REST
845 mov rp=r14
846 ;;
847 adds out2=16,r12
848 br.call.sptk.many b6=kvm_page_fault
849END(kvm_itlb_miss_dispatch)
850
851ENTRY(kvm_dispatch_reflection)
852 /*
853 * Input:
854 * psr.ic: off
855 * r19: intr type (offset into ivt, see ia64_int.h)
856 * r31: contains saved predicates (pr)
857 */
858 KVM_SAVE_MIN_WITH_COVER_R19
859 alloc r14=ar.pfs,0,0,5,0
860 mov out0=cr.ifa
861 mov out1=cr.isr
862 mov out2=cr.iim
863 mov out3=r15
864 adds r3=8,r2 // set up second base pointer
865 ;;
866 ssm psr.ic
867 ;;
868 srlz.i // guarantee that interruption collection is on
869 ;;
870 //(p15) ssm psr.i // restore psr.i
871 addl r14=@gprel(ia64_leave_hypervisor),gp
872 ;;
873 KVM_SAVE_REST
874 mov rp=r14
875 ;;
876 adds out4=16,r12
877 br.call.sptk.many b6=reflect_interruption
878END(kvm_dispatch_reflection)
879
880ENTRY(kvm_dispatch_virtualization_fault)
881 adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
882 adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
883 ;;
884 st8 [r16] = r24
885 st8 [r17] = r25
886 ;;
887 KVM_SAVE_MIN_WITH_COVER_R19
888 ;;
889 alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!)
890 mov out0=r13 //vcpu
891 adds r3=8,r2 // set up second base pointer
892 ;;
893 ssm psr.ic
894 ;;
895 srlz.i // guarantee that interruption collection is on
896 ;;
897 //(p15) ssm psr.i // restore psr.i
898 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
899 ;;
900 KVM_SAVE_REST
901 KVM_SAVE_EXTRA
902 mov rp=r14
903 ;;
904 adds out1=16,sp //regs
905 br.call.sptk.many b6=kvm_emulate
906END(kvm_dispatch_virtualization_fault)
907
908
909ENTRY(kvm_dispatch_interrupt)
910 KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3
911 ;;
912 alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
913 //mov out0=cr.ivr // pass cr.ivr as first arg
914 adds r3=8,r2 // set up second base pointer for SAVE_REST
915 ;;
916 ssm psr.ic
917 ;;
918 srlz.i
919 ;;
920 //(p15) ssm psr.i
921 addl r14=@gprel(ia64_leave_hypervisor),gp
922 ;;
923 KVM_SAVE_REST
924 mov rp=r14
925 ;;
926 mov out0=r13 // pass pointer to pt_regs as second arg
927 br.call.sptk.many b6=kvm_ia64_handle_irq
928END(kvm_dispatch_interrupt)
929
930
931
932
933GLOBAL_ENTRY(ia64_leave_nested)
934 rsm psr.i
935 ;;
936 adds r21=PT(PR)+16,r12
937 ;;
938 lfetch [r21],PT(CR_IPSR)-PT(PR)
939 adds r2=PT(B6)+16,r12
940 adds r3=PT(R16)+16,r12
941 ;;
942 lfetch [r21]
943 ld8 r28=[r2],8 // load b6
944 adds r29=PT(R24)+16,r12
945
946 ld8.fill r16=[r3]
947 adds r3=PT(AR_CSD)-PT(R16),r3
948 adds r30=PT(AR_CCV)+16,r12
949 ;;
950 ld8.fill r24=[r29]
951 ld8 r15=[r30] // load ar.ccv
952 ;;
953 ld8 r29=[r2],16 // load b7
954 ld8 r30=[r3],16 // load ar.csd
955 ;;
956 ld8 r31=[r2],16 // load ar.ssd
957 ld8.fill r8=[r3],16
958 ;;
959 ld8.fill r9=[r2],16
960 ld8.fill r10=[r3],PT(R17)-PT(R10)
961 ;;
962 ld8.fill r11=[r2],PT(R18)-PT(R11)
963 ld8.fill r17=[r3],16
964 ;;
965 ld8.fill r18=[r2],16
966 ld8.fill r19=[r3],16
967 ;;
968 ld8.fill r20=[r2],16
969 ld8.fill r21=[r3],16
970 mov ar.csd=r30
971 mov ar.ssd=r31
972 ;;
973 rsm psr.i | psr.ic
974 // initiate turning off of interrupt and interruption collection
975 invala // invalidate ALAT
976 ;;
977 srlz.i
978 ;;
979 ld8.fill r22=[r2],24
980 ld8.fill r23=[r3],24
981 mov b6=r28
982 ;;
983 ld8.fill r25=[r2],16
984 ld8.fill r26=[r3],16
985 mov b7=r29
986 ;;
987 ld8.fill r27=[r2],16
988 ld8.fill r28=[r3],16
989 ;;
990 ld8.fill r29=[r2],16
991 ld8.fill r30=[r3],24
992 ;;
993 ld8.fill r31=[r2],PT(F9)-PT(R31)
994 adds r3=PT(F10)-PT(F6),r3
995 ;;
996 ldf.fill f9=[r2],PT(F6)-PT(F9)
997 ldf.fill f10=[r3],PT(F8)-PT(F10)
998 ;;
999 ldf.fill f6=[r2],PT(F7)-PT(F6)
1000 ;;
1001 ldf.fill f7=[r2],PT(F11)-PT(F7)
1002 ldf.fill f8=[r3],32
1003 ;;
1004 srlz.i // ensure interruption collection is off
1005 mov ar.ccv=r15
1006 ;;
1007 bsw.0 // switch back to bank 0 (no stop bit required beforehand...)
1008 ;;
1009 ldf.fill f11=[r2]
1010// mov r18=r13
1011// mov r21=r13
1012 adds r16=PT(CR_IPSR)+16,r12
1013 adds r17=PT(CR_IIP)+16,r12
1014 ;;
1015 ld8 r29=[r16],16 // load cr.ipsr
1016 ld8 r28=[r17],16 // load cr.iip
1017 ;;
1018 ld8 r30=[r16],16 // load cr.ifs
1019 ld8 r25=[r17],16 // load ar.unat
1020 ;;
1021 ld8 r26=[r16],16 // load ar.pfs
1022 ld8 r27=[r17],16 // load ar.rsc
1023 cmp.eq p9,p0=r0,r0
1024 // set p9 to indicate that we should restore cr.ifs
1025 ;;
1026 ld8 r24=[r16],16 // load ar.rnat (may be garbage)
1027 ld8 r23=[r17],16// load ar.bspstore (may be garbage)
1028 ;;
1029 ld8 r31=[r16],16 // load predicates
1030 ld8 r22=[r17],16 // load b0
1031 ;;
1032 ld8 r19=[r16],16 // load ar.rsc value for "loadrs"
1033 ld8.fill r1=[r17],16 // load r1
1034 ;;
1035 ld8.fill r12=[r16],16
1036 ld8.fill r13=[r17],16
1037 ;;
1038 ld8 r20=[r16],16 // ar.fpsr
1039 ld8.fill r15=[r17],16
1040 ;;
1041 ld8.fill r14=[r16],16
1042 ld8.fill r2=[r17]
1043 ;;
1044 ld8.fill r3=[r16]
1045 ;;
1046 mov r16=ar.bsp // get existing backing store pointer
1047 ;;
1048 mov b0=r22
1049 mov ar.pfs=r26
1050 mov cr.ifs=r30
1051 mov cr.ipsr=r29
1052 mov ar.fpsr=r20
1053 mov cr.iip=r28
1054 ;;
1055 mov ar.rsc=r27
1056 mov ar.unat=r25
1057 mov pr=r31,-1
1058 rfi
1059END(ia64_leave_nested)
1060
1061
1062
1063GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
1064 /*
1065 * work.need_resched etc. mustn't get changed
1066 *by this CPU before it returns to
1067 ;;
1068 * user- or fsys-mode, hence we disable interrupts early on:
1069 */
1070 adds r2 = PT(R4)+16,r12
1071 adds r3 = PT(R5)+16,r12
1072 adds r8 = PT(EML_UNAT)+16,r12
1073 ;;
1074 ld8 r8 = [r8]
1075 ;;
1076 mov ar.unat=r8
1077 ;;
1078 ld8.fill r4=[r2],16 //load r4
1079 ld8.fill r5=[r3],16 //load r5
1080 ;;
1081 ld8.fill r6=[r2] //load r6
1082 ld8.fill r7=[r3] //load r7
1083 ;;
1084END(ia64_leave_hypervisor_prepare)
1085//fall through
1086GLOBAL_ENTRY(ia64_leave_hypervisor)
1087 rsm psr.i
1088 ;;
1089 br.call.sptk.many b0=leave_hypervisor_tail
1090 ;;
1091 adds r20=PT(PR)+16,r12
1092 adds r8=PT(EML_UNAT)+16,r12
1093 ;;
1094 ld8 r8=[r8]
1095 ;;
1096 mov ar.unat=r8
1097 ;;
1098 lfetch [r20],PT(CR_IPSR)-PT(PR)
1099 adds r2 = PT(B6)+16,r12
1100 adds r3 = PT(B7)+16,r12
1101 ;;
1102 lfetch [r20]
1103 ;;
1104 ld8 r24=[r2],16 /* B6 */
1105 ld8 r25=[r3],16 /* B7 */
1106 ;;
1107 ld8 r26=[r2],16 /* ar_csd */
1108 ld8 r27=[r3],16 /* ar_ssd */
1109 mov b6 = r24
1110 ;;
1111 ld8.fill r8=[r2],16
1112 ld8.fill r9=[r3],16
1113 mov b7 = r25
1114 ;;
1115 mov ar.csd = r26
1116 mov ar.ssd = r27
1117 ;;
1118 ld8.fill r10=[r2],PT(R15)-PT(R10)
1119 ld8.fill r11=[r3],PT(R14)-PT(R11)
1120 ;;
1121 ld8.fill r15=[r2],PT(R16)-PT(R15)
1122 ld8.fill r14=[r3],PT(R17)-PT(R14)
1123 ;;
1124 ld8.fill r16=[r2],16
1125 ld8.fill r17=[r3],16
1126 ;;
1127 ld8.fill r18=[r2],16
1128 ld8.fill r19=[r3],16
1129 ;;
1130 ld8.fill r20=[r2],16
1131 ld8.fill r21=[r3],16
1132 ;;
1133 ld8.fill r22=[r2],16
1134 ld8.fill r23=[r3],16
1135 ;;
1136 ld8.fill r24=[r2],16
1137 ld8.fill r25=[r3],16
1138 ;;
1139 ld8.fill r26=[r2],16
1140 ld8.fill r27=[r3],16
1141 ;;
1142 ld8.fill r28=[r2],16
1143 ld8.fill r29=[r3],16
1144 ;;
1145 ld8.fill r30=[r2],PT(F6)-PT(R30)
1146 ld8.fill r31=[r3],PT(F7)-PT(R31)
1147 ;;
1148 rsm psr.i | psr.ic
1149 // initiate turning off of interrupt and interruption collection
1150 invala // invalidate ALAT
1151 ;;
1152 srlz.i // ensure interruption collection is off
1153 ;;
1154 bsw.0
1155 ;;
1156 adds r16 = PT(CR_IPSR)+16,r12
1157 adds r17 = PT(CR_IIP)+16,r12
1158 mov r21=r13 // get current
1159 ;;
1160 ld8 r31=[r16],16 // load cr.ipsr
1161 ld8 r30=[r17],16 // load cr.iip
1162 ;;
1163 ld8 r29=[r16],16 // load cr.ifs
1164 ld8 r28=[r17],16 // load ar.unat
1165 ;;
1166 ld8 r27=[r16],16 // load ar.pfs
1167 ld8 r26=[r17],16 // load ar.rsc
1168 ;;
1169 ld8 r25=[r16],16 // load ar.rnat
1170 ld8 r24=[r17],16 // load ar.bspstore
1171 ;;
1172 ld8 r23=[r16],16 // load predicates
1173 ld8 r22=[r17],16 // load b0
1174 ;;
1175 ld8 r20=[r16],16 // load ar.rsc value for "loadrs"
1176 ld8.fill r1=[r17],16 //load r1
1177 ;;
1178 ld8.fill r12=[r16],16 //load r12
1179 ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13
1180 ;;
1181 ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr
1182 ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2
1183 ;;
1184 ld8.fill r3=[r16] //load r3
1185 ld8 r18=[r17] //load ar_ccv
1186 ;;
1187 mov ar.fpsr=r19
1188 mov ar.ccv=r18
1189 shr.u r18=r20,16
1190 ;;
1191kvm_rbs_switch:
1192 mov r19=96
1193
1194kvm_dont_preserve_current_frame:
1195/*
1196 * To prevent leaking bits between the hypervisor and guest domain,
1197 * we must clear the stacked registers in the "invalid" partition here.
1198 * 5 registers/cycle on McKinley).
1199 */
1200# define pRecurse p6
1201# define pReturn p7
1202# define Nregs 14
1203
1204 alloc loc0=ar.pfs,2,Nregs-2,2,0
1205 shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8))
1206 sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize
1207 ;;
1208 mov ar.rsc=r20 // load ar.rsc to be used for "loadrs"
1209 shladd in0=loc1,3,r19
1210 mov in1=0
1211 ;;
1212 TEXT_ALIGN(32)
1213kvm_rse_clear_invalid:
1214 alloc loc0=ar.pfs,2,Nregs-2,2,0
1215 cmp.lt pRecurse,p0=Nregs*8,in0
1216 // if more than Nregs regs left to clear, (re)curse
1217 add out0=-Nregs*8,in0
1218 add out1=1,in1 // increment recursion count
1219 mov loc1=0
1220 mov loc2=0
1221 ;;
1222 mov loc3=0
1223 mov loc4=0
1224 mov loc5=0
1225 mov loc6=0
1226 mov loc7=0
1227(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
1228 ;;
1229 mov loc8=0
1230 mov loc9=0
1231 cmp.ne pReturn,p0=r0,in1
1232 // if recursion count != 0, we need to do a br.ret
1233 mov loc10=0
1234 mov loc11=0
1235(pReturn) br.ret.dptk.many b0
1236
1237# undef pRecurse
1238# undef pReturn
1239
1240// loadrs has already been shifted
1241 alloc r16=ar.pfs,0,0,0,0 // drop current register frame
1242 ;;
1243 loadrs
1244 ;;
1245 mov ar.bspstore=r24
1246 ;;
1247 mov ar.unat=r28
1248 mov ar.rnat=r25
1249 mov ar.rsc=r26
1250 ;;
1251 mov cr.ipsr=r31
1252 mov cr.iip=r30
1253 mov cr.ifs=r29
1254 mov ar.pfs=r27
1255 adds r18=VMM_VPD_BASE_OFFSET,r21
1256 ;;
1257 ld8 r18=[r18] //vpd
1258 adds r17=VMM_VCPU_ISR_OFFSET,r21
1259 ;;
1260 ld8 r17=[r17]
1261 adds r19=VMM_VPD_VPSR_OFFSET,r18
1262 ;;
1263 ld8 r19=[r19] //vpsr
1264 adds r20=VMM_VCPU_VSA_BASE_OFFSET,r21
1265 ;;
1266 ld8 r20=[r20]
1267 ;;
1268//vsa_sync_write_start
1269 mov r25=r18
1270 adds r16= VMM_VCPU_GP_OFFSET,r21
1271 ;;
1272 ld8 r16= [r16] // Put gp in r24
1273 movl r24=@gprel(ia64_vmm_entry) // calculate return address
1274 ;;
1275 add r24=r24,r16
1276 ;;
1277 add r16=PAL_VPS_SYNC_WRITE,r20
1278 ;;
1279 mov b0=r16
1280 br.cond.sptk b0 // call the service
1281 ;;
1282END(ia64_leave_hypervisor)
1283// fall through
1284GLOBAL_ENTRY(ia64_vmm_entry)
1285/*
1286 * must be at bank 0
1287 * parameter:
1288 * r17:cr.isr
1289 * r18:vpd
1290 * r19:vpsr
1291 * r20:__vsa_base
1292 * r22:b0
1293 * r23:predicate
1294 */
1295 mov r24=r22
1296 mov r25=r18
1297 tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic
1298 ;;
1299 (p1) add r29=PAL_VPS_RESUME_NORMAL,r20
1300 (p1) br.sptk.many ia64_vmm_entry_out
1301 ;;
1302 tbit.nz p1,p2 = r17,IA64_ISR_IR_BIT //p1=cr.isr.ir
1303 ;;
1304 (p1) add r29=PAL_VPS_RESUME_NORMAL,r20
1305 (p2) add r29=PAL_VPS_RESUME_HANDLER,r20
1306 (p2) ld8 r26=[r25]
1307 ;;
1308ia64_vmm_entry_out:
1309 mov pr=r23,-2
1310 mov b0=r29
1311 ;;
1312 br.cond.sptk b0 // call pal service
1313END(ia64_vmm_entry)
1314
1315
1316
1317/*
1318 * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
1319 * u64 arg3, u64 arg4, u64 arg5,
1320 * u64 arg6, u64 arg7);
1321 *
1322 * XXX: The currently defined services use only 4 args at the max. The
1323 * rest are not consumed.
1324 */
1325GLOBAL_ENTRY(ia64_call_vsa)
1326 .regstk 4,4,0,0
1327
1328rpsave = loc0
1329pfssave = loc1
1330psrsave = loc2
1331entry = loc3
1332hostret = r24
1333
1334 alloc pfssave=ar.pfs,4,4,0,0
1335 mov rpsave=rp
1336 adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
1337 ;;
1338 ld8 entry=[entry]
13391: mov hostret=ip
1340 mov r25=in1 // copy arguments
1341 mov r26=in2
1342 mov r27=in3
1343 mov psrsave=psr
1344 ;;
1345 tbit.nz p6,p0=psrsave,14 // IA64_PSR_I
1346 tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC
1347 ;;
1348 add hostret=2f-1b,hostret // calculate return address
1349 add entry=entry,in0
1350 ;;
1351 rsm psr.i | psr.ic
1352 ;;
1353 srlz.i
1354 mov b6=entry
1355 br.cond.sptk b6 // call the service
13562:
1357 // Architectural sequence for enabling interrupts if necessary
1358(p7) ssm psr.ic
1359 ;;
1360(p7) srlz.i
1361 ;;
1362//(p6) ssm psr.i
1363 ;;
1364 mov rp=rpsave
1365 mov ar.pfs=pfssave
1366 mov r8=r31
1367 ;;
1368 srlz.d
1369 br.ret.sptk rp
1370
1371END(ia64_call_vsa)
1372
1373#define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100)
1374
1375GLOBAL_ENTRY(vmm_reset_entry)
1376 //set up ipsr, iip, vpd.vpsr, dcr
1377 // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
1378 // For DCR: all bits 0
1379 adds r14=-VMM_PT_REGS_SIZE, r12
1380 ;;
1381 movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
1382 movl r10=0x8000000000000000
1383 adds r16=PT(CR_IIP), r14
1384 adds r20=PT(R1), r14
1385 ;;
1386 rsm psr.ic | psr.i
1387 ;;
1388 srlz.i
1389 ;;
1390 bsw.0
1391 ;;
1392 mov r21 =r13
1393 ;;
1394 bsw.1
1395 ;;
1396 mov ar.rsc = 0
1397 ;;
1398 flushrs
1399 ;;
1400 mov ar.bspstore = 0
1401 // clear BSPSTORE
1402 ;;
1403 mov cr.ipsr=r6
1404 mov cr.ifs=r10
1405 ld8 r4 = [r16] // Set init iip for first run.
1406 ld8 r1 = [r20]
1407 ;;
1408 mov cr.iip=r4
1409 ;;
1410 adds r16=VMM_VPD_BASE_OFFSET,r13
1411 adds r20=VMM_VCPU_VSA_BASE_OFFSET,r13
1412 ;;
1413 ld8 r18=[r16]
1414 ld8 r20=[r20]
1415 ;;
1416 adds r19=VMM_VPD_VPSR_OFFSET,r18
1417 ;;
1418 ld8 r19=[r19]
1419 mov r17=r0
1420 mov r22=r0
1421 mov r23=r0
1422 br.cond.sptk ia64_vmm_entry
1423 br.ret.sptk b0
1424END(vmm_reset_entry)
diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h
new file mode 100644
index 000000000000..f6c5617e16af
--- /dev/null
+++ b/arch/ia64/kvm/vti.h
@@ -0,0 +1,290 @@
1/*
2 * vti.h: prototype for generial vt related interface
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
6 * Fred Yang (fred.yang@intel.com)
7 * Kun Tian (Kevin Tian) (kevin.tian@intel.com)
8 *
9 * Copyright (c) 2007, Intel Corporation.
10 * Zhang xiantao <xiantao.zhang@intel.com>
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms and conditions of the GNU General Public License,
14 * version 2, as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 * more details.
20 *
21 * You should have received a copy of the GNU General Public License along with
22 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
23 * Place - Suite 330, Boston, MA 02111-1307 USA.
24 */
25#ifndef _KVM_VT_I_H
26#define _KVM_VT_I_H
27
28#ifndef __ASSEMBLY__
29#include <asm/page.h>
30
31#include <linux/kvm_host.h>
32
33/* define itr.i and itr.d in ia64_itr function */
34#define ITR 0x01
35#define DTR 0x02
36#define IaDTR 0x03
37
38#define IA64_TR_VMM 6 /*itr6, dtr6 : maps vmm code, vmbuffer*/
39#define IA64_TR_VM_DATA 7 /*dtr7 : maps current vm data*/
40
41#define RR6 (6UL<<61)
42#define RR7 (7UL<<61)
43
44
45/* config_options in pal_vp_init_env */
46#define VP_INITIALIZE 1UL
47#define VP_FR_PMC 1UL<<1
48#define VP_OPCODE 1UL<<8
49#define VP_CAUSE 1UL<<9
50#define VP_FW_ACC 1UL<<63
51
52/* init vp env with initializing vm_buffer */
53#define VP_INIT_ENV_INITALIZE (VP_INITIALIZE | VP_FR_PMC |\
54 VP_OPCODE | VP_CAUSE | VP_FW_ACC)
55/* init vp env without initializing vm_buffer */
56#define VP_INIT_ENV VP_FR_PMC | VP_OPCODE | VP_CAUSE | VP_FW_ACC
57
58#define PAL_VP_CREATE 265
59/* Stacked Virt. Initializes a new VPD for the operation of
60 * a new virtual processor in the virtual environment.
61 */
62#define PAL_VP_ENV_INFO 266
63/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/
64#define PAL_VP_EXIT_ENV 267
65/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/
66#define PAL_VP_INIT_ENV 268
67/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/
68#define PAL_VP_REGISTER 269
69/*Stacked Virt. Register a different host IVT for the virtual processor.*/
70#define PAL_VP_RESUME 270
71/* Renamed from PAL_VP_RESUME */
72#define PAL_VP_RESTORE 270
73/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/
74#define PAL_VP_SUSPEND 271
75/* Renamed from PAL_VP_SUSPEND */
76#define PAL_VP_SAVE 271
77/* Stacked Virt. Suspends operation for the specified virtual processor on
78 * the logical processor.
79 */
80#define PAL_VP_TERMINATE 272
81/* Stacked Virt. Terminates operation for the specified virtual processor.*/
82
83union vac {
84 unsigned long value;
85 struct {
86 int a_int:1;
87 int a_from_int_cr:1;
88 int a_to_int_cr:1;
89 int a_from_psr:1;
90 int a_from_cpuid:1;
91 int a_cover:1;
92 int a_bsw:1;
93 long reserved:57;
94 };
95};
96
97union vdc {
98 unsigned long value;
99 struct {
100 int d_vmsw:1;
101 int d_extint:1;
102 int d_ibr_dbr:1;
103 int d_pmc:1;
104 int d_to_pmd:1;
105 int d_itm:1;
106 long reserved:58;
107 };
108};
109
110struct vpd {
111 union vac vac;
112 union vdc vdc;
113 unsigned long virt_env_vaddr;
114 unsigned long reserved1[29];
115 unsigned long vhpi;
116 unsigned long reserved2[95];
117 unsigned long vgr[16];
118 unsigned long vbgr[16];
119 unsigned long vnat;
120 unsigned long vbnat;
121 unsigned long vcpuid[5];
122 unsigned long reserved3[11];
123 unsigned long vpsr;
124 unsigned long vpr;
125 unsigned long reserved4[76];
126 union {
127 unsigned long vcr[128];
128 struct {
129 unsigned long dcr;
130 unsigned long itm;
131 unsigned long iva;
132 unsigned long rsv1[5];
133 unsigned long pta;
134 unsigned long rsv2[7];
135 unsigned long ipsr;
136 unsigned long isr;
137 unsigned long rsv3;
138 unsigned long iip;
139 unsigned long ifa;
140 unsigned long itir;
141 unsigned long iipa;
142 unsigned long ifs;
143 unsigned long iim;
144 unsigned long iha;
145 unsigned long rsv4[38];
146 unsigned long lid;
147 unsigned long ivr;
148 unsigned long tpr;
149 unsigned long eoi;
150 unsigned long irr[4];
151 unsigned long itv;
152 unsigned long pmv;
153 unsigned long cmcv;
154 unsigned long rsv5[5];
155 unsigned long lrr0;
156 unsigned long lrr1;
157 unsigned long rsv6[46];
158 };
159 };
160 unsigned long reserved5[128];
161 unsigned long reserved6[3456];
162 unsigned long vmm_avail[128];
163 unsigned long reserved7[4096];
164};
165
166#define PAL_PROC_VM_BIT (1UL << 40)
167#define PAL_PROC_VMSW_BIT (1UL << 54)
168
169static inline s64 ia64_pal_vp_env_info(u64 *buffer_size,
170 u64 *vp_env_info)
171{
172 struct ia64_pal_retval iprv;
173 PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0);
174 *buffer_size = iprv.v0;
175 *vp_env_info = iprv.v1;
176 return iprv.status;
177}
178
179static inline s64 ia64_pal_vp_exit_env(u64 iva)
180{
181 struct ia64_pal_retval iprv;
182
183 PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0);
184 return iprv.status;
185}
186
187static inline s64 ia64_pal_vp_init_env(u64 config_options, u64 pbase_addr,
188 u64 vbase_addr, u64 *vsa_base)
189{
190 struct ia64_pal_retval iprv;
191
192 PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr,
193 vbase_addr);
194 *vsa_base = iprv.v0;
195
196 return iprv.status;
197}
198
199static inline s64 ia64_pal_vp_restore(u64 *vpd, u64 pal_proc_vector)
200{
201 struct ia64_pal_retval iprv;
202
203 PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0);
204
205 return iprv.status;
206}
207
208static inline s64 ia64_pal_vp_save(u64 *vpd, u64 pal_proc_vector)
209{
210 struct ia64_pal_retval iprv;
211
212 PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0);
213
214 return iprv.status;
215}
216
217#endif
218
219/*VPD field offset*/
220#define VPD_VAC_START_OFFSET 0
221#define VPD_VDC_START_OFFSET 8
222#define VPD_VHPI_START_OFFSET 256
223#define VPD_VGR_START_OFFSET 1024
224#define VPD_VBGR_START_OFFSET 1152
225#define VPD_VNAT_START_OFFSET 1280
226#define VPD_VBNAT_START_OFFSET 1288
227#define VPD_VCPUID_START_OFFSET 1296
228#define VPD_VPSR_START_OFFSET 1424
229#define VPD_VPR_START_OFFSET 1432
230#define VPD_VRSE_CFLE_START_OFFSET 1440
231#define VPD_VCR_START_OFFSET 2048
232#define VPD_VTPR_START_OFFSET 2576
233#define VPD_VRR_START_OFFSET 3072
234#define VPD_VMM_VAIL_START_OFFSET 31744
235
236/*Virtualization faults*/
237
238#define EVENT_MOV_TO_AR 1
239#define EVENT_MOV_TO_AR_IMM 2
240#define EVENT_MOV_FROM_AR 3
241#define EVENT_MOV_TO_CR 4
242#define EVENT_MOV_FROM_CR 5
243#define EVENT_MOV_TO_PSR 6
244#define EVENT_MOV_FROM_PSR 7
245#define EVENT_ITC_D 8
246#define EVENT_ITC_I 9
247#define EVENT_MOV_TO_RR 10
248#define EVENT_MOV_TO_DBR 11
249#define EVENT_MOV_TO_IBR 12
250#define EVENT_MOV_TO_PKR 13
251#define EVENT_MOV_TO_PMC 14
252#define EVENT_MOV_TO_PMD 15
253#define EVENT_ITR_D 16
254#define EVENT_ITR_I 17
255#define EVENT_MOV_FROM_RR 18
256#define EVENT_MOV_FROM_DBR 19
257#define EVENT_MOV_FROM_IBR 20
258#define EVENT_MOV_FROM_PKR 21
259#define EVENT_MOV_FROM_PMC 22
260#define EVENT_MOV_FROM_CPUID 23
261#define EVENT_SSM 24
262#define EVENT_RSM 25
263#define EVENT_PTC_L 26
264#define EVENT_PTC_G 27
265#define EVENT_PTC_GA 28
266#define EVENT_PTR_D 29
267#define EVENT_PTR_I 30
268#define EVENT_THASH 31
269#define EVENT_TTAG 32
270#define EVENT_TPA 33
271#define EVENT_TAK 34
272#define EVENT_PTC_E 35
273#define EVENT_COVER 36
274#define EVENT_RFI 37
275#define EVENT_BSW_0 38
276#define EVENT_BSW_1 39
277#define EVENT_VMSW 40
278
279/**PAL virtual services offsets */
280#define PAL_VPS_RESUME_NORMAL 0x0000
281#define PAL_VPS_RESUME_HANDLER 0x0400
282#define PAL_VPS_SYNC_READ 0x0800
283#define PAL_VPS_SYNC_WRITE 0x0c00
284#define PAL_VPS_SET_PENDING_INTERRUPT 0x1000
285#define PAL_VPS_THASH 0x1400
286#define PAL_VPS_TTAG 0x1800
287#define PAL_VPS_RESTORE 0x1c00
288#define PAL_VPS_SAVE 0x2000
289
290#endif/* _VT_I_H*/
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
new file mode 100644
index 000000000000..def4576d22b1
--- /dev/null
+++ b/arch/ia64/kvm/vtlb.c
@@ -0,0 +1,636 @@
1/*
2 * vtlb.c: guest virtual tlb handling module.
3 * Copyright (c) 2004, Intel Corporation.
4 * Yaozu Dong (Eddie Dong) <Eddie.dong@intel.com>
5 * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
6 *
7 * Copyright (c) 2007, Intel Corporation.
8 * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
9 * Xiantao Zhang <xiantao.zhang@intel.com>
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms and conditions of the GNU General Public License,
13 * version 2, as published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 * more details.
19 *
20 * You should have received a copy of the GNU General Public License along with
21 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
22 * Place - Suite 330, Boston, MA 02111-1307 USA.
23 *
24 */
25
26#include "vcpu.h"
27
28#include <linux/rwsem.h>
29
30#include <asm/tlb.h>
31
32/*
33 * Check to see if the address rid:va is translated by the TLB
34 */
35
36static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va)
37{
38 return ((trp->p) && (trp->rid == rid)
39 && ((va-trp->vadr) < PSIZE(trp->ps)));
40}
41
42/*
43 * Only for GUEST TR format.
44 */
45static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva)
46{
47 u64 sa1, ea1;
48
49 if (!trp->p || trp->rid != rid)
50 return 0;
51
52 sa1 = trp->vadr;
53 ea1 = sa1 + PSIZE(trp->ps) - 1;
54 eva -= 1;
55 if ((sva > ea1) || (sa1 > eva))
56 return 0;
57 else
58 return 1;
59
60}
61
62void machine_tlb_purge(u64 va, u64 ps)
63{
64 ia64_ptcl(va, ps << 2);
65}
66
67void local_flush_tlb_all(void)
68{
69 int i, j;
70 unsigned long flags, count0, count1;
71 unsigned long stride0, stride1, addr;
72
73 addr = current_vcpu->arch.ptce_base;
74 count0 = current_vcpu->arch.ptce_count[0];
75 count1 = current_vcpu->arch.ptce_count[1];
76 stride0 = current_vcpu->arch.ptce_stride[0];
77 stride1 = current_vcpu->arch.ptce_stride[1];
78
79 local_irq_save(flags);
80 for (i = 0; i < count0; ++i) {
81 for (j = 0; j < count1; ++j) {
82 ia64_ptce(addr);
83 addr += stride1;
84 }
85 addr += stride0;
86 }
87 local_irq_restore(flags);
88 ia64_srlz_i(); /* srlz.i implies srlz.d */
89}
90
91int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref)
92{
93 union ia64_rr vrr;
94 union ia64_pta vpta;
95 struct ia64_psr vpsr;
96
97 vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
98 vrr.val = vcpu_get_rr(vcpu, vadr);
99 vpta.val = vcpu_get_pta(vcpu);
100
101 if (vrr.ve & vpta.ve) {
102 switch (ref) {
103 case DATA_REF:
104 case NA_REF:
105 return vpsr.dt;
106 case INST_REF:
107 return vpsr.dt && vpsr.it && vpsr.ic;
108 case RSE_REF:
109 return vpsr.dt && vpsr.rt;
110
111 }
112 }
113 return 0;
114}
115
116struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag)
117{
118 u64 index, pfn, rid, pfn_bits;
119
120 pfn_bits = vpta.size - 5 - 8;
121 pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr);
122 rid = _REGION_ID(vrr);
123 index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1));
124 *tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16);
125
126 return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) +
127 (index << 5));
128}
129
130struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type)
131{
132
133 struct thash_data *trp;
134 int i;
135 u64 rid;
136
137 rid = vcpu_get_rr(vcpu, va);
138 rid = rid & RR_RID_MASK;;
139 if (type == D_TLB) {
140 if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
141 for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
142 i < NDTRS; i++, trp++) {
143 if (__is_tr_translated(trp, rid, va))
144 return trp;
145 }
146 }
147 } else {
148 if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
149 for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
150 i < NITRS; i++, trp++) {
151 if (__is_tr_translated(trp, rid, va))
152 return trp;
153 }
154 }
155 }
156
157 return NULL;
158}
159
160static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte)
161{
162 union ia64_rr rr;
163 struct thash_data *head;
164 unsigned long ps, gpaddr;
165
166 ps = itir_ps(itir);
167
168 gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) |
169 (ifa & ((1UL << ps) - 1));
170
171 rr.val = ia64_get_rr(ifa);
172 head = (struct thash_data *)ia64_thash(ifa);
173 head->etag = INVALID_TI_TAG;
174 ia64_mf();
175 head->page_flags = pte & ~PAGE_FLAGS_RV_MASK;
176 head->itir = rr.ps << 2;
177 head->etag = ia64_ttag(ifa);
178 head->gpaddr = gpaddr;
179}
180
181void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
182{
183 u64 i, dirty_pages = 1;
184 u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
185 spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
186 void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE)
187 + KVM_MEM_DIRTY_LOG_OFS;
188 dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
189
190 vmm_spin_lock(lock);
191 for (i = 0; i < dirty_pages; i++) {
192 /* avoid RMW */
193 if (!test_bit(base_gfn + i, dirty_bitmap))
194 set_bit(base_gfn + i , dirty_bitmap);
195 }
196 vmm_spin_unlock(lock);
197}
198
199void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type)
200{
201 u64 phy_pte, psr;
202 union ia64_rr mrr;
203
204 mrr.val = ia64_get_rr(va);
205 phy_pte = translate_phy_pte(&pte, itir, va);
206
207 if (itir_ps(itir) >= mrr.ps) {
208 vhpt_insert(phy_pte, itir, va, pte);
209 } else {
210 phy_pte &= ~PAGE_FLAGS_RV_MASK;
211 psr = ia64_clear_ic();
212 ia64_itc(type, va, phy_pte, itir_ps(itir));
213 ia64_set_psr(psr);
214 }
215
216 if (!(pte&VTLB_PTE_IO))
217 mark_pages_dirty(v, pte, itir_ps(itir));
218}
219
220/*
221 * vhpt lookup
222 */
223struct thash_data *vhpt_lookup(u64 va)
224{
225 struct thash_data *head;
226 u64 tag;
227
228 head = (struct thash_data *)ia64_thash(va);
229 tag = ia64_ttag(va);
230 if (head->etag == tag)
231 return head;
232 return NULL;
233}
234
235u64 guest_vhpt_lookup(u64 iha, u64 *pte)
236{
237 u64 ret;
238 struct thash_data *data;
239
240 data = __vtr_lookup(current_vcpu, iha, D_TLB);
241 if (data != NULL)
242 thash_vhpt_insert(current_vcpu, data->page_flags,
243 data->itir, iha, D_TLB);
244
245 asm volatile ("rsm psr.ic|psr.i;;"
246 "srlz.d;;"
247 "ld8.s r9=[%1];;"
248 "tnat.nz p6,p7=r9;;"
249 "(p6) mov %0=1;"
250 "(p6) mov r9=r0;"
251 "(p7) extr.u r9=r9,0,53;;"
252 "(p7) mov %0=r0;"
253 "(p7) st8 [%2]=r9;;"
254 "ssm psr.ic;;"
255 "srlz.d;;"
256 /* "ssm psr.i;;" Once interrupts in vmm open, need fix*/
257 : "=r"(ret) : "r"(iha), "r"(pte):"memory");
258
259 return ret;
260}
261
262/*
263 * purge software guest tlb
264 */
265
266static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps)
267{
268 struct thash_data *cur;
269 u64 start, curadr, size, psbits, tag, rr_ps, num;
270 union ia64_rr vrr;
271 struct thash_cb *hcb = &v->arch.vtlb;
272
273 vrr.val = vcpu_get_rr(v, va);
274 psbits = VMX(v, psbits[(va >> 61)]);
275 start = va & ~((1UL << ps) - 1);
276 while (psbits) {
277 curadr = start;
278 rr_ps = __ffs(psbits);
279 psbits &= ~(1UL << rr_ps);
280 num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps));
281 size = PSIZE(rr_ps);
282 vrr.ps = rr_ps;
283 while (num) {
284 cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag);
285 if (cur->etag == tag && cur->ps == rr_ps)
286 cur->etag = INVALID_TI_TAG;
287 curadr += size;
288 num--;
289 }
290 }
291}
292
293
294/*
295 * purge VHPT and machine TLB
296 */
297static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps)
298{
299 struct thash_data *cur;
300 u64 start, size, tag, num;
301 union ia64_rr rr;
302
303 start = va & ~((1UL << ps) - 1);
304 rr.val = ia64_get_rr(va);
305 size = PSIZE(rr.ps);
306 num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps));
307 while (num) {
308 cur = (struct thash_data *)ia64_thash(start);
309 tag = ia64_ttag(start);
310 if (cur->etag == tag)
311 cur->etag = INVALID_TI_TAG;
312 start += size;
313 num--;
314 }
315 machine_tlb_purge(va, ps);
316}
317
318/*
319 * Insert an entry into hash TLB or VHPT.
320 * NOTES:
321 * 1: When inserting VHPT to thash, "va" is a must covered
322 * address by the inserted machine VHPT entry.
323 * 2: The format of entry is always in TLB.
324 * 3: The caller need to make sure the new entry will not overlap
325 * with any existed entry.
326 */
327void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va)
328{
329 struct thash_data *head;
330 union ia64_rr vrr;
331 u64 tag;
332 struct thash_cb *hcb = &v->arch.vtlb;
333
334 vrr.val = vcpu_get_rr(v, va);
335 vrr.ps = itir_ps(itir);
336 VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps);
337 head = vsa_thash(hcb->pta, va, vrr.val, &tag);
338 head->page_flags = pte;
339 head->itir = itir;
340 head->etag = tag;
341}
342
343int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type)
344{
345 struct thash_data *trp;
346 int i;
347 u64 end, rid;
348
349 rid = vcpu_get_rr(vcpu, va);
350 rid = rid & RR_RID_MASK;
351 end = va + PSIZE(ps);
352 if (type == D_TLB) {
353 if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
354 for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
355 i < NDTRS; i++, trp++) {
356 if (__is_tr_overlap(trp, rid, va, end))
357 return i;
358 }
359 }
360 } else {
361 if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
362 for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
363 i < NITRS; i++, trp++) {
364 if (__is_tr_overlap(trp, rid, va, end))
365 return i;
366 }
367 }
368 }
369 return -1;
370}
371
372/*
373 * Purge entries in VTLB and VHPT
374 */
375void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps)
376{
377 if (vcpu_quick_region_check(v->arch.tc_regions, va))
378 vtlb_purge(v, va, ps);
379 vhpt_purge(v, va, ps);
380}
381
382void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps)
383{
384 u64 old_va = va;
385 va = REGION_OFFSET(va);
386 if (vcpu_quick_region_check(v->arch.tc_regions, old_va))
387 vtlb_purge(v, va, ps);
388 vhpt_purge(v, va, ps);
389}
390
391u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
392{
393 u64 ps, ps_mask, paddr, maddr;
394 union pte_flags phy_pte;
395
396 ps = itir_ps(itir);
397 ps_mask = ~((1UL << ps) - 1);
398 phy_pte.val = *pte;
399 paddr = *pte;
400 paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask);
401 maddr = kvm_lookup_mpa(paddr >> PAGE_SHIFT);
402 if (maddr & GPFN_IO_MASK) {
403 *pte |= VTLB_PTE_IO;
404 return -1;
405 }
406 maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) |
407 (paddr & ~PAGE_MASK);
408 phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT;
409 return phy_pte.val;
410}
411
412/*
413 * Purge overlap TCs and then insert the new entry to emulate itc ops.
414 * Notes: Only TC entry can purge and insert.
415 * 1 indicates this is MMIO
416 */
417int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
418 u64 ifa, int type)
419{
420 u64 ps;
421 u64 phy_pte;
422 union ia64_rr vrr, mrr;
423 int ret = 0;
424
425 ps = itir_ps(itir);
426 vrr.val = vcpu_get_rr(v, ifa);
427 mrr.val = ia64_get_rr(ifa);
428
429 phy_pte = translate_phy_pte(&pte, itir, ifa);
430
431 /* Ensure WB attribute if pte is related to a normal mem page,
432 * which is required by vga acceleration since qemu maps shared
433 * vram buffer with WB.
434 */
435 if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT)) {
436 pte &= ~_PAGE_MA_MASK;
437 phy_pte &= ~_PAGE_MA_MASK;
438 }
439
440 if (pte & VTLB_PTE_IO)
441 ret = 1;
442
443 vtlb_purge(v, ifa, ps);
444 vhpt_purge(v, ifa, ps);
445
446 if (ps == mrr.ps) {
447 if (!(pte&VTLB_PTE_IO)) {
448 vhpt_insert(phy_pte, itir, ifa, pte);
449 } else {
450 vtlb_insert(v, pte, itir, ifa);
451 vcpu_quick_region_set(VMX(v, tc_regions), ifa);
452 }
453 } else if (ps > mrr.ps) {
454 vtlb_insert(v, pte, itir, ifa);
455 vcpu_quick_region_set(VMX(v, tc_regions), ifa);
456 if (!(pte&VTLB_PTE_IO))
457 vhpt_insert(phy_pte, itir, ifa, pte);
458 } else {
459 u64 psr;
460 phy_pte &= ~PAGE_FLAGS_RV_MASK;
461 psr = ia64_clear_ic();
462 ia64_itc(type, ifa, phy_pte, ps);
463 ia64_set_psr(psr);
464 }
465 if (!(pte&VTLB_PTE_IO))
466 mark_pages_dirty(v, pte, ps);
467
468 return ret;
469}
470
471/*
472 * Purge all TCs or VHPT entries including those in Hash table.
473 *
474 */
475
476void thash_purge_all(struct kvm_vcpu *v)
477{
478 int i;
479 struct thash_data *head;
480 struct thash_cb *vtlb, *vhpt;
481 vtlb = &v->arch.vtlb;
482 vhpt = &v->arch.vhpt;
483
484 for (i = 0; i < 8; i++)
485 VMX(v, psbits[i]) = 0;
486
487 head = vtlb->hash;
488 for (i = 0; i < vtlb->num; i++) {
489 head->page_flags = 0;
490 head->etag = INVALID_TI_TAG;
491 head->itir = 0;
492 head->next = 0;
493 head++;
494 };
495
496 head = vhpt->hash;
497 for (i = 0; i < vhpt->num; i++) {
498 head->page_flags = 0;
499 head->etag = INVALID_TI_TAG;
500 head->itir = 0;
501 head->next = 0;
502 head++;
503 };
504
505 local_flush_tlb_all();
506}
507
508
509/*
510 * Lookup the hash table and its collision chain to find an entry
511 * covering this address rid:va or the entry.
512 *
513 * INPUT:
514 * in: TLB format for both VHPT & TLB.
515 */
516
517struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
518{
519 struct thash_data *cch;
520 u64 psbits, ps, tag;
521 union ia64_rr vrr;
522
523 struct thash_cb *hcb = &v->arch.vtlb;
524
525 cch = __vtr_lookup(v, va, is_data);;
526 if (cch)
527 return cch;
528
529 if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0)
530 return NULL;
531
532 psbits = VMX(v, psbits[(va >> 61)]);
533 vrr.val = vcpu_get_rr(v, va);
534 while (psbits) {
535 ps = __ffs(psbits);
536 psbits &= ~(1UL << ps);
537 vrr.ps = ps;
538 cch = vsa_thash(hcb->pta, va, vrr.val, &tag);
539 if (cch->etag == tag && cch->ps == ps)
540 return cch;
541 }
542
543 return NULL;
544}
545
546
547/*
548 * Initialize internal control data before service.
549 */
550void thash_init(struct thash_cb *hcb, u64 sz)
551{
552 int i;
553 struct thash_data *head;
554
555 hcb->pta.val = (unsigned long)hcb->hash;
556 hcb->pta.vf = 1;
557 hcb->pta.ve = 1;
558 hcb->pta.size = sz;
559 head = hcb->hash;
560 for (i = 0; i < hcb->num; i++) {
561 head->page_flags = 0;
562 head->itir = 0;
563 head->etag = INVALID_TI_TAG;
564 head->next = 0;
565 head++;
566 }
567}
568
569u64 kvm_lookup_mpa(u64 gpfn)
570{
571 u64 *base = (u64 *) KVM_P2M_BASE;
572 return *(base + gpfn);
573}
574
575u64 kvm_gpa_to_mpa(u64 gpa)
576{
577 u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT);
578 return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK);
579}
580
581
582/*
583 * Fetch guest bundle code.
584 * INPUT:
585 * gip: guest ip
586 * pbundle: used to return fetched bundle.
587 */
588int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle)
589{
590 u64 gpip = 0; /* guest physical IP*/
591 u64 *vpa;
592 struct thash_data *tlb;
593 u64 maddr;
594
595 if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) {
596 /* I-side physical mode */
597 gpip = gip;
598 } else {
599 tlb = vtlb_lookup(vcpu, gip, I_TLB);
600 if (tlb)
601 gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) |
602 (gip & (PSIZE(tlb->ps) - 1));
603 }
604 if (gpip) {
605 maddr = kvm_gpa_to_mpa(gpip);
606 } else {
607 tlb = vhpt_lookup(gip);
608 if (tlb == NULL) {
609 ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2);
610 return IA64_FAULT;
611 }
612 maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps)
613 | (gip & (PSIZE(tlb->ps) - 1));
614 }
615 vpa = (u64 *)__kvm_va(maddr);
616
617 pbundle->i64[0] = *vpa++;
618 pbundle->i64[1] = *vpa;
619
620 return IA64_NO_FAULT;
621}
622
623
624void kvm_init_vhpt(struct kvm_vcpu *v)
625{
626 v->arch.vhpt.num = VHPT_NUM_ENTRIES;
627 thash_init(&v->arch.vhpt, VHPT_SHIFT);
628 ia64_set_pta(v->arch.vhpt.pta.val);
629 /*Enable VHPT here?*/
630}
631
632void kvm_init_vtlb(struct kvm_vcpu *v)
633{
634 v->arch.vtlb.num = VTLB_NUM_ENTRIES;
635 thash_init(&v->arch.vtlb, VTLB_SHIFT);
636}
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4bb2e9310a56..4e40c122bf26 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -626,20 +626,6 @@ config ADVANCED_OPTIONS
626comment "Default settings for advanced configuration options are used" 626comment "Default settings for advanced configuration options are used"
627 depends on !ADVANCED_OPTIONS 627 depends on !ADVANCED_OPTIONS
628 628
629config HIGHMEM_START_BOOL
630 bool "Set high memory pool address"
631 depends on ADVANCED_OPTIONS && HIGHMEM
632 help
633 This option allows you to set the base address of the kernel virtual
634 area used to map high memory pages. This can be useful in
635 optimizing the layout of kernel virtual memory.
636
637 Say N here unless you know what you are doing.
638
639config HIGHMEM_START
640 hex "Virtual start address of high memory pool" if HIGHMEM_START_BOOL
641 default "0xfe000000"
642
643config LOWMEM_SIZE_BOOL 629config LOWMEM_SIZE_BOOL
644 bool "Set maximum low memory" 630 bool "Set maximum low memory"
645 depends on ADVANCED_OPTIONS 631 depends on ADVANCED_OPTIONS
@@ -656,21 +642,76 @@ config LOWMEM_SIZE
656 hex "Maximum low memory size (in bytes)" if LOWMEM_SIZE_BOOL 642 hex "Maximum low memory size (in bytes)" if LOWMEM_SIZE_BOOL
657 default "0x30000000" 643 default "0x30000000"
658 644
645config RELOCATABLE
646 bool "Build a relocatable kernel (EXPERIMENTAL)"
647 depends on EXPERIMENTAL && ADVANCED_OPTIONS && FLATMEM && FSL_BOOKE
648 help
649 This builds a kernel image that is capable of running at the
650 location the kernel is loaded at (some alignment restrictions may
651 exist).
652
653 One use is for the kexec on panic case where the recovery kernel
654 must live at a different physical address than the primary
655 kernel.
656
657 Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address
658 it has been loaded at and the compile time physical addresses
659 CONFIG_PHYSICAL_START is ignored. However CONFIG_PHYSICAL_START
660 setting can still be useful to bootwrappers that need to know the
661 load location of the kernel (eg. u-boot/mkimage).
662
663config PAGE_OFFSET_BOOL
664 bool "Set custom page offset address"
665 depends on ADVANCED_OPTIONS
666 help
667 This option allows you to set the kernel virtual address at which
668 the kernel will map low memory. This can be useful in optimizing
669 the virtual memory layout of the system.
670
671 Say N here unless you know what you are doing.
672
673config PAGE_OFFSET
674 hex "Virtual address of memory base" if PAGE_OFFSET_BOOL
675 default "0xc0000000"
676
659config KERNEL_START_BOOL 677config KERNEL_START_BOOL
660 bool "Set custom kernel base address" 678 bool "Set custom kernel base address"
661 depends on ADVANCED_OPTIONS 679 depends on ADVANCED_OPTIONS
662 help 680 help
663 This option allows you to set the kernel virtual address at which 681 This option allows you to set the kernel virtual address at which
664 the kernel will map low memory (the kernel image will be linked at 682 the kernel will be loaded. Normally this should match PAGE_OFFSET
665 this address). This can be useful in optimizing the virtual memory 683 however there are times (like kdump) that one might not want them
666 layout of the system. 684 to be the same.
667 685
668 Say N here unless you know what you are doing. 686 Say N here unless you know what you are doing.
669 687
670config KERNEL_START 688config KERNEL_START
671 hex "Virtual address of kernel base" if KERNEL_START_BOOL 689 hex "Virtual address of kernel base" if KERNEL_START_BOOL
690 default PAGE_OFFSET if PAGE_OFFSET_BOOL
691 default "0xc2000000" if CRASH_DUMP
672 default "0xc0000000" 692 default "0xc0000000"
673 693
694config PHYSICAL_START_BOOL
695 bool "Set physical address where the kernel is loaded"
696 depends on ADVANCED_OPTIONS && FLATMEM && FSL_BOOKE
697 help
698 This gives the physical address where the kernel is loaded.
699
700 Say N here unless you know what you are doing.
701
702config PHYSICAL_START
703 hex "Physical address where the kernel is loaded" if PHYSICAL_START_BOOL
704 default "0x02000000" if PPC_STD_MMU && CRASH_DUMP
705 default "0x00000000"
706
707config PHYSICAL_ALIGN
708 hex
709 default "0x10000000" if FSL_BOOKE
710 help
711 This value puts the alignment restrictions on physical address
712 where kernel is loaded and run from. Kernel is compiled for an
713 address which meets above alignment restriction.
714
674config TASK_SIZE_BOOL 715config TASK_SIZE_BOOL
675 bool "Set custom user task size" 716 bool "Set custom user task size"
676 depends on ADVANCED_OPTIONS 717 depends on ADVANCED_OPTIONS
@@ -717,9 +758,17 @@ config PIN_TLB
717endmenu 758endmenu
718 759
719if PPC64 760if PPC64
761config PAGE_OFFSET
762 hex
763 default "0xc000000000000000"
720config KERNEL_START 764config KERNEL_START
721 hex 765 hex
766 default "0xc000000002000000" if CRASH_DUMP
722 default "0xc000000000000000" 767 default "0xc000000000000000"
768config PHYSICAL_START
769 hex
770 default "0x02000000" if CRASH_DUMP
771 default "0x00000000"
723endif 772endif
724 773
725source "net/Kconfig" 774source "net/Kconfig"
@@ -754,3 +803,4 @@ config PPC_CLOCK
754config PPC_LIB_RHEAP 803config PPC_LIB_RHEAP
755 bool 804 bool
756 805
806source "arch/powerpc/kvm/Kconfig"
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index a86d8d853214..807a2dce6263 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -151,6 +151,9 @@ config BOOTX_TEXT
151 151
152config PPC_EARLY_DEBUG 152config PPC_EARLY_DEBUG
153 bool "Early debugging (dangerous)" 153 bool "Early debugging (dangerous)"
154 # PPC_EARLY_DEBUG on 440 leaves AS=1 mappings above the TLB high water
155 # mark, which doesn't work with current 440 KVM.
156 depends on !KVM
154 help 157 help
155 Say Y to enable some early debugging facilities that may be available 158 Say Y to enable some early debugging facilities that may be available
156 for your processor/board combination. Those facilities are hacks 159 for your processor/board combination. Those facilities are hacks
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index e2ec4a91ccef..9dcdc036cdf7 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -145,6 +145,7 @@ core-y += arch/powerpc/kernel/ \
145 arch/powerpc/platforms/ 145 arch/powerpc/platforms/
146core-$(CONFIG_MATH_EMULATION) += arch/powerpc/math-emu/ 146core-$(CONFIG_MATH_EMULATION) += arch/powerpc/math-emu/
147core-$(CONFIG_XMON) += arch/powerpc/xmon/ 147core-$(CONFIG_XMON) += arch/powerpc/xmon/
148core-$(CONFIG_KVM) += arch/powerpc/kvm/
148 149
149drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ 150drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/
150 151
diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore
index 5ef2bdf8d189..2347294ff35b 100644
--- a/arch/powerpc/boot/.gitignore
+++ b/arch/powerpc/boot/.gitignore
@@ -27,6 +27,7 @@ zImage.chrp
27zImage.coff 27zImage.coff
28zImage.coff.lds 28zImage.coff.lds
29zImage.ep* 29zImage.ep*
30zImage.iseries
30zImage.*lds 31zImage.*lds
31zImage.miboot 32zImage.miboot
32zImage.pmac 33zImage.pmac
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 5ba50c673390..7822d25c9d31 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -40,7 +40,7 @@ $(obj)/ebony.o: BOOTCFLAGS += -mcpu=405
40$(obj)/cuboot-taishan.o: BOOTCFLAGS += -mcpu=405 40$(obj)/cuboot-taishan.o: BOOTCFLAGS += -mcpu=405
41$(obj)/cuboot-katmai.o: BOOTCFLAGS += -mcpu=405 41$(obj)/cuboot-katmai.o: BOOTCFLAGS += -mcpu=405
42$(obj)/treeboot-walnut.o: BOOTCFLAGS += -mcpu=405 42$(obj)/treeboot-walnut.o: BOOTCFLAGS += -mcpu=405
43$(obj)/virtex405-head.o: BOOTCFLAGS += -mcpu=405 43$(obj)/virtex405-head.o: BOOTAFLAGS += -mcpu=405
44 44
45 45
46zlib := inffast.c inflate.c inftrees.c 46zlib := inffast.c inflate.c inftrees.c
diff --git a/arch/powerpc/boot/dts/canyonlands.dts b/arch/powerpc/boot/dts/canyonlands.dts
index 6f3d38a1554f..39634124929b 100644
--- a/arch/powerpc/boot/dts/canyonlands.dts
+++ b/arch/powerpc/boot/dts/canyonlands.dts
@@ -142,8 +142,45 @@
142 #address-cells = <2>; 142 #address-cells = <2>;
143 #size-cells = <1>; 143 #size-cells = <1>;
144 clock-frequency = <0>; /* Filled in by U-Boot */ 144 clock-frequency = <0>; /* Filled in by U-Boot */
145 /* ranges property is supplied by U-Boot */
145 interrupts = <6 4>; 146 interrupts = <6 4>;
146 interrupt-parent = <&UIC1>; 147 interrupt-parent = <&UIC1>;
148
149 nor_flash@0,0 {
150 compatible = "amd,s29gl512n", "cfi-flash";
151 bank-width = <2>;
152 reg = <0 000000 4000000>;
153 #address-cells = <1>;
154 #size-cells = <1>;
155 partition@0 {
156 label = "kernel";
157 reg = <0 1e0000>;
158 };
159 partition@1e0000 {
160 label = "dtb";
161 reg = <1e0000 20000>;
162 };
163 partition@200000 {
164 label = "ramdisk";
165 reg = <200000 1400000>;
166 };
167 partition@1600000 {
168 label = "jffs2";
169 reg = <1600000 400000>;
170 };
171 partition@1a00000 {
172 label = "user";
173 reg = <1a00000 2560000>;
174 };
175 partition@3f60000 {
176 label = "env";
177 reg = <3f60000 40000>;
178 };
179 partition@3fa0000 {
180 label = "u-boot";
181 reg = <3fa0000 60000>;
182 };
183 };
147 }; 184 };
148 185
149 UART0: serial@ef600300 { 186 UART0: serial@ef600300 {
diff --git a/arch/powerpc/boot/dts/glacier.dts b/arch/powerpc/boot/dts/glacier.dts
index 958a5ca53d35..0f2fc077d8db 100644
--- a/arch/powerpc/boot/dts/glacier.dts
+++ b/arch/powerpc/boot/dts/glacier.dts
@@ -145,8 +145,45 @@
145 #address-cells = <2>; 145 #address-cells = <2>;
146 #size-cells = <1>; 146 #size-cells = <1>;
147 clock-frequency = <0>; /* Filled in by U-Boot */ 147 clock-frequency = <0>; /* Filled in by U-Boot */
148 /* ranges property is supplied by U-Boot */
148 interrupts = <6 4>; 149 interrupts = <6 4>;
149 interrupt-parent = <&UIC1>; 150 interrupt-parent = <&UIC1>;
151
152 nor_flash@0,0 {
153 compatible = "amd,s29gl512n", "cfi-flash";
154 bank-width = <2>;
155 reg = <0 000000 4000000>;
156 #address-cells = <1>;
157 #size-cells = <1>;
158 partition@0 {
159 label = "kernel";
160 reg = <0 1e0000>;
161 };
162 partition@1e0000 {
163 label = "dtb";
164 reg = <1e0000 20000>;
165 };
166 partition@200000 {
167 label = "ramdisk";
168 reg = <200000 1400000>;
169 };
170 partition@1600000 {
171 label = "jffs2";
172 reg = <1600000 400000>;
173 };
174 partition@1a00000 {
175 label = "user";
176 reg = <1a00000 2560000>;
177 };
178 partition@3f60000 {
179 label = "env";
180 reg = <3f60000 40000>;
181 };
182 partition@3fa0000 {
183 label = "u-boot";
184 reg = <3fa0000 60000>;
185 };
186 };
150 }; 187 };
151 188
152 UART0: serial@ef600300 { 189 UART0: serial@ef600300 {
diff --git a/arch/powerpc/boot/ns16550.c b/arch/powerpc/boot/ns16550.c
index aef3bdc89160..8c9ead94be06 100644
--- a/arch/powerpc/boot/ns16550.c
+++ b/arch/powerpc/boot/ns16550.c
@@ -55,10 +55,15 @@ static u8 ns16550_tstc(void)
55int ns16550_console_init(void *devp, struct serial_console_data *scdp) 55int ns16550_console_init(void *devp, struct serial_console_data *scdp)
56{ 56{
57 int n; 57 int n;
58 u32 reg_offset;
58 59
59 if (dt_get_virtual_reg(devp, (void **)&reg_base, 1) < 1) 60 if (dt_get_virtual_reg(devp, (void **)&reg_base, 1) < 1)
60 return -1; 61 return -1;
61 62
63 n = getprop(devp, "reg-offset", &reg_offset, sizeof(reg_offset));
64 if (n == sizeof(reg_offset))
65 reg_base += reg_offset;
66
62 n = getprop(devp, "reg-shift", &reg_shift, sizeof(reg_shift)); 67 n = getprop(devp, "reg-shift", &reg_shift, sizeof(reg_shift));
63 if (n != sizeof(reg_shift)) 68 if (n != sizeof(reg_shift))
64 reg_shift = 0; 69 reg_shift = 0;
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index ce1e8d24e747..9177b21b1a95 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -106,4 +106,13 @@ PHONY += systbl_chk
106systbl_chk: $(src)/systbl_chk.sh $(obj)/systbl_chk.i 106systbl_chk: $(src)/systbl_chk.sh $(obj)/systbl_chk.i
107 $(call cmd,systbl_chk) 107 $(call cmd,systbl_chk)
108 108
109$(obj)/built-in.o: prom_init_check
110
111quiet_cmd_prom_init_check = CALL $<
112 cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" "$(obj)/prom_init.o"
113
114PHONY += prom_init_check
115prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o
116 $(call cmd,prom_init_check)
117
109clean-files := vmlinux.lds 118clean-files := vmlinux.lds
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 292c6d8db0e1..62134845af08 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -23,6 +23,9 @@
23#include <linux/mm.h> 23#include <linux/mm.h>
24#include <linux/suspend.h> 24#include <linux/suspend.h>
25#include <linux/hrtimer.h> 25#include <linux/hrtimer.h>
26#ifdef CONFIG_KVM
27#include <linux/kvm_host.h>
28#endif
26#ifdef CONFIG_PPC64 29#ifdef CONFIG_PPC64
27#include <linux/time.h> 30#include <linux/time.h>
28#include <linux/hardirq.h> 31#include <linux/hardirq.h>
@@ -93,10 +96,7 @@ int main(void)
93 DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); 96 DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
94 DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); 97 DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
95 DEFINE(TI_TASK, offsetof(struct thread_info, task)); 98 DEFINE(TI_TASK, offsetof(struct thread_info, task));
96#ifdef CONFIG_PPC32
97 DEFINE(TI_EXECDOMAIN, offsetof(struct thread_info, exec_domain));
98 DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); 99 DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
99#endif /* CONFIG_PPC32 */
100 100
101#ifdef CONFIG_PPC64 101#ifdef CONFIG_PPC64
102 DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size)); 102 DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size));
@@ -165,13 +165,9 @@ int main(void)
165 165
166 /* Interrupt register frame */ 166 /* Interrupt register frame */
167 DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD); 167 DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD);
168#ifndef CONFIG_PPC64 168 DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
169 DEFINE(INT_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); 169#ifdef CONFIG_PPC64
170#else /* CONFIG_PPC64 */
171 DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); 170 DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
172 /* 288 = # of volatile regs, int & fp, for leaf routines */
173 /* which do not stack a frame. See the PPC64 ABI. */
174 DEFINE(INT_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 288);
175 /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */ 171 /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
176 DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); 172 DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
177 DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); 173 DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
@@ -331,5 +327,30 @@ int main(void)
331 327
332 DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); 328 DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE);
333 329
330#ifdef CONFIG_KVM
331 DEFINE(TLBE_BYTES, sizeof(struct tlbe));
332
333 DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
334 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
335 DEFINE(VCPU_HOST_TLB, offsetof(struct kvm_vcpu, arch.host_tlb));
336 DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb));
337 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
338 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
339 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
340 DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
341 DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
342 DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
343 DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr));
344 DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4));
345 DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5));
346 DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6));
347 DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7));
348 DEFINE(VCPU_PID, offsetof(struct kvm_vcpu, arch.pid));
349
350 DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
351 DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
352 DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
353#endif
354
334 return 0; 355 return 0;
335} 356}
diff --git a/arch/powerpc/kernel/cpu_setup_44x.S b/arch/powerpc/kernel/cpu_setup_44x.S
index 5465e8de0e61..e3623e3e3451 100644
--- a/arch/powerpc/kernel/cpu_setup_44x.S
+++ b/arch/powerpc/kernel/cpu_setup_44x.S
@@ -33,7 +33,6 @@ _GLOBAL(__setup_cpu_440grx)
33 mtlr r4 33 mtlr r4
34 blr 34 blr
35_GLOBAL(__setup_cpu_460ex) 35_GLOBAL(__setup_cpu_460ex)
36_GLOBAL(__setup_cpu_460gt)
37 b __init_fpu_44x 36 b __init_fpu_44x
38_GLOBAL(__setup_cpu_440gx) 37_GLOBAL(__setup_cpu_440gx)
39_GLOBAL(__setup_cpu_440spe) 38_GLOBAL(__setup_cpu_440spe)
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
index f1ee0b3f78f2..72d1d7395254 100644
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -17,7 +17,13 @@
17#include <asm/cache.h> 17#include <asm/cache.h>
18 18
19_GLOBAL(__setup_cpu_603) 19_GLOBAL(__setup_cpu_603)
20 b setup_common_caches 20 mflr r4
21BEGIN_FTR_SECTION
22 bl __init_fpu_registers
23END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE)
24 bl setup_common_caches
25 mtlr r4
26 blr
21_GLOBAL(__setup_cpu_604) 27_GLOBAL(__setup_cpu_604)
22 mflr r4 28 mflr r4
23 bl setup_common_caches 29 bl setup_common_caches
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 26ffb44e2701..36080d4d1922 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -37,7 +37,6 @@ extern void __setup_cpu_440gx(unsigned long offset, struct cpu_spec* spec);
37extern void __setup_cpu_440grx(unsigned long offset, struct cpu_spec* spec); 37extern void __setup_cpu_440grx(unsigned long offset, struct cpu_spec* spec);
38extern void __setup_cpu_440spe(unsigned long offset, struct cpu_spec* spec); 38extern void __setup_cpu_440spe(unsigned long offset, struct cpu_spec* spec);
39extern void __setup_cpu_460ex(unsigned long offset, struct cpu_spec* spec); 39extern void __setup_cpu_460ex(unsigned long offset, struct cpu_spec* spec);
40extern void __setup_cpu_460gt(unsigned long offset, struct cpu_spec* spec);
41extern void __setup_cpu_603(unsigned long offset, struct cpu_spec* spec); 40extern void __setup_cpu_603(unsigned long offset, struct cpu_spec* spec);
42extern void __setup_cpu_604(unsigned long offset, struct cpu_spec* spec); 41extern void __setup_cpu_604(unsigned long offset, struct cpu_spec* spec);
43extern void __setup_cpu_750(unsigned long offset, struct cpu_spec* spec); 42extern void __setup_cpu_750(unsigned long offset, struct cpu_spec* spec);
@@ -1416,10 +1415,9 @@ static struct cpu_spec __initdata cpu_specs[] = {
1416 .pvr_value = 0x13020000, 1415 .pvr_value = 0x13020000,
1417 .cpu_name = "460GT", 1416 .cpu_name = "460GT",
1418 .cpu_features = CPU_FTRS_44X, 1417 .cpu_features = CPU_FTRS_44X,
1419 .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, 1418 .cpu_user_features = COMMON_USER_BOOKE,
1420 .icache_bsize = 32, 1419 .icache_bsize = 32,
1421 .dcache_bsize = 32, 1420 .dcache_bsize = 32,
1422 .cpu_setup = __setup_cpu_460gt,
1423 .machine_check = machine_check_440A, 1421 .machine_check = machine_check_440A,
1424 .platform = "ppc440", 1422 .platform = "ppc440",
1425 }, 1423 },
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 4ff744143566..e581524d85bc 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -371,6 +371,17 @@ skpinv: addi r6,r6,1 /* Increment */
371 371
372 bl early_init 372 bl early_init
373 373
374#ifdef CONFIG_RELOCATABLE
375 lis r3,kernstart_addr@ha
376 la r3,kernstart_addr@l(r3)
377#ifdef CONFIG_PHYS_64BIT
378 stw r23,0(r3)
379 stw r25,4(r3)
380#else
381 stw r25,0(r3)
382#endif
383#endif
384
374 mfspr r3,SPRN_TLB1CFG 385 mfspr r3,SPRN_TLB1CFG
375 andi. r3,r3,0xfff 386 andi. r3,r3,0xfff
376 lis r4,num_tlbcam_entries@ha 387 lis r4,num_tlbcam_entries@ha
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 9d2c56621f1e..92ccc6fcc5b0 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -152,7 +152,7 @@ _GLOBAL(low_choose_750fx_pll)
152 mtspr SPRN_HID1,r4 152 mtspr SPRN_HID1,r4
153 153
154 /* Store new HID1 image */ 154 /* Store new HID1 image */
155 rlwinm r6,r1,0,0,18 155 rlwinm r6,r1,0,0,(31-THREAD_SHIFT)
156 lwz r6,TI_CPU(r6) 156 lwz r6,TI_CPU(r6)
157 slwi r6,r6,2 157 slwi r6,r6,2
158 addis r6,r6,nap_save_hid1@ha 158 addis r6,r6,nap_save_hid1@ha
@@ -281,7 +281,7 @@ _GLOBAL(_tlbia)
281#endif /* CONFIG_SMP */ 281#endif /* CONFIG_SMP */
282#else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */ 282#else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */
283#if defined(CONFIG_SMP) 283#if defined(CONFIG_SMP)
284 rlwinm r8,r1,0,0,18 284 rlwinm r8,r1,0,0,(31-THREAD_SHIFT)
285 lwz r8,TI_CPU(r8) 285 lwz r8,TI_CPU(r8)
286 oris r8,r8,10 286 oris r8,r8,10
287 mfmsr r10 287 mfmsr r10
@@ -377,7 +377,7 @@ _GLOBAL(_tlbie)
377#endif /* CONFIG_SMP */ 377#endif /* CONFIG_SMP */
378#else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */ 378#else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */
379#if defined(CONFIG_SMP) 379#if defined(CONFIG_SMP)
380 rlwinm r8,r1,0,0,18 380 rlwinm r8,r1,0,0,(31-THREAD_SHIFT)
381 lwz r8,TI_CPU(r8) 381 lwz r8,TI_CPU(r8)
382 oris r8,r8,11 382 oris r8,r8,11
383 mfmsr r10 383 mfmsr r10
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index a3c491e88a72..942951e76586 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -27,23 +27,11 @@
27 27
28 .text 28 .text
29 29
30_GLOBAL(get_msr)
31 mfmsr r3
32 blr
33
34_GLOBAL(get_srr0)
35 mfsrr0 r3
36 blr
37
38_GLOBAL(get_srr1)
39 mfsrr1 r3
40 blr
41
42#ifdef CONFIG_IRQSTACKS 30#ifdef CONFIG_IRQSTACKS
43_GLOBAL(call_do_softirq) 31_GLOBAL(call_do_softirq)
44 mflr r0 32 mflr r0
45 std r0,16(r1) 33 std r0,16(r1)
46 stdu r1,THREAD_SIZE-112(r3) 34 stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
47 mr r1,r3 35 mr r1,r3
48 bl .__do_softirq 36 bl .__do_softirq
49 ld r1,0(r1) 37 ld r1,0(r1)
@@ -56,7 +44,7 @@ _GLOBAL(call_handle_irq)
56 mflr r0 44 mflr r0
57 std r0,16(r1) 45 std r0,16(r1)
58 mtctr r8 46 mtctr r8
59 stdu r1,THREAD_SIZE-112(r5) 47 stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5)
60 mr r1,r5 48 mr r1,r5
61 bctrl 49 bctrl
62 ld r1,0(r1) 50 ld r1,0(r1)
@@ -599,7 +587,7 @@ _GLOBAL(kexec_sequence)
599 std r0,16(r1) 587 std r0,16(r1)
600 588
601 /* switch stacks to newstack -- &kexec_stack.stack */ 589 /* switch stacks to newstack -- &kexec_stack.stack */
602 stdu r1,THREAD_SIZE-112(r3) 590 stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
603 mr r1,r3 591 mr r1,r3
604 592
605 li r0,0 593 li r0,0
@@ -616,7 +604,7 @@ _GLOBAL(kexec_sequence)
616 std r26,-48(r1) 604 std r26,-48(r1)
617 std r25,-56(r1) 605 std r25,-56(r1)
618 606
619 stdu r1,-112-64(r1) 607 stdu r1,-STACK_FRAME_OVERHEAD-64(r1)
620 608
621 /* save args into preserved regs */ 609 /* save args into preserved regs */
622 mr r31,r3 /* newstack (both) */ 610 mr r31,r3 /* newstack (both) */
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index fb698d47082d..e79ad8afda07 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -275,6 +275,8 @@ static int __devinit of_pci_phb_probe(struct of_device *dev,
275 275
276 /* Scan the bus */ 276 /* Scan the bus */
277 scan_phb(phb); 277 scan_phb(phb);
278 if (phb->bus == NULL)
279 return -ENXIO;
278 280
279 /* Claim resources. This might need some rework as well depending 281 /* Claim resources. This might need some rework as well depending
280 * wether we are doing probe-only or not, like assigning unassigned 282 * wether we are doing probe-only or not, like assigning unassigned
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index ac163bd46cfd..c9bf17eec31b 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -7,17 +7,11 @@
7 * 2 of the License, or (at your option) any later version. 7 * 2 of the License, or (at your option) any later version.
8 */ 8 */
9 9
10#include <linux/types.h>
11#include <linux/threads.h> 10#include <linux/threads.h>
12#include <linux/module.h> 11#include <linux/module.h>
13 12
14#include <asm/processor.h>
15#include <asm/ptrace.h>
16#include <asm/page.h>
17#include <asm/lppaca.h> 13#include <asm/lppaca.h>
18#include <asm/paca.h> 14#include <asm/paca.h>
19#include <asm/mmu.h>
20
21 15
22/* This symbol is provided by the linker - let it fill in the paca 16/* This symbol is provided by the linker - let it fill in the paca
23 * field correctly */ 17 * field correctly */
@@ -65,60 +59,29 @@ struct slb_shadow slb_shadow[] __cacheline_aligned = {
65 * processors. The processor VPD array needs one entry per physical 59 * processors. The processor VPD array needs one entry per physical
66 * processor (not thread). 60 * processor (not thread).
67 */ 61 */
68#define PACA_INIT(number) \ 62struct paca_struct paca[NR_CPUS];
69{ \
70 .lppaca_ptr = &lppaca[number], \
71 .lock_token = 0x8000, \
72 .paca_index = (number), /* Paca Index */ \
73 .kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL, \
74 .hw_cpu_id = 0xffff, \
75 .slb_shadow_ptr = &slb_shadow[number], \
76 .__current = &init_task, \
77}
78
79struct paca_struct paca[] = {
80 PACA_INIT(0),
81#if NR_CPUS > 1
82 PACA_INIT( 1), PACA_INIT( 2), PACA_INIT( 3),
83#if NR_CPUS > 4
84 PACA_INIT( 4), PACA_INIT( 5), PACA_INIT( 6), PACA_INIT( 7),
85#if NR_CPUS > 8
86 PACA_INIT( 8), PACA_INIT( 9), PACA_INIT( 10), PACA_INIT( 11),
87 PACA_INIT( 12), PACA_INIT( 13), PACA_INIT( 14), PACA_INIT( 15),
88 PACA_INIT( 16), PACA_INIT( 17), PACA_INIT( 18), PACA_INIT( 19),
89 PACA_INIT( 20), PACA_INIT( 21), PACA_INIT( 22), PACA_INIT( 23),
90 PACA_INIT( 24), PACA_INIT( 25), PACA_INIT( 26), PACA_INIT( 27),
91 PACA_INIT( 28), PACA_INIT( 29), PACA_INIT( 30), PACA_INIT( 31),
92#if NR_CPUS > 32
93 PACA_INIT( 32), PACA_INIT( 33), PACA_INIT( 34), PACA_INIT( 35),
94 PACA_INIT( 36), PACA_INIT( 37), PACA_INIT( 38), PACA_INIT( 39),
95 PACA_INIT( 40), PACA_INIT( 41), PACA_INIT( 42), PACA_INIT( 43),
96 PACA_INIT( 44), PACA_INIT( 45), PACA_INIT( 46), PACA_INIT( 47),
97 PACA_INIT( 48), PACA_INIT( 49), PACA_INIT( 50), PACA_INIT( 51),
98 PACA_INIT( 52), PACA_INIT( 53), PACA_INIT( 54), PACA_INIT( 55),
99 PACA_INIT( 56), PACA_INIT( 57), PACA_INIT( 58), PACA_INIT( 59),
100 PACA_INIT( 60), PACA_INIT( 61), PACA_INIT( 62), PACA_INIT( 63),
101#if NR_CPUS > 64
102 PACA_INIT( 64), PACA_INIT( 65), PACA_INIT( 66), PACA_INIT( 67),
103 PACA_INIT( 68), PACA_INIT( 69), PACA_INIT( 70), PACA_INIT( 71),
104 PACA_INIT( 72), PACA_INIT( 73), PACA_INIT( 74), PACA_INIT( 75),
105 PACA_INIT( 76), PACA_INIT( 77), PACA_INIT( 78), PACA_INIT( 79),
106 PACA_INIT( 80), PACA_INIT( 81), PACA_INIT( 82), PACA_INIT( 83),
107 PACA_INIT( 84), PACA_INIT( 85), PACA_INIT( 86), PACA_INIT( 87),
108 PACA_INIT( 88), PACA_INIT( 89), PACA_INIT( 90), PACA_INIT( 91),
109 PACA_INIT( 92), PACA_INIT( 93), PACA_INIT( 94), PACA_INIT( 95),
110 PACA_INIT( 96), PACA_INIT( 97), PACA_INIT( 98), PACA_INIT( 99),
111 PACA_INIT(100), PACA_INIT(101), PACA_INIT(102), PACA_INIT(103),
112 PACA_INIT(104), PACA_INIT(105), PACA_INIT(106), PACA_INIT(107),
113 PACA_INIT(108), PACA_INIT(109), PACA_INIT(110), PACA_INIT(111),
114 PACA_INIT(112), PACA_INIT(113), PACA_INIT(114), PACA_INIT(115),
115 PACA_INIT(116), PACA_INIT(117), PACA_INIT(118), PACA_INIT(119),
116 PACA_INIT(120), PACA_INIT(121), PACA_INIT(122), PACA_INIT(123),
117 PACA_INIT(124), PACA_INIT(125), PACA_INIT(126), PACA_INIT(127),
118#endif
119#endif
120#endif
121#endif
122#endif
123};
124EXPORT_SYMBOL(paca); 63EXPORT_SYMBOL(paca);
64
65void __init initialise_pacas(void)
66{
67 int cpu;
68
69 /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB
70 * of the TOC can be addressed using a single machine instruction.
71 */
72 unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL;
73
74 /* Can't use for_each_*_cpu, as they aren't functional yet */
75 for (cpu = 0; cpu < NR_CPUS; cpu++) {
76 struct paca_struct *new_paca = &paca[cpu];
77
78 new_paca->lppaca_ptr = &lppaca[cpu];
79 new_paca->lock_token = 0x8000;
80 new_paca->paca_index = cpu;
81 new_paca->kernel_toc = kernel_toc;
82 new_paca->hw_cpu_id = 0xffff;
83 new_paca->slb_shadow_ptr = &slb_shadow[cpu];
84 new_paca->__current = &init_task;
85
86 }
87}
diff --git a/arch/powerpc/kernel/ppc32.h b/arch/powerpc/kernel/ppc32.h
index fda05e2211d6..90e562771791 100644
--- a/arch/powerpc/kernel/ppc32.h
+++ b/arch/powerpc/kernel/ppc32.h
@@ -135,6 +135,4 @@ struct ucontext32 {
135 struct mcontext32 uc_mcontext; 135 struct mcontext32 uc_mcontext;
136}; 136};
137 137
138extern int copy_siginfo_to_user32(struct compat_siginfo __user *d, siginfo_t *s);
139
140#endif /* _PPC64_PPC32_H */ 138#endif /* _PPC64_PPC32_H */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 703100d5e458..6caad17ea72e 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1033,3 +1033,34 @@ void ppc64_runlatch_off(void)
1033 } 1033 }
1034} 1034}
1035#endif 1035#endif
1036
1037#if THREAD_SHIFT < PAGE_SHIFT
1038
1039static struct kmem_cache *thread_info_cache;
1040
1041struct thread_info *alloc_thread_info(struct task_struct *tsk)
1042{
1043 struct thread_info *ti;
1044
1045 ti = kmem_cache_alloc(thread_info_cache, GFP_KERNEL);
1046 if (unlikely(ti == NULL))
1047 return NULL;
1048#ifdef CONFIG_DEBUG_STACK_USAGE
1049 memset(ti, 0, THREAD_SIZE);
1050#endif
1051 return ti;
1052}
1053
1054void free_thread_info(struct thread_info *ti)
1055{
1056 kmem_cache_free(thread_info_cache, ti);
1057}
1058
1059void thread_info_cache_init(void)
1060{
1061 thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
1062 THREAD_SIZE, 0, NULL);
1063 BUG_ON(thread_info_cache == NULL);
1064}
1065
1066#endif /* THREAD_SHIFT < PAGE_SHIFT */
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 3bfe7837e820..2aefe2a4129a 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -53,6 +53,7 @@
53#include <asm/pci-bridge.h> 53#include <asm/pci-bridge.h>
54#include <asm/phyp_dump.h> 54#include <asm/phyp_dump.h>
55#include <asm/kexec.h> 55#include <asm/kexec.h>
56#include <mm/mmu_decl.h>
56 57
57#ifdef DEBUG 58#ifdef DEBUG
58#define DBG(fmt...) printk(KERN_ERR fmt) 59#define DBG(fmt...) printk(KERN_ERR fmt)
@@ -978,7 +979,10 @@ static int __init early_init_dt_scan_memory(unsigned long node,
978 } 979 }
979#endif 980#endif
980 lmb_add(base, size); 981 lmb_add(base, size);
982
983 memstart_addr = min((u64)memstart_addr, base);
981 } 984 }
985
982 return 0; 986 return 0;
983} 987}
984 988
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
new file mode 100644
index 000000000000..8e24fc1821e8
--- /dev/null
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -0,0 +1,58 @@
1#!/bin/sh
2#
3# Copyright © 2008 IBM Corporation
4#
5# This program is free software; you can redistribute it and/or
6# modify it under the terms of the GNU General Public License
7# as published by the Free Software Foundation; either version
8# 2 of the License, or (at your option) any later version.
9
10# This script checks prom_init.o to see what external symbols it
11# is using, if it finds symbols not in the whitelist it returns
12# an error. The point of this is to discourage people from
13# intentionally or accidentally adding new code to prom_init.c
14# which has side effects on other parts of the kernel.
15
16# If you really need to reference something from prom_init.o add
17# it to the list below:
18
19WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush
20_end enter_prom memcpy memset reloc_offset __secondary_hold
21__secondary_hold_acknowledge __secondary_hold_spinloop __start
22strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224
23reloc_got2"
24
25NM="$1"
26OBJ="$2"
27
28ERROR=0
29
30for UNDEF in $($NM -u $OBJ | awk '{print $2}')
31do
32 # On 64-bit nm gives us the function descriptors, which have
33 # a leading . on the name, so strip it off here.
34 UNDEF="${UNDEF#.}"
35
36 if [ $KBUILD_VERBOSE ]; then
37 if [ $KBUILD_VERBOSE -ne 0 ]; then
38 echo "Checking prom_init.o symbol '$UNDEF'"
39 fi
40 fi
41
42 OK=0
43 for WHITE in $WHITELIST
44 do
45 if [ "$UNDEF" = "$WHITE" ]; then
46 OK=1
47 break
48 fi
49 done
50
51 if [ $OK -eq 0 ]; then
52 ERROR=1
53 echo "Error: External symbol '$UNDEF' referenced" \
54 "from prom_init.c" >&2
55 fi
56done
57
58exit $ERROR
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c
index 9d30e10970ac..4c1de6af4c09 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace32.c
@@ -29,15 +29,12 @@
29#include <linux/security.h> 29#include <linux/security.h>
30#include <linux/signal.h> 30#include <linux/signal.h>
31#include <linux/compat.h> 31#include <linux/compat.h>
32#include <linux/elf.h>
33 32
34#include <asm/uaccess.h> 33#include <asm/uaccess.h>
35#include <asm/page.h> 34#include <asm/page.h>
36#include <asm/pgtable.h> 35#include <asm/pgtable.h>
37#include <asm/system.h> 36#include <asm/system.h>
38 37
39#include "ppc32.h"
40
41/* 38/*
42 * does not yet catch signals sent when the child dies. 39 * does not yet catch signals sent when the child dies.
43 * in exit.c or in signal.c. 40 * in exit.c or in signal.c.
@@ -67,27 +64,6 @@ static long compat_ptrace_old(struct task_struct *child, long request,
67 return -EPERM; 64 return -EPERM;
68} 65}
69 66
70static int compat_ptrace_getsiginfo(struct task_struct *child, compat_siginfo_t __user *data)
71{
72 siginfo_t lastinfo;
73 int error = -ESRCH;
74
75 read_lock(&tasklist_lock);
76 if (likely(child->sighand != NULL)) {
77 error = -EINVAL;
78 spin_lock_irq(&child->sighand->siglock);
79 if (likely(child->last_siginfo != NULL)) {
80 lastinfo = *child->last_siginfo;
81 error = 0;
82 }
83 spin_unlock_irq(&child->sighand->siglock);
84 }
85 read_unlock(&tasklist_lock);
86 if (!error)
87 return copy_siginfo_to_user32(data, &lastinfo);
88 return error;
89}
90
91long compat_arch_ptrace(struct task_struct *child, compat_long_t request, 67long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
92 compat_ulong_t caddr, compat_ulong_t cdata) 68 compat_ulong_t caddr, compat_ulong_t cdata)
93{ 69{
@@ -306,9 +282,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
306 0, PT_REGS_COUNT * sizeof(compat_long_t), 282 0, PT_REGS_COUNT * sizeof(compat_long_t),
307 compat_ptr(data)); 283 compat_ptr(data));
308 284
309 case PTRACE_GETSIGINFO:
310 return compat_ptrace_getsiginfo(child, compat_ptr(data));
311
312 case PTRACE_GETFPREGS: 285 case PTRACE_GETFPREGS:
313 case PTRACE_SETFPREGS: 286 case PTRACE_SETFPREGS:
314 case PTRACE_GETVRREGS: 287 case PTRACE_GETVRREGS:
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 31ada9fdfc5c..dff6308d1b5e 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -170,6 +170,9 @@ void __init setup_paca(int cpu)
170 170
171void __init early_setup(unsigned long dt_ptr) 171void __init early_setup(unsigned long dt_ptr)
172{ 172{
173 /* Fill in any unititialised pacas */
174 initialise_pacas();
175
173 /* Identify CPU type */ 176 /* Identify CPU type */
174 identify_cpu(0, mfspr(SPRN_PVR)); 177 identify_cpu(0, mfspr(SPRN_PVR));
175 178
@@ -435,7 +438,7 @@ void __init setup_system(void)
435 printk("htab_address = 0x%p\n", htab_address); 438 printk("htab_address = 0x%p\n", htab_address);
436 printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); 439 printk("htab_hash_mask = 0x%lx\n", htab_hash_mask);
437#if PHYSICAL_START > 0 440#if PHYSICAL_START > 0
438 printk("physical_start = 0x%x\n", PHYSICAL_START); 441 printk("physical_start = 0x%lx\n", PHYSICAL_START);
439#endif 442#endif
440 printk("-----------------------------------------------------\n"); 443 printk("-----------------------------------------------------\n");
441 444
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index e3638eeaaae7..962944038430 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -13,7 +13,6 @@
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/stacktrace.h> 14#include <linux/stacktrace.h>
15#include <asm/ptrace.h> 15#include <asm/ptrace.h>
16#include <asm/asm-offsets.h>
17 16
18/* 17/*
19 * Save stack-backtrace addresses into a stack_trace buffer. 18 * Save stack-backtrace addresses into a stack_trace buffer.
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index 7aad6203e411..7d6c9bb8c77f 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -154,8 +154,8 @@ static void udbg_console_write(struct console *con, const char *s,
154static struct console udbg_console = { 154static struct console udbg_console = {
155 .name = "udbg", 155 .name = "udbg",
156 .write = udbg_console_write, 156 .write = udbg_console_write,
157 .flags = CON_PRINTBUFFER | CON_ENABLED | CON_BOOT, 157 .flags = CON_PRINTBUFFER | CON_ENABLED | CON_BOOT | CON_ANYTIME,
158 .index = -1, 158 .index = 0,
159}; 159};
160 160
161static int early_console_initialized; 161static int early_console_initialized;
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
new file mode 100644
index 000000000000..f5d7a5eab96e
--- /dev/null
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -0,0 +1,224 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2007
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#include <linux/types.h>
21#include <linux/string.h>
22#include <linux/kvm_host.h>
23#include <linux/highmem.h>
24#include <asm/mmu-44x.h>
25#include <asm/kvm_ppc.h>
26
27#include "44x_tlb.h"
28
29#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW)
30#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW)
31
32static unsigned int kvmppc_tlb_44x_pos;
33
34static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
35{
36 /* Mask off reserved bits. */
37 attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK;
38
39 if (!usermode) {
40 /* Guest is in supervisor mode, so we need to translate guest
41 * supervisor permissions into user permissions. */
42 attrib &= ~PPC44x_TLB_USER_PERM_MASK;
43 attrib |= (attrib & PPC44x_TLB_SUPER_PERM_MASK) << 3;
44 }
45
46 /* Make sure host can always access this memory. */
47 attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW;
48
49 return attrib;
50}
51
52/* Search the guest TLB for a matching entry. */
53int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
54 unsigned int as)
55{
56 int i;
57
58 /* XXX Replace loop with fancy data structures. */
59 for (i = 0; i < PPC44x_TLB_SIZE; i++) {
60 struct tlbe *tlbe = &vcpu->arch.guest_tlb[i];
61 unsigned int tid;
62
63 if (eaddr < get_tlb_eaddr(tlbe))
64 continue;
65
66 if (eaddr > get_tlb_end(tlbe))
67 continue;
68
69 tid = get_tlb_tid(tlbe);
70 if (tid && (tid != pid))
71 continue;
72
73 if (!get_tlb_v(tlbe))
74 continue;
75
76 if (get_tlb_ts(tlbe) != as)
77 continue;
78
79 return i;
80 }
81
82 return -1;
83}
84
85struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
86{
87 unsigned int as = !!(vcpu->arch.msr & MSR_IS);
88 unsigned int index;
89
90 index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
91 if (index == -1)
92 return NULL;
93 return &vcpu->arch.guest_tlb[index];
94}
95
96struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
97{
98 unsigned int as = !!(vcpu->arch.msr & MSR_DS);
99 unsigned int index;
100
101 index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
102 if (index == -1)
103 return NULL;
104 return &vcpu->arch.guest_tlb[index];
105}
106
107static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe)
108{
109 return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW);
110}
111
112/* Must be called with mmap_sem locked for writing. */
113static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,
114 unsigned int index)
115{
116 struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index];
117 struct page *page = vcpu->arch.shadow_pages[index];
118
119 kunmap(vcpu->arch.shadow_pages[index]);
120
121 if (get_tlb_v(stlbe)) {
122 if (kvmppc_44x_tlbe_is_writable(stlbe))
123 kvm_release_page_dirty(page);
124 else
125 kvm_release_page_clean(page);
126 }
127}
128
129/* Caller must ensure that the specified guest TLB entry is safe to insert into
130 * the shadow TLB. */
131void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
132 u32 flags)
133{
134 struct page *new_page;
135 struct tlbe *stlbe;
136 hpa_t hpaddr;
137 unsigned int victim;
138
139 /* Future optimization: don't overwrite the TLB entry containing the
140 * current PC (or stack?). */
141 victim = kvmppc_tlb_44x_pos++;
142 if (kvmppc_tlb_44x_pos > tlb_44x_hwater)
143 kvmppc_tlb_44x_pos = 0;
144 stlbe = &vcpu->arch.shadow_tlb[victim];
145
146 /* Get reference to new page. */
147 down_write(&current->mm->mmap_sem);
148 new_page = gfn_to_page(vcpu->kvm, gfn);
149 if (is_error_page(new_page)) {
150 printk(KERN_ERR "Couldn't get guest page!\n");
151 kvm_release_page_clean(new_page);
152 return;
153 }
154 hpaddr = page_to_phys(new_page);
155
156 /* Drop reference to old page. */
157 kvmppc_44x_shadow_release(vcpu, victim);
158 up_write(&current->mm->mmap_sem);
159
160 vcpu->arch.shadow_pages[victim] = new_page;
161
162 /* XXX Make sure (va, size) doesn't overlap any other
163 * entries. 440x6 user manual says the result would be
164 * "undefined." */
165
166 /* XXX what about AS? */
167
168 stlbe->tid = asid & 0xff;
169
170 /* Force TS=1 for all guest mappings. */
171 /* For now we hardcode 4KB mappings, but it will be important to
172 * use host large pages in the future. */
173 stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS
174 | PPC44x_TLB_4K;
175
176 stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
177 stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags,
178 vcpu->arch.msr & MSR_PR);
179}
180
181void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, u64 eaddr, u64 asid)
182{
183 unsigned int pid = asid & 0xff;
184 int i;
185
186 /* XXX Replace loop with fancy data structures. */
187 down_write(&current->mm->mmap_sem);
188 for (i = 0; i <= tlb_44x_hwater; i++) {
189 struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
190 unsigned int tid;
191
192 if (!get_tlb_v(stlbe))
193 continue;
194
195 if (eaddr < get_tlb_eaddr(stlbe))
196 continue;
197
198 if (eaddr > get_tlb_end(stlbe))
199 continue;
200
201 tid = get_tlb_tid(stlbe);
202 if (tid && (tid != pid))
203 continue;
204
205 kvmppc_44x_shadow_release(vcpu, i);
206 stlbe->word0 = 0;
207 }
208 up_write(&current->mm->mmap_sem);
209}
210
211/* Invalidate all mappings, so that when they fault back in they will get the
212 * proper permission bits. */
213void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
214{
215 int i;
216
217 /* XXX Replace loop with fancy data structures. */
218 down_write(&current->mm->mmap_sem);
219 for (i = 0; i <= tlb_44x_hwater; i++) {
220 kvmppc_44x_shadow_release(vcpu, i);
221 vcpu->arch.shadow_tlb[i].word0 = 0;
222 }
223 up_write(&current->mm->mmap_sem);
224}
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
new file mode 100644
index 000000000000..2ccd46b6f6b7
--- /dev/null
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -0,0 +1,91 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2007
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#ifndef __KVM_POWERPC_TLB_H__
21#define __KVM_POWERPC_TLB_H__
22
23#include <linux/kvm_host.h>
24#include <asm/mmu-44x.h>
25
26extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
27 unsigned int pid, unsigned int as);
28extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
29extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
30
31/* TLB helper functions */
32static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
33{
34 return (tlbe->word0 >> 4) & 0xf;
35}
36
37static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
38{
39 return tlbe->word0 & 0xfffffc00;
40}
41
42static inline gva_t get_tlb_bytes(const struct tlbe *tlbe)
43{
44 unsigned int pgsize = get_tlb_size(tlbe);
45 return 1 << 10 << (pgsize << 1);
46}
47
48static inline gva_t get_tlb_end(const struct tlbe *tlbe)
49{
50 return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1;
51}
52
53static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
54{
55 u64 word1 = tlbe->word1;
56 return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00);
57}
58
59static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
60{
61 return tlbe->tid & 0xff;
62}
63
64static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
65{
66 return (tlbe->word0 >> 8) & 0x1;
67}
68
69static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
70{
71 return (tlbe->word0 >> 9) & 0x1;
72}
73
74static inline unsigned int get_mmucr_stid(const struct kvm_vcpu *vcpu)
75{
76 return vcpu->arch.mmucr & 0xff;
77}
78
79static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
80{
81 return (vcpu->arch.mmucr >> 16) & 0x1;
82}
83
84static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr)
85{
86 unsigned int pgmask = get_tlb_bytes(tlbe) - 1;
87
88 return get_tlb_raddr(tlbe) | (eaddr & pgmask);
89}
90
91#endif /* __KVM_POWERPC_TLB_H__ */
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
new file mode 100644
index 000000000000..6b076010213b
--- /dev/null
+++ b/arch/powerpc/kvm/Kconfig
@@ -0,0 +1,42 @@
1#
2# KVM configuration
3#
4
5menuconfig VIRTUALIZATION
6 bool "Virtualization"
7 ---help---
8 Say Y here to get to see options for using your Linux host to run
9 other operating systems inside virtual machines (guests).
10 This option alone does not add any kernel code.
11
12 If you say N, all options in this submenu will be skipped and
13 disabled.
14
15if VIRTUALIZATION
16
17config KVM
18 bool "Kernel-based Virtual Machine (KVM) support"
19 depends on 44x && EXPERIMENTAL
20 select PREEMPT_NOTIFIERS
21 select ANON_INODES
22 # We can only run on Book E hosts so far
23 select KVM_BOOKE_HOST
24 ---help---
25 Support hosting virtualized guest machines. You will also
26 need to select one or more of the processor modules below.
27
28 This module provides access to the hardware capabilities through
29 a character device node named /dev/kvm.
30
31 If unsure, say N.
32
33config KVM_BOOKE_HOST
34 bool "KVM host support for Book E PowerPC processors"
35 depends on KVM && 44x
36 ---help---
37 Provides host support for KVM on Book E PowerPC processors. Currently
38 this works on 440 processors only.
39
40source drivers/virtio/Kconfig
41
42endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
new file mode 100644
index 000000000000..d0d358d367ec
--- /dev/null
+++ b/arch/powerpc/kvm/Makefile
@@ -0,0 +1,15 @@
1#
2# Makefile for Kernel-based Virtual Machine module
3#
4
5EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm
6
7common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o)
8
9kvm-objs := $(common-objs) powerpc.o emulate.o booke_guest.o
10obj-$(CONFIG_KVM) += kvm.o
11
12AFLAGS_booke_interrupts.o := -I$(obj)
13
14kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o
15obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o
diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c
new file mode 100644
index 000000000000..6d9884a6884a
--- /dev/null
+++ b/arch/powerpc/kvm/booke_guest.c
@@ -0,0 +1,615 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2007
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
19 */
20
21#include <linux/errno.h>
22#include <linux/err.h>
23#include <linux/kvm_host.h>
24#include <linux/module.h>
25#include <linux/vmalloc.h>
26#include <linux/fs.h>
27#include <asm/cputable.h>
28#include <asm/uaccess.h>
29#include <asm/kvm_ppc.h>
30
31#include "44x_tlb.h"
32
33#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
34#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
35
36struct kvm_stats_debugfs_item debugfs_entries[] = {
37 { "exits", VCPU_STAT(sum_exits) },
38 { "mmio", VCPU_STAT(mmio_exits) },
39 { "dcr", VCPU_STAT(dcr_exits) },
40 { "sig", VCPU_STAT(signal_exits) },
41 { "light", VCPU_STAT(light_exits) },
42 { "itlb_r", VCPU_STAT(itlb_real_miss_exits) },
43 { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) },
44 { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) },
45 { "dtlb_v", VCPU_STAT(dtlb_virt_miss_exits) },
46 { "sysc", VCPU_STAT(syscall_exits) },
47 { "isi", VCPU_STAT(isi_exits) },
48 { "dsi", VCPU_STAT(dsi_exits) },
49 { "inst_emu", VCPU_STAT(emulated_inst_exits) },
50 { "dec", VCPU_STAT(dec_exits) },
51 { "ext_intr", VCPU_STAT(ext_intr_exits) },
52 { NULL }
53};
54
55static const u32 interrupt_msr_mask[16] = {
56 [BOOKE_INTERRUPT_CRITICAL] = MSR_ME,
57 [BOOKE_INTERRUPT_MACHINE_CHECK] = 0,
58 [BOOKE_INTERRUPT_DATA_STORAGE] = MSR_CE|MSR_ME|MSR_DE,
59 [BOOKE_INTERRUPT_INST_STORAGE] = MSR_CE|MSR_ME|MSR_DE,
60 [BOOKE_INTERRUPT_EXTERNAL] = MSR_CE|MSR_ME|MSR_DE,
61 [BOOKE_INTERRUPT_ALIGNMENT] = MSR_CE|MSR_ME|MSR_DE,
62 [BOOKE_INTERRUPT_PROGRAM] = MSR_CE|MSR_ME|MSR_DE,
63 [BOOKE_INTERRUPT_FP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE,
64 [BOOKE_INTERRUPT_SYSCALL] = MSR_CE|MSR_ME|MSR_DE,
65 [BOOKE_INTERRUPT_AP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE,
66 [BOOKE_INTERRUPT_DECREMENTER] = MSR_CE|MSR_ME|MSR_DE,
67 [BOOKE_INTERRUPT_FIT] = MSR_CE|MSR_ME|MSR_DE,
68 [BOOKE_INTERRUPT_WATCHDOG] = MSR_ME,
69 [BOOKE_INTERRUPT_DTLB_MISS] = MSR_CE|MSR_ME|MSR_DE,
70 [BOOKE_INTERRUPT_ITLB_MISS] = MSR_CE|MSR_ME|MSR_DE,
71 [BOOKE_INTERRUPT_DEBUG] = MSR_ME,
72};
73
74const unsigned char exception_priority[] = {
75 [BOOKE_INTERRUPT_DATA_STORAGE] = 0,
76 [BOOKE_INTERRUPT_INST_STORAGE] = 1,
77 [BOOKE_INTERRUPT_ALIGNMENT] = 2,
78 [BOOKE_INTERRUPT_PROGRAM] = 3,
79 [BOOKE_INTERRUPT_FP_UNAVAIL] = 4,
80 [BOOKE_INTERRUPT_SYSCALL] = 5,
81 [BOOKE_INTERRUPT_AP_UNAVAIL] = 6,
82 [BOOKE_INTERRUPT_DTLB_MISS] = 7,
83 [BOOKE_INTERRUPT_ITLB_MISS] = 8,
84 [BOOKE_INTERRUPT_MACHINE_CHECK] = 9,
85 [BOOKE_INTERRUPT_DEBUG] = 10,
86 [BOOKE_INTERRUPT_CRITICAL] = 11,
87 [BOOKE_INTERRUPT_WATCHDOG] = 12,
88 [BOOKE_INTERRUPT_EXTERNAL] = 13,
89 [BOOKE_INTERRUPT_FIT] = 14,
90 [BOOKE_INTERRUPT_DECREMENTER] = 15,
91};
92
93const unsigned char priority_exception[] = {
94 BOOKE_INTERRUPT_DATA_STORAGE,
95 BOOKE_INTERRUPT_INST_STORAGE,
96 BOOKE_INTERRUPT_ALIGNMENT,
97 BOOKE_INTERRUPT_PROGRAM,
98 BOOKE_INTERRUPT_FP_UNAVAIL,
99 BOOKE_INTERRUPT_SYSCALL,
100 BOOKE_INTERRUPT_AP_UNAVAIL,
101 BOOKE_INTERRUPT_DTLB_MISS,
102 BOOKE_INTERRUPT_ITLB_MISS,
103 BOOKE_INTERRUPT_MACHINE_CHECK,
104 BOOKE_INTERRUPT_DEBUG,
105 BOOKE_INTERRUPT_CRITICAL,
106 BOOKE_INTERRUPT_WATCHDOG,
107 BOOKE_INTERRUPT_EXTERNAL,
108 BOOKE_INTERRUPT_FIT,
109 BOOKE_INTERRUPT_DECREMENTER,
110};
111
112
113void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
114{
115 struct tlbe *tlbe;
116 int i;
117
118 printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
119 printk("| %2s | %3s | %8s | %8s | %8s |\n",
120 "nr", "tid", "word0", "word1", "word2");
121
122 for (i = 0; i < PPC44x_TLB_SIZE; i++) {
123 tlbe = &vcpu->arch.guest_tlb[i];
124 if (tlbe->word0 & PPC44x_TLB_VALID)
125 printk(" G%2d | %02X | %08X | %08X | %08X |\n",
126 i, tlbe->tid, tlbe->word0, tlbe->word1,
127 tlbe->word2);
128 }
129
130 for (i = 0; i < PPC44x_TLB_SIZE; i++) {
131 tlbe = &vcpu->arch.shadow_tlb[i];
132 if (tlbe->word0 & PPC44x_TLB_VALID)
133 printk(" S%2d | %02X | %08X | %08X | %08X |\n",
134 i, tlbe->tid, tlbe->word0, tlbe->word1,
135 tlbe->word2);
136 }
137}
138
139/* TODO: use vcpu_printf() */
140void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
141{
142 int i;
143
144 printk("pc: %08x msr: %08x\n", vcpu->arch.pc, vcpu->arch.msr);
145 printk("lr: %08x ctr: %08x\n", vcpu->arch.lr, vcpu->arch.ctr);
146 printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1);
147
148 printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
149
150 for (i = 0; i < 32; i += 4) {
151 printk("gpr%02d: %08x %08x %08x %08x\n", i,
152 vcpu->arch.gpr[i],
153 vcpu->arch.gpr[i+1],
154 vcpu->arch.gpr[i+2],
155 vcpu->arch.gpr[i+3]);
156 }
157}
158
159/* Check if we are ready to deliver the interrupt */
160static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
161{
162 int r;
163
164 switch (interrupt) {
165 case BOOKE_INTERRUPT_CRITICAL:
166 r = vcpu->arch.msr & MSR_CE;
167 break;
168 case BOOKE_INTERRUPT_MACHINE_CHECK:
169 r = vcpu->arch.msr & MSR_ME;
170 break;
171 case BOOKE_INTERRUPT_EXTERNAL:
172 r = vcpu->arch.msr & MSR_EE;
173 break;
174 case BOOKE_INTERRUPT_DECREMENTER:
175 r = vcpu->arch.msr & MSR_EE;
176 break;
177 case BOOKE_INTERRUPT_FIT:
178 r = vcpu->arch.msr & MSR_EE;
179 break;
180 case BOOKE_INTERRUPT_WATCHDOG:
181 r = vcpu->arch.msr & MSR_CE;
182 break;
183 case BOOKE_INTERRUPT_DEBUG:
184 r = vcpu->arch.msr & MSR_DE;
185 break;
186 default:
187 r = 1;
188 }
189
190 return r;
191}
192
193static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
194{
195 switch (interrupt) {
196 case BOOKE_INTERRUPT_DECREMENTER:
197 vcpu->arch.tsr |= TSR_DIS;
198 break;
199 }
200
201 vcpu->arch.srr0 = vcpu->arch.pc;
202 vcpu->arch.srr1 = vcpu->arch.msr;
203 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt];
204 kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]);
205}
206
207/* Check pending exceptions and deliver one, if possible. */
208void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu)
209{
210 unsigned long *pending = &vcpu->arch.pending_exceptions;
211 unsigned int exception;
212 unsigned int priority;
213
214 priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending));
215 while (priority <= BOOKE_MAX_INTERRUPT) {
216 exception = priority_exception[priority];
217 if (kvmppc_can_deliver_interrupt(vcpu, exception)) {
218 kvmppc_clear_exception(vcpu, exception);
219 kvmppc_deliver_interrupt(vcpu, exception);
220 break;
221 }
222
223 priority = find_next_bit(pending,
224 BITS_PER_BYTE * sizeof(*pending),
225 priority + 1);
226 }
227}
228
229static int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
230{
231 enum emulation_result er;
232 int r;
233
234 er = kvmppc_emulate_instruction(run, vcpu);
235 switch (er) {
236 case EMULATE_DONE:
237 /* Future optimization: only reload non-volatiles if they were
238 * actually modified. */
239 r = RESUME_GUEST_NV;
240 break;
241 case EMULATE_DO_MMIO:
242 run->exit_reason = KVM_EXIT_MMIO;
243 /* We must reload nonvolatiles because "update" load/store
244 * instructions modify register state. */
245 /* Future optimization: only reload non-volatiles if they were
246 * actually modified. */
247 r = RESUME_HOST_NV;
248 break;
249 case EMULATE_FAIL:
250 /* XXX Deliver Program interrupt to guest. */
251 printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__,
252 vcpu->arch.last_inst);
253 r = RESUME_HOST;
254 break;
255 default:
256 BUG();
257 }
258
259 return r;
260}
261
262/**
263 * kvmppc_handle_exit
264 *
265 * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
266 */
267int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
268 unsigned int exit_nr)
269{
270 enum emulation_result er;
271 int r = RESUME_HOST;
272
273 local_irq_enable();
274
275 run->exit_reason = KVM_EXIT_UNKNOWN;
276 run->ready_for_interrupt_injection = 1;
277
278 switch (exit_nr) {
279 case BOOKE_INTERRUPT_MACHINE_CHECK:
280 printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR));
281 kvmppc_dump_vcpu(vcpu);
282 r = RESUME_HOST;
283 break;
284
285 case BOOKE_INTERRUPT_EXTERNAL:
286 case BOOKE_INTERRUPT_DECREMENTER:
287 /* Since we switched IVPR back to the host's value, the host
288 * handled this interrupt the moment we enabled interrupts.
289 * Now we just offer it a chance to reschedule the guest. */
290
291 /* XXX At this point the TLB still holds our shadow TLB, so if
292 * we do reschedule the host will fault over it. Perhaps we
293 * should politely restore the host's entries to minimize
294 * misses before ceding control. */
295 if (need_resched())
296 cond_resched();
297 if (exit_nr == BOOKE_INTERRUPT_DECREMENTER)
298 vcpu->stat.dec_exits++;
299 else
300 vcpu->stat.ext_intr_exits++;
301 r = RESUME_GUEST;
302 break;
303
304 case BOOKE_INTERRUPT_PROGRAM:
305 if (vcpu->arch.msr & MSR_PR) {
306 /* Program traps generated by user-level software must be handled
307 * by the guest kernel. */
308 vcpu->arch.esr = vcpu->arch.fault_esr;
309 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
310 r = RESUME_GUEST;
311 break;
312 }
313
314 er = kvmppc_emulate_instruction(run, vcpu);
315 switch (er) {
316 case EMULATE_DONE:
317 /* Future optimization: only reload non-volatiles if
318 * they were actually modified by emulation. */
319 vcpu->stat.emulated_inst_exits++;
320 r = RESUME_GUEST_NV;
321 break;
322 case EMULATE_DO_DCR:
323 run->exit_reason = KVM_EXIT_DCR;
324 r = RESUME_HOST;
325 break;
326 case EMULATE_FAIL:
327 /* XXX Deliver Program interrupt to guest. */
328 printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n",
329 __func__, vcpu->arch.pc, vcpu->arch.last_inst);
330 /* For debugging, encode the failing instruction and
331 * report it to userspace. */
332 run->hw.hardware_exit_reason = ~0ULL << 32;
333 run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
334 r = RESUME_HOST;
335 break;
336 default:
337 BUG();
338 }
339 break;
340
341 case BOOKE_INTERRUPT_DATA_STORAGE:
342 vcpu->arch.dear = vcpu->arch.fault_dear;
343 vcpu->arch.esr = vcpu->arch.fault_esr;
344 kvmppc_queue_exception(vcpu, exit_nr);
345 vcpu->stat.dsi_exits++;
346 r = RESUME_GUEST;
347 break;
348
349 case BOOKE_INTERRUPT_INST_STORAGE:
350 vcpu->arch.esr = vcpu->arch.fault_esr;
351 kvmppc_queue_exception(vcpu, exit_nr);
352 vcpu->stat.isi_exits++;
353 r = RESUME_GUEST;
354 break;
355
356 case BOOKE_INTERRUPT_SYSCALL:
357 kvmppc_queue_exception(vcpu, exit_nr);
358 vcpu->stat.syscall_exits++;
359 r = RESUME_GUEST;
360 break;
361
362 case BOOKE_INTERRUPT_DTLB_MISS: {
363 struct tlbe *gtlbe;
364 unsigned long eaddr = vcpu->arch.fault_dear;
365 gfn_t gfn;
366
367 /* Check the guest TLB. */
368 gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr);
369 if (!gtlbe) {
370 /* The guest didn't have a mapping for it. */
371 kvmppc_queue_exception(vcpu, exit_nr);
372 vcpu->arch.dear = vcpu->arch.fault_dear;
373 vcpu->arch.esr = vcpu->arch.fault_esr;
374 vcpu->stat.dtlb_real_miss_exits++;
375 r = RESUME_GUEST;
376 break;
377 }
378
379 vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
380 gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
381
382 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
383 /* The guest TLB had a mapping, but the shadow TLB
384 * didn't, and it is RAM. This could be because:
385 * a) the entry is mapping the host kernel, or
386 * b) the guest used a large mapping which we're faking
387 * Either way, we need to satisfy the fault without
388 * invoking the guest. */
389 kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
390 gtlbe->word2);
391 vcpu->stat.dtlb_virt_miss_exits++;
392 r = RESUME_GUEST;
393 } else {
394 /* Guest has mapped and accessed a page which is not
395 * actually RAM. */
396 r = kvmppc_emulate_mmio(run, vcpu);
397 }
398
399 break;
400 }
401
402 case BOOKE_INTERRUPT_ITLB_MISS: {
403 struct tlbe *gtlbe;
404 unsigned long eaddr = vcpu->arch.pc;
405 gfn_t gfn;
406
407 r = RESUME_GUEST;
408
409 /* Check the guest TLB. */
410 gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr);
411 if (!gtlbe) {
412 /* The guest didn't have a mapping for it. */
413 kvmppc_queue_exception(vcpu, exit_nr);
414 vcpu->stat.itlb_real_miss_exits++;
415 break;
416 }
417
418 vcpu->stat.itlb_virt_miss_exits++;
419
420 gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT;
421
422 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
423 /* The guest TLB had a mapping, but the shadow TLB
424 * didn't. This could be because:
425 * a) the entry is mapping the host kernel, or
426 * b) the guest used a large mapping which we're faking
427 * Either way, we need to satisfy the fault without
428 * invoking the guest. */
429 kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
430 gtlbe->word2);
431 } else {
432 /* Guest mapped and leaped at non-RAM! */
433 kvmppc_queue_exception(vcpu,
434 BOOKE_INTERRUPT_MACHINE_CHECK);
435 }
436
437 break;
438 }
439
440 default:
441 printk(KERN_EMERG "exit_nr %d\n", exit_nr);
442 BUG();
443 }
444
445 local_irq_disable();
446
447 kvmppc_check_and_deliver_interrupts(vcpu);
448
449 /* Do some exit accounting. */
450 vcpu->stat.sum_exits++;
451 if (!(r & RESUME_HOST)) {
452 /* To avoid clobbering exit_reason, only check for signals if
453 * we aren't already exiting to userspace for some other
454 * reason. */
455 if (signal_pending(current)) {
456 run->exit_reason = KVM_EXIT_INTR;
457 r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
458
459 vcpu->stat.signal_exits++;
460 } else {
461 vcpu->stat.light_exits++;
462 }
463 } else {
464 switch (run->exit_reason) {
465 case KVM_EXIT_MMIO:
466 vcpu->stat.mmio_exits++;
467 break;
468 case KVM_EXIT_DCR:
469 vcpu->stat.dcr_exits++;
470 break;
471 case KVM_EXIT_INTR:
472 vcpu->stat.signal_exits++;
473 break;
474 }
475 }
476
477 return r;
478}
479
480/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
481int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
482{
483 struct tlbe *tlbe = &vcpu->arch.guest_tlb[0];
484
485 tlbe->tid = 0;
486 tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
487 tlbe->word1 = 0;
488 tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
489
490 tlbe++;
491 tlbe->tid = 0;
492 tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
493 tlbe->word1 = 0xef600000;
494 tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
495 | PPC44x_TLB_I | PPC44x_TLB_G;
496
497 vcpu->arch.pc = 0;
498 vcpu->arch.msr = 0;
499 vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
500
501 /* Eye-catching number so we know if the guest takes an interrupt
502 * before it's programmed its own IVPR. */
503 vcpu->arch.ivpr = 0x55550000;
504
505 /* Since the guest can directly access the timebase, it must know the
506 * real timebase frequency. Accordingly, it must see the state of
507 * CCR1[TCS]. */
508 vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
509
510 return 0;
511}
512
513int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
514{
515 int i;
516
517 regs->pc = vcpu->arch.pc;
518 regs->cr = vcpu->arch.cr;
519 regs->ctr = vcpu->arch.ctr;
520 regs->lr = vcpu->arch.lr;
521 regs->xer = vcpu->arch.xer;
522 regs->msr = vcpu->arch.msr;
523 regs->srr0 = vcpu->arch.srr0;
524 regs->srr1 = vcpu->arch.srr1;
525 regs->pid = vcpu->arch.pid;
526 regs->sprg0 = vcpu->arch.sprg0;
527 regs->sprg1 = vcpu->arch.sprg1;
528 regs->sprg2 = vcpu->arch.sprg2;
529 regs->sprg3 = vcpu->arch.sprg3;
530 regs->sprg5 = vcpu->arch.sprg4;
531 regs->sprg6 = vcpu->arch.sprg5;
532 regs->sprg7 = vcpu->arch.sprg6;
533
534 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
535 regs->gpr[i] = vcpu->arch.gpr[i];
536
537 return 0;
538}
539
540int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
541{
542 int i;
543
544 vcpu->arch.pc = regs->pc;
545 vcpu->arch.cr = regs->cr;
546 vcpu->arch.ctr = regs->ctr;
547 vcpu->arch.lr = regs->lr;
548 vcpu->arch.xer = regs->xer;
549 vcpu->arch.msr = regs->msr;
550 vcpu->arch.srr0 = regs->srr0;
551 vcpu->arch.srr1 = regs->srr1;
552 vcpu->arch.sprg0 = regs->sprg0;
553 vcpu->arch.sprg1 = regs->sprg1;
554 vcpu->arch.sprg2 = regs->sprg2;
555 vcpu->arch.sprg3 = regs->sprg3;
556 vcpu->arch.sprg5 = regs->sprg4;
557 vcpu->arch.sprg6 = regs->sprg5;
558 vcpu->arch.sprg7 = regs->sprg6;
559
560 for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++)
561 vcpu->arch.gpr[i] = regs->gpr[i];
562
563 return 0;
564}
565
566int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
567 struct kvm_sregs *sregs)
568{
569 return -ENOTSUPP;
570}
571
572int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
573 struct kvm_sregs *sregs)
574{
575 return -ENOTSUPP;
576}
577
578int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
579{
580 return -ENOTSUPP;
581}
582
583int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
584{
585 return -ENOTSUPP;
586}
587
588/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
589int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
590 struct kvm_translation *tr)
591{
592 struct tlbe *gtlbe;
593 int index;
594 gva_t eaddr;
595 u8 pid;
596 u8 as;
597
598 eaddr = tr->linear_address;
599 pid = (tr->linear_address >> 32) & 0xff;
600 as = (tr->linear_address >> 40) & 0x1;
601
602 index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
603 if (index == -1) {
604 tr->valid = 0;
605 return 0;
606 }
607
608 gtlbe = &vcpu->arch.guest_tlb[index];
609
610 tr->physical_address = tlb_xlate(gtlbe, eaddr);
611 /* XXX what does "writeable" and "usermode" even mean? */
612 tr->valid = 1;
613
614 return 0;
615}
diff --git a/arch/powerpc/kvm/booke_host.c b/arch/powerpc/kvm/booke_host.c
new file mode 100644
index 000000000000..b480341bc31e
--- /dev/null
+++ b/arch/powerpc/kvm/booke_host.c
@@ -0,0 +1,83 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#include <linux/errno.h>
21#include <linux/kvm_host.h>
22#include <linux/module.h>
23#include <asm/cacheflush.h>
24#include <asm/kvm_ppc.h>
25
26unsigned long kvmppc_booke_handlers;
27
28static int kvmppc_booke_init(void)
29{
30 unsigned long ivor[16];
31 unsigned long max_ivor = 0;
32 int i;
33
34 /* We install our own exception handlers by hijacking IVPR. IVPR must
35 * be 16-bit aligned, so we need a 64KB allocation. */
36 kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
37 VCPU_SIZE_ORDER);
38 if (!kvmppc_booke_handlers)
39 return -ENOMEM;
40
41 /* XXX make sure our handlers are smaller than Linux's */
42
43 /* Copy our interrupt handlers to match host IVORs. That way we don't
44 * have to swap the IVORs on every guest/host transition. */
45 ivor[0] = mfspr(SPRN_IVOR0);
46 ivor[1] = mfspr(SPRN_IVOR1);
47 ivor[2] = mfspr(SPRN_IVOR2);
48 ivor[3] = mfspr(SPRN_IVOR3);
49 ivor[4] = mfspr(SPRN_IVOR4);
50 ivor[5] = mfspr(SPRN_IVOR5);
51 ivor[6] = mfspr(SPRN_IVOR6);
52 ivor[7] = mfspr(SPRN_IVOR7);
53 ivor[8] = mfspr(SPRN_IVOR8);
54 ivor[9] = mfspr(SPRN_IVOR9);
55 ivor[10] = mfspr(SPRN_IVOR10);
56 ivor[11] = mfspr(SPRN_IVOR11);
57 ivor[12] = mfspr(SPRN_IVOR12);
58 ivor[13] = mfspr(SPRN_IVOR13);
59 ivor[14] = mfspr(SPRN_IVOR14);
60 ivor[15] = mfspr(SPRN_IVOR15);
61
62 for (i = 0; i < 16; i++) {
63 if (ivor[i] > max_ivor)
64 max_ivor = ivor[i];
65
66 memcpy((void *)kvmppc_booke_handlers + ivor[i],
67 kvmppc_handlers_start + i * kvmppc_handler_len,
68 kvmppc_handler_len);
69 }
70 flush_icache_range(kvmppc_booke_handlers,
71 kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
72
73 return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
74}
75
76static void __exit kvmppc_booke_exit(void)
77{
78 free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
79 kvm_exit();
80}
81
82module_init(kvmppc_booke_init)
83module_exit(kvmppc_booke_exit)
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
new file mode 100644
index 000000000000..3b653b5309b8
--- /dev/null
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -0,0 +1,436 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2007
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#include <asm/ppc_asm.h>
21#include <asm/kvm_asm.h>
22#include <asm/reg.h>
23#include <asm/mmu-44x.h>
24#include <asm/page.h>
25#include <asm/asm-offsets.h>
26
27#define KVMPPC_MSR_MASK (MSR_CE|MSR_EE|MSR_PR|MSR_DE|MSR_ME|MSR_IS|MSR_DS)
28
29#define VCPU_GPR(n) (VCPU_GPRS + (n * 4))
30
31/* The host stack layout: */
32#define HOST_R1 0 /* Implied by stwu. */
33#define HOST_CALLEE_LR 4
34#define HOST_RUN 8
35/* r2 is special: it holds 'current', and it made nonvolatile in the
36 * kernel with the -ffixed-r2 gcc option. */
37#define HOST_R2 12
38#define HOST_NV_GPRS 16
39#define HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * 4))
40#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(31) + 4)
41#define HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */
42#define HOST_STACK_LR (HOST_STACK_SIZE + 4) /* In caller stack frame. */
43
44#define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \
45 (1<<BOOKE_INTERRUPT_DTLB_MISS))
46
47#define NEED_DEAR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \
48 (1<<BOOKE_INTERRUPT_DTLB_MISS))
49
50#define NEED_ESR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \
51 (1<<BOOKE_INTERRUPT_INST_STORAGE) | \
52 (1<<BOOKE_INTERRUPT_PROGRAM) | \
53 (1<<BOOKE_INTERRUPT_DTLB_MISS))
54
55.macro KVM_HANDLER ivor_nr
56_GLOBAL(kvmppc_handler_\ivor_nr)
57 /* Get pointer to vcpu and record exit number. */
58 mtspr SPRN_SPRG0, r4
59 mfspr r4, SPRN_SPRG1
60 stw r5, VCPU_GPR(r5)(r4)
61 stw r6, VCPU_GPR(r6)(r4)
62 mfctr r5
63 lis r6, kvmppc_resume_host@h
64 stw r5, VCPU_CTR(r4)
65 li r5, \ivor_nr
66 ori r6, r6, kvmppc_resume_host@l
67 mtctr r6
68 bctr
69.endm
70
71_GLOBAL(kvmppc_handlers_start)
72KVM_HANDLER BOOKE_INTERRUPT_CRITICAL
73KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK
74KVM_HANDLER BOOKE_INTERRUPT_DATA_STORAGE
75KVM_HANDLER BOOKE_INTERRUPT_INST_STORAGE
76KVM_HANDLER BOOKE_INTERRUPT_EXTERNAL
77KVM_HANDLER BOOKE_INTERRUPT_ALIGNMENT
78KVM_HANDLER BOOKE_INTERRUPT_PROGRAM
79KVM_HANDLER BOOKE_INTERRUPT_FP_UNAVAIL
80KVM_HANDLER BOOKE_INTERRUPT_SYSCALL
81KVM_HANDLER BOOKE_INTERRUPT_AP_UNAVAIL
82KVM_HANDLER BOOKE_INTERRUPT_DECREMENTER
83KVM_HANDLER BOOKE_INTERRUPT_FIT
84KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG
85KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS
86KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS
87KVM_HANDLER BOOKE_INTERRUPT_DEBUG
88
89_GLOBAL(kvmppc_handler_len)
90 .long kvmppc_handler_1 - kvmppc_handler_0
91
92
93/* Registers:
94 * SPRG0: guest r4
95 * r4: vcpu pointer
96 * r5: KVM exit number
97 */
98_GLOBAL(kvmppc_resume_host)
99 stw r3, VCPU_GPR(r3)(r4)
100 mfcr r3
101 stw r3, VCPU_CR(r4)
102 stw r7, VCPU_GPR(r7)(r4)
103 stw r8, VCPU_GPR(r8)(r4)
104 stw r9, VCPU_GPR(r9)(r4)
105
106 li r6, 1
107 slw r6, r6, r5
108
109 /* Save the faulting instruction and all GPRs for emulation. */
110 andi. r7, r6, NEED_INST_MASK
111 beq ..skip_inst_copy
112 mfspr r9, SPRN_SRR0
113 mfmsr r8
114 ori r7, r8, MSR_DS
115 mtmsr r7
116 isync
117 lwz r9, 0(r9)
118 mtmsr r8
119 isync
120 stw r9, VCPU_LAST_INST(r4)
121
122 stw r15, VCPU_GPR(r15)(r4)
123 stw r16, VCPU_GPR(r16)(r4)
124 stw r17, VCPU_GPR(r17)(r4)
125 stw r18, VCPU_GPR(r18)(r4)
126 stw r19, VCPU_GPR(r19)(r4)
127 stw r20, VCPU_GPR(r20)(r4)
128 stw r21, VCPU_GPR(r21)(r4)
129 stw r22, VCPU_GPR(r22)(r4)
130 stw r23, VCPU_GPR(r23)(r4)
131 stw r24, VCPU_GPR(r24)(r4)
132 stw r25, VCPU_GPR(r25)(r4)
133 stw r26, VCPU_GPR(r26)(r4)
134 stw r27, VCPU_GPR(r27)(r4)
135 stw r28, VCPU_GPR(r28)(r4)
136 stw r29, VCPU_GPR(r29)(r4)
137 stw r30, VCPU_GPR(r30)(r4)
138 stw r31, VCPU_GPR(r31)(r4)
139..skip_inst_copy:
140
141 /* Also grab DEAR and ESR before the host can clobber them. */
142
143 andi. r7, r6, NEED_DEAR_MASK
144 beq ..skip_dear
145 mfspr r9, SPRN_DEAR
146 stw r9, VCPU_FAULT_DEAR(r4)
147..skip_dear:
148
149 andi. r7, r6, NEED_ESR_MASK
150 beq ..skip_esr
151 mfspr r9, SPRN_ESR
152 stw r9, VCPU_FAULT_ESR(r4)
153..skip_esr:
154
155 /* Save remaining volatile guest register state to vcpu. */
156 stw r0, VCPU_GPR(r0)(r4)
157 stw r1, VCPU_GPR(r1)(r4)
158 stw r2, VCPU_GPR(r2)(r4)
159 stw r10, VCPU_GPR(r10)(r4)
160 stw r11, VCPU_GPR(r11)(r4)
161 stw r12, VCPU_GPR(r12)(r4)
162 stw r13, VCPU_GPR(r13)(r4)
163 stw r14, VCPU_GPR(r14)(r4) /* We need a NV GPR below. */
164 mflr r3
165 stw r3, VCPU_LR(r4)
166 mfxer r3
167 stw r3, VCPU_XER(r4)
168 mfspr r3, SPRN_SPRG0
169 stw r3, VCPU_GPR(r4)(r4)
170 mfspr r3, SPRN_SRR0
171 stw r3, VCPU_PC(r4)
172
173 /* Restore host stack pointer and PID before IVPR, since the host
174 * exception handlers use them. */
175 lwz r1, VCPU_HOST_STACK(r4)
176 lwz r3, VCPU_HOST_PID(r4)
177 mtspr SPRN_PID, r3
178
179 /* Restore host IVPR before re-enabling interrupts. We cheat and know
180 * that Linux IVPR is always 0xc0000000. */
181 lis r3, 0xc000
182 mtspr SPRN_IVPR, r3
183
184 /* Switch to kernel stack and jump to handler. */
185 LOAD_REG_ADDR(r3, kvmppc_handle_exit)
186 mtctr r3
187 lwz r3, HOST_RUN(r1)
188 lwz r2, HOST_R2(r1)
189 mr r14, r4 /* Save vcpu pointer. */
190
191 bctrl /* kvmppc_handle_exit() */
192
193 /* Restore vcpu pointer and the nonvolatiles we used. */
194 mr r4, r14
195 lwz r14, VCPU_GPR(r14)(r4)
196
197 /* Sometimes instruction emulation must restore complete GPR state. */
198 andi. r5, r3, RESUME_FLAG_NV
199 beq ..skip_nv_load
200 lwz r15, VCPU_GPR(r15)(r4)
201 lwz r16, VCPU_GPR(r16)(r4)
202 lwz r17, VCPU_GPR(r17)(r4)
203 lwz r18, VCPU_GPR(r18)(r4)
204 lwz r19, VCPU_GPR(r19)(r4)
205 lwz r20, VCPU_GPR(r20)(r4)
206 lwz r21, VCPU_GPR(r21)(r4)
207 lwz r22, VCPU_GPR(r22)(r4)
208 lwz r23, VCPU_GPR(r23)(r4)
209 lwz r24, VCPU_GPR(r24)(r4)
210 lwz r25, VCPU_GPR(r25)(r4)
211 lwz r26, VCPU_GPR(r26)(r4)
212 lwz r27, VCPU_GPR(r27)(r4)
213 lwz r28, VCPU_GPR(r28)(r4)
214 lwz r29, VCPU_GPR(r29)(r4)
215 lwz r30, VCPU_GPR(r30)(r4)
216 lwz r31, VCPU_GPR(r31)(r4)
217..skip_nv_load:
218
219 /* Should we return to the guest? */
220 andi. r5, r3, RESUME_FLAG_HOST
221 beq lightweight_exit
222
223 srawi r3, r3, 2 /* Shift -ERR back down. */
224
225heavyweight_exit:
226 /* Not returning to guest. */
227
228 /* We already saved guest volatile register state; now save the
229 * non-volatiles. */
230 stw r15, VCPU_GPR(r15)(r4)
231 stw r16, VCPU_GPR(r16)(r4)
232 stw r17, VCPU_GPR(r17)(r4)
233 stw r18, VCPU_GPR(r18)(r4)
234 stw r19, VCPU_GPR(r19)(r4)
235 stw r20, VCPU_GPR(r20)(r4)
236 stw r21, VCPU_GPR(r21)(r4)
237 stw r22, VCPU_GPR(r22)(r4)
238 stw r23, VCPU_GPR(r23)(r4)
239 stw r24, VCPU_GPR(r24)(r4)
240 stw r25, VCPU_GPR(r25)(r4)
241 stw r26, VCPU_GPR(r26)(r4)
242 stw r27, VCPU_GPR(r27)(r4)
243 stw r28, VCPU_GPR(r28)(r4)
244 stw r29, VCPU_GPR(r29)(r4)
245 stw r30, VCPU_GPR(r30)(r4)
246 stw r31, VCPU_GPR(r31)(r4)
247
248 /* Load host non-volatile register state from host stack. */
249 lwz r14, HOST_NV_GPR(r14)(r1)
250 lwz r15, HOST_NV_GPR(r15)(r1)
251 lwz r16, HOST_NV_GPR(r16)(r1)
252 lwz r17, HOST_NV_GPR(r17)(r1)
253 lwz r18, HOST_NV_GPR(r18)(r1)
254 lwz r19, HOST_NV_GPR(r19)(r1)
255 lwz r20, HOST_NV_GPR(r20)(r1)
256 lwz r21, HOST_NV_GPR(r21)(r1)
257 lwz r22, HOST_NV_GPR(r22)(r1)
258 lwz r23, HOST_NV_GPR(r23)(r1)
259 lwz r24, HOST_NV_GPR(r24)(r1)
260 lwz r25, HOST_NV_GPR(r25)(r1)
261 lwz r26, HOST_NV_GPR(r26)(r1)
262 lwz r27, HOST_NV_GPR(r27)(r1)
263 lwz r28, HOST_NV_GPR(r28)(r1)
264 lwz r29, HOST_NV_GPR(r29)(r1)
265 lwz r30, HOST_NV_GPR(r30)(r1)
266 lwz r31, HOST_NV_GPR(r31)(r1)
267
268 /* Return to kvm_vcpu_run(). */
269 lwz r4, HOST_STACK_LR(r1)
270 addi r1, r1, HOST_STACK_SIZE
271 mtlr r4
272 /* r3 still contains the return code from kvmppc_handle_exit(). */
273 blr
274
275
276/* Registers:
277 * r3: kvm_run pointer
278 * r4: vcpu pointer
279 */
280_GLOBAL(__kvmppc_vcpu_run)
281 stwu r1, -HOST_STACK_SIZE(r1)
282 stw r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */
283
284 /* Save host state to stack. */
285 stw r3, HOST_RUN(r1)
286 mflr r3
287 stw r3, HOST_STACK_LR(r1)
288
289 /* Save host non-volatile register state to stack. */
290 stw r14, HOST_NV_GPR(r14)(r1)
291 stw r15, HOST_NV_GPR(r15)(r1)
292 stw r16, HOST_NV_GPR(r16)(r1)
293 stw r17, HOST_NV_GPR(r17)(r1)
294 stw r18, HOST_NV_GPR(r18)(r1)
295 stw r19, HOST_NV_GPR(r19)(r1)
296 stw r20, HOST_NV_GPR(r20)(r1)
297 stw r21, HOST_NV_GPR(r21)(r1)
298 stw r22, HOST_NV_GPR(r22)(r1)
299 stw r23, HOST_NV_GPR(r23)(r1)
300 stw r24, HOST_NV_GPR(r24)(r1)
301 stw r25, HOST_NV_GPR(r25)(r1)
302 stw r26, HOST_NV_GPR(r26)(r1)
303 stw r27, HOST_NV_GPR(r27)(r1)
304 stw r28, HOST_NV_GPR(r28)(r1)
305 stw r29, HOST_NV_GPR(r29)(r1)
306 stw r30, HOST_NV_GPR(r30)(r1)
307 stw r31, HOST_NV_GPR(r31)(r1)
308
309 /* Load guest non-volatiles. */
310 lwz r14, VCPU_GPR(r14)(r4)
311 lwz r15, VCPU_GPR(r15)(r4)
312 lwz r16, VCPU_GPR(r16)(r4)
313 lwz r17, VCPU_GPR(r17)(r4)
314 lwz r18, VCPU_GPR(r18)(r4)
315 lwz r19, VCPU_GPR(r19)(r4)
316 lwz r20, VCPU_GPR(r20)(r4)
317 lwz r21, VCPU_GPR(r21)(r4)
318 lwz r22, VCPU_GPR(r22)(r4)
319 lwz r23, VCPU_GPR(r23)(r4)
320 lwz r24, VCPU_GPR(r24)(r4)
321 lwz r25, VCPU_GPR(r25)(r4)
322 lwz r26, VCPU_GPR(r26)(r4)
323 lwz r27, VCPU_GPR(r27)(r4)
324 lwz r28, VCPU_GPR(r28)(r4)
325 lwz r29, VCPU_GPR(r29)(r4)
326 lwz r30, VCPU_GPR(r30)(r4)
327 lwz r31, VCPU_GPR(r31)(r4)
328
329lightweight_exit:
330 stw r2, HOST_R2(r1)
331
332 mfspr r3, SPRN_PID
333 stw r3, VCPU_HOST_PID(r4)
334 lwz r3, VCPU_PID(r4)
335 mtspr SPRN_PID, r3
336
337 /* Prevent all TLB updates. */
338 mfmsr r5
339 lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h
340 ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l
341 andc r6, r5, r6
342 mtmsr r6
343
344 /* Save the host's non-pinned TLB mappings, and load the guest mappings
345 * over them. Leave the host's "pinned" kernel mappings in place. */
346 /* XXX optimization: use generation count to avoid swapping unmodified
347 * entries. */
348 mfspr r10, SPRN_MMUCR /* Save host MMUCR. */
349 lis r8, tlb_44x_hwater@ha
350 lwz r8, tlb_44x_hwater@l(r8)
351 addi r3, r4, VCPU_HOST_TLB - 4
352 addi r9, r4, VCPU_SHADOW_TLB - 4
353 li r6, 0
3541:
355 /* Save host entry. */
356 tlbre r7, r6, PPC44x_TLB_PAGEID
357 mfspr r5, SPRN_MMUCR
358 stwu r5, 4(r3)
359 stwu r7, 4(r3)
360 tlbre r7, r6, PPC44x_TLB_XLAT
361 stwu r7, 4(r3)
362 tlbre r7, r6, PPC44x_TLB_ATTRIB
363 stwu r7, 4(r3)
364 /* Load guest entry. */
365 lwzu r7, 4(r9)
366 mtspr SPRN_MMUCR, r7
367 lwzu r7, 4(r9)
368 tlbwe r7, r6, PPC44x_TLB_PAGEID
369 lwzu r7, 4(r9)
370 tlbwe r7, r6, PPC44x_TLB_XLAT
371 lwzu r7, 4(r9)
372 tlbwe r7, r6, PPC44x_TLB_ATTRIB
373 /* Increment index. */
374 addi r6, r6, 1
375 cmpw r6, r8
376 blt 1b
377 mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */
378
379 iccci 0, 0 /* XXX hack */
380
381 /* Load some guest volatiles. */
382 lwz r0, VCPU_GPR(r0)(r4)
383 lwz r2, VCPU_GPR(r2)(r4)
384 lwz r9, VCPU_GPR(r9)(r4)
385 lwz r10, VCPU_GPR(r10)(r4)
386 lwz r11, VCPU_GPR(r11)(r4)
387 lwz r12, VCPU_GPR(r12)(r4)
388 lwz r13, VCPU_GPR(r13)(r4)
389 lwz r3, VCPU_LR(r4)
390 mtlr r3
391 lwz r3, VCPU_XER(r4)
392 mtxer r3
393
394 /* Switch the IVPR. XXX If we take a TLB miss after this we're screwed,
395 * so how do we make sure vcpu won't fault? */
396 lis r8, kvmppc_booke_handlers@ha
397 lwz r8, kvmppc_booke_handlers@l(r8)
398 mtspr SPRN_IVPR, r8
399
400 /* Save vcpu pointer for the exception handlers. */
401 mtspr SPRN_SPRG1, r4
402
403 /* Can't switch the stack pointer until after IVPR is switched,
404 * because host interrupt handlers would get confused. */
405 lwz r1, VCPU_GPR(r1)(r4)
406
407 /* XXX handle USPRG0 */
408 /* Host interrupt handlers may have clobbered these guest-readable
409 * SPRGs, so we need to reload them here with the guest's values. */
410 lwz r3, VCPU_SPRG4(r4)
411 mtspr SPRN_SPRG4, r3
412 lwz r3, VCPU_SPRG5(r4)
413 mtspr SPRN_SPRG5, r3
414 lwz r3, VCPU_SPRG6(r4)
415 mtspr SPRN_SPRG6, r3
416 lwz r3, VCPU_SPRG7(r4)
417 mtspr SPRN_SPRG7, r3
418
419 /* Finish loading guest volatiles and jump to guest. */
420 lwz r3, VCPU_CTR(r4)
421 mtctr r3
422 lwz r3, VCPU_CR(r4)
423 mtcr r3
424 lwz r5, VCPU_GPR(r5)(r4)
425 lwz r6, VCPU_GPR(r6)(r4)
426 lwz r7, VCPU_GPR(r7)(r4)
427 lwz r8, VCPU_GPR(r8)(r4)
428 lwz r3, VCPU_PC(r4)
429 mtsrr0 r3
430 lwz r3, VCPU_MSR(r4)
431 oris r3, r3, KVMPPC_MSR_MASK@h
432 ori r3, r3, KVMPPC_MSR_MASK@l
433 mtsrr1 r3
434 lwz r3, VCPU_GPR(r3)(r4)
435 lwz r4, VCPU_GPR(r4)(r4)
436 rfi
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
new file mode 100644
index 000000000000..a03fe0c80698
--- /dev/null
+++ b/arch/powerpc/kvm/emulate.c
@@ -0,0 +1,760 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2007
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#include <linux/jiffies.h>
21#include <linux/timer.h>
22#include <linux/types.h>
23#include <linux/string.h>
24#include <linux/kvm_host.h>
25
26#include <asm/dcr.h>
27#include <asm/dcr-regs.h>
28#include <asm/time.h>
29#include <asm/byteorder.h>
30#include <asm/kvm_ppc.h>
31
32#include "44x_tlb.h"
33
34/* Instruction decoding */
35static inline unsigned int get_op(u32 inst)
36{
37 return inst >> 26;
38}
39
40static inline unsigned int get_xop(u32 inst)
41{
42 return (inst >> 1) & 0x3ff;
43}
44
45static inline unsigned int get_sprn(u32 inst)
46{
47 return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
48}
49
50static inline unsigned int get_dcrn(u32 inst)
51{
52 return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
53}
54
55static inline unsigned int get_rt(u32 inst)
56{
57 return (inst >> 21) & 0x1f;
58}
59
60static inline unsigned int get_rs(u32 inst)
61{
62 return (inst >> 21) & 0x1f;
63}
64
65static inline unsigned int get_ra(u32 inst)
66{
67 return (inst >> 16) & 0x1f;
68}
69
70static inline unsigned int get_rb(u32 inst)
71{
72 return (inst >> 11) & 0x1f;
73}
74
75static inline unsigned int get_rc(u32 inst)
76{
77 return inst & 0x1;
78}
79
80static inline unsigned int get_ws(u32 inst)
81{
82 return (inst >> 11) & 0x1f;
83}
84
85static inline unsigned int get_d(u32 inst)
86{
87 return inst & 0xffff;
88}
89
90static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
91 const struct tlbe *tlbe)
92{
93 gpa_t gpa;
94
95 if (!get_tlb_v(tlbe))
96 return 0;
97
98 /* Does it match current guest AS? */
99 /* XXX what about IS != DS? */
100 if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
101 return 0;
102
103 gpa = get_tlb_raddr(tlbe);
104 if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
105 /* Mapping is not for RAM. */
106 return 0;
107
108 return 1;
109}
110
111static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst)
112{
113 u64 eaddr;
114 u64 raddr;
115 u64 asid;
116 u32 flags;
117 struct tlbe *tlbe;
118 unsigned int ra;
119 unsigned int rs;
120 unsigned int ws;
121 unsigned int index;
122
123 ra = get_ra(inst);
124 rs = get_rs(inst);
125 ws = get_ws(inst);
126
127 index = vcpu->arch.gpr[ra];
128 if (index > PPC44x_TLB_SIZE) {
129 printk("%s: index %d\n", __func__, index);
130 kvmppc_dump_vcpu(vcpu);
131 return EMULATE_FAIL;
132 }
133
134 tlbe = &vcpu->arch.guest_tlb[index];
135
136 /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
137 if (tlbe->word0 & PPC44x_TLB_VALID) {
138 eaddr = get_tlb_eaddr(tlbe);
139 asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
140 kvmppc_mmu_invalidate(vcpu, eaddr, asid);
141 }
142
143 switch (ws) {
144 case PPC44x_TLB_PAGEID:
145 tlbe->tid = vcpu->arch.mmucr & 0xff;
146 tlbe->word0 = vcpu->arch.gpr[rs];
147 break;
148
149 case PPC44x_TLB_XLAT:
150 tlbe->word1 = vcpu->arch.gpr[rs];
151 break;
152
153 case PPC44x_TLB_ATTRIB:
154 tlbe->word2 = vcpu->arch.gpr[rs];
155 break;
156
157 default:
158 return EMULATE_FAIL;
159 }
160
161 if (tlbe_is_host_safe(vcpu, tlbe)) {
162 eaddr = get_tlb_eaddr(tlbe);
163 raddr = get_tlb_raddr(tlbe);
164 asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
165 flags = tlbe->word2 & 0xffff;
166
167 /* Create a 4KB mapping on the host. If the guest wanted a
168 * large page, only the first 4KB is mapped here and the rest
169 * are mapped on the fly. */
170 kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags);
171 }
172
173 return EMULATE_DONE;
174}
175
176static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
177{
178 if (vcpu->arch.tcr & TCR_DIE) {
179 /* The decrementer ticks at the same rate as the timebase, so
180 * that's how we convert the guest DEC value to the number of
181 * host ticks. */
182 unsigned long nr_jiffies;
183
184 nr_jiffies = vcpu->arch.dec / tb_ticks_per_jiffy;
185 mod_timer(&vcpu->arch.dec_timer,
186 get_jiffies_64() + nr_jiffies);
187 } else {
188 del_timer(&vcpu->arch.dec_timer);
189 }
190}
191
192static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
193{
194 vcpu->arch.pc = vcpu->arch.srr0;
195 kvmppc_set_msr(vcpu, vcpu->arch.srr1);
196}
197
198/* XXX to do:
199 * lhax
200 * lhaux
201 * lswx
202 * lswi
203 * stswx
204 * stswi
205 * lha
206 * lhau
207 * lmw
208 * stmw
209 *
210 * XXX is_bigendian should depend on MMU mapping or MSR[LE]
211 */
212int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
213{
214 u32 inst = vcpu->arch.last_inst;
215 u32 ea;
216 int ra;
217 int rb;
218 int rc;
219 int rs;
220 int rt;
221 int sprn;
222 int dcrn;
223 enum emulation_result emulated = EMULATE_DONE;
224 int advance = 1;
225
226 switch (get_op(inst)) {
227 case 3: /* trap */
228 printk("trap!\n");
229 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
230 advance = 0;
231 break;
232
233 case 19:
234 switch (get_xop(inst)) {
235 case 50: /* rfi */
236 kvmppc_emul_rfi(vcpu);
237 advance = 0;
238 break;
239
240 default:
241 emulated = EMULATE_FAIL;
242 break;
243 }
244 break;
245
246 case 31:
247 switch (get_xop(inst)) {
248
249 case 83: /* mfmsr */
250 rt = get_rt(inst);
251 vcpu->arch.gpr[rt] = vcpu->arch.msr;
252 break;
253
254 case 87: /* lbzx */
255 rt = get_rt(inst);
256 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
257 break;
258
259 case 131: /* wrtee */
260 rs = get_rs(inst);
261 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
262 | (vcpu->arch.gpr[rs] & MSR_EE);
263 break;
264
265 case 146: /* mtmsr */
266 rs = get_rs(inst);
267 kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
268 break;
269
270 case 163: /* wrteei */
271 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
272 | (inst & MSR_EE);
273 break;
274
275 case 215: /* stbx */
276 rs = get_rs(inst);
277 emulated = kvmppc_handle_store(run, vcpu,
278 vcpu->arch.gpr[rs],
279 1, 1);
280 break;
281
282 case 247: /* stbux */
283 rs = get_rs(inst);
284 ra = get_ra(inst);
285 rb = get_rb(inst);
286
287 ea = vcpu->arch.gpr[rb];
288 if (ra)
289 ea += vcpu->arch.gpr[ra];
290
291 emulated = kvmppc_handle_store(run, vcpu,
292 vcpu->arch.gpr[rs],
293 1, 1);
294 vcpu->arch.gpr[rs] = ea;
295 break;
296
297 case 279: /* lhzx */
298 rt = get_rt(inst);
299 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
300 break;
301
302 case 311: /* lhzux */
303 rt = get_rt(inst);
304 ra = get_ra(inst);
305 rb = get_rb(inst);
306
307 ea = vcpu->arch.gpr[rb];
308 if (ra)
309 ea += vcpu->arch.gpr[ra];
310
311 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
312 vcpu->arch.gpr[ra] = ea;
313 break;
314
315 case 323: /* mfdcr */
316 dcrn = get_dcrn(inst);
317 rt = get_rt(inst);
318
319 /* The guest may access CPR0 registers to determine the timebase
320 * frequency, and it must know the real host frequency because it
321 * can directly access the timebase registers.
322 *
323 * It would be possible to emulate those accesses in userspace,
324 * but userspace can really only figure out the end frequency.
325 * We could decompose that into the factors that compute it, but
326 * that's tricky math, and it's easier to just report the real
327 * CPR0 values.
328 */
329 switch (dcrn) {
330 case DCRN_CPR0_CONFIG_ADDR:
331 vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
332 break;
333 case DCRN_CPR0_CONFIG_DATA:
334 local_irq_disable();
335 mtdcr(DCRN_CPR0_CONFIG_ADDR,
336 vcpu->arch.cpr0_cfgaddr);
337 vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
338 local_irq_enable();
339 break;
340 default:
341 run->dcr.dcrn = dcrn;
342 run->dcr.data = 0;
343 run->dcr.is_write = 0;
344 vcpu->arch.io_gpr = rt;
345 vcpu->arch.dcr_needed = 1;
346 emulated = EMULATE_DO_DCR;
347 }
348
349 break;
350
351 case 339: /* mfspr */
352 sprn = get_sprn(inst);
353 rt = get_rt(inst);
354
355 switch (sprn) {
356 case SPRN_SRR0:
357 vcpu->arch.gpr[rt] = vcpu->arch.srr0; break;
358 case SPRN_SRR1:
359 vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
360 case SPRN_MMUCR:
361 vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
362 case SPRN_PID:
363 vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
364 case SPRN_IVPR:
365 vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
366 case SPRN_CCR0:
367 vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
368 case SPRN_CCR1:
369 vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
370 case SPRN_PVR:
371 vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
372 case SPRN_DEAR:
373 vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
374 case SPRN_ESR:
375 vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
376 case SPRN_DBCR0:
377 vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
378 case SPRN_DBCR1:
379 vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
380
381 /* Note: mftb and TBRL/TBWL are user-accessible, so
382 * the guest can always access the real TB anyways.
383 * In fact, we probably will never see these traps. */
384 case SPRN_TBWL:
385 vcpu->arch.gpr[rt] = mftbl(); break;
386 case SPRN_TBWU:
387 vcpu->arch.gpr[rt] = mftbu(); break;
388
389 case SPRN_SPRG0:
390 vcpu->arch.gpr[rt] = vcpu->arch.sprg0; break;
391 case SPRN_SPRG1:
392 vcpu->arch.gpr[rt] = vcpu->arch.sprg1; break;
393 case SPRN_SPRG2:
394 vcpu->arch.gpr[rt] = vcpu->arch.sprg2; break;
395 case SPRN_SPRG3:
396 vcpu->arch.gpr[rt] = vcpu->arch.sprg3; break;
397 /* Note: SPRG4-7 are user-readable, so we don't get
398 * a trap. */
399
400 case SPRN_IVOR0:
401 vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break;
402 case SPRN_IVOR1:
403 vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break;
404 case SPRN_IVOR2:
405 vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break;
406 case SPRN_IVOR3:
407 vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break;
408 case SPRN_IVOR4:
409 vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break;
410 case SPRN_IVOR5:
411 vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break;
412 case SPRN_IVOR6:
413 vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break;
414 case SPRN_IVOR7:
415 vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break;
416 case SPRN_IVOR8:
417 vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break;
418 case SPRN_IVOR9:
419 vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break;
420 case SPRN_IVOR10:
421 vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break;
422 case SPRN_IVOR11:
423 vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break;
424 case SPRN_IVOR12:
425 vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break;
426 case SPRN_IVOR13:
427 vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break;
428 case SPRN_IVOR14:
429 vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break;
430 case SPRN_IVOR15:
431 vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break;
432
433 default:
434 printk("mfspr: unknown spr %x\n", sprn);
435 vcpu->arch.gpr[rt] = 0;
436 break;
437 }
438 break;
439
440 case 407: /* sthx */
441 rs = get_rs(inst);
442 ra = get_ra(inst);
443 rb = get_rb(inst);
444
445 emulated = kvmppc_handle_store(run, vcpu,
446 vcpu->arch.gpr[rs],
447 2, 1);
448 break;
449
450 case 439: /* sthux */
451 rs = get_rs(inst);
452 ra = get_ra(inst);
453 rb = get_rb(inst);
454
455 ea = vcpu->arch.gpr[rb];
456 if (ra)
457 ea += vcpu->arch.gpr[ra];
458
459 emulated = kvmppc_handle_store(run, vcpu,
460 vcpu->arch.gpr[rs],
461 2, 1);
462 vcpu->arch.gpr[ra] = ea;
463 break;
464
465 case 451: /* mtdcr */
466 dcrn = get_dcrn(inst);
467 rs = get_rs(inst);
468
469 /* emulate some access in kernel */
470 switch (dcrn) {
471 case DCRN_CPR0_CONFIG_ADDR:
472 vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
473 break;
474 default:
475 run->dcr.dcrn = dcrn;
476 run->dcr.data = vcpu->arch.gpr[rs];
477 run->dcr.is_write = 1;
478 vcpu->arch.dcr_needed = 1;
479 emulated = EMULATE_DO_DCR;
480 }
481
482 break;
483
484 case 467: /* mtspr */
485 sprn = get_sprn(inst);
486 rs = get_rs(inst);
487 switch (sprn) {
488 case SPRN_SRR0:
489 vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break;
490 case SPRN_SRR1:
491 vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break;
492 case SPRN_MMUCR:
493 vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
494 case SPRN_PID:
495 vcpu->arch.pid = vcpu->arch.gpr[rs]; break;
496 case SPRN_CCR0:
497 vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
498 case SPRN_CCR1:
499 vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
500 case SPRN_DEAR:
501 vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
502 case SPRN_ESR:
503 vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
504 case SPRN_DBCR0:
505 vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
506 case SPRN_DBCR1:
507 vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
508
509 /* XXX We need to context-switch the timebase for
510 * watchdog and FIT. */
511 case SPRN_TBWL: break;
512 case SPRN_TBWU: break;
513
514 case SPRN_DEC:
515 vcpu->arch.dec = vcpu->arch.gpr[rs];
516 kvmppc_emulate_dec(vcpu);
517 break;
518
519 case SPRN_TSR:
520 vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
521
522 case SPRN_TCR:
523 vcpu->arch.tcr = vcpu->arch.gpr[rs];
524 kvmppc_emulate_dec(vcpu);
525 break;
526
527 case SPRN_SPRG0:
528 vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break;
529 case SPRN_SPRG1:
530 vcpu->arch.sprg1 = vcpu->arch.gpr[rs]; break;
531 case SPRN_SPRG2:
532 vcpu->arch.sprg2 = vcpu->arch.gpr[rs]; break;
533 case SPRN_SPRG3:
534 vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break;
535
536 /* Note: SPRG4-7 are user-readable. These values are
537 * loaded into the real SPRGs when resuming the
538 * guest. */
539 case SPRN_SPRG4:
540 vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
541 case SPRN_SPRG5:
542 vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
543 case SPRN_SPRG6:
544 vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
545 case SPRN_SPRG7:
546 vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
547
548 case SPRN_IVPR:
549 vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break;
550 case SPRN_IVOR0:
551 vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break;
552 case SPRN_IVOR1:
553 vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break;
554 case SPRN_IVOR2:
555 vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break;
556 case SPRN_IVOR3:
557 vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break;
558 case SPRN_IVOR4:
559 vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break;
560 case SPRN_IVOR5:
561 vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break;
562 case SPRN_IVOR6:
563 vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break;
564 case SPRN_IVOR7:
565 vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break;
566 case SPRN_IVOR8:
567 vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break;
568 case SPRN_IVOR9:
569 vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break;
570 case SPRN_IVOR10:
571 vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break;
572 case SPRN_IVOR11:
573 vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break;
574 case SPRN_IVOR12:
575 vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break;
576 case SPRN_IVOR13:
577 vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break;
578 case SPRN_IVOR14:
579 vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break;
580 case SPRN_IVOR15:
581 vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break;
582
583 default:
584 printk("mtspr: unknown spr %x\n", sprn);
585 emulated = EMULATE_FAIL;
586 break;
587 }
588 break;
589
590 case 470: /* dcbi */
591 /* Do nothing. The guest is performing dcbi because
592 * hardware DMA is not snooped by the dcache, but
593 * emulated DMA either goes through the dcache as
594 * normal writes, or the host kernel has handled dcache
595 * coherence. */
596 break;
597
598 case 534: /* lwbrx */
599 rt = get_rt(inst);
600 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
601 break;
602
603 case 566: /* tlbsync */
604 break;
605
606 case 662: /* stwbrx */
607 rs = get_rs(inst);
608 ra = get_ra(inst);
609 rb = get_rb(inst);
610
611 emulated = kvmppc_handle_store(run, vcpu,
612 vcpu->arch.gpr[rs],
613 4, 0);
614 break;
615
616 case 978: /* tlbwe */
617 emulated = kvmppc_emul_tlbwe(vcpu, inst);
618 break;
619
620 case 914: { /* tlbsx */
621 int index;
622 unsigned int as = get_mmucr_sts(vcpu);
623 unsigned int pid = get_mmucr_stid(vcpu);
624
625 rt = get_rt(inst);
626 ra = get_ra(inst);
627 rb = get_rb(inst);
628 rc = get_rc(inst);
629
630 ea = vcpu->arch.gpr[rb];
631 if (ra)
632 ea += vcpu->arch.gpr[ra];
633
634 index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
635 if (rc) {
636 if (index < 0)
637 vcpu->arch.cr &= ~0x20000000;
638 else
639 vcpu->arch.cr |= 0x20000000;
640 }
641 vcpu->arch.gpr[rt] = index;
642
643 }
644 break;
645
646 case 790: /* lhbrx */
647 rt = get_rt(inst);
648 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
649 break;
650
651 case 918: /* sthbrx */
652 rs = get_rs(inst);
653 ra = get_ra(inst);
654 rb = get_rb(inst);
655
656 emulated = kvmppc_handle_store(run, vcpu,
657 vcpu->arch.gpr[rs],
658 2, 0);
659 break;
660
661 case 966: /* iccci */
662 break;
663
664 default:
665 printk("unknown: op %d xop %d\n", get_op(inst),
666 get_xop(inst));
667 emulated = EMULATE_FAIL;
668 break;
669 }
670 break;
671
672 case 32: /* lwz */
673 rt = get_rt(inst);
674 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
675 break;
676
677 case 33: /* lwzu */
678 ra = get_ra(inst);
679 rt = get_rt(inst);
680 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
681 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
682 break;
683
684 case 34: /* lbz */
685 rt = get_rt(inst);
686 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
687 break;
688
689 case 35: /* lbzu */
690 ra = get_ra(inst);
691 rt = get_rt(inst);
692 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
693 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
694 break;
695
696 case 36: /* stw */
697 rs = get_rs(inst);
698 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
699 4, 1);
700 break;
701
702 case 37: /* stwu */
703 ra = get_ra(inst);
704 rs = get_rs(inst);
705 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
706 4, 1);
707 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
708 break;
709
710 case 38: /* stb */
711 rs = get_rs(inst);
712 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
713 1, 1);
714 break;
715
716 case 39: /* stbu */
717 ra = get_ra(inst);
718 rs = get_rs(inst);
719 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
720 1, 1);
721 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
722 break;
723
724 case 40: /* lhz */
725 rt = get_rt(inst);
726 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
727 break;
728
729 case 41: /* lhzu */
730 ra = get_ra(inst);
731 rt = get_rt(inst);
732 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
733 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
734 break;
735
736 case 44: /* sth */
737 rs = get_rs(inst);
738 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
739 2, 1);
740 break;
741
742 case 45: /* sthu */
743 ra = get_ra(inst);
744 rs = get_rs(inst);
745 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
746 2, 1);
747 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
748 break;
749
750 default:
751 printk("unknown op %d\n", get_op(inst));
752 emulated = EMULATE_FAIL;
753 break;
754 }
755
756 if (advance)
757 vcpu->arch.pc += 4; /* Advance past emulated instruction. */
758
759 return emulated;
760}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
new file mode 100644
index 000000000000..bad40bd2d3ac
--- /dev/null
+++ b/arch/powerpc/kvm/powerpc.c
@@ -0,0 +1,436 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2007
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
19 */
20
21#include <linux/errno.h>
22#include <linux/err.h>
23#include <linux/kvm_host.h>
24#include <linux/module.h>
25#include <linux/vmalloc.h>
26#include <linux/fs.h>
27#include <asm/cputable.h>
28#include <asm/uaccess.h>
29#include <asm/kvm_ppc.h>
30
31
32gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
33{
34 return gfn;
35}
36
37int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
38{
39 /* XXX implement me */
40 return 0;
41}
42
43int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
44{
45 return 1;
46}
47
48
49int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
50{
51 enum emulation_result er;
52 int r;
53
54 er = kvmppc_emulate_instruction(run, vcpu);
55 switch (er) {
56 case EMULATE_DONE:
57 /* Future optimization: only reload non-volatiles if they were
58 * actually modified. */
59 r = RESUME_GUEST_NV;
60 break;
61 case EMULATE_DO_MMIO:
62 run->exit_reason = KVM_EXIT_MMIO;
63 /* We must reload nonvolatiles because "update" load/store
64 * instructions modify register state. */
65 /* Future optimization: only reload non-volatiles if they were
66 * actually modified. */
67 r = RESUME_HOST_NV;
68 break;
69 case EMULATE_FAIL:
70 /* XXX Deliver Program interrupt to guest. */
71 printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__,
72 vcpu->arch.last_inst);
73 r = RESUME_HOST;
74 break;
75 default:
76 BUG();
77 }
78
79 return r;
80}
81
82void kvm_arch_hardware_enable(void *garbage)
83{
84}
85
86void kvm_arch_hardware_disable(void *garbage)
87{
88}
89
90int kvm_arch_hardware_setup(void)
91{
92 return 0;
93}
94
95void kvm_arch_hardware_unsetup(void)
96{
97}
98
99void kvm_arch_check_processor_compat(void *rtn)
100{
101 int r;
102
103 if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
104 r = 0;
105 else
106 r = -ENOTSUPP;
107
108 *(int *)rtn = r;
109}
110
111struct kvm *kvm_arch_create_vm(void)
112{
113 struct kvm *kvm;
114
115 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
116 if (!kvm)
117 return ERR_PTR(-ENOMEM);
118
119 return kvm;
120}
121
122static void kvmppc_free_vcpus(struct kvm *kvm)
123{
124 unsigned int i;
125
126 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
127 if (kvm->vcpus[i]) {
128 kvm_arch_vcpu_free(kvm->vcpus[i]);
129 kvm->vcpus[i] = NULL;
130 }
131 }
132}
133
134void kvm_arch_destroy_vm(struct kvm *kvm)
135{
136 kvmppc_free_vcpus(kvm);
137 kvm_free_physmem(kvm);
138 kfree(kvm);
139}
140
141int kvm_dev_ioctl_check_extension(long ext)
142{
143 int r;
144
145 switch (ext) {
146 case KVM_CAP_USER_MEMORY:
147 r = 1;
148 break;
149 default:
150 r = 0;
151 break;
152 }
153 return r;
154
155}
156
157long kvm_arch_dev_ioctl(struct file *filp,
158 unsigned int ioctl, unsigned long arg)
159{
160 return -EINVAL;
161}
162
163int kvm_arch_set_memory_region(struct kvm *kvm,
164 struct kvm_userspace_memory_region *mem,
165 struct kvm_memory_slot old,
166 int user_alloc)
167{
168 return 0;
169}
170
171struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
172{
173 struct kvm_vcpu *vcpu;
174 int err;
175
176 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
177 if (!vcpu) {
178 err = -ENOMEM;
179 goto out;
180 }
181
182 err = kvm_vcpu_init(vcpu, kvm, id);
183 if (err)
184 goto free_vcpu;
185
186 return vcpu;
187
188free_vcpu:
189 kmem_cache_free(kvm_vcpu_cache, vcpu);
190out:
191 return ERR_PTR(err);
192}
193
194void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
195{
196 kvm_vcpu_uninit(vcpu);
197 kmem_cache_free(kvm_vcpu_cache, vcpu);
198}
199
200void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
201{
202 kvm_arch_vcpu_free(vcpu);
203}
204
205int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
206{
207 unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER];
208
209 return test_bit(priority, &vcpu->arch.pending_exceptions);
210}
211
212static void kvmppc_decrementer_func(unsigned long data)
213{
214 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
215
216 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER);
217}
218
219int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
220{
221 setup_timer(&vcpu->arch.dec_timer, kvmppc_decrementer_func,
222 (unsigned long)vcpu);
223
224 return 0;
225}
226
227void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
228{
229}
230
231void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
232{
233}
234
235void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
236{
237}
238
239void decache_vcpus_on_cpu(int cpu)
240{
241}
242
243int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
244 struct kvm_debug_guest *dbg)
245{
246 return -ENOTSUPP;
247}
248
249static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
250 struct kvm_run *run)
251{
252 u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
253 *gpr = run->dcr.data;
254}
255
256static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
257 struct kvm_run *run)
258{
259 u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
260
261 if (run->mmio.len > sizeof(*gpr)) {
262 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
263 return;
264 }
265
266 if (vcpu->arch.mmio_is_bigendian) {
267 switch (run->mmio.len) {
268 case 4: *gpr = *(u32 *)run->mmio.data; break;
269 case 2: *gpr = *(u16 *)run->mmio.data; break;
270 case 1: *gpr = *(u8 *)run->mmio.data; break;
271 }
272 } else {
273 /* Convert BE data from userland back to LE. */
274 switch (run->mmio.len) {
275 case 4: *gpr = ld_le32((u32 *)run->mmio.data); break;
276 case 2: *gpr = ld_le16((u16 *)run->mmio.data); break;
277 case 1: *gpr = *(u8 *)run->mmio.data; break;
278 }
279 }
280}
281
282int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
283 unsigned int rt, unsigned int bytes, int is_bigendian)
284{
285 if (bytes > sizeof(run->mmio.data)) {
286 printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
287 run->mmio.len);
288 }
289
290 run->mmio.phys_addr = vcpu->arch.paddr_accessed;
291 run->mmio.len = bytes;
292 run->mmio.is_write = 0;
293
294 vcpu->arch.io_gpr = rt;
295 vcpu->arch.mmio_is_bigendian = is_bigendian;
296 vcpu->mmio_needed = 1;
297 vcpu->mmio_is_write = 0;
298
299 return EMULATE_DO_MMIO;
300}
301
302int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
303 u32 val, unsigned int bytes, int is_bigendian)
304{
305 void *data = run->mmio.data;
306
307 if (bytes > sizeof(run->mmio.data)) {
308 printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
309 run->mmio.len);
310 }
311
312 run->mmio.phys_addr = vcpu->arch.paddr_accessed;
313 run->mmio.len = bytes;
314 run->mmio.is_write = 1;
315 vcpu->mmio_needed = 1;
316 vcpu->mmio_is_write = 1;
317
318 /* Store the value at the lowest bytes in 'data'. */
319 if (is_bigendian) {
320 switch (bytes) {
321 case 4: *(u32 *)data = val; break;
322 case 2: *(u16 *)data = val; break;
323 case 1: *(u8 *)data = val; break;
324 }
325 } else {
326 /* Store LE value into 'data'. */
327 switch (bytes) {
328 case 4: st_le32(data, val); break;
329 case 2: st_le16(data, val); break;
330 case 1: *(u8 *)data = val; break;
331 }
332 }
333
334 return EMULATE_DO_MMIO;
335}
336
337int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
338{
339 int r;
340 sigset_t sigsaved;
341
342 if (vcpu->sigset_active)
343 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
344
345 if (vcpu->mmio_needed) {
346 if (!vcpu->mmio_is_write)
347 kvmppc_complete_mmio_load(vcpu, run);
348 vcpu->mmio_needed = 0;
349 } else if (vcpu->arch.dcr_needed) {
350 if (!vcpu->arch.dcr_is_write)
351 kvmppc_complete_dcr_load(vcpu, run);
352 vcpu->arch.dcr_needed = 0;
353 }
354
355 kvmppc_check_and_deliver_interrupts(vcpu);
356
357 local_irq_disable();
358 kvm_guest_enter();
359 r = __kvmppc_vcpu_run(run, vcpu);
360 kvm_guest_exit();
361 local_irq_enable();
362
363 if (vcpu->sigset_active)
364 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
365
366 return r;
367}
368
369int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
370{
371 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL);
372 return 0;
373}
374
375int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
376 struct kvm_mp_state *mp_state)
377{
378 return -EINVAL;
379}
380
381int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
382 struct kvm_mp_state *mp_state)
383{
384 return -EINVAL;
385}
386
387long kvm_arch_vcpu_ioctl(struct file *filp,
388 unsigned int ioctl, unsigned long arg)
389{
390 struct kvm_vcpu *vcpu = filp->private_data;
391 void __user *argp = (void __user *)arg;
392 long r;
393
394 switch (ioctl) {
395 case KVM_INTERRUPT: {
396 struct kvm_interrupt irq;
397 r = -EFAULT;
398 if (copy_from_user(&irq, argp, sizeof(irq)))
399 goto out;
400 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
401 break;
402 }
403 default:
404 r = -EINVAL;
405 }
406
407out:
408 return r;
409}
410
411int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
412{
413 return -ENOTSUPP;
414}
415
416long kvm_arch_vm_ioctl(struct file *filp,
417 unsigned int ioctl, unsigned long arg)
418{
419 long r;
420
421 switch (ioctl) {
422 default:
423 r = -EINVAL;
424 }
425
426 return r;
427}
428
429int kvm_arch_init(void *opaque)
430{
431 return 0;
432}
433
434void kvm_arch_exit(void)
435{
436}
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index ada249bf9779..ce10e2b1b902 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -202,7 +202,7 @@ adjust_total_lowmem(void)
202 cam_max_size = max_lowmem_size; 202 cam_max_size = max_lowmem_size;
203 203
204 /* adjust lowmem size to max_lowmem_size */ 204 /* adjust lowmem size to max_lowmem_size */
205 ram = min(max_lowmem_size, total_lowmem); 205 ram = min(max_lowmem_size, (phys_addr_t)total_lowmem);
206 206
207 /* Calculate CAM values */ 207 /* Calculate CAM values */
208 __cam0 = 1UL << 2 * (__ilog2(ram) / 2); 208 __cam0 = 1UL << 2 * (__ilog2(ram) / 2);
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S
index e10d76a860d3..ddeaf9e38ad5 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/hash_low_32.S
@@ -191,7 +191,7 @@ _GLOBAL(add_hash_page)
191 add r3,r3,r0 /* note create_hpte trims to 24 bits */ 191 add r3,r3,r0 /* note create_hpte trims to 24 bits */
192 192
193#ifdef CONFIG_SMP 193#ifdef CONFIG_SMP
194 rlwinm r8,r1,0,0,18 /* use cpu number to make tag */ 194 rlwinm r8,r1,0,0,(31-THREAD_SHIFT) /* use cpu number to make tag */
195 lwz r8,TI_CPU(r8) /* to go in mmu_hash_lock */ 195 lwz r8,TI_CPU(r8) /* to go in mmu_hash_lock */
196 oris r8,r8,12 196 oris r8,r8,12
197#endif /* CONFIG_SMP */ 197#endif /* CONFIG_SMP */
@@ -526,7 +526,7 @@ _GLOBAL(flush_hash_pages)
526#ifdef CONFIG_SMP 526#ifdef CONFIG_SMP
527 addis r9,r7,mmu_hash_lock@ha 527 addis r9,r7,mmu_hash_lock@ha
528 addi r9,r9,mmu_hash_lock@l 528 addi r9,r9,mmu_hash_lock@l
529 rlwinm r8,r1,0,0,18 529 rlwinm r8,r1,0,0,(31-THREAD_SHIFT)
530 add r8,r8,r7 530 add r8,r8,r7
531 lwz r8,TI_CPU(r8) 531 lwz r8,TI_CPU(r8)
532 oris r8,r8,9 532 oris r8,r8,9
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 47325f23c51f..1952b4d3fa7f 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -59,7 +59,10 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
59unsigned long total_memory; 59unsigned long total_memory;
60unsigned long total_lowmem; 60unsigned long total_lowmem;
61 61
62phys_addr_t memstart_addr; 62phys_addr_t memstart_addr = (phys_addr_t)~0ull;
63EXPORT_SYMBOL(memstart_addr);
64phys_addr_t kernstart_addr;
65EXPORT_SYMBOL(kernstart_addr);
63phys_addr_t lowmem_end_addr; 66phys_addr_t lowmem_end_addr;
64 67
65int boot_mapsize; 68int boot_mapsize;
@@ -68,14 +71,6 @@ unsigned long agp_special_page;
68EXPORT_SYMBOL(agp_special_page); 71EXPORT_SYMBOL(agp_special_page);
69#endif 72#endif
70 73
71#ifdef CONFIG_HIGHMEM
72pte_t *kmap_pte;
73pgprot_t kmap_prot;
74
75EXPORT_SYMBOL(kmap_prot);
76EXPORT_SYMBOL(kmap_pte);
77#endif
78
79void MMU_init(void); 74void MMU_init(void);
80 75
81/* XXX should be in current.h -- paulus */ 76/* XXX should be in current.h -- paulus */
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 698bd000f98b..c5ac532a0161 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -72,7 +72,8 @@
72#warning TASK_SIZE is smaller than it needs to be. 72#warning TASK_SIZE is smaller than it needs to be.
73#endif 73#endif
74 74
75phys_addr_t memstart_addr; 75phys_addr_t memstart_addr = ~0;
76phys_addr_t kernstart_addr;
76 77
77void free_initmem(void) 78void free_initmem(void)
78{ 79{
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 16def4dcff6d..5ccb579b81e4 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -45,6 +45,7 @@
45#include <asm/tlb.h> 45#include <asm/tlb.h>
46#include <asm/sections.h> 46#include <asm/sections.h>
47#include <asm/vdso.h> 47#include <asm/vdso.h>
48#include <asm/fixmap.h>
48 49
49#include "mmu_decl.h" 50#include "mmu_decl.h"
50 51
@@ -57,6 +58,20 @@ int init_bootmem_done;
57int mem_init_done; 58int mem_init_done;
58unsigned long memory_limit; 59unsigned long memory_limit;
59 60
61#ifdef CONFIG_HIGHMEM
62pte_t *kmap_pte;
63pgprot_t kmap_prot;
64
65EXPORT_SYMBOL(kmap_prot);
66EXPORT_SYMBOL(kmap_pte);
67
68static inline pte_t *virt_to_kpte(unsigned long vaddr)
69{
70 return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr),
71 vaddr), vaddr), vaddr);
72}
73#endif
74
60int page_is_ram(unsigned long pfn) 75int page_is_ram(unsigned long pfn)
61{ 76{
62 unsigned long paddr = (pfn << PAGE_SHIFT); 77 unsigned long paddr = (pfn << PAGE_SHIFT);
@@ -216,7 +231,7 @@ void __init do_init_bootmem(void)
216 unsigned long total_pages; 231 unsigned long total_pages;
217 int boot_mapsize; 232 int boot_mapsize;
218 233
219 max_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; 234 max_low_pfn = max_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
220 total_pages = (lmb_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT; 235 total_pages = (lmb_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT;
221#ifdef CONFIG_HIGHMEM 236#ifdef CONFIG_HIGHMEM
222 total_pages = total_lowmem >> PAGE_SHIFT; 237 total_pages = total_lowmem >> PAGE_SHIFT;
@@ -232,7 +247,8 @@ void __init do_init_bootmem(void)
232 247
233 start = lmb_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE); 248 start = lmb_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE);
234 249
235 boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages); 250 min_low_pfn = MEMORY_START >> PAGE_SHIFT;
251 boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn);
236 252
237 /* Add active regions with valid PFNs */ 253 /* Add active regions with valid PFNs */
238 for (i = 0; i < lmb.memory.cnt; i++) { 254 for (i = 0; i < lmb.memory.cnt; i++) {
@@ -310,14 +326,19 @@ void __init paging_init(void)
310 unsigned long top_of_ram = lmb_end_of_DRAM(); 326 unsigned long top_of_ram = lmb_end_of_DRAM();
311 unsigned long max_zone_pfns[MAX_NR_ZONES]; 327 unsigned long max_zone_pfns[MAX_NR_ZONES];
312 328
329#ifdef CONFIG_PPC32
330 unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1);
331 unsigned long end = __fix_to_virt(FIX_HOLE);
332
333 for (; v < end; v += PAGE_SIZE)
334 map_page(v, 0, 0); /* XXX gross */
335#endif
336
313#ifdef CONFIG_HIGHMEM 337#ifdef CONFIG_HIGHMEM
314 map_page(PKMAP_BASE, 0, 0); /* XXX gross */ 338 map_page(PKMAP_BASE, 0, 0); /* XXX gross */
315 pkmap_page_table = pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k 339 pkmap_page_table = virt_to_kpte(PKMAP_BASE);
316 (PKMAP_BASE), PKMAP_BASE), PKMAP_BASE), PKMAP_BASE); 340
317 map_page(KMAP_FIX_BEGIN, 0, 0); /* XXX gross */ 341 kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN));
318 kmap_pte = pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k
319 (KMAP_FIX_BEGIN), KMAP_FIX_BEGIN), KMAP_FIX_BEGIN),
320 KMAP_FIX_BEGIN);
321 kmap_prot = PAGE_KERNEL; 342 kmap_prot = PAGE_KERNEL;
322#endif /* CONFIG_HIGHMEM */ 343#endif /* CONFIG_HIGHMEM */
323 344
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 1efd631211ef..dc704da363eb 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -18,6 +18,7 @@
18#include <linux/cpu.h> 18#include <linux/cpu.h>
19#include <linux/notifier.h> 19#include <linux/notifier.h>
20#include <linux/lmb.h> 20#include <linux/lmb.h>
21#include <linux/of.h>
21#include <asm/sparsemem.h> 22#include <asm/sparsemem.h>
22#include <asm/prom.h> 23#include <asm/prom.h>
23#include <asm/system.h> 24#include <asm/system.h>
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 64c44bcc68de..80d1babb230d 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -29,6 +29,7 @@
29 29
30#include <asm/pgtable.h> 30#include <asm/pgtable.h>
31#include <asm/pgalloc.h> 31#include <asm/pgalloc.h>
32#include <asm/fixmap.h>
32#include <asm/io.h> 33#include <asm/io.h>
33 34
34#include "mmu_decl.h" 35#include "mmu_decl.h"
@@ -387,3 +388,25 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
387 change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); 388 change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
388} 389}
389#endif /* CONFIG_DEBUG_PAGEALLOC */ 390#endif /* CONFIG_DEBUG_PAGEALLOC */
391
392static int fixmaps;
393unsigned long FIXADDR_TOP = 0xfffff000;
394EXPORT_SYMBOL(FIXADDR_TOP);
395
396void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
397{
398 unsigned long address = __fix_to_virt(idx);
399
400 if (idx >= __end_of_fixed_addresses) {
401 BUG();
402 return;
403 }
404
405 map_page(address, phys, flags);
406 fixmaps++;
407}
408
409void __this_fixmap_does_not_exist(void)
410{
411 WARN_ON(1);
412}
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index f38c50b4ce56..87454c526973 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -45,7 +45,6 @@ source "arch/powerpc/platforms/powermac/Kconfig"
45source "arch/powerpc/platforms/prep/Kconfig" 45source "arch/powerpc/platforms/prep/Kconfig"
46source "arch/powerpc/platforms/maple/Kconfig" 46source "arch/powerpc/platforms/maple/Kconfig"
47source "arch/powerpc/platforms/pasemi/Kconfig" 47source "arch/powerpc/platforms/pasemi/Kconfig"
48source "arch/powerpc/platforms/celleb/Kconfig"
49source "arch/powerpc/platforms/ps3/Kconfig" 48source "arch/powerpc/platforms/ps3/Kconfig"
50source "arch/powerpc/platforms/cell/Kconfig" 49source "arch/powerpc/platforms/cell/Kconfig"
51source "arch/powerpc/platforms/8xx/Kconfig" 50source "arch/powerpc/platforms/8xx/Kconfig"
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 5fc7fac10e93..f7efaa925a13 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -220,8 +220,8 @@ config SMP
220 If you don't know what to do here, say N. 220 If you don't know what to do here, say N.
221 221
222config NR_CPUS 222config NR_CPUS
223 int "Maximum number of CPUs (2-128)" 223 int "Maximum number of CPUs (2-1024)"
224 range 2 128 224 range 2 1024
225 depends on SMP 225 depends on SMP
226 default "32" if PPC64 226 default "32" if PPC64
227 default "4" 227 default "4"
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
index a984894466d9..423a0234dc31 100644
--- a/arch/powerpc/platforms/Makefile
+++ b/arch/powerpc/platforms/Makefile
@@ -24,5 +24,4 @@ obj-$(CONFIG_PPC_MAPLE) += maple/
24obj-$(CONFIG_PPC_PASEMI) += pasemi/ 24obj-$(CONFIG_PPC_PASEMI) += pasemi/
25obj-$(CONFIG_PPC_CELL) += cell/ 25obj-$(CONFIG_PPC_CELL) += cell/
26obj-$(CONFIG_PPC_PS3) += ps3/ 26obj-$(CONFIG_PPC_PS3) += ps3/
27obj-$(CONFIG_PPC_CELLEB) += celleb/
28obj-$(CONFIG_EMBEDDED6xx) += embedded6xx/ 27obj-$(CONFIG_EMBEDDED6xx) += embedded6xx/
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 2f169991896d..3959fcfe731c 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -25,6 +25,19 @@ config PPC_IBM_CELL_BLADE
25 select PPC_UDBG_16550 25 select PPC_UDBG_16550
26 select UDBG_RTAS_CONSOLE 26 select UDBG_RTAS_CONSOLE
27 27
28config PPC_CELLEB
29 bool "Toshiba's Cell Reference Set 'Celleb' Architecture"
30 depends on PPC_MULTIPLATFORM && PPC64
31 select PPC_CELL
32 select PPC_CELL_NATIVE
33 select PPC_RTAS
34 select PPC_INDIRECT_IO
35 select PPC_OF_PLATFORM_PCI
36 select HAS_TXX9_SERIAL
37 select PPC_UDBG_BEAT
38 select USB_OHCI_BIG_ENDIAN_MMIO
39 select USB_EHCI_BIG_ENDIAN_MMIO
40
28menu "Cell Broadband Engine options" 41menu "Cell Broadband Engine options"
29 depends on PPC_CELL 42 depends on PPC_CELL
30 43
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
index c89964c6fb1f..c2a7e4e5ddf9 100644
--- a/arch/powerpc/platforms/cell/Makefile
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -1,6 +1,7 @@
1obj-$(CONFIG_PPC_CELL_NATIVE) += interrupt.o iommu.o setup.o \ 1obj-$(CONFIG_PPC_CELL_NATIVE) += interrupt.o iommu.o setup.o \
2 cbe_regs.o spider-pic.o \ 2 cbe_regs.o spider-pic.o \
3 pervasive.o pmu.o io-workarounds.o 3 pervasive.o pmu.o io-workarounds.o \
4 spider-pci.o
4obj-$(CONFIG_CBE_RAS) += ras.o 5obj-$(CONFIG_CBE_RAS) += ras.o
5 6
6obj-$(CONFIG_CBE_THERM) += cbe_thermal.o 7obj-$(CONFIG_CBE_THERM) += cbe_thermal.o
@@ -26,3 +27,20 @@ obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \
26 spufs/ 27 spufs/
27 28
28obj-$(CONFIG_PCI_MSI) += axon_msi.o 29obj-$(CONFIG_PCI_MSI) += axon_msi.o
30
31
32# celleb stuff
33ifeq ($(CONFIG_PPC_CELLEB),y)
34obj-y += celleb_setup.o \
35 celleb_pci.o celleb_scc_epci.o \
36 celleb_scc_pciex.o \
37 celleb_scc_uhc.o \
38 io-workarounds.o spider-pci.o \
39 beat.o beat_htab.o beat_hvCall.o \
40 beat_interrupt.o beat_iommu.o
41
42obj-$(CONFIG_SMP) += beat_smp.o
43obj-$(CONFIG_PPC_UDBG_BEAT) += beat_udbg.o
44obj-$(CONFIG_SERIAL_TXX9) += celleb_scc_sio.o
45obj-$(CONFIG_SPU_BASE) += beat_spu_priv1.o
46endif
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index d95e71dee91f..c39f5c225f2e 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -123,7 +123,7 @@ static struct axon_msic *find_msi_translator(struct pci_dev *dev)
123 return NULL; 123 return NULL;
124 } 124 }
125 125
126 for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) { 126 for (; dn; dn = of_get_next_parent(dn)) {
127 ph = of_get_property(dn, "msi-translator", NULL); 127 ph = of_get_property(dn, "msi-translator", NULL);
128 if (ph) 128 if (ph)
129 break; 129 break;
@@ -169,7 +169,7 @@ static int axon_msi_check_device(struct pci_dev *dev, int nvec, int type)
169 169
170static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg) 170static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg)
171{ 171{
172 struct device_node *dn, *tmp; 172 struct device_node *dn;
173 struct msi_desc *entry; 173 struct msi_desc *entry;
174 int len; 174 int len;
175 const u32 *prop; 175 const u32 *prop;
@@ -182,7 +182,7 @@ static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg)
182 182
183 entry = list_first_entry(&dev->msi_list, struct msi_desc, list); 183 entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
184 184
185 for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) { 185 for (; dn; dn = of_get_next_parent(dn)) {
186 if (entry->msi_attrib.is_64) { 186 if (entry->msi_attrib.is_64) {
187 prop = of_get_property(dn, "msi-address-64", &len); 187 prop = of_get_property(dn, "msi-address-64", &len);
188 if (prop) 188 if (prop)
diff --git a/arch/powerpc/platforms/celleb/beat.c b/arch/powerpc/platforms/cell/beat.c
index b64b171f245b..48c690ea65da 100644
--- a/arch/powerpc/platforms/celleb/beat.c
+++ b/arch/powerpc/platforms/cell/beat.c
@@ -33,7 +33,7 @@
33 33
34#include "beat_wrapper.h" 34#include "beat_wrapper.h"
35#include "beat.h" 35#include "beat.h"
36#include "interrupt.h" 36#include "beat_interrupt.h"
37 37
38static int beat_pm_poweroff_flag; 38static int beat_pm_poweroff_flag;
39 39
diff --git a/arch/powerpc/platforms/celleb/beat.h b/arch/powerpc/platforms/cell/beat.h
index 32c8efcedc80..32c8efcedc80 100644
--- a/arch/powerpc/platforms/celleb/beat.h
+++ b/arch/powerpc/platforms/cell/beat.h
diff --git a/arch/powerpc/platforms/celleb/htab.c b/arch/powerpc/platforms/cell/beat_htab.c
index 81467ff055c8..81467ff055c8 100644
--- a/arch/powerpc/platforms/celleb/htab.c
+++ b/arch/powerpc/platforms/cell/beat_htab.c
diff --git a/arch/powerpc/platforms/celleb/hvCall.S b/arch/powerpc/platforms/cell/beat_hvCall.S
index 74c817448948..74c817448948 100644
--- a/arch/powerpc/platforms/celleb/hvCall.S
+++ b/arch/powerpc/platforms/cell/beat_hvCall.S
diff --git a/arch/powerpc/platforms/celleb/interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c
index 69562a867876..192a93509372 100644
--- a/arch/powerpc/platforms/celleb/interrupt.c
+++ b/arch/powerpc/platforms/cell/beat_interrupt.c
@@ -26,7 +26,7 @@
26 26
27#include <asm/machdep.h> 27#include <asm/machdep.h>
28 28
29#include "interrupt.h" 29#include "beat_interrupt.h"
30#include "beat_wrapper.h" 30#include "beat_wrapper.h"
31 31
32#define MAX_IRQS NR_IRQS 32#define MAX_IRQS NR_IRQS
diff --git a/arch/powerpc/platforms/celleb/interrupt.h b/arch/powerpc/platforms/cell/beat_interrupt.h
index b470fd0051f1..b470fd0051f1 100644
--- a/arch/powerpc/platforms/celleb/interrupt.h
+++ b/arch/powerpc/platforms/cell/beat_interrupt.h
diff --git a/arch/powerpc/platforms/celleb/iommu.c b/arch/powerpc/platforms/cell/beat_iommu.c
index 93b0efddd658..93b0efddd658 100644
--- a/arch/powerpc/platforms/celleb/iommu.c
+++ b/arch/powerpc/platforms/cell/beat_iommu.c
diff --git a/arch/powerpc/platforms/celleb/smp.c b/arch/powerpc/platforms/cell/beat_smp.c
index a7631250aeb4..26efc204c47f 100644
--- a/arch/powerpc/platforms/celleb/smp.c
+++ b/arch/powerpc/platforms/cell/beat_smp.c
@@ -37,7 +37,7 @@
37#include <asm/machdep.h> 37#include <asm/machdep.h>
38#include <asm/udbg.h> 38#include <asm/udbg.h>
39 39
40#include "interrupt.h" 40#include "beat_interrupt.h"
41 41
42#ifdef DEBUG 42#ifdef DEBUG
43#define DBG(fmt...) udbg_printf(fmt) 43#define DBG(fmt...) udbg_printf(fmt)
diff --git a/arch/powerpc/platforms/celleb/spu_priv1.c b/arch/powerpc/platforms/cell/beat_spu_priv1.c
index bcc17f7fe8ad..bcc17f7fe8ad 100644
--- a/arch/powerpc/platforms/celleb/spu_priv1.c
+++ b/arch/powerpc/platforms/cell/beat_spu_priv1.c
diff --git a/arch/powerpc/platforms/celleb/beat_syscall.h b/arch/powerpc/platforms/cell/beat_syscall.h
index 8580dc7e1798..8580dc7e1798 100644
--- a/arch/powerpc/platforms/celleb/beat_syscall.h
+++ b/arch/powerpc/platforms/cell/beat_syscall.h
diff --git a/arch/powerpc/platforms/celleb/udbg_beat.c b/arch/powerpc/platforms/cell/beat_udbg.c
index 6b418f6b6175..6b418f6b6175 100644
--- a/arch/powerpc/platforms/celleb/udbg_beat.c
+++ b/arch/powerpc/platforms/cell/beat_udbg.c
diff --git a/arch/powerpc/platforms/celleb/beat_wrapper.h b/arch/powerpc/platforms/cell/beat_wrapper.h
index b47dfda48d06..b47dfda48d06 100644
--- a/arch/powerpc/platforms/celleb/beat_wrapper.h
+++ b/arch/powerpc/platforms/cell/beat_wrapper.h
diff --git a/arch/powerpc/platforms/celleb/pci.c b/arch/powerpc/platforms/cell/celleb_pci.c
index 51b390d34e4d..f39a3b2a1667 100644
--- a/arch/powerpc/platforms/celleb/pci.c
+++ b/arch/powerpc/platforms/cell/celleb_pci.c
@@ -37,12 +37,11 @@
37#include <asm/io.h> 37#include <asm/io.h>
38#include <asm/irq.h> 38#include <asm/irq.h>
39#include <asm/prom.h> 39#include <asm/prom.h>
40#include <asm/machdep.h>
41#include <asm/pci-bridge.h> 40#include <asm/pci-bridge.h>
42#include <asm/ppc-pci.h> 41#include <asm/ppc-pci.h>
43 42
44#include "pci.h" 43#include "io-workarounds.h"
45#include "interrupt.h" 44#include "celleb_pci.h"
46 45
47#define MAX_PCI_DEVICES 32 46#define MAX_PCI_DEVICES 32
48#define MAX_PCI_FUNCTIONS 8 47#define MAX_PCI_FUNCTIONS 8
@@ -190,7 +189,7 @@ static int celleb_fake_pci_read_config(struct pci_bus *bus,
190 189
191 190
192static int celleb_fake_pci_write_config(struct pci_bus *bus, 191static int celleb_fake_pci_write_config(struct pci_bus *bus,
193 unsigned int devfn, int where, int size, u32 val) 192 unsigned int devfn, int where, int size, u32 val)
194{ 193{
195 char *config; 194 char *config;
196 struct device_node *node; 195 struct device_node *node;
@@ -457,33 +456,42 @@ static int __init celleb_setup_fake_pci(struct device_node *dev,
457 return 0; 456 return 0;
458} 457}
459 458
460void __init fake_pci_workaround_init(struct pci_controller *phb) 459static struct celleb_phb_spec celleb_fake_pci_spec __initdata = {
461{ 460 .setup = celleb_setup_fake_pci,
462 /** 461};
463 * We will add fake pci bus to scc_pci_bus for the purpose to improve
464 * I/O Macro performance. But device-tree and device drivers
465 * are not ready to use address with a token.
466 */
467
468 /* celleb_pci_add_one(phb, NULL); */
469}
470 462
471static struct of_device_id celleb_phb_match[] __initdata = { 463static struct of_device_id celleb_phb_match[] __initdata = {
472 { 464 {
473 .name = "pci-pseudo", 465 .name = "pci-pseudo",
474 .data = celleb_setup_fake_pci, 466 .data = &celleb_fake_pci_spec,
475 }, { 467 }, {
476 .name = "epci", 468 .name = "epci",
477 .data = celleb_setup_epci, 469 .data = &celleb_epci_spec,
470 }, {
471 .name = "pcie",
472 .data = &celleb_pciex_spec,
478 }, { 473 }, {
479 }, 474 },
480}; 475};
481 476
477static int __init celleb_io_workaround_init(struct pci_controller *phb,
478 struct celleb_phb_spec *phb_spec)
479{
480 if (phb_spec->ops) {
481 iowa_register_bus(phb, phb_spec->ops, phb_spec->iowa_init,
482 phb_spec->iowa_data);
483 io_workaround_init();
484 }
485
486 return 0;
487}
488
482int __init celleb_setup_phb(struct pci_controller *phb) 489int __init celleb_setup_phb(struct pci_controller *phb)
483{ 490{
484 struct device_node *dev = phb->dn; 491 struct device_node *dev = phb->dn;
485 const struct of_device_id *match; 492 const struct of_device_id *match;
486 int (*setup_func)(struct device_node *, struct pci_controller *); 493 struct celleb_phb_spec *phb_spec;
494 int rc;
487 495
488 match = of_match_node(celleb_phb_match, dev); 496 match = of_match_node(celleb_phb_match, dev);
489 if (!match) 497 if (!match)
@@ -492,8 +500,12 @@ int __init celleb_setup_phb(struct pci_controller *phb)
492 phb_set_bus_ranges(dev, phb); 500 phb_set_bus_ranges(dev, phb);
493 phb->buid = 1; 501 phb->buid = 1;
494 502
495 setup_func = match->data; 503 phb_spec = match->data;
496 return (*setup_func)(dev, phb); 504 rc = (*phb_spec->setup)(dev, phb);
505 if (rc)
506 return 1;
507
508 return celleb_io_workaround_init(phb, phb_spec);
497} 509}
498 510
499int celleb_pci_probe_mode(struct pci_bus *bus) 511int celleb_pci_probe_mode(struct pci_bus *bus)
diff --git a/arch/powerpc/platforms/celleb/pci.h b/arch/powerpc/platforms/cell/celleb_pci.h
index 5d5544ffeddb..4cba1523ec50 100644
--- a/arch/powerpc/platforms/celleb/pci.h
+++ b/arch/powerpc/platforms/cell/celleb_pci.h
@@ -27,16 +27,19 @@
27#include <asm/prom.h> 27#include <asm/prom.h>
28#include <asm/ppc-pci.h> 28#include <asm/ppc-pci.h>
29 29
30#include "io-workarounds.h"
31
32struct celleb_phb_spec {
33 int (*setup)(struct device_node *, struct pci_controller *);
34 struct ppc_pci_io *ops;
35 int (*iowa_init)(struct iowa_bus *, void *);
36 void *iowa_data;
37};
38
30extern int celleb_setup_phb(struct pci_controller *); 39extern int celleb_setup_phb(struct pci_controller *);
31extern int celleb_pci_probe_mode(struct pci_bus *); 40extern int celleb_pci_probe_mode(struct pci_bus *);
32 41
33extern int celleb_setup_epci(struct device_node *, struct pci_controller *); 42extern struct celleb_phb_spec celleb_epci_spec;
34 43extern struct celleb_phb_spec celleb_pciex_spec;
35extern void *celleb_dummy_page_va;
36extern int __init celleb_pci_workaround_init(void);
37extern void __init celleb_pci_add_one(struct pci_controller *,
38 void (*)(struct pci_controller *));
39extern void fake_pci_workaround_init(struct pci_controller *);
40extern void epci_workaround_init(struct pci_controller *);
41 44
42#endif /* _CELLEB_PCI_H */ 45#endif /* _CELLEB_PCI_H */
diff --git a/arch/powerpc/platforms/celleb/scc.h b/arch/powerpc/platforms/cell/celleb_scc.h
index 6be1542a6e66..b596a711c348 100644
--- a/arch/powerpc/platforms/celleb/scc.h
+++ b/arch/powerpc/platforms/cell/celleb_scc.h
@@ -125,6 +125,93 @@
125/* bits for SCC_EPCI_CNTOPT */ 125/* bits for SCC_EPCI_CNTOPT */
126#define SCC_EPCI_CNTOPT_O2PMB 0x00000002 126#define SCC_EPCI_CNTOPT_O2PMB 0x00000002
127 127
128/* SCC PCIEXC SMMIO registers */
129#define PEXCADRS 0x000
130#define PEXCWDATA 0x004
131#define PEXCRDATA 0x008
132#define PEXDADRS 0x010
133#define PEXDCMND 0x014
134#define PEXDWDATA 0x018
135#define PEXDRDATA 0x01c
136#define PEXREQID 0x020
137#define PEXTIDMAP 0x024
138#define PEXINTMASK 0x028
139#define PEXINTSTS 0x02c
140#define PEXAERRMASK 0x030
141#define PEXAERRSTS 0x034
142#define PEXPRERRMASK 0x040
143#define PEXPRERRSTS 0x044
144#define PEXPRERRID01 0x048
145#define PEXPRERRID23 0x04c
146#define PEXVDMASK 0x050
147#define PEXVDSTS 0x054
148#define PEXRCVCPLIDA 0x060
149#define PEXLENERRIDA 0x068
150#define PEXPHYPLLST 0x070
151#define PEXDMRDEN0 0x100
152#define PEXDMRDADR0 0x104
153#define PEXDMRDENX 0x110
154#define PEXDMRDADRX 0x114
155#define PEXECMODE 0xf00
156#define PEXMAEA(n) (0xf50 + (8 * n))
157#define PEXMAEC(n) (0xf54 + (8 * n))
158#define PEXCCRCTRL 0xff0
159
160/* SCC PCIEXC bits and shifts for PEXCADRS */
161#define PEXCADRS_BYTE_EN_SHIFT 20
162#define PEXCADRS_CMD_SHIFT 16
163#define PEXCADRS_CMD_READ (0xa << PEXCADRS_CMD_SHIFT)
164#define PEXCADRS_CMD_WRITE (0xb << PEXCADRS_CMD_SHIFT)
165
166/* SCC PCIEXC shifts for PEXDADRS */
167#define PEXDADRS_BUSNO_SHIFT 20
168#define PEXDADRS_DEVNO_SHIFT 15
169#define PEXDADRS_FUNCNO_SHIFT 12
170
171/* SCC PCIEXC bits and shifts for PEXDCMND */
172#define PEXDCMND_BYTE_EN_SHIFT 4
173#define PEXDCMND_IO_READ 0x2
174#define PEXDCMND_IO_WRITE 0x3
175#define PEXDCMND_CONFIG_READ 0xa
176#define PEXDCMND_CONFIG_WRITE 0xb
177
178/* SCC PCIEXC bits for PEXPHYPLLST */
179#define PEXPHYPLLST_PEXPHYAPLLST 0x00000001
180
181/* SCC PCIEXC bits for PEXECMODE */
182#define PEXECMODE_ALL_THROUGH 0x00000000
183#define PEXECMODE_ALL_8BIT 0x00550155
184#define PEXECMODE_ALL_16BIT 0x00aa02aa
185
186/* SCC PCIEXC bits for PEXCCRCTRL */
187#define PEXCCRCTRL_PEXIPCOREEN 0x00040000
188#define PEXCCRCTRL_PEXIPCONTEN 0x00020000
189#define PEXCCRCTRL_PEXPHYPLLEN 0x00010000
190#define PEXCCRCTRL_PCIEXCAOCKEN 0x00000100
191
192/* SCC PCIEXC port configuration registers */
193#define PEXTCERRCHK 0x21c
194#define PEXTAMAPB0 0x220
195#define PEXTAMAPL0 0x224
196#define PEXTAMAPB(n) (PEXTAMAPB0 + 8 * (n))
197#define PEXTAMAPL(n) (PEXTAMAPL0 + 8 * (n))
198#define PEXCHVC0P 0x500
199#define PEXCHVC0NP 0x504
200#define PEXCHVC0C 0x508
201#define PEXCDVC0P 0x50c
202#define PEXCDVC0NP 0x510
203#define PEXCDVC0C 0x514
204#define PEXCHVCXP 0x518
205#define PEXCHVCXNP 0x51c
206#define PEXCHVCXC 0x520
207#define PEXCDVCXP 0x524
208#define PEXCDVCXNP 0x528
209#define PEXCDVCXC 0x52c
210#define PEXCTTRG 0x530
211#define PEXTSCTRL 0x700
212#define PEXTSSTS 0x704
213#define PEXSKPCTRL 0x708
214
128/* UHC registers */ 215/* UHC registers */
129#define SCC_UHC_CKRCTRL 0xff0 216#define SCC_UHC_CKRCTRL 0xff0
130#define SCC_UHC_ECMODE 0xf00 217#define SCC_UHC_ECMODE 0xf00
diff --git a/arch/powerpc/platforms/celleb/scc_epci.c b/arch/powerpc/platforms/cell/celleb_scc_epci.c
index a999b393f6f6..08c285b10e30 100644
--- a/arch/powerpc/platforms/celleb/scc_epci.c
+++ b/arch/powerpc/platforms/cell/celleb_scc_epci.c
@@ -30,23 +30,17 @@
30#include <asm/io.h> 30#include <asm/io.h>
31#include <asm/irq.h> 31#include <asm/irq.h>
32#include <asm/prom.h> 32#include <asm/prom.h>
33#include <asm/machdep.h>
34#include <asm/pci-bridge.h> 33#include <asm/pci-bridge.h>
35#include <asm/ppc-pci.h> 34#include <asm/ppc-pci.h>
36 35
37#include "scc.h" 36#include "celleb_scc.h"
38#include "pci.h" 37#include "celleb_pci.h"
39#include "interrupt.h"
40 38
41#define MAX_PCI_DEVICES 32 39#define MAX_PCI_DEVICES 32
42#define MAX_PCI_FUNCTIONS 8 40#define MAX_PCI_FUNCTIONS 8
43 41
44#define iob() __asm__ __volatile__("eieio; sync":::"memory") 42#define iob() __asm__ __volatile__("eieio; sync":::"memory")
45 43
46struct epci_private {
47 dma_addr_t dummy_page_da;
48};
49
50static inline PCI_IO_ADDR celleb_epci_get_epci_base( 44static inline PCI_IO_ADDR celleb_epci_get_epci_base(
51 struct pci_controller *hose) 45 struct pci_controller *hose)
52{ 46{
@@ -71,42 +65,6 @@ static inline PCI_IO_ADDR celleb_epci_get_epci_cfg(
71 return hose->cfg_data; 65 return hose->cfg_data;
72} 66}
73 67
74static void scc_epci_dummy_read(struct pci_controller *hose)
75{
76 PCI_IO_ADDR epci_base;
77 u32 val;
78
79 epci_base = celleb_epci_get_epci_base(hose);
80
81 val = in_be32(epci_base + SCC_EPCI_WATRP);
82 iosync();
83
84 return;
85}
86
87void __init epci_workaround_init(struct pci_controller *hose)
88{
89 PCI_IO_ADDR epci_base;
90 PCI_IO_ADDR reg;
91 struct epci_private *private = hose->private_data;
92
93 BUG_ON(!private);
94
95 private->dummy_page_da = dma_map_single(hose->parent,
96 celleb_dummy_page_va, PAGE_SIZE, DMA_FROM_DEVICE);
97 if (private->dummy_page_da == DMA_ERROR_CODE) {
98 printk(KERN_ERR "EPCI: dummy read disabled. "
99 "Map dummy page failed.\n");
100 return;
101 }
102
103 celleb_pci_add_one(hose, scc_epci_dummy_read);
104 epci_base = celleb_epci_get_epci_base(hose);
105
106 reg = epci_base + SCC_EPCI_DUMYRADR;
107 out_be32(reg, private->dummy_page_da);
108}
109
110static inline void clear_and_disable_master_abort_interrupt( 68static inline void clear_and_disable_master_abort_interrupt(
111 struct pci_controller *hose) 69 struct pci_controller *hose)
112{ 70{
@@ -151,10 +109,8 @@ static int celleb_epci_check_abort(struct pci_controller *hose,
151 return PCIBIOS_SUCCESSFUL; 109 return PCIBIOS_SUCCESSFUL;
152} 110}
153 111
154static PCI_IO_ADDR celleb_epci_make_config_addr( 112static PCI_IO_ADDR celleb_epci_make_config_addr(struct pci_bus *bus,
155 struct pci_bus *bus, 113 struct pci_controller *hose, unsigned int devfn, int where)
156 struct pci_controller *hose,
157 unsigned int devfn, int where)
158{ 114{
159 PCI_IO_ADDR addr; 115 PCI_IO_ADDR addr;
160 116
@@ -425,8 +381,8 @@ static int __init celleb_epci_init(struct pci_controller *hose)
425 return 0; 381 return 0;
426} 382}
427 383
428int __init celleb_setup_epci(struct device_node *node, 384static int __init celleb_setup_epci(struct device_node *node,
429 struct pci_controller *hose) 385 struct pci_controller *hose)
430{ 386{
431 struct resource r; 387 struct resource r;
432 388
@@ -450,8 +406,7 @@ int __init celleb_setup_epci(struct device_node *node,
450 if (!hose->cfg_addr) 406 if (!hose->cfg_addr)
451 goto error; 407 goto error;
452 pr_debug("EPCI: cfg_addr map 0x%016lx->0x%016lx + 0x%016lx\n", 408 pr_debug("EPCI: cfg_addr map 0x%016lx->0x%016lx + 0x%016lx\n",
453 r.start, (unsigned long)hose->cfg_addr, 409 r.start, (unsigned long)hose->cfg_addr, (r.end - r.start + 1));
454 (r.end - r.start + 1));
455 410
456 if (of_address_to_resource(node, 2, &r)) 411 if (of_address_to_resource(node, 2, &r))
457 goto error; 412 goto error;
@@ -459,14 +414,7 @@ int __init celleb_setup_epci(struct device_node *node,
459 if (!hose->cfg_data) 414 if (!hose->cfg_data)
460 goto error; 415 goto error;
461 pr_debug("EPCI: cfg_data map 0x%016lx->0x%016lx + 0x%016lx\n", 416 pr_debug("EPCI: cfg_data map 0x%016lx->0x%016lx + 0x%016lx\n",
462 r.start, (unsigned long)hose->cfg_data, 417 r.start, (unsigned long)hose->cfg_data, (r.end - r.start + 1));
463 (r.end - r.start + 1));
464
465 hose->private_data = kzalloc(sizeof(struct epci_private), GFP_KERNEL);
466 if (hose->private_data == NULL) {
467 printk(KERN_ERR "EPCI: no memory for private data.\n");
468 goto error;
469 }
470 418
471 hose->ops = &celleb_epci_ops; 419 hose->ops = &celleb_epci_ops;
472 celleb_epci_init(hose); 420 celleb_epci_init(hose);
@@ -474,8 +422,6 @@ int __init celleb_setup_epci(struct device_node *node,
474 return 0; 422 return 0;
475 423
476error: 424error:
477 kfree(hose->private_data);
478
479 if (hose->cfg_addr) 425 if (hose->cfg_addr)
480 iounmap(hose->cfg_addr); 426 iounmap(hose->cfg_addr);
481 427
@@ -483,3 +429,10 @@ error:
483 iounmap(hose->cfg_data); 429 iounmap(hose->cfg_data);
484 return 1; 430 return 1;
485} 431}
432
433struct celleb_phb_spec celleb_epci_spec __initdata = {
434 .setup = celleb_setup_epci,
435 .ops = &spiderpci_ops,
436 .iowa_init = &spiderpci_iowa_init,
437 .iowa_data = (void *)0,
438};
diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
new file mode 100644
index 000000000000..ab24d94baab6
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
@@ -0,0 +1,547 @@
1/*
2 * Support for Celleb PCI-Express.
3 *
4 * (C) Copyright 2007-2008 TOSHIBA CORPORATION
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 */
20
21#undef DEBUG
22
23#include <linux/kernel.h>
24#include <linux/pci.h>
25#include <linux/string.h>
26#include <linux/init.h>
27#include <linux/bootmem.h>
28#include <linux/delay.h>
29#include <linux/interrupt.h>
30
31#include <asm/io.h>
32#include <asm/irq.h>
33#include <asm/iommu.h>
34#include <asm/byteorder.h>
35
36#include "celleb_scc.h"
37#include "celleb_pci.h"
38
39#define PEX_IN(base, off) in_be32((void *)(base) + (off))
40#define PEX_OUT(base, off, data) out_be32((void *)(base) + (off), (data))
41
42static void scc_pciex_io_flush(struct iowa_bus *bus)
43{
44 (void)PEX_IN(bus->phb->cfg_addr, PEXDMRDEN0);
45}
46
47/*
48 * Memory space access to device on PCIEX
49 */
50#define PCIEX_MMIO_READ(name, ret) \
51static ret scc_pciex_##name(const PCI_IO_ADDR addr) \
52{ \
53 ret val = __do_##name(addr); \
54 scc_pciex_io_flush(iowa_mem_find_bus(addr)); \
55 return val; \
56}
57
58#define PCIEX_MMIO_READ_STR(name) \
59static void scc_pciex_##name(const PCI_IO_ADDR addr, void *buf, \
60 unsigned long count) \
61{ \
62 __do_##name(addr, buf, count); \
63 scc_pciex_io_flush(iowa_mem_find_bus(addr)); \
64}
65
66PCIEX_MMIO_READ(readb, u8)
67PCIEX_MMIO_READ(readw, u16)
68PCIEX_MMIO_READ(readl, u32)
69PCIEX_MMIO_READ(readq, u64)
70PCIEX_MMIO_READ(readw_be, u16)
71PCIEX_MMIO_READ(readl_be, u32)
72PCIEX_MMIO_READ(readq_be, u64)
73PCIEX_MMIO_READ_STR(readsb)
74PCIEX_MMIO_READ_STR(readsw)
75PCIEX_MMIO_READ_STR(readsl)
76
77static void scc_pciex_memcpy_fromio(void *dest, const PCI_IO_ADDR src,
78 unsigned long n)
79{
80 __do_memcpy_fromio(dest, src, n);
81 scc_pciex_io_flush(iowa_mem_find_bus(src));
82}
83
84/*
85 * I/O port access to devices on PCIEX.
86 */
87
88static inline unsigned long get_bus_address(struct pci_controller *phb,
89 unsigned long port)
90{
91 return port - ((unsigned long)(phb->io_base_virt) - _IO_BASE);
92}
93
94static u32 scc_pciex_read_port(struct pci_controller *phb,
95 unsigned long port, int size)
96{
97 unsigned int byte_enable;
98 unsigned int cmd, shift;
99 unsigned long addr;
100 u32 data, ret;
101
102 BUG_ON(((port & 0x3ul) + size) > 4);
103
104 addr = get_bus_address(phb, port);
105 shift = addr & 0x3ul;
106 byte_enable = ((1 << size) - 1) << shift;
107 cmd = PEXDCMND_IO_READ | (byte_enable << PEXDCMND_BYTE_EN_SHIFT);
108 PEX_OUT(phb->cfg_addr, PEXDADRS, (addr & ~0x3ul));
109 PEX_OUT(phb->cfg_addr, PEXDCMND, cmd);
110 data = PEX_IN(phb->cfg_addr, PEXDRDATA);
111 ret = (data >> (shift * 8)) & (0xFFFFFFFF >> ((4 - size) * 8));
112
113 pr_debug("PCIEX:PIO READ:port=0x%lx, addr=0x%lx, size=%d, be=%x,"
114 " cmd=%x, data=%x, ret=%x\n", port, addr, size, byte_enable,
115 cmd, data, ret);
116
117 return ret;
118}
119
120static void scc_pciex_write_port(struct pci_controller *phb,
121 unsigned long port, int size, u32 val)
122{
123 unsigned int byte_enable;
124 unsigned int cmd, shift;
125 unsigned long addr;
126 u32 data;
127
128 BUG_ON(((port & 0x3ul) + size) > 4);
129
130 addr = get_bus_address(phb, port);
131 shift = addr & 0x3ul;
132 byte_enable = ((1 << size) - 1) << shift;
133 cmd = PEXDCMND_IO_WRITE | (byte_enable << PEXDCMND_BYTE_EN_SHIFT);
134 data = (val & (0xFFFFFFFF >> (4 - size) * 8)) << (shift * 8);
135 PEX_OUT(phb->cfg_addr, PEXDADRS, (addr & ~0x3ul));
136 PEX_OUT(phb->cfg_addr, PEXDCMND, cmd);
137 PEX_OUT(phb->cfg_addr, PEXDWDATA, data);
138
139 pr_debug("PCIEX:PIO WRITE:port=0x%lx, addr=%lx, size=%d, val=%x,"
140 " be=%x, cmd=%x, data=%x\n", port, addr, size, val,
141 byte_enable, cmd, data);
142}
143
144static u8 __scc_pciex_inb(struct pci_controller *phb, unsigned long port)
145{
146 return (u8)scc_pciex_read_port(phb, port, 1);
147}
148
149static u16 __scc_pciex_inw(struct pci_controller *phb, unsigned long port)
150{
151 u32 data;
152 if ((port & 0x3ul) < 3)
153 data = scc_pciex_read_port(phb, port, 2);
154 else {
155 u32 d1 = scc_pciex_read_port(phb, port, 1);
156 u32 d2 = scc_pciex_read_port(phb, port + 1, 1);
157 data = d1 | (d2 << 8);
158 }
159 return (u16)data;
160}
161
162static u32 __scc_pciex_inl(struct pci_controller *phb, unsigned long port)
163{
164 unsigned int mod = port & 0x3ul;
165 u32 data;
166 if (mod == 0)
167 data = scc_pciex_read_port(phb, port, 4);
168 else {
169 u32 d1 = scc_pciex_read_port(phb, port, 4 - mod);
170 u32 d2 = scc_pciex_read_port(phb, port + 1, mod);
171 data = d1 | (d2 << (mod * 8));
172 }
173 return data;
174}
175
176static void __scc_pciex_outb(struct pci_controller *phb,
177 u8 val, unsigned long port)
178{
179 scc_pciex_write_port(phb, port, 1, (u32)val);
180}
181
182static void __scc_pciex_outw(struct pci_controller *phb,
183 u16 val, unsigned long port)
184{
185 if ((port & 0x3ul) < 3)
186 scc_pciex_write_port(phb, port, 2, (u32)val);
187 else {
188 u32 d1 = val & 0x000000FF;
189 u32 d2 = (val & 0x0000FF00) >> 8;
190 scc_pciex_write_port(phb, port, 1, d1);
191 scc_pciex_write_port(phb, port + 1, 1, d2);
192 }
193}
194
195static void __scc_pciex_outl(struct pci_controller *phb,
196 u32 val, unsigned long port)
197{
198 unsigned int mod = port & 0x3ul;
199 if (mod == 0)
200 scc_pciex_write_port(phb, port, 4, val);
201 else {
202 u32 d1 = val & (0xFFFFFFFFul >> (mod * 8));
203 u32 d2 = val >> ((4 - mod) * 8);
204 scc_pciex_write_port(phb, port, 4 - mod, d1);
205 scc_pciex_write_port(phb, port + 1, mod, d2);
206 }
207}
208
209#define PCIEX_PIO_FUNC(size, name) \
210static u##size scc_pciex_in##name(unsigned long port) \
211{ \
212 struct iowa_bus *bus = iowa_pio_find_bus(port); \
213 u##size data = __scc_pciex_in##name(bus->phb, port); \
214 scc_pciex_io_flush(bus); \
215 return data; \
216} \
217static void scc_pciex_ins##name(unsigned long p, void *b, unsigned long c) \
218{ \
219 struct iowa_bus *bus = iowa_pio_find_bus(p); \
220 u##size *dst = b; \
221 for (; c != 0; c--, dst++) \
222 *dst = cpu_to_le##size(__scc_pciex_in##name(bus->phb, p)); \
223 scc_pciex_io_flush(bus); \
224} \
225static void scc_pciex_out##name(u##size val, unsigned long port) \
226{ \
227 struct iowa_bus *bus = iowa_pio_find_bus(port); \
228 __scc_pciex_out##name(bus->phb, val, port); \
229} \
230static void scc_pciex_outs##name(unsigned long p, const void *b, \
231 unsigned long c) \
232{ \
233 struct iowa_bus *bus = iowa_pio_find_bus(p); \
234 const u##size *src = b; \
235 for (; c != 0; c--, src++) \
236 __scc_pciex_out##name(bus->phb, le##size##_to_cpu(*src), p); \
237}
238#define cpu_to_le8(x) (x)
239#define le8_to_cpu(x) (x)
240PCIEX_PIO_FUNC(8, b)
241PCIEX_PIO_FUNC(16, w)
242PCIEX_PIO_FUNC(32, l)
243
244static struct ppc_pci_io scc_pciex_ops = {
245 .readb = scc_pciex_readb,
246 .readw = scc_pciex_readw,
247 .readl = scc_pciex_readl,
248 .readq = scc_pciex_readq,
249 .readw_be = scc_pciex_readw_be,
250 .readl_be = scc_pciex_readl_be,
251 .readq_be = scc_pciex_readq_be,
252 .readsb = scc_pciex_readsb,
253 .readsw = scc_pciex_readsw,
254 .readsl = scc_pciex_readsl,
255 .memcpy_fromio = scc_pciex_memcpy_fromio,
256 .inb = scc_pciex_inb,
257 .inw = scc_pciex_inw,
258 .inl = scc_pciex_inl,
259 .outb = scc_pciex_outb,
260 .outw = scc_pciex_outw,
261 .outl = scc_pciex_outl,
262 .insb = scc_pciex_insb,
263 .insw = scc_pciex_insw,
264 .insl = scc_pciex_insl,
265 .outsb = scc_pciex_outsb,
266 .outsw = scc_pciex_outsw,
267 .outsl = scc_pciex_outsl,
268};
269
270static int __init scc_pciex_iowa_init(struct iowa_bus *bus, void *data)
271{
272 dma_addr_t dummy_page_da;
273 void *dummy_page_va;
274
275 dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL);
276 if (!dummy_page_va) {
277 pr_err("PCIEX:Alloc dummy_page_va failed\n");
278 return -1;
279 }
280
281 dummy_page_da = dma_map_single(bus->phb->parent, dummy_page_va,
282 PAGE_SIZE, DMA_FROM_DEVICE);
283 if (dma_mapping_error(dummy_page_da)) {
284 pr_err("PCIEX:Map dummy page failed.\n");
285 kfree(dummy_page_va);
286 return -1;
287 }
288
289 PEX_OUT(bus->phb->cfg_addr, PEXDMRDADR0, dummy_page_da);
290
291 return 0;
292}
293
294/*
295 * config space access
296 */
297#define MK_PEXDADRS(bus_no, dev_no, func_no, addr) \
298 ((uint32_t)(((addr) & ~0x3UL) | \
299 ((bus_no) << PEXDADRS_BUSNO_SHIFT) | \
300 ((dev_no) << PEXDADRS_DEVNO_SHIFT) | \
301 ((func_no) << PEXDADRS_FUNCNO_SHIFT)))
302
303#define MK_PEXDCMND_BYTE_EN(addr, size) \
304 ((((0x1 << (size))-1) << ((addr) & 0x3)) << PEXDCMND_BYTE_EN_SHIFT)
305#define MK_PEXDCMND(cmd, addr, size) ((cmd) | MK_PEXDCMND_BYTE_EN(addr, size))
306
307static uint32_t config_read_pciex_dev(unsigned int *base,
308 uint64_t bus_no, uint64_t dev_no, uint64_t func_no,
309 uint64_t off, uint64_t size)
310{
311 uint32_t ret;
312 uint32_t addr, cmd;
313
314 addr = MK_PEXDADRS(bus_no, dev_no, func_no, off);
315 cmd = MK_PEXDCMND(PEXDCMND_CONFIG_READ, off, size);
316 PEX_OUT(base, PEXDADRS, addr);
317 PEX_OUT(base, PEXDCMND, cmd);
318 ret = (PEX_IN(base, PEXDRDATA)
319 >> ((off & (4-size)) * 8)) & ((0x1 << (size * 8)) - 1);
320 return ret;
321}
322
323static void config_write_pciex_dev(unsigned int *base, uint64_t bus_no,
324 uint64_t dev_no, uint64_t func_no, uint64_t off, uint64_t size,
325 uint32_t data)
326{
327 uint32_t addr, cmd;
328
329 addr = MK_PEXDADRS(bus_no, dev_no, func_no, off);
330 cmd = MK_PEXDCMND(PEXDCMND_CONFIG_WRITE, off, size);
331 PEX_OUT(base, PEXDADRS, addr);
332 PEX_OUT(base, PEXDCMND, cmd);
333 PEX_OUT(base, PEXDWDATA,
334 (data & ((0x1 << (size * 8)) - 1)) << ((off & (4-size)) * 8));
335}
336
337#define MK_PEXCADRS_BYTE_EN(off, len) \
338 ((((0x1 << (len)) - 1) << ((off) & 0x3)) << PEXCADRS_BYTE_EN_SHIFT)
339#define MK_PEXCADRS(cmd, addr, size) \
340 ((cmd) | MK_PEXCADRS_BYTE_EN(addr, size) | ((addr) & ~0x3))
341static uint32_t config_read_pciex_rc(unsigned int *base,
342 uint32_t where, uint32_t size)
343{
344 PEX_OUT(base, PEXCADRS, MK_PEXCADRS(PEXCADRS_CMD_READ, where, size));
345 return (PEX_IN(base, PEXCRDATA)
346 >> ((where & (4 - size)) * 8)) & ((0x1 << (size * 8)) - 1);
347}
348
349static void config_write_pciex_rc(unsigned int *base, uint32_t where,
350 uint32_t size, uint32_t val)
351{
352 uint32_t data;
353
354 data = (val & ((0x1 << (size * 8)) - 1)) << ((where & (4 - size)) * 8);
355 PEX_OUT(base, PEXCADRS, MK_PEXCADRS(PEXCADRS_CMD_WRITE, where, size));
356 PEX_OUT(base, PEXCWDATA, data);
357}
358
359/* Interfaces */
360/* Note: Work-around
361 * On SCC PCIEXC, one device is seen on all 32 dev_no.
362 * As SCC PCIEXC can have only one device on the bus, we look only one dev_no.
363 * (dev_no = 1)
364 */
365static int scc_pciex_read_config(struct pci_bus *bus, unsigned int devfn,
366 int where, int size, unsigned int *val)
367{
368 struct device_node *dn;
369 struct pci_controller *phb;
370
371 dn = bus->sysdata;
372 phb = pci_find_hose_for_OF_device(dn);
373
374 if (bus->number == phb->first_busno && PCI_SLOT(devfn) != 1) {
375 *val = ~0;
376 return PCIBIOS_DEVICE_NOT_FOUND;
377 }
378
379 if (bus->number == 0 && PCI_SLOT(devfn) == 0)
380 *val = config_read_pciex_rc(phb->cfg_addr, where, size);
381 else
382 *val = config_read_pciex_dev(phb->cfg_addr, bus->number,
383 PCI_SLOT(devfn), PCI_FUNC(devfn), where, size);
384
385 return PCIBIOS_SUCCESSFUL;
386}
387
388static int scc_pciex_write_config(struct pci_bus *bus, unsigned int devfn,
389 int where, int size, unsigned int val)
390{
391 struct device_node *dn;
392 struct pci_controller *phb;
393
394 dn = bus->sysdata;
395 phb = pci_find_hose_for_OF_device(dn);
396
397 if (bus->number == phb->first_busno && PCI_SLOT(devfn) != 1)
398 return PCIBIOS_DEVICE_NOT_FOUND;
399
400 if (bus->number == 0 && PCI_SLOT(devfn) == 0)
401 config_write_pciex_rc(phb->cfg_addr, where, size, val);
402 else
403 config_write_pciex_dev(phb->cfg_addr, bus->number,
404 PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
405 return PCIBIOS_SUCCESSFUL;
406}
407
408static struct pci_ops scc_pciex_pci_ops = {
409 scc_pciex_read_config,
410 scc_pciex_write_config,
411};
412
413static void pciex_clear_intr_all(unsigned int *base)
414{
415 PEX_OUT(base, PEXAERRSTS, 0xffffffff);
416 PEX_OUT(base, PEXPRERRSTS, 0xffffffff);
417 PEX_OUT(base, PEXINTSTS, 0xffffffff);
418}
419
420#if 0
421static void pciex_disable_intr_all(unsigned int *base)
422{
423 PEX_OUT(base, PEXINTMASK, 0x0);
424 PEX_OUT(base, PEXAERRMASK, 0x0);
425 PEX_OUT(base, PEXPRERRMASK, 0x0);
426 PEX_OUT(base, PEXVDMASK, 0x0);
427}
428#endif
429
430static void pciex_enable_intr_all(unsigned int *base)
431{
432 PEX_OUT(base, PEXINTMASK, 0x0000e7f1);
433 PEX_OUT(base, PEXAERRMASK, 0x03ff01ff);
434 PEX_OUT(base, PEXPRERRMASK, 0x0001010f);
435 PEX_OUT(base, PEXVDMASK, 0x00000001);
436}
437
438static void pciex_check_status(unsigned int *base)
439{
440 uint32_t err = 0;
441 uint32_t intsts, aerr, prerr, rcvcp, lenerr;
442 uint32_t maea, maec;
443
444 intsts = PEX_IN(base, PEXINTSTS);
445 aerr = PEX_IN(base, PEXAERRSTS);
446 prerr = PEX_IN(base, PEXPRERRSTS);
447 rcvcp = PEX_IN(base, PEXRCVCPLIDA);
448 lenerr = PEX_IN(base, PEXLENERRIDA);
449
450 if (intsts || aerr || prerr || rcvcp || lenerr)
451 err = 1;
452
453 pr_info("PCEXC interrupt!!\n");
454 pr_info("PEXINTSTS :0x%08x\n", intsts);
455 pr_info("PEXAERRSTS :0x%08x\n", aerr);
456 pr_info("PEXPRERRSTS :0x%08x\n", prerr);
457 pr_info("PEXRCVCPLIDA :0x%08x\n", rcvcp);
458 pr_info("PEXLENERRIDA :0x%08x\n", lenerr);
459
460 /* print detail of Protection Error */
461 if (intsts & 0x00004000) {
462 uint32_t i, n;
463 for (i = 0; i < 4; i++) {
464 n = 1 << i;
465 if (prerr & n) {
466 maea = PEX_IN(base, PEXMAEA(i));
467 maec = PEX_IN(base, PEXMAEC(i));
468 pr_info("PEXMAEC%d :0x%08x\n", i, maec);
469 pr_info("PEXMAEA%d :0x%08x\n", i, maea);
470 }
471 }
472 }
473
474 if (err)
475 pciex_clear_intr_all(base);
476}
477
478static irqreturn_t pciex_handle_internal_irq(int irq, void *dev_id)
479{
480 struct pci_controller *phb = dev_id;
481
482 pr_debug("PCIEX:pciex_handle_internal_irq(irq=%d)\n", irq);
483
484 BUG_ON(phb->cfg_addr == NULL);
485
486 pciex_check_status(phb->cfg_addr);
487
488 return IRQ_HANDLED;
489}
490
491static __init int celleb_setup_pciex(struct device_node *node,
492 struct pci_controller *phb)
493{
494 struct resource r;
495 struct of_irq oirq;
496 int virq;
497
498 /* SMMIO registers; used inside this file */
499 if (of_address_to_resource(node, 0, &r)) {
500 pr_err("PCIEXC:Failed to get config resource.\n");
501 return 1;
502 }
503 phb->cfg_addr = ioremap(r.start, r.end - r.start + 1);
504 if (!phb->cfg_addr) {
505 pr_err("PCIEXC:Failed to remap SMMIO region.\n");
506 return 1;
507 }
508
509 /* Not use cfg_data, cmd and data regs are near address reg */
510 phb->cfg_data = NULL;
511
512 /* set pci_ops */
513 phb->ops = &scc_pciex_pci_ops;
514
515 /* internal interrupt handler */
516 if (of_irq_map_one(node, 1, &oirq)) {
517 pr_err("PCIEXC:Failed to map irq\n");
518 goto error;
519 }
520 virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
521 oirq.size);
522 if (request_irq(virq, pciex_handle_internal_irq,
523 IRQF_DISABLED, "pciex", (void *)phb)) {
524 pr_err("PCIEXC:Failed to request irq\n");
525 goto error;
526 }
527
528 /* enable all interrupts */
529 pciex_clear_intr_all(phb->cfg_addr);
530 pciex_enable_intr_all(phb->cfg_addr);
531 /* MSI: TBD */
532
533 return 0;
534
535error:
536 phb->cfg_data = NULL;
537 if (phb->cfg_addr)
538 iounmap(phb->cfg_addr);
539 phb->cfg_addr = NULL;
540 return 1;
541}
542
543struct celleb_phb_spec celleb_pciex_spec __initdata = {
544 .setup = celleb_setup_pciex,
545 .ops = &scc_pciex_ops,
546 .iowa_init = &scc_pciex_iowa_init,
547};
diff --git a/arch/powerpc/platforms/celleb/scc_sio.c b/arch/powerpc/platforms/cell/celleb_scc_sio.c
index 3a16c5b3c464..3a16c5b3c464 100644
--- a/arch/powerpc/platforms/celleb/scc_sio.c
+++ b/arch/powerpc/platforms/cell/celleb_scc_sio.c
diff --git a/arch/powerpc/platforms/celleb/scc_uhc.c b/arch/powerpc/platforms/cell/celleb_scc_uhc.c
index cb4307994087..d63b720bfe3a 100644
--- a/arch/powerpc/platforms/celleb/scc_uhc.c
+++ b/arch/powerpc/platforms/cell/celleb_scc_uhc.c
@@ -25,7 +25,7 @@
25#include <asm/io.h> 25#include <asm/io.h>
26#include <asm/machdep.h> 26#include <asm/machdep.h>
27 27
28#include "scc.h" 28#include "celleb_scc.h"
29 29
30#define UHC_RESET_WAIT_MAX 10000 30#define UHC_RESET_WAIT_MAX 10000
31 31
diff --git a/arch/powerpc/platforms/celleb/setup.c b/arch/powerpc/platforms/cell/celleb_setup.c
index f27ae1e3fb58..b11cb30decb2 100644
--- a/arch/powerpc/platforms/celleb/setup.c
+++ b/arch/powerpc/platforms/cell/celleb_setup.c
@@ -56,13 +56,13 @@
56#include <asm/rtas.h> 56#include <asm/rtas.h>
57#include <asm/cell-regs.h> 57#include <asm/cell-regs.h>
58 58
59#include "interrupt.h" 59#include "beat_interrupt.h"
60#include "beat_wrapper.h" 60#include "beat_wrapper.h"
61#include "beat.h" 61#include "beat.h"
62#include "pci.h" 62#include "celleb_pci.h"
63#include "../cell/interrupt.h" 63#include "interrupt.h"
64#include "../cell/pervasive.h" 64#include "pervasive.h"
65#include "../cell/ras.h" 65#include "ras.h"
66 66
67static char celleb_machine_type[128] = "Celleb"; 67static char celleb_machine_type[128] = "Celleb";
68 68
@@ -114,8 +114,6 @@ static int __init celleb_publish_devices(void)
114 /* Publish OF platform devices for southbridge IOs */ 114 /* Publish OF platform devices for southbridge IOs */
115 of_platform_bus_probe(NULL, celleb_bus_ids, NULL); 115 of_platform_bus_probe(NULL, celleb_bus_ids, NULL);
116 116
117 celleb_pci_workaround_init();
118
119 return 0; 117 return 0;
120} 118}
121machine_device_initcall(celleb_beat, celleb_publish_devices); 119machine_device_initcall(celleb_beat, celleb_publish_devices);
diff --git a/arch/powerpc/platforms/cell/io-workarounds.c b/arch/powerpc/platforms/cell/io-workarounds.c
index 979d4b67efb4..3b84e8be314c 100644
--- a/arch/powerpc/platforms/cell/io-workarounds.c
+++ b/arch/powerpc/platforms/cell/io-workarounds.c
@@ -1,6 +1,9 @@
1/* 1/*
2 * Support PCI IO workaround
3 *
2 * Copyright (C) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org> 4 * Copyright (C) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>
3 * IBM, Corp. 5 * IBM, Corp.
6 * (C) Copyright 2007-2008 TOSHIBA CORPORATION
4 * 7 *
5 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
@@ -9,335 +12,174 @@
9#undef DEBUG 12#undef DEBUG
10 13
11#include <linux/kernel.h> 14#include <linux/kernel.h>
12#include <linux/mm.h> 15
13#include <linux/pci.h>
14#include <asm/io.h> 16#include <asm/io.h>
15#include <asm/machdep.h> 17#include <asm/machdep.h>
16#include <asm/pci-bridge.h> 18#include <asm/pgtable.h>
17#include <asm/ppc-pci.h> 19#include <asm/ppc-pci.h>
18 20
21#include "io-workarounds.h"
19 22
20#define SPIDER_PCI_REG_BASE 0xd000 23#define IOWA_MAX_BUS 8
21#define SPIDER_PCI_VCI_CNTL_STAT 0x0110
22#define SPIDER_PCI_DUMMY_READ 0x0810
23#define SPIDER_PCI_DUMMY_READ_BASE 0x0814
24 24
25/* Undefine that to re-enable bogus prefetch 25static struct iowa_bus iowa_busses[IOWA_MAX_BUS];
26 * 26static unsigned int iowa_bus_count;
27 * Without that workaround, the chip will do bogus prefetch past
28 * page boundary from system memory. This setting will disable that,
29 * though the documentation is unclear as to the consequences of doing
30 * so, either purely performances, or possible misbehaviour... It's not
31 * clear wether the chip can handle unaligned accesses at all without
32 * prefetching enabled.
33 *
34 * For now, things appear to be behaving properly with that prefetching
35 * disabled and IDE, possibly because IDE isn't doing any unaligned
36 * access.
37 */
38#define SPIDER_DISABLE_PREFETCH
39 27
40#define MAX_SPIDERS 3 28static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr)
29{
30 int i, j;
31 struct resource *res;
32 unsigned long vstart, vend;
41 33
42static struct spider_pci_bus { 34 for (i = 0; i < iowa_bus_count; i++) {
43 void __iomem *regs; 35 struct iowa_bus *bus = &iowa_busses[i];
44 unsigned long mmio_start; 36 struct pci_controller *phb = bus->phb;
45 unsigned long mmio_end;
46 unsigned long pio_vstart;
47 unsigned long pio_vend;
48} spider_pci_busses[MAX_SPIDERS];
49static int spider_pci_count;
50 37
51static struct spider_pci_bus *spider_pci_find(unsigned long vaddr, 38 if (vaddr) {
52 unsigned long paddr) 39 vstart = (unsigned long)phb->io_base_virt;
53{ 40 vend = vstart + phb->pci_io_size - 1;
54 int i; 41 if ((vaddr >= vstart) && (vaddr <= vend))
55 42 return bus;
56 for (i = 0; i < spider_pci_count; i++) { 43 }
57 struct spider_pci_bus *bus = &spider_pci_busses[i]; 44
58 if (paddr && paddr >= bus->mmio_start && paddr < bus->mmio_end) 45 if (paddr)
59 return bus; 46 for (j = 0; j < 3; j++) {
60 if (vaddr && vaddr >= bus->pio_vstart && vaddr < bus->pio_vend) 47 res = &phb->mem_resources[j];
61 return bus; 48 if (paddr >= res->start && paddr <= res->end)
49 return bus;
50 }
62 } 51 }
52
63 return NULL; 53 return NULL;
64} 54}
65 55
66static void spider_io_flush(const volatile void __iomem *addr) 56struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
67{ 57{
68 struct spider_pci_bus *bus; 58 struct iowa_bus *bus;
69 int token; 59 int token;
70 60
71 /* Get platform token (set by ioremap) from address */
72 token = PCI_GET_ADDR_TOKEN(addr); 61 token = PCI_GET_ADDR_TOKEN(addr);
73 62
74 /* Fast path if we have a non-0 token, it indicates which bus we 63 if (token && token <= iowa_bus_count)
75 * are on. 64 bus = &iowa_busses[token - 1];
76 *
77 * If the token is 0, that means either that the ioremap was done
78 * before we initialized this layer, or it's a PIO operation. We
79 * fallback to a low path in this case. Hopefully, internal devices
80 * which are ioremap'ed early should use in_XX/out_XX functions
81 * instead of the PCI ones and thus not suffer from the slowdown.
82 *
83 * Also note that currently, the workaround will not work for areas
84 * that are not mapped with PTEs (bolted in the hash table). This
85 * is the case for ioremaps done very early at boot (before
86 * mem_init_done) and includes the mapping of the ISA IO space.
87 *
88 * Fortunately, none of the affected devices is expected to do DMA
89 * and thus there should be no problem in practice.
90 *
91 * In order to improve performances, we only do the PTE search for
92 * addresses falling in the PHB IO space area. That means it will
93 * not work for hotplug'ed PHBs but those don't exist with Spider.
94 */
95 if (token && token <= spider_pci_count)
96 bus = &spider_pci_busses[token - 1];
97 else { 65 else {
98 unsigned long vaddr, paddr; 66 unsigned long vaddr, paddr;
99 pte_t *ptep; 67 pte_t *ptep;
100 68
101 /* Fixup physical address */
102 vaddr = (unsigned long)PCI_FIX_ADDR(addr); 69 vaddr = (unsigned long)PCI_FIX_ADDR(addr);
70 if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
71 return NULL;
103 72
104 /* Check if it's in allowed range for PIO */
105 if (vaddr < PHB_IO_BASE || vaddr > PHB_IO_END)
106 return;
107
108 /* Try to find a PTE. If not, clear the paddr, we'll do
109 * a vaddr only lookup (PIO only)
110 */
111 ptep = find_linux_pte(init_mm.pgd, vaddr); 73 ptep = find_linux_pte(init_mm.pgd, vaddr);
112 if (ptep == NULL) 74 if (ptep == NULL)
113 paddr = 0; 75 paddr = 0;
114 else 76 else
115 paddr = pte_pfn(*ptep) << PAGE_SHIFT; 77 paddr = pte_pfn(*ptep) << PAGE_SHIFT;
78 bus = iowa_pci_find(vaddr, paddr);
116 79
117 bus = spider_pci_find(vaddr, paddr);
118 if (bus == NULL) 80 if (bus == NULL)
119 return; 81 return NULL;
120 } 82 }
121 83
122 /* Now do the workaround 84 return bus;
123 */
124 (void)in_be32(bus->regs + SPIDER_PCI_DUMMY_READ);
125} 85}
126 86
127static u8 spider_readb(const volatile void __iomem *addr) 87struct iowa_bus *iowa_pio_find_bus(unsigned long port)
128{ 88{
129 u8 val = __do_readb(addr); 89 unsigned long vaddr = (unsigned long)pci_io_base + port;
130 spider_io_flush(addr); 90 return iowa_pci_find(vaddr, 0);
131 return val;
132} 91}
133 92
134static u16 spider_readw(const volatile void __iomem *addr)
135{
136 u16 val = __do_readw(addr);
137 spider_io_flush(addr);
138 return val;
139}
140 93
141static u32 spider_readl(const volatile void __iomem *addr) 94#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \
142{ 95static ret iowa_##name at \
143 u32 val = __do_readl(addr); 96{ \
144 spider_io_flush(addr); 97 struct iowa_bus *bus; \
145 return val; 98 bus = iowa_##space##_find_bus(aa); \
99 if (bus && bus->ops && bus->ops->name) \
100 return bus->ops->name al; \
101 return __do_##name al; \
146} 102}
147 103
148static u64 spider_readq(const volatile void __iomem *addr) 104#define DEF_PCI_AC_NORET(name, at, al, space, aa) \
149{ 105static void iowa_##name at \
150 u64 val = __do_readq(addr); 106{ \
151 spider_io_flush(addr); 107 struct iowa_bus *bus; \
152 return val; 108 bus = iowa_##space##_find_bus(aa); \
109 if (bus && bus->ops && bus->ops->name) { \
110 bus->ops->name al; \
111 return; \
112 } \
113 __do_##name al; \
153} 114}
154 115
155static u16 spider_readw_be(const volatile void __iomem *addr) 116#include <asm/io-defs.h>
156{
157 u16 val = __do_readw_be(addr);
158 spider_io_flush(addr);
159 return val;
160}
161 117
162static u32 spider_readl_be(const volatile void __iomem *addr) 118#undef DEF_PCI_AC_RET
163{ 119#undef DEF_PCI_AC_NORET
164 u32 val = __do_readl_be(addr);
165 spider_io_flush(addr);
166 return val;
167}
168 120
169static u64 spider_readq_be(const volatile void __iomem *addr) 121static struct ppc_pci_io __initdata iowa_pci_io = {
170{
171 u64 val = __do_readq_be(addr);
172 spider_io_flush(addr);
173 return val;
174}
175 122
176static void spider_readsb(const volatile void __iomem *addr, void *buf, 123#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) .name = iowa_##name,
177 unsigned long count) 124#define DEF_PCI_AC_NORET(name, at, al, space, aa) .name = iowa_##name,
178{
179 __do_readsb(addr, buf, count);
180 spider_io_flush(addr);
181}
182 125
183static void spider_readsw(const volatile void __iomem *addr, void *buf, 126#include <asm/io-defs.h>
184 unsigned long count)
185{
186 __do_readsw(addr, buf, count);
187 spider_io_flush(addr);
188}
189 127
190static void spider_readsl(const volatile void __iomem *addr, void *buf, 128#undef DEF_PCI_AC_RET
191 unsigned long count) 129#undef DEF_PCI_AC_NORET
192{
193 __do_readsl(addr, buf, count);
194 spider_io_flush(addr);
195}
196
197static void spider_memcpy_fromio(void *dest, const volatile void __iomem *src,
198 unsigned long n)
199{
200 __do_memcpy_fromio(dest, src, n);
201 spider_io_flush(src);
202}
203 130
131};
204 132
205static void __iomem * spider_ioremap(unsigned long addr, unsigned long size, 133static void __iomem *iowa_ioremap(unsigned long addr, unsigned long size,
206 unsigned long flags) 134 unsigned long flags)
207{ 135{
208 struct spider_pci_bus *bus; 136 struct iowa_bus *bus;
209 void __iomem *res = __ioremap(addr, size, flags); 137 void __iomem *res = __ioremap(addr, size, flags);
210 int busno; 138 int busno;
211 139
212 pr_debug("spider_ioremap(0x%lx, 0x%lx, 0x%lx) -> 0x%p\n", 140 bus = iowa_pci_find(0, addr);
213 addr, size, flags, res);
214
215 bus = spider_pci_find(0, addr);
216 if (bus != NULL) { 141 if (bus != NULL) {
217 busno = bus - spider_pci_busses; 142 busno = bus - iowa_busses;
218 pr_debug(" found bus %d, setting token\n", busno);
219 PCI_SET_ADDR_TOKEN(res, busno + 1); 143 PCI_SET_ADDR_TOKEN(res, busno + 1);
220 } 144 }
221 pr_debug(" result=0x%p\n", res);
222
223 return res; 145 return res;
224} 146}
225 147
226static void __init spider_pci_setup_chip(struct spider_pci_bus *bus) 148/* Regist new bus to support workaround */
227{ 149void __init iowa_register_bus(struct pci_controller *phb,
228#ifdef SPIDER_DISABLE_PREFETCH 150 struct ppc_pci_io *ops,
229 u32 val = in_be32(bus->regs + SPIDER_PCI_VCI_CNTL_STAT); 151 int (*initfunc)(struct iowa_bus *, void *), void *data)
230 pr_debug(" PVCI_Control_Status was 0x%08x\n", val);
231 out_be32(bus->regs + SPIDER_PCI_VCI_CNTL_STAT, val | 0x8);
232#endif
233
234 /* Configure the dummy address for the workaround */
235 out_be32(bus->regs + SPIDER_PCI_DUMMY_READ_BASE, 0x80000000);
236}
237
238static void __init spider_pci_add_one(struct pci_controller *phb)
239{ 152{
240 struct spider_pci_bus *bus = &spider_pci_busses[spider_pci_count]; 153 struct iowa_bus *bus;
241 struct device_node *np = phb->dn; 154 struct device_node *np = phb->dn;
242 struct resource rsrc;
243 void __iomem *regs;
244 155
245 if (spider_pci_count >= MAX_SPIDERS) { 156 if (iowa_bus_count >= IOWA_MAX_BUS) {
246 printk(KERN_ERR "Too many spider bridges, workarounds" 157 pr_err("IOWA:Too many pci bridges, "
247 " disabled for %s\n", np->full_name); 158 "workarounds disabled for %s\n", np->full_name);
248 return; 159 return;
249 } 160 }
250 161
251 /* Get the registers for the beast */ 162 bus = &iowa_busses[iowa_bus_count];
252 if (of_address_to_resource(np, 0, &rsrc)) { 163 bus->phb = phb;
253 printk(KERN_ERR "Failed to get registers for spider %s" 164 bus->ops = ops;
254 " workarounds disabled\n", np->full_name);
255 return;
256 }
257 165
258 /* Mask out some useless bits in there to get to the base of the 166 if (initfunc)
259 * spider chip 167 if ((*initfunc)(bus, data))
260 */ 168 return;
261 rsrc.start &= ~0xfffffffful;
262
263 /* Map them */
264 regs = ioremap(rsrc.start + SPIDER_PCI_REG_BASE, 0x1000);
265 if (regs == NULL) {
266 printk(KERN_ERR "Failed to map registers for spider %s"
267 " workarounds disabled\n", np->full_name);
268 return;
269 }
270
271 spider_pci_count++;
272
273 /* We assume spiders only have one MMIO resource */
274 bus->mmio_start = phb->mem_resources[0].start;
275 bus->mmio_end = phb->mem_resources[0].end + 1;
276
277 bus->pio_vstart = (unsigned long)phb->io_base_virt;
278 bus->pio_vend = bus->pio_vstart + phb->pci_io_size;
279
280 bus->regs = regs;
281
282 printk(KERN_INFO "PCI: Spider MMIO workaround for %s\n",np->full_name);
283 169
284 pr_debug(" mmio (P) = 0x%016lx..0x%016lx\n", 170 iowa_bus_count++;
285 bus->mmio_start, bus->mmio_end);
286 pr_debug(" pio (V) = 0x%016lx..0x%016lx\n",
287 bus->pio_vstart, bus->pio_vend);
288 pr_debug(" regs (P) = 0x%016lx (V) = 0x%p\n",
289 rsrc.start + SPIDER_PCI_REG_BASE, bus->regs);
290 171
291 spider_pci_setup_chip(bus); 172 pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name);
292} 173}
293 174
294static struct ppc_pci_io __initdata spider_pci_io = { 175/* enable IO workaround */
295 .readb = spider_readb, 176void __init io_workaround_init(void)
296 .readw = spider_readw,
297 .readl = spider_readl,
298 .readq = spider_readq,
299 .readw_be = spider_readw_be,
300 .readl_be = spider_readl_be,
301 .readq_be = spider_readq_be,
302 .readsb = spider_readsb,
303 .readsw = spider_readsw,
304 .readsl = spider_readsl,
305 .memcpy_fromio = spider_memcpy_fromio,
306};
307
308static int __init spider_pci_workaround_init(void)
309{ 177{
310 struct pci_controller *phb; 178 static int io_workaround_inited;
311
312 /* Find spider bridges. We assume they have been all probed
313 * in setup_arch(). If that was to change, we would need to
314 * update this code to cope with dynamically added busses
315 */
316 list_for_each_entry(phb, &hose_list, list_node) {
317 struct device_node *np = phb->dn;
318 const char *model = of_get_property(np, "model", NULL);
319
320 /* If no model property or name isn't exactly "pci", skip */
321 if (model == NULL || strcmp(np->name, "pci"))
322 continue;
323 /* If model is not "Spider", skip */
324 if (strcmp(model, "Spider"))
325 continue;
326 spider_pci_add_one(phb);
327 }
328
329 /* No Spider PCI found, exit */
330 if (spider_pci_count == 0)
331 return 0;
332 179
333 /* Setup IO callbacks. We only setup MMIO reads. PIO reads will 180 if (io_workaround_inited)
334 * fallback to MMIO reads (though without a token, thus slower) 181 return;
335 */ 182 ppc_pci_io = iowa_pci_io;
336 ppc_pci_io = spider_pci_io; 183 ppc_md.ioremap = iowa_ioremap;
337 184 io_workaround_inited = 1;
338 /* Setup ioremap callback */
339 ppc_md.ioremap = spider_ioremap;
340
341 return 0;
342} 185}
343machine_arch_initcall(cell, spider_pci_workaround_init);
diff --git a/arch/powerpc/platforms/cell/io-workarounds.h b/arch/powerpc/platforms/cell/io-workarounds.h
new file mode 100644
index 000000000000..79d8ed3d510f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/io-workarounds.h
@@ -0,0 +1,49 @@
1/*
2 * Support PCI IO workaround
3 *
4 * (C) Copyright 2007-2008 TOSHIBA CORPORATION
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 */
20
21#ifndef _IO_WORKAROUNDS_H
22#define _IO_WORKAROUNDS_H
23
24#include <linux/io.h>
25#include <asm/pci-bridge.h>
26
27/* Bus info */
28struct iowa_bus {
29 struct pci_controller *phb;
30 struct ppc_pci_io *ops;
31 void *private;
32};
33
34void __init io_workaround_init(void);
35void __init iowa_register_bus(struct pci_controller *, struct ppc_pci_io *,
36 int (*)(struct iowa_bus *, void *), void *);
37struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR);
38struct iowa_bus *iowa_pio_find_bus(unsigned long);
39
40extern struct ppc_pci_io spiderpci_ops;
41extern int spiderpci_iowa_init(struct iowa_bus *, void *);
42
43#define SPIDER_PCI_REG_BASE 0xd000
44#define SPIDER_PCI_REG_SIZE 0x1000
45#define SPIDER_PCI_VCI_CNTL_STAT 0x0110
46#define SPIDER_PCI_DUMMY_READ 0x0810
47#define SPIDER_PCI_DUMMY_READ_BASE 0x0814
48
49#endif /* _IO_WORKAROUNDS_H */
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index 5c531e8f9f6f..ab721b50fbba 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -57,6 +57,7 @@
57#include "interrupt.h" 57#include "interrupt.h"
58#include "pervasive.h" 58#include "pervasive.h"
59#include "ras.h" 59#include "ras.h"
60#include "io-workarounds.h"
60 61
61#ifdef DEBUG 62#ifdef DEBUG
62#define DBG(fmt...) udbg_printf(fmt) 63#define DBG(fmt...) udbg_printf(fmt)
@@ -117,13 +118,50 @@ static void cell_fixup_pcie_rootcomplex(struct pci_dev *dev)
117} 118}
118DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, cell_fixup_pcie_rootcomplex); 119DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, cell_fixup_pcie_rootcomplex);
119 120
121static int __devinit cell_setup_phb(struct pci_controller *phb)
122{
123 const char *model;
124 struct device_node *np;
125
126 int rc = rtas_setup_phb(phb);
127 if (rc)
128 return rc;
129
130 np = phb->dn;
131 model = of_get_property(np, "model", NULL);
132 if (model == NULL || strcmp(np->name, "pci"))
133 return 0;
134
135 /* Setup workarounds for spider */
136 if (strcmp(model, "Spider"))
137 return 0;
138
139 iowa_register_bus(phb, &spiderpci_ops, &spiderpci_iowa_init,
140 (void *)SPIDER_PCI_REG_BASE);
141 io_workaround_init();
142
143 return 0;
144}
145
120static int __init cell_publish_devices(void) 146static int __init cell_publish_devices(void)
121{ 147{
148 struct device_node *root = of_find_node_by_path("/");
149 struct device_node *np;
122 int node; 150 int node;
123 151
124 /* Publish OF platform devices for southbridge IOs */ 152 /* Publish OF platform devices for southbridge IOs */
125 of_platform_bus_probe(NULL, NULL, NULL); 153 of_platform_bus_probe(NULL, NULL, NULL);
126 154
155 /* On spider based blades, we need to manually create the OF
156 * platform devices for the PCI host bridges
157 */
158 for_each_child_of_node(root, np) {
159 if (np->type == NULL || (strcmp(np->type, "pci") != 0 &&
160 strcmp(np->type, "pciex") != 0))
161 continue;
162 of_platform_device_create(np, NULL, NULL);
163 }
164
127 /* There is no device for the MIC memory controller, thus we create 165 /* There is no device for the MIC memory controller, thus we create
128 * a platform device for it to attach the EDAC driver to. 166 * a platform device for it to attach the EDAC driver to.
129 */ 167 */
@@ -132,6 +170,7 @@ static int __init cell_publish_devices(void)
132 continue; 170 continue;
133 platform_device_register_simple("cbe-mic", node, NULL, 0); 171 platform_device_register_simple("cbe-mic", node, NULL, 0);
134 } 172 }
173
135 return 0; 174 return 0;
136} 175}
137machine_subsys_initcall(cell, cell_publish_devices); 176machine_subsys_initcall(cell, cell_publish_devices);
@@ -213,7 +252,7 @@ static void __init cell_setup_arch(void)
213 252
214 /* Find and initialize PCI host bridges */ 253 /* Find and initialize PCI host bridges */
215 init_pci_config_tokens(); 254 init_pci_config_tokens();
216 find_and_init_phbs(); 255
217 cbe_pervasive_init(); 256 cbe_pervasive_init();
218#ifdef CONFIG_DUMMY_CONSOLE 257#ifdef CONFIG_DUMMY_CONSOLE
219 conswitchp = &dummy_con; 258 conswitchp = &dummy_con;
@@ -249,7 +288,7 @@ define_machine(cell) {
249 .calibrate_decr = generic_calibrate_decr, 288 .calibrate_decr = generic_calibrate_decr,
250 .progress = cell_progress, 289 .progress = cell_progress,
251 .init_IRQ = cell_init_irq, 290 .init_IRQ = cell_init_irq,
252 .pci_setup_phb = rtas_setup_phb, 291 .pci_setup_phb = cell_setup_phb,
253#ifdef CONFIG_KEXEC 292#ifdef CONFIG_KEXEC
254 .machine_kexec = default_machine_kexec, 293 .machine_kexec = default_machine_kexec,
255 .machine_kexec_prepare = default_machine_kexec_prepare, 294 .machine_kexec_prepare = default_machine_kexec_prepare,
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
new file mode 100644
index 000000000000..418b605ac35a
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -0,0 +1,184 @@
1/*
2 * IO workarounds for PCI on Celleb/Cell platform
3 *
4 * (C) Copyright 2006-2007 TOSHIBA CORPORATION
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 */
20
21#undef DEBUG
22
23#include <linux/kernel.h>
24#include <linux/of_platform.h>
25#include <linux/io.h>
26
27#include <asm/ppc-pci.h>
28#include <asm/pci-bridge.h>
29
30#include "io-workarounds.h"
31
32#define SPIDER_PCI_DISABLE_PREFETCH
33
34struct spiderpci_iowa_private {
35 void __iomem *regs;
36};
37
38static void spiderpci_io_flush(struct iowa_bus *bus)
39{
40 struct spiderpci_iowa_private *priv;
41 u32 val;
42
43 priv = bus->private;
44 val = in_be32(priv->regs + SPIDER_PCI_DUMMY_READ);
45 iosync();
46}
47
48#define SPIDER_PCI_MMIO_READ(name, ret) \
49static ret spiderpci_##name(const PCI_IO_ADDR addr) \
50{ \
51 ret val = __do_##name(addr); \
52 spiderpci_io_flush(iowa_mem_find_bus(addr)); \
53 return val; \
54}
55
56#define SPIDER_PCI_MMIO_READ_STR(name) \
57static void spiderpci_##name(const PCI_IO_ADDR addr, void *buf, \
58 unsigned long count) \
59{ \
60 __do_##name(addr, buf, count); \
61 spiderpci_io_flush(iowa_mem_find_bus(addr)); \
62}
63
64SPIDER_PCI_MMIO_READ(readb, u8)
65SPIDER_PCI_MMIO_READ(readw, u16)
66SPIDER_PCI_MMIO_READ(readl, u32)
67SPIDER_PCI_MMIO_READ(readq, u64)
68SPIDER_PCI_MMIO_READ(readw_be, u16)
69SPIDER_PCI_MMIO_READ(readl_be, u32)
70SPIDER_PCI_MMIO_READ(readq_be, u64)
71SPIDER_PCI_MMIO_READ_STR(readsb)
72SPIDER_PCI_MMIO_READ_STR(readsw)
73SPIDER_PCI_MMIO_READ_STR(readsl)
74
75static void spiderpci_memcpy_fromio(void *dest, const PCI_IO_ADDR src,
76 unsigned long n)
77{
78 __do_memcpy_fromio(dest, src, n);
79 spiderpci_io_flush(iowa_mem_find_bus(src));
80}
81
82static int __init spiderpci_pci_setup_chip(struct pci_controller *phb,
83 void __iomem *regs)
84{
85 void *dummy_page_va;
86 dma_addr_t dummy_page_da;
87
88#ifdef SPIDER_PCI_DISABLE_PREFETCH
89 u32 val = in_be32(regs + SPIDER_PCI_VCI_CNTL_STAT);
90 pr_debug("SPIDER_IOWA:PVCI_Control_Status was 0x%08x\n", val);
91 out_be32(regs + SPIDER_PCI_VCI_CNTL_STAT, val | 0x8);
92#endif /* SPIDER_PCI_DISABLE_PREFETCH */
93
94 /* setup dummy read */
95 /*
96 * On CellBlade, we can't know that which XDR memory is used by
97 * kmalloc() to allocate dummy_page_va.
98 * In order to imporve the performance, the XDR which is used to
99 * allocate dummy_page_va is the nearest the spider-pci.
100 * We have to select the CBE which is the nearest the spider-pci
101 * to allocate memory from the best XDR, but I don't know that
102 * how to do.
103 *
104 * Celleb does not have this problem, because it has only one XDR.
105 */
106 dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL);
107 if (!dummy_page_va) {
108 pr_err("SPIDERPCI-IOWA:Alloc dummy_page_va failed.\n");
109 return -1;
110 }
111
112 dummy_page_da = dma_map_single(phb->parent, dummy_page_va,
113 PAGE_SIZE, DMA_FROM_DEVICE);
114 if (dma_mapping_error(dummy_page_da)) {
115 pr_err("SPIDER-IOWA:Map dummy page filed.\n");
116 kfree(dummy_page_va);
117 return -1;
118 }
119
120 out_be32(regs + SPIDER_PCI_DUMMY_READ_BASE, dummy_page_da);
121
122 return 0;
123}
124
125int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data)
126{
127 void __iomem *regs = NULL;
128 struct spiderpci_iowa_private *priv;
129 struct device_node *np = bus->phb->dn;
130 struct resource r;
131 unsigned long offset = (unsigned long)data;
132
133 pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%s)\n",
134 np->full_name);
135
136 priv = kzalloc(sizeof(struct spiderpci_iowa_private), GFP_KERNEL);
137 if (!priv) {
138 pr_err("SPIDERPCI-IOWA:"
139 "Can't allocate struct spiderpci_iowa_private");
140 return -1;
141 }
142
143 if (of_address_to_resource(np, 0, &r)) {
144 pr_err("SPIDERPCI-IOWA:Can't get resource.\n");
145 goto error;
146 }
147
148 regs = ioremap(r.start + offset, SPIDER_PCI_REG_SIZE);
149 if (!regs) {
150 pr_err("SPIDERPCI-IOWA:ioremap failed.\n");
151 goto error;
152 }
153 priv->regs = regs;
154 bus->private = priv;
155
156 if (spiderpci_pci_setup_chip(bus->phb, regs))
157 goto error;
158
159 return 0;
160
161error:
162 kfree(priv);
163 bus->private = NULL;
164
165 if (regs)
166 iounmap(regs);
167
168 return -1;
169}
170
171struct ppc_pci_io spiderpci_ops = {
172 .readb = spiderpci_readb,
173 .readw = spiderpci_readw,
174 .readl = spiderpci_readl,
175 .readq = spiderpci_readq,
176 .readw_be = spiderpci_readw_be,
177 .readl_be = spiderpci_readl_be,
178 .readq_be = spiderpci_readq_be,
179 .readsb = spiderpci_readsb,
180 .readsw = spiderpci_readsw,
181 .readsl = spiderpci_readsl,
182 .memcpy_fromio = spiderpci_memcpy_fromio,
183};
184
diff --git a/arch/powerpc/platforms/celleb/Kconfig b/arch/powerpc/platforms/celleb/Kconfig
deleted file mode 100644
index 372891edcdd2..000000000000
--- a/arch/powerpc/platforms/celleb/Kconfig
+++ /dev/null
@@ -1,12 +0,0 @@
1config PPC_CELLEB
2 bool "Toshiba's Cell Reference Set 'Celleb' Architecture"
3 depends on PPC_MULTIPLATFORM && PPC64
4 select PPC_CELL
5 select PPC_CELL_NATIVE
6 select PPC_RTAS
7 select PPC_INDIRECT_IO
8 select PPC_OF_PLATFORM_PCI
9 select HAS_TXX9_SERIAL
10 select PPC_UDBG_BEAT
11 select USB_OHCI_BIG_ENDIAN_MMIO
12 select USB_EHCI_BIG_ENDIAN_MMIO
diff --git a/arch/powerpc/platforms/celleb/Makefile b/arch/powerpc/platforms/celleb/Makefile
deleted file mode 100644
index 889d43f715ea..000000000000
--- a/arch/powerpc/platforms/celleb/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
1obj-y += interrupt.o iommu.o setup.o \
2 htab.o beat.o hvCall.o pci.o \
3 scc_epci.o scc_uhc.o \
4 io-workarounds.o
5
6obj-$(CONFIG_SMP) += smp.o
7obj-$(CONFIG_PPC_UDBG_BEAT) += udbg_beat.o
8obj-$(CONFIG_SERIAL_TXX9) += scc_sio.o
9obj-$(CONFIG_SPU_BASE) += spu_priv1.o
diff --git a/arch/powerpc/platforms/celleb/io-workarounds.c b/arch/powerpc/platforms/celleb/io-workarounds.c
deleted file mode 100644
index 423339be1bac..000000000000
--- a/arch/powerpc/platforms/celleb/io-workarounds.c
+++ /dev/null
@@ -1,280 +0,0 @@
1/*
2 * Support for Celleb io workarounds
3 *
4 * (C) Copyright 2006-2007 TOSHIBA CORPORATION
5 *
6 * This file is based to arch/powerpc/platform/cell/io-workarounds.c
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23#undef DEBUG
24
25#include <linux/of.h>
26#include <linux/of_device.h>
27#include <linux/irq.h>
28
29#include <asm/io.h>
30#include <asm/prom.h>
31#include <asm/machdep.h>
32#include <asm/pci-bridge.h>
33#include <asm/ppc-pci.h>
34
35#include "pci.h"
36
37#define MAX_CELLEB_PCI_BUS 4
38
39void *celleb_dummy_page_va;
40
41static struct celleb_pci_bus {
42 struct pci_controller *phb;
43 void (*dummy_read)(struct pci_controller *);
44} celleb_pci_busses[MAX_CELLEB_PCI_BUS];
45
46static int celleb_pci_count = 0;
47
48static struct celleb_pci_bus *celleb_pci_find(unsigned long vaddr,
49 unsigned long paddr)
50{
51 int i, j;
52 struct resource *res;
53
54 for (i = 0; i < celleb_pci_count; i++) {
55 struct celleb_pci_bus *bus = &celleb_pci_busses[i];
56 struct pci_controller *phb = bus->phb;
57 if (paddr)
58 for (j = 0; j < 3; j++) {
59 res = &phb->mem_resources[j];
60 if (paddr >= res->start && paddr <= res->end)
61 return bus;
62 }
63 res = &phb->io_resource;
64 if (vaddr && vaddr >= res->start && vaddr <= res->end)
65 return bus;
66 }
67 return NULL;
68}
69
70static void celleb_io_flush(const PCI_IO_ADDR addr)
71{
72 struct celleb_pci_bus *bus;
73 int token;
74
75 token = PCI_GET_ADDR_TOKEN(addr);
76
77 if (token && token <= celleb_pci_count)
78 bus = &celleb_pci_busses[token - 1];
79 else {
80 unsigned long vaddr, paddr;
81 pte_t *ptep;
82
83 vaddr = (unsigned long)PCI_FIX_ADDR(addr);
84 if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
85 return;
86
87 ptep = find_linux_pte(init_mm.pgd, vaddr);
88 if (ptep == NULL)
89 paddr = 0;
90 else
91 paddr = pte_pfn(*ptep) << PAGE_SHIFT;
92 bus = celleb_pci_find(vaddr, paddr);
93
94 if (bus == NULL)
95 return;
96 }
97
98 if (bus->dummy_read)
99 bus->dummy_read(bus->phb);
100}
101
102static u8 celleb_readb(const PCI_IO_ADDR addr)
103{
104 u8 val;
105 val = __do_readb(addr);
106 celleb_io_flush(addr);
107 return val;
108}
109
110static u16 celleb_readw(const PCI_IO_ADDR addr)
111{
112 u16 val;
113 val = __do_readw(addr);
114 celleb_io_flush(addr);
115 return val;
116}
117
118static u32 celleb_readl(const PCI_IO_ADDR addr)
119{
120 u32 val;
121 val = __do_readl(addr);
122 celleb_io_flush(addr);
123 return val;
124}
125
126static u64 celleb_readq(const PCI_IO_ADDR addr)
127{
128 u64 val;
129 val = __do_readq(addr);
130 celleb_io_flush(addr);
131 return val;
132}
133
134static u16 celleb_readw_be(const PCI_IO_ADDR addr)
135{
136 u16 val;
137 val = __do_readw_be(addr);
138 celleb_io_flush(addr);
139 return val;
140}
141
142static u32 celleb_readl_be(const PCI_IO_ADDR addr)
143{
144 u32 val;
145 val = __do_readl_be(addr);
146 celleb_io_flush(addr);
147 return val;
148}
149
150static u64 celleb_readq_be(const PCI_IO_ADDR addr)
151{
152 u64 val;
153 val = __do_readq_be(addr);
154 celleb_io_flush(addr);
155 return val;
156}
157
158static void celleb_readsb(const PCI_IO_ADDR addr,
159 void *buf, unsigned long count)
160{
161 __do_readsb(addr, buf, count);
162 celleb_io_flush(addr);
163}
164
165static void celleb_readsw(const PCI_IO_ADDR addr,
166 void *buf, unsigned long count)
167{
168 __do_readsw(addr, buf, count);
169 celleb_io_flush(addr);
170}
171
172static void celleb_readsl(const PCI_IO_ADDR addr,
173 void *buf, unsigned long count)
174{
175 __do_readsl(addr, buf, count);
176 celleb_io_flush(addr);
177}
178
179static void celleb_memcpy_fromio(void *dest,
180 const PCI_IO_ADDR src,
181 unsigned long n)
182{
183 __do_memcpy_fromio(dest, src, n);
184 celleb_io_flush(src);
185}
186
187static void __iomem *celleb_ioremap(unsigned long addr,
188 unsigned long size,
189 unsigned long flags)
190{
191 struct celleb_pci_bus *bus;
192 void __iomem *res = __ioremap(addr, size, flags);
193 int busno;
194
195 bus = celleb_pci_find(0, addr);
196 if (bus != NULL) {
197 busno = bus - celleb_pci_busses;
198 PCI_SET_ADDR_TOKEN(res, busno + 1);
199 }
200 return res;
201}
202
203static void celleb_iounmap(volatile void __iomem *addr)
204{
205 return __iounmap(PCI_FIX_ADDR(addr));
206}
207
208static struct ppc_pci_io celleb_pci_io __initdata = {
209 .readb = celleb_readb,
210 .readw = celleb_readw,
211 .readl = celleb_readl,
212 .readq = celleb_readq,
213 .readw_be = celleb_readw_be,
214 .readl_be = celleb_readl_be,
215 .readq_be = celleb_readq_be,
216 .readsb = celleb_readsb,
217 .readsw = celleb_readsw,
218 .readsl = celleb_readsl,
219 .memcpy_fromio = celleb_memcpy_fromio,
220};
221
222void __init celleb_pci_add_one(struct pci_controller *phb,
223 void (*dummy_read)(struct pci_controller *))
224{
225 struct celleb_pci_bus *bus = &celleb_pci_busses[celleb_pci_count];
226 struct device_node *np = phb->dn;
227
228 if (celleb_pci_count >= MAX_CELLEB_PCI_BUS) {
229 printk(KERN_ERR "Too many pci bridges, workarounds"
230 " disabled for %s\n", np->full_name);
231 return;
232 }
233
234 celleb_pci_count++;
235
236 bus->phb = phb;
237 bus->dummy_read = dummy_read;
238}
239
240static struct of_device_id celleb_pci_workaround_match[] __initdata = {
241 {
242 .name = "pci-pseudo",
243 .data = fake_pci_workaround_init,
244 }, {
245 .name = "epci",
246 .data = epci_workaround_init,
247 }, {
248 },
249};
250
251int __init celleb_pci_workaround_init(void)
252{
253 struct pci_controller *phb;
254 struct device_node *node;
255 const struct of_device_id *match;
256 void (*init_func)(struct pci_controller *);
257
258 celleb_dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL);
259 if (!celleb_dummy_page_va) {
260 printk(KERN_ERR "Celleb: dummy read disabled. "
261 "Alloc celleb_dummy_page_va failed\n");
262 return 1;
263 }
264
265 list_for_each_entry(phb, &hose_list, list_node) {
266 node = phb->dn;
267 match = of_match_node(celleb_pci_workaround_match, node);
268
269 if (match) {
270 init_func = match->data;
271 (*init_func)(phb);
272 }
273 }
274
275 ppc_pci_io = celleb_pci_io;
276 ppc_md.ioremap = celleb_ioremap;
277 ppc_md.iounmap = celleb_iounmap;
278
279 return 0;
280}
diff --git a/arch/powerpc/platforms/iseries/exception.S b/arch/powerpc/platforms/iseries/exception.S
index c775cd4b3d6e..8ff330d026ca 100644
--- a/arch/powerpc/platforms/iseries/exception.S
+++ b/arch/powerpc/platforms/iseries/exception.S
@@ -59,8 +59,33 @@ system_reset_iSeries:
59 andc r4,r4,r5 59 andc r4,r4,r5
60 mtspr SPRN_CTRLT,r4 60 mtspr SPRN_CTRLT,r4
61 61
62/* Spin on __secondary_hold_spinloop until it is updated by the boot cpu. */
63/* In the UP case we'll yeild() later, and we will not access the paca anyway */
64#ifdef CONFIG_SMP
621: 651:
63 HMT_LOW 66 HMT_LOW
67 LOAD_REG_IMMEDIATE(r23, __secondary_hold_spinloop)
68 ld r23,0(r23)
69 sync
70 LOAD_REG_IMMEDIATE(r3,current_set)
71 sldi r28,r24,3 /* get current_set[cpu#] */
72 ldx r3,r3,r28
73 addi r1,r3,THREAD_SIZE
74 subi r1,r1,STACK_FRAME_OVERHEAD
75
76 cmpwi 0,r23,0 /* Keep poking the Hypervisor until */
77 bne 2f /* we're released */
78 /* Let the Hypervisor know we are alive */
79 /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
80 lis r3,0x8002
81 rldicr r3,r3,32,15 /* r0 = (r3 << 32) & 0xffff000000000000 */
82 li r0,-1 /* r0=-1 indicates a Hypervisor call */
83 sc /* Invoke the hypervisor via a system call */
84 b 1b
85#endif
86
872:
88 HMT_LOW
64#ifdef CONFIG_SMP 89#ifdef CONFIG_SMP
65 lbz r23,PACAPROCSTART(r13) /* Test if this processor 90 lbz r23,PACAPROCSTART(r13) /* Test if this processor
66 * should start */ 91 * should start */
@@ -91,7 +116,7 @@ iSeries_secondary_smp_loop:
91 li r0,-1 /* r0=-1 indicates a Hypervisor call */ 116 li r0,-1 /* r0=-1 indicates a Hypervisor call */
92 sc /* Invoke the hypervisor via a system call */ 117 sc /* Invoke the hypervisor via a system call */
93 mfspr r13,SPRN_SPRG3 /* Put r13 back ???? */ 118 mfspr r13,SPRN_SPRG3 /* Put r13 back ???? */
94 b 1b /* If SMP not configured, secondaries 119 b 2b /* If SMP not configured, secondaries
95 * loop forever */ 120 * loop forever */
96 121
97/*** ISeries-LPAR interrupt handlers ***/ 122/*** ISeries-LPAR interrupt handlers ***/
diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c
index c73379ec9141..1d201782d4e5 100644
--- a/arch/powerpc/platforms/ps3/os-area.c
+++ b/arch/powerpc/platforms/ps3/os-area.c
@@ -25,6 +25,7 @@
25#include <linux/syscalls.h> 25#include <linux/syscalls.h>
26#include <linux/ctype.h> 26#include <linux/ctype.h>
27#include <linux/lmb.h> 27#include <linux/lmb.h>
28#include <linux/of.h>
28 29
29#include <asm/prom.h> 30#include <asm/prom.h>
30 31
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 306a9d07491d..07fe5b69b9e2 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -34,3 +34,8 @@ config LPARCFG
34 help 34 help
35 Provide system capacity information via human readable 35 Provide system capacity information via human readable
36 <key word>=<value> pairs through a /proc/ppc64/lparcfg interface. 36 <key word>=<value> pairs through a /proc/ppc64/lparcfg interface.
37
38config PPC_PSERIES_DEBUG
39 depends on PPC_PSERIES && PPC_EARLY_DEBUG
40 bool "Enable extra debug logging in platforms/pseries"
41 default y
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index bdae04bb7a01..bd2593ed28dd 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -2,6 +2,10 @@ ifeq ($(CONFIG_PPC64),y)
2EXTRA_CFLAGS += -mno-minimal-toc 2EXTRA_CFLAGS += -mno-minimal-toc
3endif 3endif
4 4
5ifeq ($(CONFIG_PPC_PSERIES_DEBUG),y)
6EXTRA_CFLAGS += -DDEBUG
7endif
8
5obj-y := lpar.o hvCall.o nvram.o reconfig.o \ 9obj-y := lpar.o hvCall.o nvram.o reconfig.o \
6 setup.o iommu.o ras.o rtasd.o \ 10 setup.o iommu.o ras.o rtasd.o \
7 firmware.o power.o 11 firmware.o power.o
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 550b2f7d2cc1..a3fd56b186e6 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -39,7 +39,6 @@
39#include <asm/ppc-pci.h> 39#include <asm/ppc-pci.h>
40#include <asm/rtas.h> 40#include <asm/rtas.h>
41 41
42#undef DEBUG
43 42
44/** Overview: 43/** Overview:
45 * EEH, or "Extended Error Handling" is a PCI bridge technology for 44 * EEH, or "Extended Error Handling" is a PCI bridge technology for
diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c
index 1e83fcd0df31..ce37040af870 100644
--- a/arch/powerpc/platforms/pseries/eeh_cache.c
+++ b/arch/powerpc/platforms/pseries/eeh_cache.c
@@ -28,7 +28,6 @@
28#include <asm/pci-bridge.h> 28#include <asm/pci-bridge.h>
29#include <asm/ppc-pci.h> 29#include <asm/ppc-pci.h>
30 30
31#undef DEBUG
32 31
33/** 32/**
34 * The pci address cache subsystem. This subsystem places 33 * The pci address cache subsystem. This subsystem places
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index b765b7c77b65..9d3a40f45974 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -21,17 +21,11 @@
21 * 2 of the License, or (at your option) any later version. 21 * 2 of the License, or (at your option) any later version.
22 */ 22 */
23 23
24#undef DEBUG
25 24
26#include <asm/firmware.h> 25#include <asm/firmware.h>
27#include <asm/prom.h> 26#include <asm/prom.h>
28#include <asm/udbg.h> 27#include <asm/udbg.h>
29 28
30#ifdef DEBUG
31#define DBG(fmt...) udbg_printf(fmt)
32#else
33#define DBG(fmt...)
34#endif
35 29
36typedef struct { 30typedef struct {
37 unsigned long val; 31 unsigned long val;
@@ -72,7 +66,7 @@ void __init fw_feature_init(const char *hypertas, unsigned long len)
72 const char *s; 66 const char *s;
73 int i; 67 int i;
74 68
75 DBG(" -> fw_feature_init()\n"); 69 pr_debug(" -> fw_feature_init()\n");
76 70
77 for (s = hypertas; s < hypertas + len; s += strlen(s) + 1) { 71 for (s = hypertas; s < hypertas + len; s += strlen(s) + 1) {
78 for (i = 0; i < FIRMWARE_MAX_FEATURES; i++) { 72 for (i = 0; i < FIRMWARE_MAX_FEATURES; i++) {
@@ -88,5 +82,5 @@ void __init fw_feature_init(const char *hypertas, unsigned long len)
88 } 82 }
89 } 83 }
90 84
91 DBG(" <- fw_feature_init()\n"); 85 pr_debug(" <- fw_feature_init()\n");
92} 86}
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index a65c76308201..176f1f39d2d5 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -47,7 +47,6 @@
47 47
48#include "plpar_wrappers.h" 48#include "plpar_wrappers.h"
49 49
50#define DBG(fmt...)
51 50
52static void tce_build_pSeries(struct iommu_table *tbl, long index, 51static void tce_build_pSeries(struct iommu_table *tbl, long index,
53 long npages, unsigned long uaddr, 52 long npages, unsigned long uaddr,
@@ -322,7 +321,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
322 321
323 dn = pci_bus_to_OF_node(bus); 322 dn = pci_bus_to_OF_node(bus);
324 323
325 DBG("pci_dma_bus_setup_pSeries: setting up bus %s\n", dn->full_name); 324 pr_debug("pci_dma_bus_setup_pSeries: setting up bus %s\n", dn->full_name);
326 325
327 if (bus->self) { 326 if (bus->self) {
328 /* This is not a root bus, any setup will be done for the 327 /* This is not a root bus, any setup will be done for the
@@ -347,7 +346,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
347 for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling) 346 for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling)
348 children++; 347 children++;
349 348
350 DBG("Children: %d\n", children); 349 pr_debug("Children: %d\n", children);
351 350
352 /* Calculate amount of DMA window per slot. Each window must be 351 /* Calculate amount of DMA window per slot. Each window must be
353 * a power of two (due to pci_alloc_consistent requirements). 352 * a power of two (due to pci_alloc_consistent requirements).
@@ -361,8 +360,8 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
361 360
362 while (pci->phb->dma_window_size * children > 0x80000000ul) 361 while (pci->phb->dma_window_size * children > 0x80000000ul)
363 pci->phb->dma_window_size >>= 1; 362 pci->phb->dma_window_size >>= 1;
364 DBG("No ISA/IDE, window size is 0x%lx\n", 363 pr_debug("No ISA/IDE, window size is 0x%lx\n",
365 pci->phb->dma_window_size); 364 pci->phb->dma_window_size);
366 pci->phb->dma_window_base_cur = 0; 365 pci->phb->dma_window_base_cur = 0;
367 366
368 return; 367 return;
@@ -387,8 +386,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
387 while (pci->phb->dma_window_size * children > 0x70000000ul) 386 while (pci->phb->dma_window_size * children > 0x70000000ul)
388 pci->phb->dma_window_size >>= 1; 387 pci->phb->dma_window_size >>= 1;
389 388
390 DBG("ISA/IDE, window size is 0x%lx\n", pci->phb->dma_window_size); 389 pr_debug("ISA/IDE, window size is 0x%lx\n", pci->phb->dma_window_size);
391
392} 390}
393 391
394 392
@@ -401,7 +399,8 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
401 399
402 dn = pci_bus_to_OF_node(bus); 400 dn = pci_bus_to_OF_node(bus);
403 401
404 DBG("pci_dma_bus_setup_pSeriesLP: setting up bus %s\n", dn->full_name); 402 pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %s\n",
403 dn->full_name);
405 404
406 /* Find nearest ibm,dma-window, walking up the device tree */ 405 /* Find nearest ibm,dma-window, walking up the device tree */
407 for (pdn = dn; pdn != NULL; pdn = pdn->parent) { 406 for (pdn = dn; pdn != NULL; pdn = pdn->parent) {
@@ -411,14 +410,14 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
411 } 410 }
412 411
413 if (dma_window == NULL) { 412 if (dma_window == NULL) {
414 DBG(" no ibm,dma-window property !\n"); 413 pr_debug(" no ibm,dma-window property !\n");
415 return; 414 return;
416 } 415 }
417 416
418 ppci = PCI_DN(pdn); 417 ppci = PCI_DN(pdn);
419 418
420 DBG(" parent is %s, iommu_table: 0x%p\n", 419 pr_debug(" parent is %s, iommu_table: 0x%p\n",
421 pdn->full_name, ppci->iommu_table); 420 pdn->full_name, ppci->iommu_table);
422 421
423 if (!ppci->iommu_table) { 422 if (!ppci->iommu_table) {
424 tbl = kmalloc_node(sizeof(struct iommu_table), GFP_KERNEL, 423 tbl = kmalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
@@ -426,7 +425,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
426 iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window, 425 iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window,
427 bus->number); 426 bus->number);
428 ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node); 427 ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node);
429 DBG(" created table: %p\n", ppci->iommu_table); 428 pr_debug(" created table: %p\n", ppci->iommu_table);
430 } 429 }
431 430
432 if (pdn != dn) 431 if (pdn != dn)
@@ -439,7 +438,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
439 struct device_node *dn; 438 struct device_node *dn;
440 struct iommu_table *tbl; 439 struct iommu_table *tbl;
441 440
442 DBG("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev)); 441 pr_debug("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev));
443 442
444 dn = dev->dev.archdata.of_node; 443 dn = dev->dev.archdata.of_node;
445 444
@@ -450,7 +449,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
450 if (!dev->bus->self) { 449 if (!dev->bus->self) {
451 struct pci_controller *phb = PCI_DN(dn)->phb; 450 struct pci_controller *phb = PCI_DN(dn)->phb;
452 451
453 DBG(" --> first child, no bridge. Allocating iommu table.\n"); 452 pr_debug(" --> first child, no bridge. Allocating iommu table.\n");
454 tbl = kmalloc_node(sizeof(struct iommu_table), GFP_KERNEL, 453 tbl = kmalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
455 phb->node); 454 phb->node);
456 iommu_table_setparms(phb, dn, tbl); 455 iommu_table_setparms(phb, dn, tbl);
@@ -480,7 +479,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
480 const void *dma_window = NULL; 479 const void *dma_window = NULL;
481 struct pci_dn *pci; 480 struct pci_dn *pci;
482 481
483 DBG("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev)); 482 pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev));
484 483
485 /* dev setup for LPAR is a little tricky, since the device tree might 484 /* dev setup for LPAR is a little tricky, since the device tree might
486 * contain the dma-window properties per-device and not neccesarily 485 * contain the dma-window properties per-device and not neccesarily
@@ -489,7 +488,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
489 * already allocated. 488 * already allocated.
490 */ 489 */
491 dn = pci_device_to_OF_node(dev); 490 dn = pci_device_to_OF_node(dev);
492 DBG(" node is %s\n", dn->full_name); 491 pr_debug(" node is %s\n", dn->full_name);
493 492
494 for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table; 493 for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table;
495 pdn = pdn->parent) { 494 pdn = pdn->parent) {
@@ -504,13 +503,13 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
504 pci_name(dev), dn? dn->full_name : "<null>"); 503 pci_name(dev), dn? dn->full_name : "<null>");
505 return; 504 return;
506 } 505 }
507 DBG(" parent is %s\n", pdn->full_name); 506 pr_debug(" parent is %s\n", pdn->full_name);
508 507
509 /* Check for parent == NULL so we don't try to setup the empty EADS 508 /* Check for parent == NULL so we don't try to setup the empty EADS
510 * slots on POWER4 machines. 509 * slots on POWER4 machines.
511 */ 510 */
512 if (dma_window == NULL || pdn->parent == NULL) { 511 if (dma_window == NULL || pdn->parent == NULL) {
513 DBG(" no dma window for device, linking to parent\n"); 512 pr_debug(" no dma window for device, linking to parent\n");
514 dev->dev.archdata.dma_data = PCI_DN(pdn)->iommu_table; 513 dev->dev.archdata.dma_data = PCI_DN(pdn)->iommu_table;
515 return; 514 return;
516 } 515 }
@@ -522,9 +521,9 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
522 iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window, 521 iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window,
523 pci->phb->bus->number); 522 pci->phb->bus->number);
524 pci->iommu_table = iommu_init_table(tbl, pci->phb->node); 523 pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
525 DBG(" created table: %p\n", pci->iommu_table); 524 pr_debug(" created table: %p\n", pci->iommu_table);
526 } else { 525 } else {
527 DBG(" found DMA window, table: %p\n", pci->iommu_table); 526 pr_debug(" found DMA window, table: %p\n", pci->iommu_table);
528 } 527 }
529 528
530 dev->dev.archdata.dma_data = pci->iommu_table; 529 dev->dev.archdata.dma_data = pci->iommu_table;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 9235c469449e..2cbaedb17f3e 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -19,7 +19,8 @@
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */ 20 */
21 21
22#undef DEBUG_LOW 22/* Enables debugging of low-level hash table routines - careful! */
23#undef DEBUG
23 24
24#include <linux/kernel.h> 25#include <linux/kernel.h>
25#include <linux/dma-mapping.h> 26#include <linux/dma-mapping.h>
@@ -42,11 +43,6 @@
42#include "plpar_wrappers.h" 43#include "plpar_wrappers.h"
43#include "pseries.h" 44#include "pseries.h"
44 45
45#ifdef DEBUG_LOW
46#define DBG_LOW(fmt...) do { udbg_printf(fmt); } while(0)
47#else
48#define DBG_LOW(fmt...) do { } while(0)
49#endif
50 46
51/* in hvCall.S */ 47/* in hvCall.S */
52EXPORT_SYMBOL(plpar_hcall); 48EXPORT_SYMBOL(plpar_hcall);
@@ -196,6 +192,8 @@ void __init udbg_init_debug_lpar(void)
196 udbg_putc = udbg_putcLP; 192 udbg_putc = udbg_putcLP;
197 udbg_getc = udbg_getcLP; 193 udbg_getc = udbg_getcLP;
198 udbg_getc_poll = udbg_getc_pollLP; 194 udbg_getc_poll = udbg_getc_pollLP;
195
196 register_early_udbg_console();
199} 197}
200 198
201/* returns 0 if couldn't find or use /chosen/stdout as console */ 199/* returns 0 if couldn't find or use /chosen/stdout as console */
@@ -288,15 +286,15 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
288 unsigned long hpte_v, hpte_r; 286 unsigned long hpte_v, hpte_r;
289 287
290 if (!(vflags & HPTE_V_BOLTED)) 288 if (!(vflags & HPTE_V_BOLTED))
291 DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, " 289 pr_debug("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
292 "rflags=%lx, vflags=%lx, psize=%d)\n", 290 "rflags=%lx, vflags=%lx, psize=%d)\n",
293 hpte_group, va, pa, rflags, vflags, psize); 291 hpte_group, va, pa, rflags, vflags, psize);
294 292
295 hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID; 293 hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID;
296 hpte_r = hpte_encode_r(pa, psize) | rflags; 294 hpte_r = hpte_encode_r(pa, psize) | rflags;
297 295
298 if (!(vflags & HPTE_V_BOLTED)) 296 if (!(vflags & HPTE_V_BOLTED))
299 DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); 297 pr_debug(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
300 298
301 /* Now fill in the actual HPTE */ 299 /* Now fill in the actual HPTE */
302 /* Set CEC cookie to 0 */ 300 /* Set CEC cookie to 0 */
@@ -313,7 +311,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
313 lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot); 311 lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot);
314 if (unlikely(lpar_rc == H_PTEG_FULL)) { 312 if (unlikely(lpar_rc == H_PTEG_FULL)) {
315 if (!(vflags & HPTE_V_BOLTED)) 313 if (!(vflags & HPTE_V_BOLTED))
316 DBG_LOW(" full\n"); 314 pr_debug(" full\n");
317 return -1; 315 return -1;
318 } 316 }
319 317
@@ -324,11 +322,11 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
324 */ 322 */
325 if (unlikely(lpar_rc != H_SUCCESS)) { 323 if (unlikely(lpar_rc != H_SUCCESS)) {
326 if (!(vflags & HPTE_V_BOLTED)) 324 if (!(vflags & HPTE_V_BOLTED))
327 DBG_LOW(" lpar err %d\n", lpar_rc); 325 pr_debug(" lpar err %lu\n", lpar_rc);
328 return -2; 326 return -2;
329 } 327 }
330 if (!(vflags & HPTE_V_BOLTED)) 328 if (!(vflags & HPTE_V_BOLTED))
331 DBG_LOW(" -> slot: %d\n", slot & 7); 329 pr_debug(" -> slot: %lu\n", slot & 7);
332 330
333 /* Because of iSeries, we have to pass down the secondary 331 /* Because of iSeries, we have to pass down the secondary
334 * bucket bit here as well 332 * bucket bit here as well
@@ -420,17 +418,17 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot,
420 418
421 want_v = hpte_encode_avpn(va, psize, ssize); 419 want_v = hpte_encode_avpn(va, psize, ssize);
422 420
423 DBG_LOW(" update: avpnv=%016lx, hash=%016lx, f=%x, psize: %d ... ", 421 pr_debug(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
424 want_v, slot, flags, psize); 422 want_v, slot, flags, psize);
425 423
426 lpar_rc = plpar_pte_protect(flags, slot, want_v); 424 lpar_rc = plpar_pte_protect(flags, slot, want_v);
427 425
428 if (lpar_rc == H_NOT_FOUND) { 426 if (lpar_rc == H_NOT_FOUND) {
429 DBG_LOW("not found !\n"); 427 pr_debug("not found !\n");
430 return -1; 428 return -1;
431 } 429 }
432 430
433 DBG_LOW("ok\n"); 431 pr_debug("ok\n");
434 432
435 BUG_ON(lpar_rc != H_SUCCESS); 433 BUG_ON(lpar_rc != H_SUCCESS);
436 434
@@ -505,8 +503,8 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
505 unsigned long lpar_rc; 503 unsigned long lpar_rc;
506 unsigned long dummy1, dummy2; 504 unsigned long dummy1, dummy2;
507 505
508 DBG_LOW(" inval : slot=%lx, va=%016lx, psize: %d, local: %d", 506 pr_debug(" inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
509 slot, va, psize, local); 507 slot, va, psize, local);
510 508
511 want_v = hpte_encode_avpn(va, psize, ssize); 509 want_v = hpte_encode_avpn(va, psize, ssize);
512 lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v, &dummy1, &dummy2); 510 lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v, &dummy1, &dummy2);
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index a1ab25c7082f..2b548afd1003 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -67,8 +67,6 @@ static int ras_check_exception_token;
67static irqreturn_t ras_epow_interrupt(int irq, void *dev_id); 67static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
68static irqreturn_t ras_error_interrupt(int irq, void *dev_id); 68static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
69 69
70/* #define DEBUG */
71
72 70
73static void request_ras_irqs(struct device_node *np, 71static void request_ras_irqs(struct device_node *np,
74 irq_handler_t handler, 72 irq_handler_t handler,
@@ -237,7 +235,7 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
237 printk(KERN_EMERG "Error: Fatal hardware error <0x%lx 0x%x>\n", 235 printk(KERN_EMERG "Error: Fatal hardware error <0x%lx 0x%x>\n",
238 *((unsigned long *)&ras_log_buf), status); 236 *((unsigned long *)&ras_log_buf), status);
239 237
240#ifndef DEBUG 238#ifndef DEBUG_RTAS_POWER_OFF
241 /* Don't actually power off when debugging so we can test 239 /* Don't actually power off when debugging so we can test
242 * without actually failing while injecting errors. 240 * without actually failing while injecting errors.
243 * Error data will not be logged to syslog. 241 * Error data will not be logged to syslog.
diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c
index e3078ce41518..befadd4f9524 100644
--- a/arch/powerpc/platforms/pseries/rtasd.c
+++ b/arch/powerpc/platforms/pseries/rtasd.c
@@ -29,11 +29,6 @@
29#include <asm/atomic.h> 29#include <asm/atomic.h>
30#include <asm/machdep.h> 30#include <asm/machdep.h>
31 31
32#if 0
33#define DEBUG(A...) printk(KERN_ERR A)
34#else
35#define DEBUG(A...)
36#endif
37 32
38static DEFINE_SPINLOCK(rtasd_log_lock); 33static DEFINE_SPINLOCK(rtasd_log_lock);
39 34
@@ -198,7 +193,7 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
198 unsigned long s; 193 unsigned long s;
199 int len = 0; 194 int len = 0;
200 195
201 DEBUG("logging event\n"); 196 pr_debug("rtasd: logging event\n");
202 if (buf == NULL) 197 if (buf == NULL)
203 return; 198 return;
204 199
@@ -409,7 +404,8 @@ static int rtasd(void *unused)
409 daemonize("rtasd"); 404 daemonize("rtasd");
410 405
411 printk(KERN_DEBUG "RTAS daemon started\n"); 406 printk(KERN_DEBUG "RTAS daemon started\n");
412 DEBUG("will sleep for %d milliseconds\n", (30000/rtas_event_scan_rate)); 407 pr_debug("rtasd: will sleep for %d milliseconds\n",
408 (30000 / rtas_event_scan_rate));
413 409
414 /* See if we have any error stored in NVRAM */ 410 /* See if we have any error stored in NVRAM */
415 memset(logdata, 0, rtas_error_log_max); 411 memset(logdata, 0, rtas_error_log_max);
@@ -428,9 +424,9 @@ static int rtasd(void *unused)
428 do_event_scan_all_cpus(1000); 424 do_event_scan_all_cpus(1000);
429 425
430 if (surveillance_timeout != -1) { 426 if (surveillance_timeout != -1) {
431 DEBUG("enabling surveillance\n"); 427 pr_debug("rtasd: enabling surveillance\n");
432 enable_surveillance(surveillance_timeout); 428 enable_surveillance(surveillance_timeout);
433 DEBUG("surveillance enabled\n"); 429 pr_debug("rtasd: surveillance enabled\n");
434 } 430 }
435 431
436 /* Delay should be at least one second since some 432 /* Delay should be at least one second since some
diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c
index e5b0ea870164..bec3803f0618 100644
--- a/arch/powerpc/platforms/pseries/scanlog.c
+++ b/arch/powerpc/platforms/pseries/scanlog.c
@@ -38,9 +38,7 @@
38#define SCANLOG_HWERROR -1 38#define SCANLOG_HWERROR -1
39#define SCANLOG_CONTINUE 1 39#define SCANLOG_CONTINUE 1
40 40
41#define DEBUG(A...) do { if (scanlog_debug) printk(KERN_ERR "scanlog: " A); } while (0)
42 41
43static int scanlog_debug;
44static unsigned int ibm_scan_log_dump; /* RTAS token */ 42static unsigned int ibm_scan_log_dump; /* RTAS token */
45static struct proc_dir_entry *proc_ppc64_scan_log_dump; /* The proc file */ 43static struct proc_dir_entry *proc_ppc64_scan_log_dump; /* The proc file */
46 44
@@ -86,14 +84,14 @@ static ssize_t scanlog_read(struct file *file, char __user *buf,
86 memcpy(data, rtas_data_buf, RTAS_DATA_BUF_SIZE); 84 memcpy(data, rtas_data_buf, RTAS_DATA_BUF_SIZE);
87 spin_unlock(&rtas_data_buf_lock); 85 spin_unlock(&rtas_data_buf_lock);
88 86
89 DEBUG("status=%d, data[0]=%x, data[1]=%x, data[2]=%x\n", 87 pr_debug("scanlog: status=%d, data[0]=%x, data[1]=%x, " \
90 status, data[0], data[1], data[2]); 88 "data[2]=%x\n", status, data[0], data[1], data[2]);
91 switch (status) { 89 switch (status) {
92 case SCANLOG_COMPLETE: 90 case SCANLOG_COMPLETE:
93 DEBUG("hit eof\n"); 91 pr_debug("scanlog: hit eof\n");
94 return 0; 92 return 0;
95 case SCANLOG_HWERROR: 93 case SCANLOG_HWERROR:
96 DEBUG("hardware error reading scan log data\n"); 94 pr_debug("scanlog: hardware error reading data\n");
97 return -EIO; 95 return -EIO;
98 case SCANLOG_CONTINUE: 96 case SCANLOG_CONTINUE:
99 /* We may or may not have data yet */ 97 /* We may or may not have data yet */
@@ -110,7 +108,8 @@ static ssize_t scanlog_read(struct file *file, char __user *buf,
110 /* Assume extended busy */ 108 /* Assume extended busy */
111 wait_time = rtas_busy_delay_time(status); 109 wait_time = rtas_busy_delay_time(status);
112 if (!wait_time) { 110 if (!wait_time) {
113 printk(KERN_ERR "scanlog: unknown error from rtas: %d\n", status); 111 printk(KERN_ERR "scanlog: unknown error " \
112 "from rtas: %d\n", status);
114 return -EIO; 113 return -EIO;
115 } 114 }
116 } 115 }
@@ -134,15 +133,9 @@ static ssize_t scanlog_write(struct file * file, const char __user * buf,
134 133
135 if (buf) { 134 if (buf) {
136 if (strncmp(stkbuf, "reset", 5) == 0) { 135 if (strncmp(stkbuf, "reset", 5) == 0) {
137 DEBUG("reset scanlog\n"); 136 pr_debug("scanlog: reset scanlog\n");
138 status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, 0, 0); 137 status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, 0, 0);
139 DEBUG("rtas returns %d\n", status); 138 pr_debug("scanlog: rtas returns %d\n", status);
140 } else if (strncmp(stkbuf, "debugon", 7) == 0) {
141 printk(KERN_ERR "scanlog: debug on\n");
142 scanlog_debug = 1;
143 } else if (strncmp(stkbuf, "debugoff", 8) == 0) {
144 printk(KERN_ERR "scanlog: debug off\n");
145 scanlog_debug = 0;
146 } 139 }
147 } 140 }
148 return count; 141 return count;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index f66aa9c3b135..f5d29f5b13c1 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -16,8 +16,6 @@
16 * bootup setup stuff.. 16 * bootup setup stuff..
17 */ 17 */
18 18
19#undef DEBUG
20
21#include <linux/cpu.h> 19#include <linux/cpu.h>
22#include <linux/errno.h> 20#include <linux/errno.h>
23#include <linux/sched.h> 21#include <linux/sched.h>
@@ -70,11 +68,6 @@
70#include "plpar_wrappers.h" 68#include "plpar_wrappers.h"
71#include "pseries.h" 69#include "pseries.h"
72 70
73#ifdef DEBUG
74#define DBG(fmt...) udbg_printf(fmt)
75#else
76#define DBG(fmt...)
77#endif
78 71
79int fwnmi_active; /* TRUE if an FWNMI handler is present */ 72int fwnmi_active; /* TRUE if an FWNMI handler is present */
80 73
@@ -326,7 +319,7 @@ static int pseries_set_xdabr(unsigned long dabr)
326 */ 319 */
327static void __init pSeries_init_early(void) 320static void __init pSeries_init_early(void)
328{ 321{
329 DBG(" -> pSeries_init_early()\n"); 322 pr_debug(" -> pSeries_init_early()\n");
330 323
331 if (firmware_has_feature(FW_FEATURE_LPAR)) 324 if (firmware_has_feature(FW_FEATURE_LPAR))
332 find_udbg_vterm(); 325 find_udbg_vterm();
@@ -338,7 +331,7 @@ static void __init pSeries_init_early(void)
338 331
339 iommu_init_early_pSeries(); 332 iommu_init_early_pSeries();
340 333
341 DBG(" <- pSeries_init_early()\n"); 334 pr_debug(" <- pSeries_init_early()\n");
342} 335}
343 336
344/* 337/*
@@ -383,7 +376,7 @@ static int __init pSeries_probe(void)
383 of_flat_dt_is_compatible(root, "IBM,CBEA")) 376 of_flat_dt_is_compatible(root, "IBM,CBEA"))
384 return 0; 377 return 0;
385 378
386 DBG("pSeries detected, looking for LPAR capability...\n"); 379 pr_debug("pSeries detected, looking for LPAR capability...\n");
387 380
388 /* Now try to figure out if we are running on LPAR */ 381 /* Now try to figure out if we are running on LPAR */
389 of_scan_flat_dt(pSeries_probe_hypertas, NULL); 382 of_scan_flat_dt(pSeries_probe_hypertas, NULL);
@@ -393,8 +386,8 @@ static int __init pSeries_probe(void)
393 else 386 else
394 hpte_init_native(); 387 hpte_init_native();
395 388
396 DBG("Machine is%s LPAR !\n", 389 pr_debug("Machine is%s LPAR !\n",
397 (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not"); 390 (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not");
398 391
399 return 1; 392 return 1;
400} 393}
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index ea4c65917a64..9d8f8c84ab89 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -12,7 +12,6 @@
12 * 2 of the License, or (at your option) any later version. 12 * 2 of the License, or (at your option) any later version.
13 */ 13 */
14 14
15#undef DEBUG
16 15
17#include <linux/kernel.h> 16#include <linux/kernel.h>
18#include <linux/module.h> 17#include <linux/module.h>
@@ -51,12 +50,6 @@
51#include "plpar_wrappers.h" 50#include "plpar_wrappers.h"
52#include "pseries.h" 51#include "pseries.h"
53 52
54#ifdef DEBUG
55#include <asm/udbg.h>
56#define DBG(fmt...) udbg_printf(fmt)
57#else
58#define DBG(fmt...)
59#endif
60 53
61/* 54/*
62 * The primary thread of each non-boot processor is recorded here before 55 * The primary thread of each non-boot processor is recorded here before
@@ -231,7 +224,7 @@ static void __init smp_init_pseries(void)
231{ 224{
232 int i; 225 int i;
233 226
234 DBG(" -> smp_init_pSeries()\n"); 227 pr_debug(" -> smp_init_pSeries()\n");
235 228
236 /* Mark threads which are still spinning in hold loops. */ 229 /* Mark threads which are still spinning in hold loops. */
237 if (cpu_has_feature(CPU_FTR_SMT)) { 230 if (cpu_has_feature(CPU_FTR_SMT)) {
@@ -255,7 +248,7 @@ static void __init smp_init_pseries(void)
255 smp_ops->take_timebase = pSeries_take_timebase; 248 smp_ops->take_timebase = pSeries_take_timebase;
256 } 249 }
257 250
258 DBG(" <- smp_init_pSeries()\n"); 251 pr_debug(" <- smp_init_pSeries()\n");
259} 252}
260 253
261#ifdef CONFIG_MPIC 254#ifdef CONFIG_MPIC
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 43df53c30aa0..ebebc28fe895 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -9,7 +9,6 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#undef DEBUG
13 12
14#include <linux/types.h> 13#include <linux/types.h>
15#include <linux/threads.h> 14#include <linux/threads.h>
diff --git a/arch/powerpc/sysdev/mv64x60_dev.c b/arch/powerpc/sysdev/mv64x60_dev.c
index 047b31027fa6..41af1223e2a0 100644
--- a/arch/powerpc/sysdev/mv64x60_dev.c
+++ b/arch/powerpc/sysdev/mv64x60_dev.c
@@ -338,15 +338,13 @@ static int __init mv64x60_i2c_device_setup(struct device_node *np, int id)
338 338
339 pdata.freq_m = 8; /* default */ 339 pdata.freq_m = 8; /* default */
340 prop = of_get_property(np, "freq_m", NULL); 340 prop = of_get_property(np, "freq_m", NULL);
341 if (!prop) 341 if (prop)
342 return -ENODEV; 342 pdata.freq_m = *prop;
343 pdata.freq_m = *prop;
344 343
345 pdata.freq_m = 3; /* default */ 344 pdata.freq_m = 3; /* default */
346 prop = of_get_property(np, "freq_n", NULL); 345 prop = of_get_property(np, "freq_n", NULL);
347 if (!prop) 346 if (prop)
348 return -ENODEV; 347 pdata.freq_n = *prop;
349 pdata.freq_n = *prop;
350 348
351 pdata.timeout = 1000; /* default: 1 second */ 349 pdata.timeout = 1000; /* default: 1 second */
352 350
@@ -433,9 +431,13 @@ static int __init mv64x60_device_setup(void)
433 int err; 431 int err;
434 432
435 id = 0; 433 id = 0;
436 for_each_compatible_node(np, "serial", "marvell,mv64360-mpsc") 434 for_each_compatible_node(np, "serial", "marvell,mv64360-mpsc") {
437 if ((err = mv64x60_mpsc_device_setup(np, id++))) 435 err = mv64x60_mpsc_device_setup(np, id++);
438 goto error; 436 if (err)
437 printk(KERN_ERR "Failed to initialize MV64x60 "
438 "serial device %s: error %d.\n",
439 np->full_name, err);
440 }
439 441
440 id = 0; 442 id = 0;
441 id2 = 0; 443 id2 = 0;
@@ -443,38 +445,44 @@ static int __init mv64x60_device_setup(void)
443 pdev = mv64x60_eth_register_shared_pdev(np, id++); 445 pdev = mv64x60_eth_register_shared_pdev(np, id++);
444 if (IS_ERR(pdev)) { 446 if (IS_ERR(pdev)) {
445 err = PTR_ERR(pdev); 447 err = PTR_ERR(pdev);
446 goto error; 448 printk(KERN_ERR "Failed to initialize MV64x60 "
449 "network block %s: error %d.\n",
450 np->full_name, err);
451 continue;
447 } 452 }
448 for_each_child_of_node(np, np2) { 453 for_each_child_of_node(np, np2) {
449 if (!of_device_is_compatible(np2, 454 if (!of_device_is_compatible(np2,
450 "marvell,mv64360-eth")) 455 "marvell,mv64360-eth"))
451 continue; 456 continue;
452 err = mv64x60_eth_device_setup(np2, id2++, pdev); 457 err = mv64x60_eth_device_setup(np2, id2++, pdev);
453 if (err) { 458 if (err)
454 of_node_put(np2); 459 printk(KERN_ERR "Failed to initialize "
455 goto error; 460 "MV64x60 network device %s: "
456 } 461 "error %d.\n",
462 np2->full_name, err);
457 } 463 }
458 } 464 }
459 465
460 id = 0; 466 id = 0;
461 for_each_compatible_node(np, "i2c", "marvell,mv64360-i2c") 467 for_each_compatible_node(np, "i2c", "marvell,mv64360-i2c") {
462 if ((err = mv64x60_i2c_device_setup(np, id++))) 468 err = mv64x60_i2c_device_setup(np, id++);
463 goto error; 469 if (err)
470 printk(KERN_ERR "Failed to initialize MV64x60 I2C "
471 "bus %s: error %d.\n",
472 np->full_name, err);
473 }
464 474
465 /* support up to one watchdog timer */ 475 /* support up to one watchdog timer */
466 np = of_find_compatible_node(np, NULL, "marvell,mv64360-wdt"); 476 np = of_find_compatible_node(np, NULL, "marvell,mv64360-wdt");
467 if (np) { 477 if (np) {
468 if ((err = mv64x60_wdt_device_setup(np, id))) 478 if ((err = mv64x60_wdt_device_setup(np, id)))
469 goto error; 479 printk(KERN_ERR "Failed to initialize MV64x60 "
480 "Watchdog %s: error %d.\n",
481 np->full_name, err);
470 of_node_put(np); 482 of_node_put(np);
471 } 483 }
472 484
473 return 0; 485 return 0;
474
475error:
476 of_node_put(np);
477 return err;
478} 486}
479arch_initcall(mv64x60_device_setup); 487arch_initcall(mv64x60_device_setup);
480 488
diff --git a/arch/powerpc/sysdev/mv64x60_udbg.c b/arch/powerpc/sysdev/mv64x60_udbg.c
index ccdb3b0418fc..2792dc8b038c 100644
--- a/arch/powerpc/sysdev/mv64x60_udbg.c
+++ b/arch/powerpc/sysdev/mv64x60_udbg.c
@@ -94,7 +94,7 @@ static void mv64x60_udbg_init(void)
94 if (!np) 94 if (!np)
95 return; 95 return;
96 96
97 block_index = of_get_property(np, "block-index", NULL); 97 block_index = of_get_property(np, "cell-index", NULL);
98 if (!block_index) 98 if (!block_index)
99 goto error; 99 goto error;
100 100
diff --git a/arch/ppc/8260_io/fcc_enet.c b/arch/ppc/8260_io/fcc_enet.c
index bcc3aa9d04f3..d38b57e24cee 100644
--- a/arch/ppc/8260_io/fcc_enet.c
+++ b/arch/ppc/8260_io/fcc_enet.c
@@ -165,9 +165,6 @@ static int fcc_enet_set_mac_address(struct net_device *dev, void *addr);
165#ifdef CONFIG_SBC82xx 165#ifdef CONFIG_SBC82xx
166#define F1_RXCLK 9 166#define F1_RXCLK 9
167#define F1_TXCLK 10 167#define F1_TXCLK 10
168#elif defined(CONFIG_ADS8272)
169#define F1_RXCLK 11
170#define F1_TXCLK 10
171#else 168#else
172#define F1_RXCLK 12 169#define F1_RXCLK 12
173#define F1_TXCLK 11 170#define F1_TXCLK 11
@@ -175,13 +172,8 @@ static int fcc_enet_set_mac_address(struct net_device *dev, void *addr);
175 172
176/* FCC2 Clock Source Configuration. There are board specific. 173/* FCC2 Clock Source Configuration. There are board specific.
177 Can only choose from CLK13-16 */ 174 Can only choose from CLK13-16 */
178#ifdef CONFIG_ADS8272
179#define F2_RXCLK 15
180#define F2_TXCLK 16
181#else
182#define F2_RXCLK 13 175#define F2_RXCLK 13
183#define F2_TXCLK 14 176#define F2_TXCLK 14
184#endif
185 177
186/* FCC3 Clock Source Configuration. There are board specific. 178/* FCC3 Clock Source Configuration. There are board specific.
187 Can only choose from CLK13-16 */ 179 Can only choose from CLK13-16 */
@@ -289,10 +281,7 @@ static int fcc_enet_set_mac_address(struct net_device *dev, void *addr);
289/* TQM8260 has MDIO and MDCK on PC30 and PC31 respectively */ 281/* TQM8260 has MDIO and MDCK on PC30 and PC31 respectively */
290#define PC_MDIO ((uint)0x00000002) 282#define PC_MDIO ((uint)0x00000002)
291#define PC_MDCK ((uint)0x00000001) 283#define PC_MDCK ((uint)0x00000001)
292#elif defined(CONFIG_ADS8272) 284#elif defined(CONFIG_EST8260) || defined(CONFIG_ADS8260)
293#define PC_MDIO ((uint)0x00002000)
294#define PC_MDCK ((uint)0x00001000)
295#elif defined(CONFIG_EST8260) || defined(CONFIG_ADS8260) || defined(CONFIG_PQ2FADS)
296#define PC_MDIO ((uint)0x00400000) 285#define PC_MDIO ((uint)0x00400000)
297#define PC_MDCK ((uint)0x00200000) 286#define PC_MDCK ((uint)0x00200000)
298#else 287#else
@@ -2118,11 +2107,6 @@ init_fcc_startup(fcc_info_t *fip, struct net_device *dev)
2118 printk("Can't get FCC IRQ %d\n", fip->fc_interrupt); 2107 printk("Can't get FCC IRQ %d\n", fip->fc_interrupt);
2119 2108
2120#ifdef PHY_INTERRUPT 2109#ifdef PHY_INTERRUPT
2121#ifdef CONFIG_ADS8272
2122 if (request_irq(PHY_INTERRUPT, mii_link_interrupt, IRQF_SHARED,
2123 "mii", dev) < 0)
2124 printk(KERN_CRIT "Can't get MII IRQ %d\n", PHY_INTERRUPT);
2125#else
2126 /* Make IRQn edge triggered. This does not work if PHY_INTERRUPT is 2110 /* Make IRQn edge triggered. This does not work if PHY_INTERRUPT is
2127 * on Port C. 2111 * on Port C.
2128 */ 2112 */
@@ -2132,7 +2116,6 @@ init_fcc_startup(fcc_info_t *fip, struct net_device *dev)
2132 if (request_irq(PHY_INTERRUPT, mii_link_interrupt, 0, 2116 if (request_irq(PHY_INTERRUPT, mii_link_interrupt, 0,
2133 "mii", dev) < 0) 2117 "mii", dev) < 0)
2134 printk(KERN_CRIT "Can't get MII IRQ %d\n", PHY_INTERRUPT); 2118 printk(KERN_CRIT "Can't get MII IRQ %d\n", PHY_INTERRUPT);
2135#endif
2136#endif /* PHY_INTERRUPT */ 2119#endif /* PHY_INTERRUPT */
2137 2120
2138 /* Set GFMR to enable Ethernet operating mode. 2121 /* Set GFMR to enable Ethernet operating mode.
diff --git a/arch/ppc/8xx_io/enet.c b/arch/ppc/8xx_io/enet.c
index c6d047ae77ac..5899aea1644b 100644
--- a/arch/ppc/8xx_io/enet.c
+++ b/arch/ppc/8xx_io/enet.c
@@ -946,29 +946,6 @@ static int __init scc_enet_init(void)
946 *((volatile uint *)BCSR1) &= ~BCSR1_ETHEN; 946 *((volatile uint *)BCSR1) &= ~BCSR1_ETHEN;
947#endif 947#endif
948 948
949#ifdef CONFIG_MPC885ADS
950
951 /* Deassert PHY reset and enable the PHY.
952 */
953 {
954 volatile uint __iomem *bcsr = ioremap(BCSR_ADDR, BCSR_SIZE);
955 uint tmp;
956
957 tmp = in_be32(bcsr + 1 /* BCSR1 */);
958 tmp |= BCSR1_ETHEN;
959 out_be32(bcsr + 1, tmp);
960 tmp = in_be32(bcsr + 4 /* BCSR4 */);
961 tmp |= BCSR4_ETH10_RST;
962 out_be32(bcsr + 4, tmp);
963 iounmap(bcsr);
964 }
965
966 /* On MPC885ADS SCC ethernet PHY defaults to the full duplex mode
967 * upon reset. SCC is set to half duplex by default. So this
968 * inconsistency should be better fixed by the software.
969 */
970#endif
971
972 dev->base_addr = (unsigned long)ep; 949 dev->base_addr = (unsigned long)ep;
973#if 0 950#if 0
974 dev->name = "CPM_ENET"; 951 dev->name = "CPM_ENET";
diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig
index abc877faf123..0f1863ed9c1c 100644
--- a/arch/ppc/Kconfig
+++ b/arch/ppc/Kconfig
@@ -372,22 +372,6 @@ config MPC8XXFADS
372 bool "FADS" 372 bool "FADS"
373 select FADS 373 select FADS
374 374
375config MPC86XADS
376 bool "MPC86XADS"
377 help
378 MPC86x Application Development System by Freescale Semiconductor.
379 The MPC86xADS is meant to serve as a platform for s/w and h/w
380 development around the MPC86X processor families.
381 select FADS
382
383config MPC885ADS
384 bool "MPC885ADS"
385 help
386 Freescale Semiconductor MPC885 Application Development System (ADS).
387 Also known as DUET.
388 The MPC885ADS is meant to serve as a platform for s/w and h/w
389 development around the MPC885 processor family.
390
391config TQM823L 375config TQM823L
392 bool "TQM823L" 376 bool "TQM823L"
393 help 377 help
@@ -479,53 +463,6 @@ config WINCEPT
479 463
480endchoice 464endchoice
481 465
482menu "Freescale Ethernet driver platform-specific options"
483 depends on FS_ENET
484
485 config MPC8xx_SECOND_ETH
486 bool "Second Ethernet channel"
487 depends on (MPC885ADS || MPC86XADS)
488 default y
489 help
490 This enables support for second Ethernet on MPC885ADS and MPC86xADS boards.
491 The latter will use SCC1, for 885ADS you can select it below.
492
493 choice
494 prompt "Second Ethernet channel"
495 depends on MPC8xx_SECOND_ETH
496 default MPC8xx_SECOND_ETH_FEC2
497
498 config MPC8xx_SECOND_ETH_FEC2
499 bool "FEC2"
500 depends on MPC885ADS
501 help
502 Enable FEC2 to serve as 2-nd Ethernet channel. Note that SMC2
503 (often 2-nd UART) will not work if this is enabled.
504
505 config MPC8xx_SECOND_ETH_SCC1
506 bool "SCC1"
507 depends on MPC86XADS
508 select MPC8xx_SCC_ENET_FIXED
509 help
510 Enable SCC1 to serve as 2-nd Ethernet channel. Note that SMC1
511 (often 1-nd UART) will not work if this is enabled.
512
513 config MPC8xx_SECOND_ETH_SCC3
514 bool "SCC3"
515 depends on MPC885ADS
516 help
517 Enable SCC3 to serve as 2-nd Ethernet channel. Note that SMC1
518 (often 1-nd UART) will not work if this is enabled.
519
520 endchoice
521
522 config MPC8xx_SCC_ENET_FIXED
523 depends on MPC8xx_SECOND_ETH_SCC
524 default n
525 bool "Use fixed MII-less mode for SCC Ethernet"
526
527endmenu
528
529choice 466choice
530 prompt "Machine Type" 467 prompt "Machine Type"
531 depends on 6xx 468 depends on 6xx
@@ -666,9 +603,6 @@ config TQM8260
666 End of Life: not yet :-) 603 End of Life: not yet :-)
667 URL: <http://www.denx.de/PDF/TQM82xx_SPEC_Rev005.pdf> 604 URL: <http://www.denx.de/PDF/TQM82xx_SPEC_Rev005.pdf>
668 605
669config ADS8272
670 bool "ADS8272"
671
672config PQ2FADS 606config PQ2FADS
673 bool "Freescale-PQ2FADS" 607 bool "Freescale-PQ2FADS"
674 help 608 help
@@ -698,11 +632,6 @@ config EV64360
698 platform. 632 platform.
699endchoice 633endchoice
700 634
701config PQ2ADS
702 bool
703 depends on ADS8272
704 default y
705
706config TQM8xxL 635config TQM8xxL
707 bool 636 bool
708 depends on 8xx && (TQM823L || TQM850L || FPS850L || TQM855L || TQM860L) 637 depends on 8xx && (TQM823L || TQM850L || FPS850L || TQM855L || TQM860L)
@@ -725,15 +654,6 @@ config 8260
725 this option means that you wish to build a kernel for a machine with 654 this option means that you wish to build a kernel for a machine with
726 an 8260 class CPU. 655 an 8260 class CPU.
727 656
728config 8272
729 bool
730 depends on 6xx
731 default y if ADS8272
732 select 8260
733 help
734 The MPC8272 CPM has a different internal dpram setup than other CPM2
735 devices
736
737config CPM1 657config CPM1
738 bool 658 bool
739 depends on 8xx 659 depends on 8xx
@@ -1069,7 +989,7 @@ config PCI_8260
1069 989
1070config 8260_PCI9 990config 8260_PCI9
1071 bool "Enable workaround for MPC826x erratum PCI 9" 991 bool "Enable workaround for MPC826x erratum PCI 9"
1072 depends on PCI_8260 && !ADS8272 992 depends on PCI_8260
1073 default y 993 default y
1074 994
1075choice 995choice
diff --git a/arch/ppc/configs/ads8272_defconfig b/arch/ppc/configs/ads8272_defconfig
deleted file mode 100644
index 6619f9118b00..000000000000
--- a/arch/ppc/configs/ads8272_defconfig
+++ /dev/null
@@ -1,930 +0,0 @@
1#
2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.21-rc5
4# Wed Apr 4 20:55:16 2007
5#
6CONFIG_MMU=y
7CONFIG_GENERIC_HARDIRQS=y
8CONFIG_RWSEM_XCHGADD_ALGORITHM=y
9CONFIG_ARCH_HAS_ILOG2_U32=y
10# CONFIG_ARCH_HAS_ILOG2_U64 is not set
11CONFIG_GENERIC_HWEIGHT=y
12CONFIG_GENERIC_CALIBRATE_DELAY=y
13CONFIG_PPC=y
14CONFIG_PPC32=y
15CONFIG_GENERIC_NVRAM=y
16CONFIG_GENERIC_FIND_NEXT_BIT=y
17CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
18CONFIG_ARCH_MAY_HAVE_PC_FDC=y
19CONFIG_GENERIC_BUG=y
20CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
21
22#
23# Code maturity level options
24#
25CONFIG_EXPERIMENTAL=y
26CONFIG_BROKEN_ON_SMP=y
27CONFIG_INIT_ENV_ARG_LIMIT=32
28
29#
30# General setup
31#
32CONFIG_LOCALVERSION=""
33CONFIG_LOCALVERSION_AUTO=y
34CONFIG_SWAP=y
35CONFIG_SYSVIPC=y
36# CONFIG_IPC_NS is not set
37CONFIG_SYSVIPC_SYSCTL=y
38# CONFIG_POSIX_MQUEUE is not set
39# CONFIG_BSD_PROCESS_ACCT is not set
40# CONFIG_TASKSTATS is not set
41# CONFIG_UTS_NS is not set
42# CONFIG_AUDIT is not set
43# CONFIG_IKCONFIG is not set
44CONFIG_SYSFS_DEPRECATED=y
45# CONFIG_RELAY is not set
46CONFIG_BLK_DEV_INITRD=y
47CONFIG_INITRAMFS_SOURCE=""
48# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
49CONFIG_SYSCTL=y
50CONFIG_EMBEDDED=y
51CONFIG_SYSCTL_SYSCALL=y
52# CONFIG_KALLSYMS is not set
53# CONFIG_HOTPLUG is not set
54CONFIG_PRINTK=y
55CONFIG_BUG=y
56CONFIG_ELF_CORE=y
57CONFIG_BASE_FULL=y
58CONFIG_FUTEX=y
59# CONFIG_EPOLL is not set
60CONFIG_SHMEM=y
61CONFIG_SLAB=y
62CONFIG_VM_EVENT_COUNTERS=y
63CONFIG_RT_MUTEXES=y
64# CONFIG_TINY_SHMEM is not set
65CONFIG_BASE_SMALL=0
66# CONFIG_SLOB is not set
67
68#
69# Loadable module support
70#
71# CONFIG_MODULES is not set
72
73#
74# Block layer
75#
76CONFIG_BLOCK=y
77# CONFIG_LBD is not set
78# CONFIG_BLK_DEV_IO_TRACE is not set
79# CONFIG_LSF is not set
80
81#
82# IO Schedulers
83#
84CONFIG_IOSCHED_NOOP=y
85CONFIG_IOSCHED_AS=y
86CONFIG_IOSCHED_DEADLINE=y
87CONFIG_IOSCHED_CFQ=y
88# CONFIG_DEFAULT_AS is not set
89# CONFIG_DEFAULT_DEADLINE is not set
90CONFIG_DEFAULT_CFQ=y
91# CONFIG_DEFAULT_NOOP is not set
92CONFIG_DEFAULT_IOSCHED="cfq"
93
94#
95# Processor
96#
97CONFIG_6xx=y
98# CONFIG_40x is not set
99# CONFIG_44x is not set
100# CONFIG_8xx is not set
101# CONFIG_E200 is not set
102# CONFIG_E500 is not set
103CONFIG_PPC_FPU=y
104# CONFIG_PPC_DCR_NATIVE is not set
105# CONFIG_KEXEC is not set
106# CONFIG_CPU_FREQ is not set
107# CONFIG_WANT_EARLY_SERIAL is not set
108CONFIG_EMBEDDEDBOOT=y
109CONFIG_PPC_STD_MMU=y
110
111#
112# Platform options
113#
114
115#
116# Freescale Ethernet driver platform-specific options
117#
118# CONFIG_PPC_PREP is not set
119# CONFIG_APUS is not set
120# CONFIG_KATANA is not set
121# CONFIG_WILLOW is not set
122# CONFIG_CPCI690 is not set
123# CONFIG_POWERPMC250 is not set
124# CONFIG_CHESTNUT is not set
125# CONFIG_SPRUCE is not set
126# CONFIG_HDPU is not set
127# CONFIG_EV64260 is not set
128# CONFIG_LOPEC is not set
129# CONFIG_MVME5100 is not set
130# CONFIG_PPLUS is not set
131# CONFIG_PRPMC750 is not set
132# CONFIG_PRPMC800 is not set
133# CONFIG_SANDPOINT is not set
134# CONFIG_RADSTONE_PPC7D is not set
135# CONFIG_PAL4 is not set
136# CONFIG_EST8260 is not set
137# CONFIG_SBC82xx is not set
138# CONFIG_SBS8260 is not set
139# CONFIG_RPX8260 is not set
140# CONFIG_TQM8260 is not set
141CONFIG_ADS8272=y
142# CONFIG_PQ2FADS is not set
143# CONFIG_LITE5200 is not set
144# CONFIG_MPC834x_SYS is not set
145# CONFIG_EV64360 is not set
146CONFIG_PQ2ADS=y
147CONFIG_8260=y
148CONFIG_8272=y
149CONFIG_CPM2=y
150# CONFIG_PC_KEYBOARD is not set
151# CONFIG_SMP is not set
152# CONFIG_HIGHMEM is not set
153CONFIG_ARCH_POPULATES_NODE_MAP=y
154# CONFIG_HZ_100 is not set
155CONFIG_HZ_250=y
156# CONFIG_HZ_300 is not set
157# CONFIG_HZ_1000 is not set
158CONFIG_HZ=250
159CONFIG_PREEMPT_NONE=y
160# CONFIG_PREEMPT_VOLUNTARY is not set
161# CONFIG_PREEMPT is not set
162CONFIG_SELECT_MEMORY_MODEL=y
163CONFIG_FLATMEM_MANUAL=y
164# CONFIG_DISCONTIGMEM_MANUAL is not set
165# CONFIG_SPARSEMEM_MANUAL is not set
166CONFIG_FLATMEM=y
167CONFIG_FLAT_NODE_MEM_MAP=y
168# CONFIG_SPARSEMEM_STATIC is not set
169CONFIG_SPLIT_PTLOCK_CPUS=4
170# CONFIG_RESOURCES_64BIT is not set
171CONFIG_ZONE_DMA_FLAG=1
172CONFIG_BINFMT_ELF=y
173# CONFIG_BINFMT_MISC is not set
174# CONFIG_CMDLINE_BOOL is not set
175# CONFIG_PM is not set
176CONFIG_SECCOMP=y
177CONFIG_ISA_DMA_API=y
178
179#
180# Bus options
181#
182CONFIG_ZONE_DMA=y
183# CONFIG_PPC_I8259 is not set
184CONFIG_PPC_INDIRECT_PCI=y
185CONFIG_PCI=y
186CONFIG_PCI_DOMAINS=y
187CONFIG_PCI_8260=y
188
189#
190# PCCARD (PCMCIA/CardBus) support
191#
192
193#
194# Advanced setup
195#
196# CONFIG_ADVANCED_OPTIONS is not set
197
198#
199# Default settings for advanced configuration options are used
200#
201CONFIG_HIGHMEM_START=0xfe000000
202CONFIG_LOWMEM_SIZE=0x30000000
203CONFIG_KERNEL_START=0xc0000000
204CONFIG_TASK_SIZE=0x80000000
205CONFIG_BOOT_LOAD=0x00400000
206
207#
208# Networking
209#
210CONFIG_NET=y
211
212#
213# Networking options
214#
215# CONFIG_NETDEBUG is not set
216CONFIG_PACKET=y
217# CONFIG_PACKET_MMAP is not set
218CONFIG_UNIX=y
219CONFIG_XFRM=y
220# CONFIG_XFRM_USER is not set
221# CONFIG_XFRM_SUB_POLICY is not set
222# CONFIG_XFRM_MIGRATE is not set
223# CONFIG_NET_KEY is not set
224CONFIG_INET=y
225CONFIG_IP_MULTICAST=y
226# CONFIG_IP_ADVANCED_ROUTER is not set
227CONFIG_IP_FIB_HASH=y
228CONFIG_IP_PNP=y
229CONFIG_IP_PNP_DHCP=y
230CONFIG_IP_PNP_BOOTP=y
231# CONFIG_IP_PNP_RARP is not set
232# CONFIG_NET_IPIP is not set
233# CONFIG_NET_IPGRE is not set
234# CONFIG_IP_MROUTE is not set
235# CONFIG_ARPD is not set
236CONFIG_SYN_COOKIES=y
237# CONFIG_INET_AH is not set
238# CONFIG_INET_ESP is not set
239# CONFIG_INET_IPCOMP is not set
240# CONFIG_INET_XFRM_TUNNEL is not set
241# CONFIG_INET_TUNNEL is not set
242CONFIG_INET_XFRM_MODE_TRANSPORT=y
243CONFIG_INET_XFRM_MODE_TUNNEL=y
244CONFIG_INET_XFRM_MODE_BEET=y
245CONFIG_INET_DIAG=y
246CONFIG_INET_TCP_DIAG=y
247# CONFIG_TCP_CONG_ADVANCED is not set
248CONFIG_TCP_CONG_CUBIC=y
249CONFIG_DEFAULT_TCP_CONG="cubic"
250# CONFIG_TCP_MD5SIG is not set
251# CONFIG_IPV6 is not set
252# CONFIG_INET6_XFRM_TUNNEL is not set
253# CONFIG_INET6_TUNNEL is not set
254# CONFIG_NETWORK_SECMARK is not set
255# CONFIG_NETFILTER is not set
256
257#
258# DCCP Configuration (EXPERIMENTAL)
259#
260# CONFIG_IP_DCCP is not set
261
262#
263# SCTP Configuration (EXPERIMENTAL)
264#
265# CONFIG_IP_SCTP is not set
266
267#
268# TIPC Configuration (EXPERIMENTAL)
269#
270# CONFIG_TIPC is not set
271# CONFIG_ATM is not set
272# CONFIG_BRIDGE is not set
273# CONFIG_VLAN_8021Q is not set
274# CONFIG_DECNET is not set
275# CONFIG_LLC2 is not set
276# CONFIG_IPX is not set
277# CONFIG_ATALK is not set
278# CONFIG_X25 is not set
279# CONFIG_LAPB is not set
280# CONFIG_ECONET is not set
281# CONFIG_WAN_ROUTER is not set
282
283#
284# QoS and/or fair queueing
285#
286# CONFIG_NET_SCHED is not set
287
288#
289# Network testing
290#
291# CONFIG_NET_PKTGEN is not set
292# CONFIG_HAMRADIO is not set
293# CONFIG_IRDA is not set
294# CONFIG_BT is not set
295# CONFIG_IEEE80211 is not set
296
297#
298# Device Drivers
299#
300
301#
302# Generic Driver Options
303#
304CONFIG_STANDALONE=y
305CONFIG_PREVENT_FIRMWARE_BUILD=y
306# CONFIG_SYS_HYPERVISOR is not set
307
308#
309# Connector - unified userspace <-> kernelspace linker
310#
311# CONFIG_CONNECTOR is not set
312
313#
314# Memory Technology Devices (MTD)
315#
316# CONFIG_MTD is not set
317
318#
319# Parallel port support
320#
321# CONFIG_PARPORT is not set
322
323#
324# Plug and Play support
325#
326# CONFIG_PNPACPI is not set
327
328#
329# Block devices
330#
331# CONFIG_BLK_DEV_FD is not set
332# CONFIG_BLK_CPQ_DA is not set
333# CONFIG_BLK_CPQ_CISS_DA is not set
334# CONFIG_BLK_DEV_DAC960 is not set
335# CONFIG_BLK_DEV_UMEM is not set
336# CONFIG_BLK_DEV_COW_COMMON is not set
337CONFIG_BLK_DEV_LOOP=y
338# CONFIG_BLK_DEV_CRYPTOLOOP is not set
339# CONFIG_BLK_DEV_NBD is not set
340# CONFIG_BLK_DEV_SX8 is not set
341CONFIG_BLK_DEV_RAM=y
342CONFIG_BLK_DEV_RAM_COUNT=16
343CONFIG_BLK_DEV_RAM_SIZE=32768
344CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
345# CONFIG_CDROM_PKTCDVD is not set
346# CONFIG_ATA_OVER_ETH is not set
347
348#
349# Misc devices
350#
351# CONFIG_SGI_IOC4 is not set
352# CONFIG_TIFM_CORE is not set
353
354#
355# ATA/ATAPI/MFM/RLL support
356#
357# CONFIG_IDE is not set
358
359#
360# SCSI device support
361#
362# CONFIG_RAID_ATTRS is not set
363# CONFIG_SCSI is not set
364# CONFIG_SCSI_NETLINK is not set
365
366#
367# Serial ATA (prod) and Parallel ATA (experimental) drivers
368#
369# CONFIG_ATA is not set
370
371#
372# Multi-device support (RAID and LVM)
373#
374# CONFIG_MD is not set
375
376#
377# Fusion MPT device support
378#
379# CONFIG_FUSION is not set
380
381#
382# IEEE 1394 (FireWire) support
383#
384# CONFIG_IEEE1394 is not set
385
386#
387# I2O device support
388#
389# CONFIG_I2O is not set
390
391#
392# Macintosh device drivers
393#
394# CONFIG_MAC_EMUMOUSEBTN is not set
395# CONFIG_WINDFARM is not set
396
397#
398# Network device support
399#
400CONFIG_NETDEVICES=y
401# CONFIG_DUMMY is not set
402# CONFIG_BONDING is not set
403# CONFIG_EQUALIZER is not set
404# CONFIG_TUN is not set
405
406#
407# ARCnet devices
408#
409# CONFIG_ARCNET is not set
410
411#
412# PHY device support
413#
414CONFIG_PHYLIB=y
415
416#
417# MII PHY device drivers
418#
419# CONFIG_MARVELL_PHY is not set
420CONFIG_DAVICOM_PHY=y
421# CONFIG_QSEMI_PHY is not set
422# CONFIG_LXT_PHY is not set
423# CONFIG_CICADA_PHY is not set
424# CONFIG_VITESSE_PHY is not set
425# CONFIG_SMSC_PHY is not set
426# CONFIG_BROADCOM_PHY is not set
427# CONFIG_FIXED_PHY is not set
428
429#
430# Ethernet (10 or 100Mbit)
431#
432CONFIG_NET_ETHERNET=y
433CONFIG_MII=y
434# CONFIG_HAPPYMEAL is not set
435# CONFIG_SUNGEM is not set
436# CONFIG_CASSINI is not set
437# CONFIG_NET_VENDOR_3COM is not set
438
439#
440# Tulip family network device support
441#
442# CONFIG_NET_TULIP is not set
443# CONFIG_HP100 is not set
444# CONFIG_NET_PCI is not set
445CONFIG_FS_ENET=y
446# CONFIG_FS_ENET_HAS_SCC is not set
447CONFIG_FS_ENET_HAS_FCC=y
448
449#
450# Ethernet (1000 Mbit)
451#
452# CONFIG_ACENIC is not set
453# CONFIG_DL2K is not set
454# CONFIG_E1000 is not set
455# CONFIG_NS83820 is not set
456# CONFIG_HAMACHI is not set
457# CONFIG_YELLOWFIN is not set
458# CONFIG_R8169 is not set
459# CONFIG_SIS190 is not set
460# CONFIG_SKGE is not set
461# CONFIG_SKY2 is not set
462# CONFIG_SK98LIN is not set
463# CONFIG_TIGON3 is not set
464# CONFIG_BNX2 is not set
465# CONFIG_QLA3XXX is not set
466# CONFIG_ATL1 is not set
467
468#
469# Ethernet (10000 Mbit)
470#
471# CONFIG_CHELSIO_T1 is not set
472# CONFIG_CHELSIO_T3 is not set
473# CONFIG_IXGB is not set
474# CONFIG_S2IO is not set
475# CONFIG_MYRI10GE is not set
476# CONFIG_NETXEN_NIC is not set
477
478#
479# Token Ring devices
480#
481# CONFIG_TR is not set
482
483#
484# Wireless LAN (non-hamradio)
485#
486# CONFIG_NET_RADIO is not set
487
488#
489# Wan interfaces
490#
491# CONFIG_WAN is not set
492# CONFIG_FDDI is not set
493# CONFIG_HIPPI is not set
494# CONFIG_PPP is not set
495# CONFIG_SLIP is not set
496# CONFIG_SHAPER is not set
497# CONFIG_NETCONSOLE is not set
498# CONFIG_NETPOLL is not set
499# CONFIG_NET_POLL_CONTROLLER is not set
500
501#
502# ISDN subsystem
503#
504# CONFIG_ISDN is not set
505
506#
507# Telephony Support
508#
509# CONFIG_PHONE is not set
510
511#
512# Input device support
513#
514CONFIG_INPUT=y
515# CONFIG_INPUT_FF_MEMLESS is not set
516
517#
518# Userland interfaces
519#
520# CONFIG_INPUT_MOUSEDEV is not set
521# CONFIG_INPUT_JOYDEV is not set
522# CONFIG_INPUT_TSDEV is not set
523# CONFIG_INPUT_EVDEV is not set
524# CONFIG_INPUT_EVBUG is not set
525
526#
527# Input Device Drivers
528#
529# CONFIG_INPUT_KEYBOARD is not set
530# CONFIG_INPUT_MOUSE is not set
531# CONFIG_INPUT_JOYSTICK is not set
532# CONFIG_INPUT_TOUCHSCREEN is not set
533# CONFIG_INPUT_MISC is not set
534
535#
536# Hardware I/O ports
537#
538# CONFIG_SERIO is not set
539# CONFIG_GAMEPORT is not set
540
541#
542# Character devices
543#
544# CONFIG_VT is not set
545# CONFIG_SERIAL_NONSTANDARD is not set
546
547#
548# Serial drivers
549#
550# CONFIG_SERIAL_8250 is not set
551
552#
553# Non-8250 serial port support
554#
555# CONFIG_SERIAL_UARTLITE is not set
556CONFIG_SERIAL_CORE=y
557CONFIG_SERIAL_CORE_CONSOLE=y
558CONFIG_SERIAL_CPM=y
559CONFIG_SERIAL_CPM_CONSOLE=y
560CONFIG_SERIAL_CPM_SCC1=y
561# CONFIG_SERIAL_CPM_SCC2 is not set
562# CONFIG_SERIAL_CPM_SCC3 is not set
563CONFIG_SERIAL_CPM_SCC4=y
564# CONFIG_SERIAL_CPM_SMC1 is not set
565# CONFIG_SERIAL_CPM_SMC2 is not set
566# CONFIG_SERIAL_JSM is not set
567CONFIG_UNIX98_PTYS=y
568CONFIG_LEGACY_PTYS=y
569CONFIG_LEGACY_PTY_COUNT=256
570
571#
572# IPMI
573#
574# CONFIG_IPMI_HANDLER is not set
575
576#
577# Watchdog Cards
578#
579# CONFIG_WATCHDOG is not set
580CONFIG_HW_RANDOM=y
581# CONFIG_NVRAM is not set
582CONFIG_GEN_RTC=y
583# CONFIG_GEN_RTC_X is not set
584# CONFIG_DTLK is not set
585# CONFIG_R3964 is not set
586# CONFIG_APPLICOM is not set
587# CONFIG_AGP is not set
588# CONFIG_DRM is not set
589# CONFIG_RAW_DRIVER is not set
590
591#
592# TPM devices
593#
594# CONFIG_TCG_TPM is not set
595
596#
597# I2C support
598#
599# CONFIG_I2C is not set
600
601#
602# SPI support
603#
604# CONFIG_SPI is not set
605# CONFIG_SPI_MASTER is not set
606
607#
608# Dallas's 1-wire bus
609#
610# CONFIG_W1 is not set
611
612#
613# Hardware Monitoring support
614#
615CONFIG_HWMON=y
616# CONFIG_HWMON_VID is not set
617# CONFIG_SENSORS_ABITUGURU is not set
618# CONFIG_SENSORS_F71805F is not set
619# CONFIG_SENSORS_PC87427 is not set
620# CONFIG_SENSORS_VT1211 is not set
621# CONFIG_HWMON_DEBUG_CHIP is not set
622
623#
624# Multifunction device drivers
625#
626# CONFIG_MFD_SM501 is not set
627
628#
629# Multimedia devices
630#
631# CONFIG_VIDEO_DEV is not set
632
633#
634# Digital Video Broadcasting Devices
635#
636# CONFIG_DVB is not set
637
638#
639# Graphics support
640#
641# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
642# CONFIG_FB is not set
643# CONFIG_FB_IBM_GXT4500 is not set
644
645#
646# Sound
647#
648# CONFIG_SOUND is not set
649
650#
651# HID Devices
652#
653CONFIG_HID=y
654# CONFIG_HID_DEBUG is not set
655
656#
657# USB support
658#
659CONFIG_USB_ARCH_HAS_HCD=y
660CONFIG_USB_ARCH_HAS_OHCI=y
661CONFIG_USB_ARCH_HAS_EHCI=y
662# CONFIG_USB is not set
663
664#
665# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
666#
667
668#
669# USB Gadget Support
670#
671# CONFIG_USB_GADGET is not set
672
673#
674# MMC/SD Card support
675#
676# CONFIG_MMC is not set
677
678#
679# LED devices
680#
681# CONFIG_NEW_LEDS is not set
682
683#
684# LED drivers
685#
686
687#
688# LED Triggers
689#
690
691#
692# InfiniBand support
693#
694# CONFIG_INFINIBAND is not set
695
696#
697# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
698#
699
700#
701# Real Time Clock
702#
703# CONFIG_RTC_CLASS is not set
704
705#
706# DMA Engine support
707#
708# CONFIG_DMA_ENGINE is not set
709
710#
711# DMA Clients
712#
713
714#
715# DMA Devices
716#
717
718#
719# Auxiliary Display support
720#
721
722#
723# Virtualization
724#
725
726#
727# File systems
728#
729CONFIG_EXT2_FS=y
730# CONFIG_EXT2_FS_XATTR is not set
731# CONFIG_EXT2_FS_XIP is not set
732CONFIG_EXT3_FS=y
733CONFIG_EXT3_FS_XATTR=y
734# CONFIG_EXT3_FS_POSIX_ACL is not set
735# CONFIG_EXT3_FS_SECURITY is not set
736# CONFIG_EXT4DEV_FS is not set
737CONFIG_JBD=y
738# CONFIG_JBD_DEBUG is not set
739CONFIG_FS_MBCACHE=y
740# CONFIG_REISERFS_FS is not set
741# CONFIG_JFS_FS is not set
742CONFIG_FS_POSIX_ACL=y
743# CONFIG_XFS_FS is not set
744# CONFIG_GFS2_FS is not set
745# CONFIG_OCFS2_FS is not set
746# CONFIG_MINIX_FS is not set
747# CONFIG_ROMFS_FS is not set
748CONFIG_INOTIFY=y
749CONFIG_INOTIFY_USER=y
750# CONFIG_QUOTA is not set
751CONFIG_DNOTIFY=y
752# CONFIG_AUTOFS_FS is not set
753# CONFIG_AUTOFS4_FS is not set
754# CONFIG_FUSE_FS is not set
755
756#
757# CD-ROM/DVD Filesystems
758#
759# CONFIG_ISO9660_FS is not set
760# CONFIG_UDF_FS is not set
761
762#
763# DOS/FAT/NT Filesystems
764#
765# CONFIG_MSDOS_FS is not set
766# CONFIG_VFAT_FS is not set
767# CONFIG_NTFS_FS is not set
768
769#
770# Pseudo filesystems
771#
772CONFIG_PROC_FS=y
773CONFIG_PROC_KCORE=y
774CONFIG_PROC_SYSCTL=y
775CONFIG_SYSFS=y
776CONFIG_TMPFS=y
777# CONFIG_TMPFS_POSIX_ACL is not set
778# CONFIG_HUGETLB_PAGE is not set
779CONFIG_RAMFS=y
780# CONFIG_CONFIGFS_FS is not set
781
782#
783# Miscellaneous filesystems
784#
785# CONFIG_ADFS_FS is not set
786# CONFIG_AFFS_FS is not set
787# CONFIG_HFS_FS is not set
788# CONFIG_HFSPLUS_FS is not set
789# CONFIG_BEFS_FS is not set
790# CONFIG_BFS_FS is not set
791# CONFIG_EFS_FS is not set
792# CONFIG_CRAMFS is not set
793# CONFIG_VXFS_FS is not set
794# CONFIG_HPFS_FS is not set
795# CONFIG_QNX4FS_FS is not set
796# CONFIG_SYSV_FS is not set
797# CONFIG_UFS_FS is not set
798
799#
800# Network File Systems
801#
802CONFIG_NFS_FS=y
803CONFIG_NFS_V3=y
804CONFIG_NFS_V3_ACL=y
805CONFIG_NFS_V4=y
806# CONFIG_NFS_DIRECTIO is not set
807# CONFIG_NFSD is not set
808CONFIG_ROOT_NFS=y
809CONFIG_LOCKD=y
810CONFIG_LOCKD_V4=y
811CONFIG_NFS_ACL_SUPPORT=y
812CONFIG_NFS_COMMON=y
813CONFIG_SUNRPC=y
814CONFIG_SUNRPC_GSS=y
815CONFIG_RPCSEC_GSS_KRB5=y
816# CONFIG_RPCSEC_GSS_SPKM3 is not set
817# CONFIG_SMB_FS is not set
818# CONFIG_CIFS is not set
819# CONFIG_NCP_FS is not set
820# CONFIG_CODA_FS is not set
821# CONFIG_AFS_FS is not set
822# CONFIG_9P_FS is not set
823
824#
825# Partition Types
826#
827CONFIG_PARTITION_ADVANCED=y
828# CONFIG_ACORN_PARTITION is not set
829# CONFIG_OSF_PARTITION is not set
830# CONFIG_AMIGA_PARTITION is not set
831# CONFIG_ATARI_PARTITION is not set
832# CONFIG_MAC_PARTITION is not set
833# CONFIG_MSDOS_PARTITION is not set
834# CONFIG_LDM_PARTITION is not set
835# CONFIG_SGI_PARTITION is not set
836# CONFIG_ULTRIX_PARTITION is not set
837# CONFIG_SUN_PARTITION is not set
838# CONFIG_KARMA_PARTITION is not set
839# CONFIG_EFI_PARTITION is not set
840
841#
842# Native Language Support
843#
844# CONFIG_NLS is not set
845
846#
847# Distributed Lock Manager
848#
849# CONFIG_DLM is not set
850# CONFIG_SCC_ENET is not set
851# CONFIG_FEC_ENET is not set
852
853#
854# CPM2 Options
855#
856
857#
858# Library routines
859#
860# CONFIG_CRC_CCITT is not set
861# CONFIG_CRC16 is not set
862# CONFIG_CRC32 is not set
863# CONFIG_LIBCRC32C is not set
864CONFIG_PLIST=y
865CONFIG_HAS_IOMEM=y
866CONFIG_HAS_IOPORT=y
867# CONFIG_PROFILING is not set
868
869#
870# Kernel hacking
871#
872# CONFIG_PRINTK_TIME is not set
873CONFIG_ENABLE_MUST_CHECK=y
874# CONFIG_MAGIC_SYSRQ is not set
875# CONFIG_UNUSED_SYMBOLS is not set
876# CONFIG_DEBUG_FS is not set
877# CONFIG_HEADERS_CHECK is not set
878# CONFIG_DEBUG_KERNEL is not set
879CONFIG_LOG_BUF_SHIFT=14
880# CONFIG_DEBUG_BUGVERBOSE is not set
881# CONFIG_KGDB_CONSOLE is not set
882
883#
884# Security options
885#
886# CONFIG_KEYS is not set
887# CONFIG_SECURITY is not set
888
889#
890# Cryptographic options
891#
892CONFIG_CRYPTO=y
893CONFIG_CRYPTO_ALGAPI=y
894CONFIG_CRYPTO_BLKCIPHER=y
895CONFIG_CRYPTO_MANAGER=y
896# CONFIG_CRYPTO_HMAC is not set
897# CONFIG_CRYPTO_XCBC is not set
898# CONFIG_CRYPTO_NULL is not set
899# CONFIG_CRYPTO_MD4 is not set
900CONFIG_CRYPTO_MD5=y
901# CONFIG_CRYPTO_SHA1 is not set
902# CONFIG_CRYPTO_SHA256 is not set
903# CONFIG_CRYPTO_SHA512 is not set
904# CONFIG_CRYPTO_WP512 is not set
905# CONFIG_CRYPTO_TGR192 is not set
906# CONFIG_CRYPTO_GF128MUL is not set
907CONFIG_CRYPTO_ECB=y
908CONFIG_CRYPTO_CBC=y
909CONFIG_CRYPTO_PCBC=y
910# CONFIG_CRYPTO_LRW is not set
911CONFIG_CRYPTO_DES=y
912# CONFIG_CRYPTO_FCRYPT is not set
913# CONFIG_CRYPTO_BLOWFISH is not set
914# CONFIG_CRYPTO_TWOFISH is not set
915# CONFIG_CRYPTO_SERPENT is not set
916# CONFIG_CRYPTO_AES is not set
917# CONFIG_CRYPTO_CAST5 is not set
918# CONFIG_CRYPTO_CAST6 is not set
919# CONFIG_CRYPTO_TEA is not set
920# CONFIG_CRYPTO_ARC4 is not set
921# CONFIG_CRYPTO_KHAZAD is not set
922# CONFIG_CRYPTO_ANUBIS is not set
923# CONFIG_CRYPTO_DEFLATE is not set
924# CONFIG_CRYPTO_MICHAEL_MIC is not set
925# CONFIG_CRYPTO_CRC32C is not set
926# CONFIG_CRYPTO_CAMELLIA is not set
927
928#
929# Hardware crypto devices
930#
diff --git a/arch/ppc/configs/mpc86x_ads_defconfig b/arch/ppc/configs/mpc86x_ads_defconfig
deleted file mode 100644
index f63c6f59d68a..000000000000
--- a/arch/ppc/configs/mpc86x_ads_defconfig
+++ /dev/null
@@ -1,633 +0,0 @@
1#
2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.12-rc4
4# Tue Jun 14 13:36:35 2005
5#
6CONFIG_MMU=y
7CONFIG_GENERIC_HARDIRQS=y
8CONFIG_RWSEM_XCHGADD_ALGORITHM=y
9CONFIG_GENERIC_CALIBRATE_DELAY=y
10CONFIG_HAVE_DEC_LOCK=y
11CONFIG_PPC=y
12CONFIG_PPC32=y
13CONFIG_GENERIC_NVRAM=y
14CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
15
16#
17# Code maturity level options
18#
19CONFIG_EXPERIMENTAL=y
20# CONFIG_CLEAN_COMPILE is not set
21CONFIG_BROKEN=y
22CONFIG_BROKEN_ON_SMP=y
23CONFIG_INIT_ENV_ARG_LIMIT=32
24
25#
26# General setup
27#
28CONFIG_LOCALVERSION=""
29# CONFIG_SWAP is not set
30CONFIG_SYSVIPC=y
31# CONFIG_POSIX_MQUEUE is not set
32# CONFIG_BSD_PROCESS_ACCT is not set
33CONFIG_SYSCTL=y
34# CONFIG_AUDIT is not set
35# CONFIG_HOTPLUG is not set
36CONFIG_KOBJECT_UEVENT=y
37# CONFIG_IKCONFIG is not set
38CONFIG_EMBEDDED=y
39# CONFIG_KALLSYMS is not set
40CONFIG_PRINTK=y
41CONFIG_BUG=y
42# CONFIG_BASE_FULL is not set
43CONFIG_FUTEX=y
44# CONFIG_EPOLL is not set
45# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
46# CONFIG_SHMEM is not set
47CONFIG_CC_ALIGN_FUNCTIONS=0
48CONFIG_CC_ALIGN_LABELS=0
49CONFIG_CC_ALIGN_LOOPS=0
50CONFIG_CC_ALIGN_JUMPS=0
51CONFIG_TINY_SHMEM=y
52CONFIG_BASE_SMALL=1
53
54#
55# Loadable module support
56#
57CONFIG_MODULES=y
58# CONFIG_MODULE_UNLOAD is not set
59CONFIG_OBSOLETE_MODPARM=y
60# CONFIG_MODVERSIONS is not set
61# CONFIG_MODULE_SRCVERSION_ALL is not set
62# CONFIG_KMOD is not set
63
64#
65# Processor
66#
67# CONFIG_6xx is not set
68# CONFIG_40x is not set
69# CONFIG_44x is not set
70# CONFIG_POWER3 is not set
71# CONFIG_POWER4 is not set
72CONFIG_8xx=y
73# CONFIG_E500 is not set
74# CONFIG_MATH_EMULATION is not set
75# CONFIG_CPU_FREQ is not set
76CONFIG_EMBEDDEDBOOT=y
77# CONFIG_PM is not set
78CONFIG_NOT_COHERENT_CACHE=y
79
80#
81# Platform options
82#
83CONFIG_FADS=y
84# CONFIG_RPXLITE is not set
85# CONFIG_RPXCLASSIC is not set
86# CONFIG_BSEIP is not set
87# CONFIG_MPC8XXFADS is not set
88CONFIG_MPC86XADS=y
89# CONFIG_TQM823L is not set
90# CONFIG_TQM850L is not set
91# CONFIG_TQM855L is not set
92# CONFIG_TQM860L is not set
93# CONFIG_FPS850L is not set
94# CONFIG_SPD823TS is not set
95# CONFIG_IVMS8 is not set
96# CONFIG_IVML24 is not set
97# CONFIG_SM850 is not set
98# CONFIG_HERMES_PRO is not set
99# CONFIG_IP860 is not set
100# CONFIG_LWMON is not set
101# CONFIG_PCU_E is not set
102# CONFIG_CCM is not set
103# CONFIG_LANTEC is not set
104# CONFIG_MBX is not set
105# CONFIG_WINCEPT is not set
106# CONFIG_SMP is not set
107# CONFIG_PREEMPT is not set
108# CONFIG_HIGHMEM is not set
109CONFIG_BINFMT_ELF=y
110# CONFIG_BINFMT_MISC is not set
111# CONFIG_CMDLINE_BOOL is not set
112CONFIG_ISA_DMA_API=y
113
114#
115# Bus options
116#
117# CONFIG_PCI is not set
118# CONFIG_PCI_DOMAINS is not set
119# CONFIG_PCI_QSPAN is not set
120
121#
122# PCCARD (PCMCIA/CardBus) support
123#
124# CONFIG_PCCARD is not set
125
126#
127# Advanced setup
128#
129# CONFIG_ADVANCED_OPTIONS is not set
130
131#
132# Default settings for advanced configuration options are used
133#
134CONFIG_HIGHMEM_START=0xfe000000
135CONFIG_LOWMEM_SIZE=0x30000000
136CONFIG_KERNEL_START=0xc0000000
137CONFIG_TASK_SIZE=0x80000000
138CONFIG_CONSISTENT_START=0xff100000
139CONFIG_CONSISTENT_SIZE=0x00200000
140CONFIG_BOOT_LOAD=0x00400000
141
142#
143# Device Drivers
144#
145
146#
147# Generic Driver Options
148#
149# CONFIG_STANDALONE is not set
150CONFIG_PREVENT_FIRMWARE_BUILD=y
151# CONFIG_FW_LOADER is not set
152
153#
154# Memory Technology Devices (MTD)
155#
156# CONFIG_MTD is not set
157
158#
159# Parallel port support
160#
161# CONFIG_PARPORT is not set
162
163#
164# Plug and Play support
165#
166
167#
168# Block devices
169#
170# CONFIG_BLK_DEV_FD is not set
171# CONFIG_BLK_DEV_COW_COMMON is not set
172CONFIG_BLK_DEV_LOOP=y
173# CONFIG_BLK_DEV_CRYPTOLOOP is not set
174# CONFIG_BLK_DEV_NBD is not set
175# CONFIG_BLK_DEV_RAM is not set
176CONFIG_BLK_DEV_RAM_COUNT=16
177CONFIG_INITRAMFS_SOURCE=""
178# CONFIG_LBD is not set
179# CONFIG_CDROM_PKTCDVD is not set
180
181#
182# IO Schedulers
183#
184CONFIG_IOSCHED_NOOP=y
185CONFIG_IOSCHED_AS=y
186CONFIG_IOSCHED_DEADLINE=y
187CONFIG_IOSCHED_CFQ=y
188# CONFIG_ATA_OVER_ETH is not set
189
190#
191# ATA/ATAPI/MFM/RLL support
192#
193# CONFIG_IDE is not set
194
195#
196# SCSI device support
197#
198# CONFIG_SCSI is not set
199
200#
201# Multi-device support (RAID and LVM)
202#
203# CONFIG_MD is not set
204
205#
206# Fusion MPT device support
207#
208
209#
210# IEEE 1394 (FireWire) support
211#
212# CONFIG_IEEE1394 is not set
213
214#
215# I2O device support
216#
217
218#
219# Macintosh device drivers
220#
221
222#
223# Networking support
224#
225CONFIG_NET=y
226
227#
228# Networking options
229#
230CONFIG_PACKET=y
231# CONFIG_PACKET_MMAP is not set
232CONFIG_UNIX=y
233# CONFIG_NET_KEY is not set
234CONFIG_INET=y
235# CONFIG_IP_MULTICAST is not set
236# CONFIG_IP_ADVANCED_ROUTER is not set
237CONFIG_IP_PNP=y
238CONFIG_IP_PNP_DHCP=y
239# CONFIG_IP_PNP_BOOTP is not set
240# CONFIG_IP_PNP_RARP is not set
241# CONFIG_NET_IPIP is not set
242# CONFIG_NET_IPGRE is not set
243# CONFIG_ARPD is not set
244# CONFIG_SYN_COOKIES is not set
245# CONFIG_INET_AH is not set
246# CONFIG_INET_ESP is not set
247# CONFIG_INET_IPCOMP is not set
248# CONFIG_INET_TUNNEL is not set
249CONFIG_IP_TCPDIAG=y
250# CONFIG_IP_TCPDIAG_IPV6 is not set
251CONFIG_IPV6=m
252# CONFIG_IPV6_PRIVACY is not set
253# CONFIG_INET6_AH is not set
254# CONFIG_INET6_ESP is not set
255# CONFIG_INET6_IPCOMP is not set
256# CONFIG_INET6_TUNNEL is not set
257# CONFIG_IPV6_TUNNEL is not set
258# CONFIG_NETFILTER is not set
259
260#
261# SCTP Configuration (EXPERIMENTAL)
262#
263# CONFIG_IP_SCTP is not set
264# CONFIG_ATM is not set
265# CONFIG_BRIDGE is not set
266# CONFIG_VLAN_8021Q is not set
267# CONFIG_DECNET is not set
268# CONFIG_LLC2 is not set
269# CONFIG_IPX is not set
270# CONFIG_ATALK is not set
271# CONFIG_X25 is not set
272# CONFIG_LAPB is not set
273# CONFIG_NET_DIVERT is not set
274# CONFIG_ECONET is not set
275# CONFIG_WAN_ROUTER is not set
276
277#
278# QoS and/or fair queueing
279#
280# CONFIG_NET_SCHED is not set
281# CONFIG_NET_CLS_ROUTE is not set
282
283#
284# Network testing
285#
286# CONFIG_NET_PKTGEN is not set
287# CONFIG_NETPOLL is not set
288# CONFIG_NET_POLL_CONTROLLER is not set
289# CONFIG_HAMRADIO is not set
290# CONFIG_IRDA is not set
291# CONFIG_BT is not set
292CONFIG_NETDEVICES=y
293# CONFIG_DUMMY is not set
294# CONFIG_BONDING is not set
295# CONFIG_EQUALIZER is not set
296# CONFIG_TUN is not set
297
298#
299# Ethernet (10 or 100Mbit)
300#
301CONFIG_NET_ETHERNET=y
302# CONFIG_MII is not set
303# CONFIG_OAKNET is not set
304
305#
306# Ethernet (1000 Mbit)
307#
308
309#
310# Ethernet (10000 Mbit)
311#
312
313#
314# Token Ring devices
315#
316
317#
318# Wireless LAN (non-hamradio)
319#
320# CONFIG_NET_RADIO is not set
321
322#
323# Wan interfaces
324#
325# CONFIG_WAN is not set
326# CONFIG_PPP is not set
327# CONFIG_SLIP is not set
328# CONFIG_SHAPER is not set
329# CONFIG_NETCONSOLE is not set
330
331#
332# ISDN subsystem
333#
334# CONFIG_ISDN is not set
335
336#
337# Telephony Support
338#
339# CONFIG_PHONE is not set
340
341#
342# Input device support
343#
344# CONFIG_INPUT is not set
345
346#
347# Hardware I/O ports
348#
349# CONFIG_SERIO is not set
350# CONFIG_GAMEPORT is not set
351CONFIG_SOUND_GAMEPORT=y
352
353#
354# Character devices
355#
356# CONFIG_VT is not set
357# CONFIG_SERIAL_NONSTANDARD is not set
358
359#
360# Serial drivers
361#
362# CONFIG_SERIAL_8250 is not set
363
364#
365# Non-8250 serial port support
366#
367CONFIG_SERIAL_CORE=y
368CONFIG_SERIAL_CORE_CONSOLE=y
369CONFIG_SERIAL_CPM=y
370CONFIG_SERIAL_CPM_CONSOLE=y
371# CONFIG_SERIAL_CPM_SCC1 is not set
372# CONFIG_SERIAL_CPM_SCC2 is not set
373# CONFIG_SERIAL_CPM_SCC3 is not set
374# CONFIG_SERIAL_CPM_SCC4 is not set
375CONFIG_SERIAL_CPM_SMC1=y
376# CONFIG_SERIAL_CPM_SMC2 is not set
377CONFIG_UNIX98_PTYS=y
378# CONFIG_LEGACY_PTYS is not set
379
380#
381# IPMI
382#
383# CONFIG_IPMI_HANDLER is not set
384
385#
386# Watchdog Cards
387#
388# CONFIG_WATCHDOG is not set
389# CONFIG_NVRAM is not set
390# CONFIG_GEN_RTC is not set
391# CONFIG_DTLK is not set
392# CONFIG_R3964 is not set
393
394#
395# Ftape, the floppy tape device driver
396#
397# CONFIG_AGP is not set
398# CONFIG_DRM is not set
399# CONFIG_RAW_DRIVER is not set
400
401#
402# TPM devices
403#
404
405#
406# I2C support
407#
408# CONFIG_I2C is not set
409
410#
411# Dallas's 1-wire bus
412#
413# CONFIG_W1 is not set
414
415#
416# Misc devices
417#
418
419#
420# Multimedia devices
421#
422# CONFIG_VIDEO_DEV is not set
423
424#
425# Digital Video Broadcasting Devices
426#
427# CONFIG_DVB is not set
428
429#
430# Graphics support
431#
432# CONFIG_FB is not set
433
434#
435# Sound
436#
437# CONFIG_SOUND is not set
438
439#
440# USB support
441#
442# CONFIG_USB_ARCH_HAS_HCD is not set
443# CONFIG_USB_ARCH_HAS_OHCI is not set
444
445#
446# USB Gadget Support
447#
448# CONFIG_USB_GADGET is not set
449
450#
451# MMC/SD Card support
452#
453# CONFIG_MMC is not set
454
455#
456# InfiniBand support
457#
458# CONFIG_INFINIBAND is not set
459
460#
461# File systems
462#
463# CONFIG_EXT2_FS is not set
464CONFIG_EXT3_FS=y
465# CONFIG_EXT3_FS_XATTR is not set
466CONFIG_JBD=y
467# CONFIG_JBD_DEBUG is not set
468# CONFIG_REISERFS_FS is not set
469# CONFIG_JFS_FS is not set
470
471#
472# XFS support
473#
474# CONFIG_XFS_FS is not set
475# CONFIG_MINIX_FS is not set
476# CONFIG_ROMFS_FS is not set
477# CONFIG_QUOTA is not set
478# CONFIG_DNOTIFY is not set
479# CONFIG_AUTOFS_FS is not set
480# CONFIG_AUTOFS4_FS is not set
481
482#
483# CD-ROM/DVD Filesystems
484#
485# CONFIG_ISO9660_FS is not set
486# CONFIG_UDF_FS is not set
487
488#
489# DOS/FAT/NT Filesystems
490#
491# CONFIG_MSDOS_FS is not set
492# CONFIG_VFAT_FS is not set
493# CONFIG_NTFS_FS is not set
494
495#
496# Pseudo filesystems
497#
498CONFIG_PROC_FS=y
499CONFIG_PROC_KCORE=y
500CONFIG_SYSFS=y
501# CONFIG_DEVFS_FS is not set
502# CONFIG_DEVPTS_FS_XATTR is not set
503# CONFIG_TMPFS is not set
504# CONFIG_HUGETLBFS is not set
505# CONFIG_HUGETLB_PAGE is not set
506CONFIG_RAMFS=y
507
508#
509# Miscellaneous filesystems
510#
511# CONFIG_ADFS_FS is not set
512# CONFIG_AFFS_FS is not set
513# CONFIG_HFS_FS is not set
514# CONFIG_HFSPLUS_FS is not set
515# CONFIG_BEFS_FS is not set
516# CONFIG_BFS_FS is not set
517# CONFIG_EFS_FS is not set
518# CONFIG_CRAMFS is not set
519# CONFIG_VXFS_FS is not set
520# CONFIG_HPFS_FS is not set
521# CONFIG_QNX4FS_FS is not set
522# CONFIG_SYSV_FS is not set
523# CONFIG_UFS_FS is not set
524
525#
526# Network File Systems
527#
528CONFIG_NFS_FS=y
529CONFIG_NFS_V3=y
530CONFIG_NFS_V4=y
531# CONFIG_NFS_DIRECTIO is not set
532# CONFIG_NFSD is not set
533CONFIG_ROOT_NFS=y
534CONFIG_LOCKD=y
535CONFIG_LOCKD_V4=y
536CONFIG_SUNRPC=y
537CONFIG_SUNRPC_GSS=y
538CONFIG_RPCSEC_GSS_KRB5=y
539# CONFIG_RPCSEC_GSS_SPKM3 is not set
540# CONFIG_SMB_FS is not set
541# CONFIG_CIFS is not set
542# CONFIG_NCP_FS is not set
543# CONFIG_CODA_FS is not set
544# CONFIG_AFS_FS is not set
545
546#
547# Partition Types
548#
549# CONFIG_PARTITION_ADVANCED is not set
550CONFIG_MSDOS_PARTITION=y
551
552#
553# Native Language Support
554#
555# CONFIG_NLS is not set
556
557#
558# MPC8xx CPM Options
559#
560CONFIG_SCC_ENET=y
561CONFIG_SCC1_ENET=y
562# CONFIG_SCC2_ENET is not set
563# CONFIG_SCC3_ENET is not set
564# CONFIG_FEC_ENET is not set
565# CONFIG_ENET_BIG_BUFFERS is not set
566
567#
568# Generic MPC8xx Options
569#
570# CONFIG_8xx_COPYBACK is not set
571# CONFIG_8xx_CPU6 is not set
572CONFIG_NO_UCODE_PATCH=y
573# CONFIG_USB_SOF_UCODE_PATCH is not set
574# CONFIG_I2C_SPI_UCODE_PATCH is not set
575# CONFIG_I2C_SPI_SMC1_UCODE_PATCH is not set
576
577#
578# Library routines
579#
580# CONFIG_CRC_CCITT is not set
581# CONFIG_CRC32 is not set
582# CONFIG_LIBCRC32C is not set
583
584#
585# Profiling support
586#
587# CONFIG_PROFILING is not set
588
589#
590# Kernel hacking
591#
592# CONFIG_PRINTK_TIME is not set
593# CONFIG_DEBUG_KERNEL is not set
594CONFIG_LOG_BUF_SHIFT=14
595
596#
597# Security options
598#
599# CONFIG_KEYS is not set
600# CONFIG_SECURITY is not set
601
602#
603# Cryptographic options
604#
605CONFIG_CRYPTO=y
606# CONFIG_CRYPTO_HMAC is not set
607# CONFIG_CRYPTO_NULL is not set
608# CONFIG_CRYPTO_MD4 is not set
609CONFIG_CRYPTO_MD5=y
610# CONFIG_CRYPTO_SHA1 is not set
611# CONFIG_CRYPTO_SHA256 is not set
612# CONFIG_CRYPTO_SHA512 is not set
613# CONFIG_CRYPTO_WP512 is not set
614# CONFIG_CRYPTO_TGR192 is not set
615CONFIG_CRYPTO_DES=y
616# CONFIG_CRYPTO_BLOWFISH is not set
617# CONFIG_CRYPTO_TWOFISH is not set
618# CONFIG_CRYPTO_SERPENT is not set
619# CONFIG_CRYPTO_AES is not set
620# CONFIG_CRYPTO_CAST5 is not set
621# CONFIG_CRYPTO_CAST6 is not set
622# CONFIG_CRYPTO_TEA is not set
623# CONFIG_CRYPTO_ARC4 is not set
624# CONFIG_CRYPTO_KHAZAD is not set
625# CONFIG_CRYPTO_ANUBIS is not set
626# CONFIG_CRYPTO_DEFLATE is not set
627# CONFIG_CRYPTO_MICHAEL_MIC is not set
628# CONFIG_CRYPTO_CRC32C is not set
629# CONFIG_CRYPTO_TEST is not set
630
631#
632# Hardware crypto devices
633#
diff --git a/arch/ppc/configs/mpc885ads_defconfig b/arch/ppc/configs/mpc885ads_defconfig
deleted file mode 100644
index 016f94d9325f..000000000000
--- a/arch/ppc/configs/mpc885ads_defconfig
+++ /dev/null
@@ -1,622 +0,0 @@
1#
2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.12-rc6
4# Thu Jun 9 21:17:29 2005
5#
6CONFIG_MMU=y
7CONFIG_GENERIC_HARDIRQS=y
8CONFIG_RWSEM_XCHGADD_ALGORITHM=y
9CONFIG_GENERIC_CALIBRATE_DELAY=y
10CONFIG_HAVE_DEC_LOCK=y
11CONFIG_PPC=y
12CONFIG_PPC32=y
13CONFIG_GENERIC_NVRAM=y
14CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
15
16#
17# Code maturity level options
18#
19CONFIG_EXPERIMENTAL=y
20# CONFIG_CLEAN_COMPILE is not set
21CONFIG_BROKEN=y
22CONFIG_BROKEN_ON_SMP=y
23CONFIG_INIT_ENV_ARG_LIMIT=32
24
25#
26# General setup
27#
28CONFIG_LOCALVERSION=""
29# CONFIG_SWAP is not set
30CONFIG_SYSVIPC=y
31# CONFIG_POSIX_MQUEUE is not set
32# CONFIG_BSD_PROCESS_ACCT is not set
33CONFIG_SYSCTL=y
34# CONFIG_AUDIT is not set
35CONFIG_HOTPLUG=y
36CONFIG_KOBJECT_UEVENT=y
37# CONFIG_IKCONFIG is not set
38CONFIG_EMBEDDED=y
39# CONFIG_KALLSYMS is not set
40CONFIG_PRINTK=y
41CONFIG_BUG=y
42CONFIG_BASE_FULL=y
43CONFIG_FUTEX=y
44# CONFIG_EPOLL is not set
45# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
46CONFIG_SHMEM=y
47CONFIG_CC_ALIGN_FUNCTIONS=0
48CONFIG_CC_ALIGN_LABELS=0
49CONFIG_CC_ALIGN_LOOPS=0
50CONFIG_CC_ALIGN_JUMPS=0
51# CONFIG_TINY_SHMEM is not set
52CONFIG_BASE_SMALL=0
53
54#
55# Loadable module support
56#
57# CONFIG_MODULES is not set
58
59#
60# Processor
61#
62# CONFIG_6xx is not set
63# CONFIG_40x is not set
64# CONFIG_44x is not set
65# CONFIG_POWER3 is not set
66# CONFIG_POWER4 is not set
67CONFIG_8xx=y
68# CONFIG_E500 is not set
69# CONFIG_MATH_EMULATION is not set
70# CONFIG_CPU_FREQ is not set
71CONFIG_EMBEDDEDBOOT=y
72# CONFIG_PM is not set
73CONFIG_NOT_COHERENT_CACHE=y
74
75#
76# Platform options
77#
78# CONFIG_RPXLITE is not set
79# CONFIG_RPXCLASSIC is not set
80# CONFIG_BSEIP is not set
81# CONFIG_FADS is not set
82CONFIG_MPC885ADS=y
83# CONFIG_TQM823L is not set
84# CONFIG_TQM850L is not set
85# CONFIG_TQM855L is not set
86# CONFIG_TQM860L is not set
87# CONFIG_FPS850L is not set
88# CONFIG_SPD823TS is not set
89# CONFIG_IVMS8 is not set
90# CONFIG_IVML24 is not set
91# CONFIG_SM850 is not set
92# CONFIG_HERMES_PRO is not set
93# CONFIG_IP860 is not set
94# CONFIG_LWMON is not set
95# CONFIG_PCU_E is not set
96# CONFIG_CCM is not set
97# CONFIG_LANTEC is not set
98# CONFIG_MBX is not set
99# CONFIG_WINCEPT is not set
100# CONFIG_SMP is not set
101# CONFIG_PREEMPT is not set
102# CONFIG_HIGHMEM is not set
103CONFIG_BINFMT_ELF=y
104# CONFIG_BINFMT_MISC is not set
105# CONFIG_CMDLINE_BOOL is not set
106CONFIG_ISA_DMA_API=y
107
108#
109# Bus options
110#
111# CONFIG_PCI is not set
112# CONFIG_PCI_DOMAINS is not set
113# CONFIG_PCI_QSPAN is not set
114
115#
116# PCCARD (PCMCIA/CardBus) support
117#
118# CONFIG_PCCARD is not set
119
120#
121# Advanced setup
122#
123# CONFIG_ADVANCED_OPTIONS is not set
124
125#
126# Default settings for advanced configuration options are used
127#
128CONFIG_HIGHMEM_START=0xfe000000
129CONFIG_LOWMEM_SIZE=0x30000000
130CONFIG_KERNEL_START=0xc0000000
131CONFIG_TASK_SIZE=0x80000000
132CONFIG_CONSISTENT_START=0xff100000
133CONFIG_CONSISTENT_SIZE=0x00200000
134CONFIG_BOOT_LOAD=0x00400000
135
136#
137# Device Drivers
138#
139
140#
141# Generic Driver Options
142#
143CONFIG_STANDALONE=y
144CONFIG_PREVENT_FIRMWARE_BUILD=y
145# CONFIG_FW_LOADER is not set
146
147#
148# Memory Technology Devices (MTD)
149#
150# CONFIG_MTD is not set
151
152#
153# Parallel port support
154#
155# CONFIG_PARPORT is not set
156
157#
158# Plug and Play support
159#
160
161#
162# Block devices
163#
164# CONFIG_BLK_DEV_FD is not set
165# CONFIG_BLK_DEV_COW_COMMON is not set
166# CONFIG_BLK_DEV_LOOP is not set
167# CONFIG_BLK_DEV_NBD is not set
168# CONFIG_BLK_DEV_RAM is not set
169CONFIG_BLK_DEV_RAM_COUNT=16
170CONFIG_INITRAMFS_SOURCE=""
171# CONFIG_LBD is not set
172# CONFIG_CDROM_PKTCDVD is not set
173
174#
175# IO Schedulers
176#
177CONFIG_IOSCHED_NOOP=y
178# CONFIG_IOSCHED_AS is not set
179# CONFIG_IOSCHED_DEADLINE is not set
180# CONFIG_IOSCHED_CFQ is not set
181# CONFIG_ATA_OVER_ETH is not set
182
183#
184# ATA/ATAPI/MFM/RLL support
185#
186# CONFIG_IDE is not set
187
188#
189# SCSI device support
190#
191# CONFIG_SCSI is not set
192
193#
194# Multi-device support (RAID and LVM)
195#
196# CONFIG_MD is not set
197
198#
199# Fusion MPT device support
200#
201
202#
203# IEEE 1394 (FireWire) support
204#
205# CONFIG_IEEE1394 is not set
206
207#
208# I2O device support
209#
210
211#
212# Macintosh device drivers
213#
214
215#
216# Networking support
217#
218CONFIG_NET=y
219
220#
221# Networking options
222#
223CONFIG_PACKET=y
224# CONFIG_PACKET_MMAP is not set
225CONFIG_UNIX=y
226# CONFIG_NET_KEY is not set
227CONFIG_INET=y
228# CONFIG_IP_MULTICAST is not set
229# CONFIG_IP_ADVANCED_ROUTER is not set
230CONFIG_IP_PNP=y
231CONFIG_IP_PNP_DHCP=y
232CONFIG_IP_PNP_BOOTP=y
233# CONFIG_IP_PNP_RARP is not set
234# CONFIG_NET_IPIP is not set
235# CONFIG_NET_IPGRE is not set
236# CONFIG_ARPD is not set
237# CONFIG_SYN_COOKIES is not set
238# CONFIG_INET_AH is not set
239# CONFIG_INET_ESP is not set
240# CONFIG_INET_IPCOMP is not set
241# CONFIG_INET_TUNNEL is not set
242CONFIG_IP_TCPDIAG=y
243# CONFIG_IP_TCPDIAG_IPV6 is not set
244# CONFIG_IPV6 is not set
245# CONFIG_NETFILTER is not set
246
247#
248# SCTP Configuration (EXPERIMENTAL)
249#
250# CONFIG_IP_SCTP is not set
251# CONFIG_ATM is not set
252# CONFIG_BRIDGE is not set
253# CONFIG_VLAN_8021Q is not set
254# CONFIG_DECNET is not set
255# CONFIG_LLC2 is not set
256# CONFIG_IPX is not set
257# CONFIG_ATALK is not set
258# CONFIG_X25 is not set
259# CONFIG_LAPB is not set
260# CONFIG_NET_DIVERT is not set
261# CONFIG_ECONET is not set
262# CONFIG_WAN_ROUTER is not set
263
264#
265# QoS and/or fair queueing
266#
267# CONFIG_NET_SCHED is not set
268# CONFIG_NET_CLS_ROUTE is not set
269
270#
271# Network testing
272#
273# CONFIG_NET_PKTGEN is not set
274# CONFIG_NETPOLL is not set
275# CONFIG_NET_POLL_CONTROLLER is not set
276# CONFIG_HAMRADIO is not set
277# CONFIG_IRDA is not set
278# CONFIG_BT is not set
279CONFIG_NETDEVICES=y
280# CONFIG_DUMMY is not set
281# CONFIG_BONDING is not set
282# CONFIG_EQUALIZER is not set
283# CONFIG_TUN is not set
284
285#
286# Ethernet (10 or 100Mbit)
287#
288CONFIG_NET_ETHERNET=y
289CONFIG_MII=y
290# CONFIG_OAKNET is not set
291
292#
293# Ethernet (1000 Mbit)
294#
295
296#
297# Ethernet (10000 Mbit)
298#
299
300#
301# Token Ring devices
302#
303
304#
305# Wireless LAN (non-hamradio)
306#
307# CONFIG_NET_RADIO is not set
308
309#
310# Wan interfaces
311#
312# CONFIG_WAN is not set
313CONFIG_PPP=y
314# CONFIG_PPP_MULTILINK is not set
315# CONFIG_PPP_FILTER is not set
316CONFIG_PPP_ASYNC=y
317CONFIG_PPP_SYNC_TTY=y
318CONFIG_PPP_DEFLATE=y
319# CONFIG_PPP_BSDCOMP is not set
320# CONFIG_PPPOE is not set
321# CONFIG_SLIP is not set
322# CONFIG_SHAPER is not set
323# CONFIG_NETCONSOLE is not set
324
325#
326# ISDN subsystem
327#
328# CONFIG_ISDN is not set
329
330#
331# Telephony Support
332#
333# CONFIG_PHONE is not set
334
335#
336# Input device support
337#
338# CONFIG_INPUT is not set
339
340#
341# Hardware I/O ports
342#
343# CONFIG_SERIO is not set
344# CONFIG_GAMEPORT is not set
345
346#
347# Character devices
348#
349# CONFIG_VT is not set
350# CONFIG_SERIAL_NONSTANDARD is not set
351
352#
353# Serial drivers
354#
355# CONFIG_SERIAL_8250 is not set
356
357#
358# Non-8250 serial port support
359#
360CONFIG_SERIAL_CORE=y
361CONFIG_SERIAL_CORE_CONSOLE=y
362CONFIG_SERIAL_CPM=y
363CONFIG_SERIAL_CPM_CONSOLE=y
364# CONFIG_SERIAL_CPM_SCC1 is not set
365# CONFIG_SERIAL_CPM_SCC2 is not set
366# CONFIG_SERIAL_CPM_SCC3 is not set
367# CONFIG_SERIAL_CPM_SCC4 is not set
368CONFIG_SERIAL_CPM_SMC1=y
369CONFIG_SERIAL_CPM_SMC2=y
370CONFIG_UNIX98_PTYS=y
371# CONFIG_LEGACY_PTYS is not set
372
373#
374# IPMI
375#
376# CONFIG_IPMI_HANDLER is not set
377
378#
379# Watchdog Cards
380#
381# CONFIG_WATCHDOG is not set
382# CONFIG_NVRAM is not set
383# CONFIG_GEN_RTC is not set
384# CONFIG_DTLK is not set
385# CONFIG_R3964 is not set
386
387#
388# Ftape, the floppy tape device driver
389#
390# CONFIG_AGP is not set
391# CONFIG_DRM is not set
392# CONFIG_RAW_DRIVER is not set
393
394#
395# TPM devices
396#
397
398#
399# I2C support
400#
401# CONFIG_I2C is not set
402
403#
404# Dallas's 1-wire bus
405#
406# CONFIG_W1 is not set
407
408#
409# Misc devices
410#
411
412#
413# Multimedia devices
414#
415# CONFIG_VIDEO_DEV is not set
416
417#
418# Digital Video Broadcasting Devices
419#
420# CONFIG_DVB is not set
421
422#
423# Graphics support
424#
425# CONFIG_FB is not set
426
427#
428# Sound
429#
430# CONFIG_SOUND is not set
431
432#
433# USB support
434#
435# CONFIG_USB_ARCH_HAS_HCD is not set
436# CONFIG_USB_ARCH_HAS_OHCI is not set
437
438#
439# USB Gadget Support
440#
441# CONFIG_USB_GADGET is not set
442
443#
444# MMC/SD Card support
445#
446# CONFIG_MMC is not set
447
448#
449# InfiniBand support
450#
451# CONFIG_INFINIBAND is not set
452
453#
454# File systems
455#
456CONFIG_EXT2_FS=y
457CONFIG_EXT2_FS_XATTR=y
458# CONFIG_EXT2_FS_POSIX_ACL is not set
459# CONFIG_EXT2_FS_SECURITY is not set
460CONFIG_EXT3_FS=y
461CONFIG_EXT3_FS_XATTR=y
462# CONFIG_EXT3_FS_POSIX_ACL is not set
463# CONFIG_EXT3_FS_SECURITY is not set
464CONFIG_JBD=y
465# CONFIG_JBD_DEBUG is not set
466CONFIG_FS_MBCACHE=y
467# CONFIG_REISERFS_FS is not set
468# CONFIG_JFS_FS is not set
469
470#
471# XFS support
472#
473# CONFIG_XFS_FS is not set
474# CONFIG_MINIX_FS is not set
475# CONFIG_ROMFS_FS is not set
476# CONFIG_QUOTA is not set
477# CONFIG_DNOTIFY is not set
478# CONFIG_AUTOFS_FS is not set
479# CONFIG_AUTOFS4_FS is not set
480
481#
482# CD-ROM/DVD Filesystems
483#
484# CONFIG_ISO9660_FS is not set
485# CONFIG_UDF_FS is not set
486
487#
488# DOS/FAT/NT Filesystems
489#
490# CONFIG_MSDOS_FS is not set
491# CONFIG_VFAT_FS is not set
492# CONFIG_NTFS_FS is not set
493
494#
495# Pseudo filesystems
496#
497CONFIG_PROC_FS=y
498# CONFIG_PROC_KCORE is not set
499CONFIG_SYSFS=y
500# CONFIG_DEVFS_FS is not set
501# CONFIG_DEVPTS_FS_XATTR is not set
502# CONFIG_TMPFS is not set
503# CONFIG_HUGETLBFS is not set
504# CONFIG_HUGETLB_PAGE is not set
505CONFIG_RAMFS=y
506
507#
508# Miscellaneous filesystems
509#
510# CONFIG_ADFS_FS is not set
511# CONFIG_AFFS_FS is not set
512# CONFIG_HFS_FS is not set
513# CONFIG_HFSPLUS_FS is not set
514# CONFIG_BEFS_FS is not set
515# CONFIG_BFS_FS is not set
516# CONFIG_EFS_FS is not set
517# CONFIG_CRAMFS is not set
518# CONFIG_VXFS_FS is not set
519# CONFIG_HPFS_FS is not set
520# CONFIG_QNX4FS_FS is not set
521# CONFIG_SYSV_FS is not set
522# CONFIG_UFS_FS is not set
523
524#
525# Network File Systems
526#
527CONFIG_NFS_FS=y
528# CONFIG_NFS_V3 is not set
529# CONFIG_NFS_V4 is not set
530# CONFIG_NFS_DIRECTIO is not set
531# CONFIG_NFSD is not set
532CONFIG_ROOT_NFS=y
533CONFIG_LOCKD=y
534CONFIG_SUNRPC=y
535# CONFIG_RPCSEC_GSS_KRB5 is not set
536# CONFIG_RPCSEC_GSS_SPKM3 is not set
537# CONFIG_SMB_FS is not set
538# CONFIG_CIFS is not set
539# CONFIG_NCP_FS is not set
540# CONFIG_CODA_FS is not set
541# CONFIG_AFS_FS is not set
542
543#
544# Partition Types
545#
546CONFIG_PARTITION_ADVANCED=y
547# CONFIG_ACORN_PARTITION is not set
548# CONFIG_OSF_PARTITION is not set
549# CONFIG_AMIGA_PARTITION is not set
550# CONFIG_ATARI_PARTITION is not set
551# CONFIG_MAC_PARTITION is not set
552CONFIG_MSDOS_PARTITION=y
553# CONFIG_BSD_DISKLABEL is not set
554# CONFIG_MINIX_SUBPARTITION is not set
555# CONFIG_SOLARIS_X86_PARTITION is not set
556# CONFIG_UNIXWARE_DISKLABEL is not set
557# CONFIG_LDM_PARTITION is not set
558# CONFIG_SGI_PARTITION is not set
559# CONFIG_ULTRIX_PARTITION is not set
560# CONFIG_SUN_PARTITION is not set
561# CONFIG_EFI_PARTITION is not set
562
563#
564# Native Language Support
565#
566# CONFIG_NLS is not set
567
568#
569# MPC8xx CPM Options
570#
571CONFIG_SCC_ENET=y
572# CONFIG_SCC1_ENET is not set
573# CONFIG_SCC2_ENET is not set
574CONFIG_SCC3_ENET=y
575# CONFIG_FEC_ENET is not set
576# CONFIG_ENET_BIG_BUFFERS is not set
577
578#
579# Generic MPC8xx Options
580#
581CONFIG_8xx_COPYBACK=y
582CONFIG_8xx_CPU6=y
583CONFIG_NO_UCODE_PATCH=y
584# CONFIG_USB_SOF_UCODE_PATCH is not set
585# CONFIG_I2C_SPI_UCODE_PATCH is not set
586# CONFIG_I2C_SPI_SMC1_UCODE_PATCH is not set
587
588#
589# Library routines
590#
591CONFIG_CRC_CCITT=y
592# CONFIG_CRC32 is not set
593# CONFIG_LIBCRC32C is not set
594CONFIG_ZLIB_INFLATE=y
595CONFIG_ZLIB_DEFLATE=y
596
597#
598# Profiling support
599#
600# CONFIG_PROFILING is not set
601
602#
603# Kernel hacking
604#
605# CONFIG_PRINTK_TIME is not set
606# CONFIG_DEBUG_KERNEL is not set
607CONFIG_LOG_BUF_SHIFT=14
608
609#
610# Security options
611#
612# CONFIG_KEYS is not set
613# CONFIG_SECURITY is not set
614
615#
616# Cryptographic options
617#
618# CONFIG_CRYPTO is not set
619
620#
621# Hardware crypto devices
622#
diff --git a/arch/ppc/kernel/ppc_ksyms.c b/arch/ppc/kernel/ppc_ksyms.c
index 2ba659f401be..d9036ef0b658 100644
--- a/arch/ppc/kernel/ppc_ksyms.c
+++ b/arch/ppc/kernel/ppc_ksyms.c
@@ -88,6 +88,7 @@ EXPORT_SYMBOL(strncpy);
88EXPORT_SYMBOL(strcat); 88EXPORT_SYMBOL(strcat);
89EXPORT_SYMBOL(strlen); 89EXPORT_SYMBOL(strlen);
90EXPORT_SYMBOL(strcmp); 90EXPORT_SYMBOL(strcmp);
91EXPORT_SYMBOL(strncmp);
91 92
92EXPORT_SYMBOL(csum_partial); 93EXPORT_SYMBOL(csum_partial);
93EXPORT_SYMBOL(csum_partial_copy_generic); 94EXPORT_SYMBOL(csum_partial_copy_generic);
diff --git a/arch/ppc/lib/string.S b/arch/ppc/lib/string.S
index 84ed33ab4c2d..927253bfc826 100644
--- a/arch/ppc/lib/string.S
+++ b/arch/ppc/lib/string.S
@@ -121,6 +121,20 @@ _GLOBAL(strcmp)
121 beq 1b 121 beq 1b
122 blr 122 blr
123 123
124_GLOBAL(strncmp)
125 PPC_LCMPI r5,0
126 beqlr
127 mtctr r5
128 addi r5,r3,-1
129 addi r4,r4,-1
1301: lbzu r3,1(r5)
131 cmpwi 1,r3,0
132 lbzu r0,1(r4)
133 subf. r3,r0,r3
134 beqlr 1
135 bdnzt eq,1b
136 blr
137
124_GLOBAL(strlen) 138_GLOBAL(strlen)
125 addi r4,r3,-1 139 addi r4,r3,-1
1261: lbzu r0,1(r4) 1401: lbzu r0,1(r4)
diff --git a/arch/ppc/platforms/Makefile b/arch/ppc/platforms/Makefile
index 40f53fbe6d35..6260231987cb 100644
--- a/arch/ppc/platforms/Makefile
+++ b/arch/ppc/platforms/Makefile
@@ -4,7 +4,6 @@
4 4
5obj-$(CONFIG_PPC_PREP) += prep_pci.o prep_setup.o 5obj-$(CONFIG_PPC_PREP) += prep_pci.o prep_setup.o
6obj-$(CONFIG_PREP_RESIDUAL) += residual.o 6obj-$(CONFIG_PREP_RESIDUAL) += residual.o
7obj-$(CONFIG_PQ2ADS) += pq2ads.o
8obj-$(CONFIG_TQM8260) += tqm8260_setup.o 7obj-$(CONFIG_TQM8260) += tqm8260_setup.o
9obj-$(CONFIG_CPCI690) += cpci690.o 8obj-$(CONFIG_CPCI690) += cpci690.o
10obj-$(CONFIG_EV64260) += ev64260.o 9obj-$(CONFIG_EV64260) += ev64260.o
@@ -24,6 +23,3 @@ obj-$(CONFIG_SBC82xx) += sbc82xx.o
24obj-$(CONFIG_SPRUCE) += spruce.o 23obj-$(CONFIG_SPRUCE) += spruce.o
25obj-$(CONFIG_LITE5200) += lite5200.o 24obj-$(CONFIG_LITE5200) += lite5200.o
26obj-$(CONFIG_EV64360) += ev64360.o 25obj-$(CONFIG_EV64360) += ev64360.o
27obj-$(CONFIG_MPC86XADS) += mpc866ads_setup.o
28obj-$(CONFIG_MPC885ADS) += mpc885ads_setup.o
29obj-$(CONFIG_ADS8272) += mpc8272ads_setup.o
diff --git a/arch/ppc/platforms/fads.h b/arch/ppc/platforms/fads.h
index 2f9f0f60e3f7..5219366667b3 100644
--- a/arch/ppc/platforms/fads.h
+++ b/arch/ppc/platforms/fads.h
@@ -22,29 +22,6 @@
22 22
23#include <asm/ppcboot.h> 23#include <asm/ppcboot.h>
24 24
25#if defined(CONFIG_MPC86XADS)
26
27#define BOARD_CHIP_NAME "MPC86X"
28
29/* U-Boot maps BCSR to 0xff080000 */
30#define BCSR_ADDR ((uint)0xff080000)
31
32/* MPC86XADS has one more CPLD and an additional BCSR.
33 */
34#define CFG_PHYDEV_ADDR ((uint)0xff0a0000)
35#define BCSR5 ((uint)(CFG_PHYDEV_ADDR + 0x300))
36
37#define BCSR5_T1_RST 0x10
38#define BCSR5_ATM155_RST 0x08
39#define BCSR5_ATM25_RST 0x04
40#define BCSR5_MII1_EN 0x02
41#define BCSR5_MII1_RST 0x01
42
43/* There is no PHY link change interrupt */
44#define PHY_INTERRUPT (-1)
45
46#else /* FADS */
47
48/* Memory map is configured by the PROM startup. 25/* Memory map is configured by the PROM startup.
49 * I tried to follow the FADS manual, although the startup PROM 26 * I tried to follow the FADS manual, although the startup PROM
50 * dictates this and we simply have to move some of the physical 27 * dictates this and we simply have to move some of the physical
@@ -55,8 +32,6 @@
55/* PHY link change interrupt */ 32/* PHY link change interrupt */
56#define PHY_INTERRUPT SIU_IRQ2 33#define PHY_INTERRUPT SIU_IRQ2
57 34
58#endif /* CONFIG_MPC86XADS */
59
60#define BCSR_SIZE ((uint)(64 * 1024)) 35#define BCSR_SIZE ((uint)(64 * 1024))
61#define BCSR0 ((uint)(BCSR_ADDR + 0x00)) 36#define BCSR0 ((uint)(BCSR_ADDR + 0x00))
62#define BCSR1 ((uint)(BCSR_ADDR + 0x04)) 37#define BCSR1 ((uint)(BCSR_ADDR + 0x04))
diff --git a/arch/ppc/platforms/mpc8272ads_setup.c b/arch/ppc/platforms/mpc8272ads_setup.c
deleted file mode 100644
index 47f4b38edb5f..000000000000
--- a/arch/ppc/platforms/mpc8272ads_setup.c
+++ /dev/null
@@ -1,367 +0,0 @@
1/*
2 * arch/ppc/platforms/mpc8272ads_setup.c
3 *
4 * MPC82xx Board-specific PlatformDevice descriptions
5 *
6 * 2005 (c) MontaVista Software, Inc.
7 * Vitaly Bordug <vbordug@ru.mvista.com>
8 *
9 * This file is licensed under the terms of the GNU General Public License
10 * version 2. This program is licensed "as is" without any warranty of any
11 * kind, whether express or implied.
12 */
13
14
15#include <linux/init.h>
16#include <linux/module.h>
17#include <linux/device.h>
18#include <linux/ioport.h>
19#include <linux/fs_enet_pd.h>
20#include <linux/platform_device.h>
21#include <linux/phy.h>
22
23#include <asm/io.h>
24#include <asm/mpc8260.h>
25#include <asm/cpm2.h>
26#include <asm/immap_cpm2.h>
27#include <asm/irq.h>
28#include <asm/ppc_sys.h>
29#include <asm/ppcboot.h>
30#include <linux/fs_uart_pd.h>
31
32#include "pq2ads_pd.h"
33
34static void init_fcc1_ioports(struct fs_platform_info*);
35static void init_fcc2_ioports(struct fs_platform_info*);
36static void init_scc1_uart_ioports(struct fs_uart_platform_info*);
37static void init_scc4_uart_ioports(struct fs_uart_platform_info*);
38
39static struct fs_uart_platform_info mpc8272_uart_pdata[] = {
40 [fsid_scc1_uart] = {
41 .init_ioports = init_scc1_uart_ioports,
42 .fs_no = fsid_scc1_uart,
43 .brg = 1,
44 .tx_num_fifo = 4,
45 .tx_buf_size = 32,
46 .rx_num_fifo = 4,
47 .rx_buf_size = 32,
48 },
49 [fsid_scc4_uart] = {
50 .init_ioports = init_scc4_uart_ioports,
51 .fs_no = fsid_scc4_uart,
52 .brg = 4,
53 .tx_num_fifo = 4,
54 .tx_buf_size = 32,
55 .rx_num_fifo = 4,
56 .rx_buf_size = 32,
57 },
58};
59
60static struct fs_mii_bb_platform_info m82xx_mii_bb_pdata = {
61 .mdio_dat.bit = 18,
62 .mdio_dir.bit = 18,
63 .mdc_dat.bit = 19,
64 .delay = 1,
65};
66
67static struct fs_platform_info mpc82xx_enet_pdata[] = {
68 [fsid_fcc1] = {
69 .fs_no = fsid_fcc1,
70 .cp_page = CPM_CR_FCC1_PAGE,
71 .cp_block = CPM_CR_FCC1_SBLOCK,
72
73 .clk_trx = (PC_F1RXCLK | PC_F1TXCLK),
74 .clk_route = CMX1_CLK_ROUTE,
75 .clk_mask = CMX1_CLK_MASK,
76 .init_ioports = init_fcc1_ioports,
77
78 .mem_offset = FCC1_MEM_OFFSET,
79
80 .rx_ring = 32,
81 .tx_ring = 32,
82 .rx_copybreak = 240,
83 .use_napi = 0,
84 .napi_weight = 17,
85 .bus_id = "0:00",
86 },
87 [fsid_fcc2] = {
88 .fs_no = fsid_fcc2,
89 .cp_page = CPM_CR_FCC2_PAGE,
90 .cp_block = CPM_CR_FCC2_SBLOCK,
91 .clk_trx = (PC_F2RXCLK | PC_F2TXCLK),
92 .clk_route = CMX2_CLK_ROUTE,
93 .clk_mask = CMX2_CLK_MASK,
94 .init_ioports = init_fcc2_ioports,
95
96 .mem_offset = FCC2_MEM_OFFSET,
97
98 .rx_ring = 32,
99 .tx_ring = 32,
100 .rx_copybreak = 240,
101 .use_napi = 0,
102 .napi_weight = 17,
103 .bus_id = "0:03",
104 },
105};
106
107static void init_fcc1_ioports(struct fs_platform_info* pdata)
108{
109 struct io_port *io;
110 u32 tempval;
111 cpm2_map_t* immap = ioremap(CPM_MAP_ADDR, sizeof(cpm2_map_t));
112 u32 *bcsr = ioremap(BCSR_ADDR+4, sizeof(u32));
113
114 io = &immap->im_ioport;
115
116 /* Enable the PHY */
117 clrbits32(bcsr, BCSR1_FETHIEN);
118 setbits32(bcsr, BCSR1_FETH_RST);
119
120 /* FCC1 pins are on port A/C. */
121 /* Configure port A and C pins for FCC1 Ethernet. */
122
123 tempval = in_be32(&io->iop_pdira);
124 tempval &= ~PA1_DIRA0;
125 tempval |= PA1_DIRA1;
126 out_be32(&io->iop_pdira, tempval);
127
128 tempval = in_be32(&io->iop_psora);
129 tempval &= ~PA1_PSORA0;
130 tempval |= PA1_PSORA1;
131 out_be32(&io->iop_psora, tempval);
132
133 setbits32(&io->iop_ppara,PA1_DIRA0 | PA1_DIRA1);
134
135 /* Alter clocks */
136 tempval = PC_F1TXCLK|PC_F1RXCLK;
137
138 clrbits32(&io->iop_psorc, tempval);
139 clrbits32(&io->iop_pdirc, tempval);
140 setbits32(&io->iop_pparc, tempval);
141
142 clrbits32(&immap->im_cpmux.cmx_fcr, CMX1_CLK_MASK);
143 setbits32(&immap->im_cpmux.cmx_fcr, CMX1_CLK_ROUTE);
144 iounmap(bcsr);
145 iounmap(immap);
146}
147
148static void init_fcc2_ioports(struct fs_platform_info* pdata)
149{
150 cpm2_map_t* immap = ioremap(CPM_MAP_ADDR, sizeof(cpm2_map_t));
151 u32 *bcsr = ioremap(BCSR_ADDR+12, sizeof(u32));
152
153 struct io_port *io;
154 u32 tempval;
155
156 immap = cpm2_immr;
157
158 io = &immap->im_ioport;
159
160 /* Enable the PHY */
161 clrbits32(bcsr, BCSR3_FETHIEN2);
162 setbits32(bcsr, BCSR3_FETH2_RST);
163
164 /* FCC2 are port B/C. */
165 /* Configure port A and C pins for FCC2 Ethernet. */
166
167 tempval = in_be32(&io->iop_pdirb);
168 tempval &= ~PB2_DIRB0;
169 tempval |= PB2_DIRB1;
170 out_be32(&io->iop_pdirb, tempval);
171
172 tempval = in_be32(&io->iop_psorb);
173 tempval &= ~PB2_PSORB0;
174 tempval |= PB2_PSORB1;
175 out_be32(&io->iop_psorb, tempval);
176
177 setbits32(&io->iop_pparb,PB2_DIRB0 | PB2_DIRB1);
178
179 tempval = PC_F2RXCLK|PC_F2TXCLK;
180
181 /* Alter clocks */
182 clrbits32(&io->iop_psorc,tempval);
183 clrbits32(&io->iop_pdirc,tempval);
184 setbits32(&io->iop_pparc,tempval);
185
186 clrbits32(&immap->im_cpmux.cmx_fcr, CMX2_CLK_MASK);
187 setbits32(&immap->im_cpmux.cmx_fcr, CMX2_CLK_ROUTE);
188
189 iounmap(bcsr);
190 iounmap(immap);
191}
192
193
194static void __init mpc8272ads_fixup_enet_pdata(struct platform_device *pdev,
195 int idx)
196{
197 bd_t* bi = (void*)__res;
198 int fs_no = fsid_fcc1+pdev->id-1;
199
200 if(fs_no >= ARRAY_SIZE(mpc82xx_enet_pdata)) {
201 return;
202 }
203
204 mpc82xx_enet_pdata[fs_no].dpram_offset=
205 (u32)cpm2_immr->im_dprambase;
206 mpc82xx_enet_pdata[fs_no].fcc_regs_c =
207 (u32)cpm2_immr->im_fcc_c;
208 memcpy(&mpc82xx_enet_pdata[fs_no].macaddr,bi->bi_enetaddr,6);
209
210 /* prevent dup mac */
211 if(fs_no == fsid_fcc2)
212 mpc82xx_enet_pdata[fs_no].macaddr[5] ^= 1;
213
214 pdev->dev.platform_data = &mpc82xx_enet_pdata[fs_no];
215}
216
217static void mpc8272ads_fixup_uart_pdata(struct platform_device *pdev,
218 int idx)
219{
220 bd_t *bd = (bd_t *) __res;
221 struct fs_uart_platform_info *pinfo;
222 int num = ARRAY_SIZE(mpc8272_uart_pdata);
223 int id = fs_uart_id_scc2fsid(idx);
224
225 /* no need to alter anything if console */
226 if ((id < num) && (!pdev->dev.platform_data)) {
227 pinfo = &mpc8272_uart_pdata[id];
228 pinfo->uart_clk = bd->bi_intfreq;
229 pdev->dev.platform_data = pinfo;
230 }
231}
232
233static void init_scc1_uart_ioports(struct fs_uart_platform_info* pdata)
234{
235 cpm2_map_t* immap = ioremap(CPM_MAP_ADDR, sizeof(cpm2_map_t));
236
237 /* SCC1 is only on port D */
238 setbits32(&immap->im_ioport.iop_ppard,0x00000003);
239 clrbits32(&immap->im_ioport.iop_psord,0x00000001);
240 setbits32(&immap->im_ioport.iop_psord,0x00000002);
241 clrbits32(&immap->im_ioport.iop_pdird,0x00000001);
242 setbits32(&immap->im_ioport.iop_pdird,0x00000002);
243
244 /* Wire BRG1 to SCC1 */
245 clrbits32(&immap->im_cpmux.cmx_scr,0x00ffffff);
246
247 iounmap(immap);
248}
249
250static void init_scc4_uart_ioports(struct fs_uart_platform_info* pdata)
251{
252 cpm2_map_t* immap = ioremap(CPM_MAP_ADDR, sizeof(cpm2_map_t));
253
254 setbits32(&immap->im_ioport.iop_ppard,0x00000600);
255 clrbits32(&immap->im_ioport.iop_psord,0x00000600);
256 clrbits32(&immap->im_ioport.iop_pdird,0x00000200);
257 setbits32(&immap->im_ioport.iop_pdird,0x00000400);
258
259 /* Wire BRG4 to SCC4 */
260 clrbits32(&immap->im_cpmux.cmx_scr,0x000000ff);
261 setbits32(&immap->im_cpmux.cmx_scr,0x0000001b);
262
263 iounmap(immap);
264}
265
266static void __init mpc8272ads_fixup_mdio_pdata(struct platform_device *pdev,
267 int idx)
268{
269 m82xx_mii_bb_pdata.irq[0] = PHY_INTERRUPT;
270 m82xx_mii_bb_pdata.irq[1] = PHY_POLL;
271 m82xx_mii_bb_pdata.irq[2] = PHY_POLL;
272 m82xx_mii_bb_pdata.irq[3] = PHY_INTERRUPT;
273 m82xx_mii_bb_pdata.irq[31] = PHY_POLL;
274
275
276 m82xx_mii_bb_pdata.mdio_dat.offset =
277 (u32)&cpm2_immr->im_ioport.iop_pdatc;
278
279 m82xx_mii_bb_pdata.mdio_dir.offset =
280 (u32)&cpm2_immr->im_ioport.iop_pdirc;
281
282 m82xx_mii_bb_pdata.mdc_dat.offset =
283 (u32)&cpm2_immr->im_ioport.iop_pdatc;
284
285
286 pdev->dev.platform_data = &m82xx_mii_bb_pdata;
287}
288
289static int mpc8272ads_platform_notify(struct device *dev)
290{
291 static const struct platform_notify_dev_map dev_map[] = {
292 {
293 .bus_id = "fsl-cpm-fcc",
294 .rtn = mpc8272ads_fixup_enet_pdata,
295 },
296 {
297 .bus_id = "fsl-cpm-scc:uart",
298 .rtn = mpc8272ads_fixup_uart_pdata,
299 },
300 {
301 .bus_id = "fsl-bb-mdio",
302 .rtn = mpc8272ads_fixup_mdio_pdata,
303 },
304 {
305 .bus_id = NULL
306 }
307 };
308 platform_notify_map(dev_map,dev);
309
310 return 0;
311
312}
313
314int __init mpc8272ads_init(void)
315{
316 printk(KERN_NOTICE "mpc8272ads: Init\n");
317
318 platform_notify = mpc8272ads_platform_notify;
319
320 ppc_sys_device_initfunc();
321
322 ppc_sys_device_disable_all();
323 ppc_sys_device_enable(MPC82xx_CPM_FCC1);
324 ppc_sys_device_enable(MPC82xx_CPM_FCC2);
325
326 /* to be ready for console, let's attach pdata here */
327#ifdef CONFIG_SERIAL_CPM_SCC1
328 ppc_sys_device_setfunc(MPC82xx_CPM_SCC1, PPC_SYS_FUNC_UART);
329 ppc_sys_device_enable(MPC82xx_CPM_SCC1);
330
331#endif
332
333#ifdef CONFIG_SERIAL_CPM_SCC4
334 ppc_sys_device_setfunc(MPC82xx_CPM_SCC4, PPC_SYS_FUNC_UART);
335 ppc_sys_device_enable(MPC82xx_CPM_SCC4);
336#endif
337
338 ppc_sys_device_enable(MPC82xx_MDIO_BB);
339
340 return 0;
341}
342
343/*
344 To prevent confusion, console selection is gross:
345 by 0 assumed SCC1 and by 1 assumed SCC4
346 */
347struct platform_device* early_uart_get_pdev(int index)
348{
349 bd_t *bd = (bd_t *) __res;
350 struct fs_uart_platform_info *pinfo;
351
352 struct platform_device* pdev = NULL;
353 if(index) { /*assume SCC4 here*/
354 pdev = &ppc_sys_platform_devices[MPC82xx_CPM_SCC4];
355 pinfo = &mpc8272_uart_pdata[fsid_scc4_uart];
356 } else { /*over SCC1*/
357 pdev = &ppc_sys_platform_devices[MPC82xx_CPM_SCC1];
358 pinfo = &mpc8272_uart_pdata[fsid_scc1_uart];
359 }
360
361 pinfo->uart_clk = bd->bi_intfreq;
362 pdev->dev.platform_data = pinfo;
363 ppc_sys_fixup_mem_resource(pdev, CPM_MAP_ADDR);
364 return NULL;
365}
366
367arch_initcall(mpc8272ads_init);
diff --git a/arch/ppc/platforms/mpc885ads.h b/arch/ppc/platforms/mpc885ads.h
deleted file mode 100644
index d3bbbb3c9a1f..000000000000
--- a/arch/ppc/platforms/mpc885ads.h
+++ /dev/null
@@ -1,93 +0,0 @@
1/*
2 * A collection of structures, addresses, and values associated with
3 * the Freescale MPC885ADS board.
4 * Copied from the FADS stuff.
5 *
6 * Author: MontaVista Software, Inc.
7 * source@mvista.com
8 *
9 * 2005 (c) MontaVista Software, Inc. This file is licensed under the
10 * terms of the GNU General Public License version 2. This program is licensed
11 * "as is" without any warranty of any kind, whether express or implied.
12 */
13
14#ifdef __KERNEL__
15#ifndef __ASM_MPC885ADS_H__
16#define __ASM_MPC885ADS_H__
17
18
19#include <asm/ppcboot.h>
20
21/* U-Boot maps BCSR to 0xff080000 */
22#define BCSR_ADDR ((uint)0xff080000)
23#define BCSR_SIZE ((uint)32)
24#define BCSR0 ((uint)(BCSR_ADDR + 0x00))
25#define BCSR1 ((uint)(BCSR_ADDR + 0x04))
26#define BCSR2 ((uint)(BCSR_ADDR + 0x08))
27#define BCSR3 ((uint)(BCSR_ADDR + 0x0c))
28#define BCSR4 ((uint)(BCSR_ADDR + 0x10))
29
30#define CFG_PHYDEV_ADDR ((uint)0xff0a0000)
31#define BCSR5 ((uint)(CFG_PHYDEV_ADDR + 0x300))
32
33#define IMAP_ADDR ((uint)0xff000000)
34#define IMAP_SIZE ((uint)(64 * 1024))
35
36#define PCMCIA_MEM_ADDR ((uint)0xff020000)
37#define PCMCIA_MEM_SIZE ((uint)(64 * 1024))
38
39/* Bits of interest in the BCSRs.
40 */
41#define BCSR1_ETHEN ((uint)0x20000000)
42#define BCSR1_IRDAEN ((uint)0x10000000)
43#define BCSR1_RS232EN_1 ((uint)0x01000000)
44#define BCSR1_PCCEN ((uint)0x00800000)
45#define BCSR1_PCCVCC0 ((uint)0x00400000)
46#define BCSR1_PCCVPP0 ((uint)0x00200000)
47#define BCSR1_PCCVPP1 ((uint)0x00100000)
48#define BCSR1_PCCVPP_MASK (BCSR1_PCCVPP0 | BCSR1_PCCVPP1)
49#define BCSR1_RS232EN_2 ((uint)0x00040000)
50#define BCSR1_PCCVCC1 ((uint)0x00010000)
51#define BCSR1_PCCVCC_MASK (BCSR1_PCCVCC0 | BCSR1_PCCVCC1)
52
53#define BCSR4_ETH10_RST ((uint)0x80000000) /* 10Base-T PHY reset*/
54#define BCSR4_USB_LO_SPD ((uint)0x04000000)
55#define BCSR4_USB_VCC ((uint)0x02000000)
56#define BCSR4_USB_FULL_SPD ((uint)0x00040000)
57#define BCSR4_USB_EN ((uint)0x00020000)
58
59#define BCSR5_MII2_EN 0x40
60#define BCSR5_MII2_RST 0x20
61#define BCSR5_T1_RST 0x10
62#define BCSR5_ATM155_RST 0x08
63#define BCSR5_ATM25_RST 0x04
64#define BCSR5_MII1_EN 0x02
65#define BCSR5_MII1_RST 0x01
66
67/* Interrupt level assignments */
68#define PHY_INTERRUPT SIU_IRQ7 /* PHY link change interrupt */
69#define SIU_INT_FEC1 SIU_LEVEL1 /* FEC1 interrupt */
70#define SIU_INT_FEC2 SIU_LEVEL3 /* FEC2 interrupt */
71#define FEC_INTERRUPT SIU_INT_FEC1 /* FEC interrupt */
72
73/* We don't use the 8259 */
74#define NR_8259_INTS 0
75
76/* CPM Ethernet through SCC3 */
77#define PA_ENET_RXD ((ushort)0x0040)
78#define PA_ENET_TXD ((ushort)0x0080)
79#define PE_ENET_TCLK ((uint)0x00004000)
80#define PE_ENET_RCLK ((uint)0x00008000)
81#define PE_ENET_TENA ((uint)0x00000010)
82#define PC_ENET_CLSN ((ushort)0x0400)
83#define PC_ENET_RENA ((ushort)0x0800)
84
85/* Control bits in the SICR to route TCLK (CLK5) and RCLK (CLK6) to
86 * SCC3. Also, make sure GR3 (bit 8) and SC3 (bit 9) are zero */
87#define SICR_ENET_MASK ((uint)0x00ff0000)
88#define SICR_ENET_CLKRT ((uint)0x002c0000)
89
90#define BOARD_CHIP_NAME "MPC885"
91
92#endif /* __ASM_MPC885ADS_H__ */
93#endif /* __KERNEL__ */
diff --git a/arch/ppc/platforms/mpc885ads_setup.c b/arch/ppc/platforms/mpc885ads_setup.c
deleted file mode 100644
index ba06cc08cdab..000000000000
--- a/arch/ppc/platforms/mpc885ads_setup.c
+++ /dev/null
@@ -1,476 +0,0 @@
1/*arch/ppc/platforms/mpc885ads_setup.c
2 *
3 * Platform setup for the Freescale mpc885ads board
4 *
5 * Vitaly Bordug <vbordug@ru.mvista.com>
6 *
7 * Copyright 2005 MontaVista Software Inc.
8 *
9 * This file is licensed under the terms of the GNU General Public License
10 * version 2. This program is licensed "as is" without any warranty of any
11 * kind, whether express or implied.
12 */
13
14#include <linux/init.h>
15#include <linux/module.h>
16#include <linux/param.h>
17#include <linux/string.h>
18#include <linux/ioport.h>
19#include <linux/device.h>
20
21#include <linux/fs_enet_pd.h>
22#include <linux/fs_uart_pd.h>
23#include <linux/mii.h>
24
25#include <asm/delay.h>
26#include <asm/io.h>
27#include <asm/machdep.h>
28#include <asm/page.h>
29#include <asm/processor.h>
30#include <asm/system.h>
31#include <asm/time.h>
32#include <asm/ppcboot.h>
33#include <asm/8xx_immap.h>
34#include <asm/cpm1.h>
35#include <asm/ppc_sys.h>
36
37extern unsigned char __res[];
38static void setup_smc1_ioports(struct fs_uart_platform_info*);
39static void setup_smc2_ioports(struct fs_uart_platform_info*);
40
41static struct fs_mii_fec_platform_info mpc8xx_mdio_fec_pdata;
42static void setup_fec1_ioports(struct fs_platform_info*);
43static void setup_fec2_ioports(struct fs_platform_info*);
44static void setup_scc3_ioports(struct fs_platform_info*);
45
46static struct fs_uart_platform_info mpc885_uart_pdata[] = {
47 [fsid_smc1_uart] = {
48 .brg = 1,
49 .fs_no = fsid_smc1_uart,
50 .init_ioports = setup_smc1_ioports,
51 .tx_num_fifo = 4,
52 .tx_buf_size = 32,
53 .rx_num_fifo = 4,
54 .rx_buf_size = 32,
55 },
56 [fsid_smc2_uart] = {
57 .brg = 2,
58 .fs_no = fsid_smc2_uart,
59 .init_ioports = setup_smc2_ioports,
60 .tx_num_fifo = 4,
61 .tx_buf_size = 32,
62 .rx_num_fifo = 4,
63 .rx_buf_size = 32,
64 },
65};
66
67static struct fs_platform_info mpc8xx_enet_pdata[] = {
68 [fsid_fec1] = {
69 .rx_ring = 128,
70 .tx_ring = 16,
71 .rx_copybreak = 240,
72
73 .use_napi = 1,
74 .napi_weight = 17,
75
76 .init_ioports = setup_fec1_ioports,
77
78 .bus_id = "0:00",
79 .has_phy = 1,
80 },
81 [fsid_fec2] = {
82 .rx_ring = 128,
83 .tx_ring = 16,
84 .rx_copybreak = 240,
85
86 .use_napi = 1,
87 .napi_weight = 17,
88
89 .init_ioports = setup_fec2_ioports,
90
91 .bus_id = "0:01",
92 .has_phy = 1,
93 },
94 [fsid_scc3] = {
95 .rx_ring = 64,
96 .tx_ring = 8,
97 .rx_copybreak = 240,
98
99 .use_napi = 1,
100 .napi_weight = 17,
101
102 .init_ioports = setup_scc3_ioports,
103#ifdef CONFIG_FIXED_MII_10_FDX
104 .bus_id = "fixed@100:1",
105#else
106 .bus_id = "0:02",
107 #endif
108 },
109};
110
111void __init board_init(void)
112{
113 cpm8xx_t *cp = cpmp;
114 unsigned int *bcsr_io;
115
116#ifdef CONFIG_FS_ENET
117 immap_t *immap = (immap_t *) IMAP_ADDR;
118#endif
119 bcsr_io = ioremap(BCSR1, sizeof(unsigned long));
120
121 if (bcsr_io == NULL) {
122 printk(KERN_CRIT "Could not remap BCSR\n");
123 return;
124 }
125#ifdef CONFIG_SERIAL_CPM_SMC1
126 cp->cp_simode &= ~(0xe0000000 >> 17); /* brg1 */
127 clrbits32(bcsr_io, BCSR1_RS232EN_1);
128 cp->cp_smc[0].smc_smcm |= (SMCM_RX | SMCM_TX);
129 cp->cp_smc[0].smc_smcmr &= ~(SMCMR_REN | SMCMR_TEN);
130#else
131 setbits32(bcsr_io,BCSR1_RS232EN_1);
132 cp->cp_smc[0].smc_smcmr = 0;
133 cp->cp_smc[0].smc_smce = 0;
134#endif
135
136#ifdef CONFIG_SERIAL_CPM_SMC2
137 cp->cp_simode &= ~(0xe0000000 >> 1);
138 cp->cp_simode |= (0x20000000 >> 1); /* brg2 */
139 clrbits32(bcsr_io,BCSR1_RS232EN_2);
140 cp->cp_smc[1].smc_smcm |= (SMCM_RX | SMCM_TX);
141 cp->cp_smc[1].smc_smcmr &= ~(SMCMR_REN | SMCMR_TEN);
142#else
143 setbits32(bcsr_io,BCSR1_RS232EN_2);
144 cp->cp_smc[1].smc_smcmr = 0;
145 cp->cp_smc[1].smc_smce = 0;
146#endif
147 iounmap(bcsr_io);
148
149#ifdef CONFIG_FS_ENET
150 /* use MDC for MII (common) */
151 setbits16(&immap->im_ioport.iop_pdpar, 0x0080);
152 clrbits16(&immap->im_ioport.iop_pddir, 0x0080);
153 bcsr_io = ioremap(BCSR5, sizeof(unsigned long));
154 clrbits32(bcsr_io,BCSR5_MII1_EN);
155 clrbits32(bcsr_io,BCSR5_MII1_RST);
156#ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2
157 clrbits32(bcsr_io,BCSR5_MII2_EN);
158 clrbits32(bcsr_io,BCSR5_MII2_RST);
159#endif
160 iounmap(bcsr_io);
161#endif
162}
163
164static void setup_fec1_ioports(struct fs_platform_info* pdata)
165{
166 immap_t *immap = (immap_t *) IMAP_ADDR;
167
168 /* configure FEC1 pins */
169 setbits16(&immap->im_ioport.iop_papar, 0xf830);
170 setbits16(&immap->im_ioport.iop_padir, 0x0830);
171 clrbits16(&immap->im_ioport.iop_padir, 0xf000);
172 setbits32(&immap->im_cpm.cp_pbpar, 0x00001001);
173
174 clrbits32(&immap->im_cpm.cp_pbdir, 0x00001001);
175 setbits16(&immap->im_ioport.iop_pcpar, 0x000c);
176 clrbits16(&immap->im_ioport.iop_pcdir, 0x000c);
177 setbits32(&immap->im_cpm.cp_pepar, 0x00000003);
178
179 setbits32(&immap->im_cpm.cp_pedir, 0x00000003);
180 clrbits32(&immap->im_cpm.cp_peso, 0x00000003);
181 clrbits32(&immap->im_cpm.cp_cptr, 0x00000100);
182}
183
184static void setup_fec2_ioports(struct fs_platform_info* pdata)
185{
186 immap_t *immap = (immap_t *) IMAP_ADDR;
187
188 /* configure FEC2 pins */
189 setbits32(&immap->im_cpm.cp_pepar, 0x0003fffc);
190 setbits32(&immap->im_cpm.cp_pedir, 0x0003fffc);
191 clrbits32(&immap->im_cpm.cp_peso, 0x000087fc);
192 setbits32(&immap->im_cpm.cp_peso, 0x00037800);
193 clrbits32(&immap->im_cpm.cp_cptr, 0x00000080);
194}
195
196static void setup_scc3_ioports(struct fs_platform_info* pdata)
197{
198 immap_t *immap = (immap_t *) IMAP_ADDR;
199 unsigned *bcsr_io;
200
201 bcsr_io = ioremap(BCSR_ADDR, BCSR_SIZE);
202
203 if (bcsr_io == NULL) {
204 printk(KERN_CRIT "Could not remap BCSR\n");
205 return;
206 }
207
208 /* Enable the PHY.
209 */
210 clrbits32(bcsr_io+4, BCSR4_ETH10_RST);
211 udelay(1000);
212 setbits32(bcsr_io+4, BCSR4_ETH10_RST);
213 /* Configure port A pins for Txd and Rxd.
214 */
215 setbits16(&immap->im_ioport.iop_papar, PA_ENET_RXD | PA_ENET_TXD);
216 clrbits16(&immap->im_ioport.iop_padir, PA_ENET_RXD | PA_ENET_TXD);
217
218 /* Configure port C pins to enable CLSN and RENA.
219 */
220 clrbits16(&immap->im_ioport.iop_pcpar, PC_ENET_CLSN | PC_ENET_RENA);
221 clrbits16(&immap->im_ioport.iop_pcdir, PC_ENET_CLSN | PC_ENET_RENA);
222 setbits16(&immap->im_ioport.iop_pcso, PC_ENET_CLSN | PC_ENET_RENA);
223
224 /* Configure port E for TCLK and RCLK.
225 */
226 setbits32(&immap->im_cpm.cp_pepar, PE_ENET_TCLK | PE_ENET_RCLK);
227 clrbits32(&immap->im_cpm.cp_pepar, PE_ENET_TENA);
228 clrbits32(&immap->im_cpm.cp_pedir,
229 PE_ENET_TCLK | PE_ENET_RCLK | PE_ENET_TENA);
230 clrbits32(&immap->im_cpm.cp_peso, PE_ENET_TCLK | PE_ENET_RCLK);
231 setbits32(&immap->im_cpm.cp_peso, PE_ENET_TENA);
232
233 /* Configure Serial Interface clock routing.
234 * First, clear all SCC bits to zero, then set the ones we want.
235 */
236 clrbits32(&immap->im_cpm.cp_sicr, SICR_ENET_MASK);
237 setbits32(&immap->im_cpm.cp_sicr, SICR_ENET_CLKRT);
238
239 /* Disable Rx and Tx. SMC1 sshould be stopped if SCC3 eternet are used.
240 */
241 immap->im_cpm.cp_smc[0].smc_smcmr &= ~(SMCMR_REN | SMCMR_TEN);
242 /* On the MPC885ADS SCC ethernet PHY is initialized in the full duplex mode
243 * by H/W setting after reset. SCC ethernet controller support only half duplex.
244 * This discrepancy of modes causes a lot of carrier lost errors.
245 */
246
247 /* In the original SCC enet driver the following code is placed at
248 the end of the initialization */
249 setbits32(&immap->im_cpm.cp_pepar, PE_ENET_TENA);
250 clrbits32(&immap->im_cpm.cp_pedir, PE_ENET_TENA);
251 setbits32(&immap->im_cpm.cp_peso, PE_ENET_TENA);
252
253 setbits32(bcsr_io+4, BCSR1_ETHEN);
254 iounmap(bcsr_io);
255}
256
257static int mac_count = 0;
258
259static void mpc885ads_fixup_enet_pdata(struct platform_device *pdev, int fs_no)
260{
261 struct fs_platform_info *fpi;
262 bd_t *bd = (bd_t *) __res;
263 char *e;
264 int i;
265
266 if(fs_no >= ARRAY_SIZE(mpc8xx_enet_pdata)) {
267 printk(KERN_ERR"No network-suitable #%d device on bus", fs_no);
268 return;
269 }
270
271 fpi = &mpc8xx_enet_pdata[fs_no];
272
273 switch (fs_no) {
274 case fsid_fec1:
275 fpi->init_ioports = &setup_fec1_ioports;
276 break;
277 case fsid_fec2:
278 fpi->init_ioports = &setup_fec2_ioports;
279 break;
280 case fsid_scc3:
281 fpi->init_ioports = &setup_scc3_ioports;
282 break;
283 default:
284 printk(KERN_WARNING "Device %s is not supported!\n", pdev->name);
285 return;
286 }
287
288 pdev->dev.platform_data = fpi;
289 fpi->fs_no = fs_no;
290
291 e = (unsigned char *)&bd->bi_enetaddr;
292 for (i = 0; i < 6; i++)
293 fpi->macaddr[i] = *e++;
294
295 fpi->macaddr[5] += mac_count++;
296
297}
298
299static void mpc885ads_fixup_fec_enet_pdata(struct platform_device *pdev,
300 int idx)
301{
302 /* This is for FEC devices only */
303 if (!pdev || !pdev->name || (!strstr(pdev->name, "fsl-cpm-fec")))
304 return;
305 mpc885ads_fixup_enet_pdata(pdev, fsid_fec1 + pdev->id - 1);
306}
307
308static void __init mpc885ads_fixup_scc_enet_pdata(struct platform_device *pdev,
309 int idx)
310{
311 /* This is for SCC devices only */
312 if (!pdev || !pdev->name || (!strstr(pdev->name, "fsl-cpm-scc")))
313 return;
314
315 mpc885ads_fixup_enet_pdata(pdev, fsid_scc1 + pdev->id - 1);
316}
317
318static void setup_smc1_ioports(struct fs_uart_platform_info* pdata)
319{
320 immap_t *immap = (immap_t *) IMAP_ADDR;
321 unsigned *bcsr_io;
322 unsigned int iobits = 0x000000c0;
323
324 bcsr_io = ioremap(BCSR1, sizeof(unsigned long));
325
326 if (bcsr_io == NULL) {
327 printk(KERN_CRIT "Could not remap BCSR1\n");
328 return;
329 }
330 clrbits32(bcsr_io,BCSR1_RS232EN_1);
331 iounmap(bcsr_io);
332
333 setbits32(&immap->im_cpm.cp_pbpar, iobits);
334 clrbits32(&immap->im_cpm.cp_pbdir, iobits);
335 clrbits16(&immap->im_cpm.cp_pbodr, iobits);
336}
337
338static void setup_smc2_ioports(struct fs_uart_platform_info* pdata)
339{
340 immap_t *immap = (immap_t *) IMAP_ADDR;
341 unsigned *bcsr_io;
342 unsigned int iobits = 0x00000c00;
343
344 bcsr_io = ioremap(BCSR1, sizeof(unsigned long));
345
346 if (bcsr_io == NULL) {
347 printk(KERN_CRIT "Could not remap BCSR1\n");
348 return;
349 }
350 clrbits32(bcsr_io,BCSR1_RS232EN_2);
351 iounmap(bcsr_io);
352
353#ifndef CONFIG_SERIAL_CPM_ALT_SMC2
354 setbits32(&immap->im_cpm.cp_pbpar, iobits);
355 clrbits32(&immap->im_cpm.cp_pbdir, iobits);
356 clrbits16(&immap->im_cpm.cp_pbodr, iobits);
357#else
358 setbits16(&immap->im_ioport.iop_papar, iobits);
359 clrbits16(&immap->im_ioport.iop_padir, iobits);
360 clrbits16(&immap->im_ioport.iop_paodr, iobits);
361#endif
362}
363
364static void __init mpc885ads_fixup_uart_pdata(struct platform_device *pdev,
365 int idx)
366{
367 bd_t *bd = (bd_t *) __res;
368 struct fs_uart_platform_info *pinfo;
369 int num = ARRAY_SIZE(mpc885_uart_pdata);
370
371 int id = fs_uart_id_smc2fsid(idx);
372
373 /* no need to alter anything if console */
374 if ((id < num) && (!pdev->dev.platform_data)) {
375 pinfo = &mpc885_uart_pdata[id];
376 pinfo->uart_clk = bd->bi_intfreq;
377 pdev->dev.platform_data = pinfo;
378 }
379}
380
381
382static int mpc885ads_platform_notify(struct device *dev)
383{
384
385 static const struct platform_notify_dev_map dev_map[] = {
386 {
387 .bus_id = "fsl-cpm-fec",
388 .rtn = mpc885ads_fixup_fec_enet_pdata,
389 },
390 {
391 .bus_id = "fsl-cpm-scc",
392 .rtn = mpc885ads_fixup_scc_enet_pdata,
393 },
394 {
395 .bus_id = "fsl-cpm-smc:uart",
396 .rtn = mpc885ads_fixup_uart_pdata
397 },
398 {
399 .bus_id = NULL
400 }
401 };
402
403 platform_notify_map(dev_map,dev);
404
405 return 0;
406}
407
408int __init mpc885ads_init(void)
409{
410 struct fs_mii_fec_platform_info* fmpi;
411 bd_t *bd = (bd_t *) __res;
412
413 printk(KERN_NOTICE "mpc885ads: Init\n");
414
415 platform_notify = mpc885ads_platform_notify;
416
417 ppc_sys_device_initfunc();
418 ppc_sys_device_disable_all();
419
420 ppc_sys_device_enable(MPC8xx_CPM_FEC1);
421
422 ppc_sys_device_enable(MPC8xx_MDIO_FEC);
423 fmpi = ppc_sys_platform_devices[MPC8xx_MDIO_FEC].dev.platform_data =
424 &mpc8xx_mdio_fec_pdata;
425
426 fmpi->mii_speed = ((((bd->bi_intfreq + 4999999) / 2500000) / 2) & 0x3F) << 1;
427
428 /* No PHY interrupt line here */
429 fmpi->irq[0xf] = SIU_IRQ7;
430
431#ifdef CONFIG_MPC8xx_SECOND_ETH_SCC3
432 ppc_sys_device_enable(MPC8xx_CPM_SCC3);
433
434#endif
435#ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2
436 ppc_sys_device_enable(MPC8xx_CPM_FEC2);
437#endif
438
439#ifdef CONFIG_SERIAL_CPM_SMC1
440 ppc_sys_device_enable(MPC8xx_CPM_SMC1);
441 ppc_sys_device_setfunc(MPC8xx_CPM_SMC1, PPC_SYS_FUNC_UART);
442#endif
443
444#ifdef CONFIG_SERIAL_CPM_SMC2
445 ppc_sys_device_enable(MPC8xx_CPM_SMC2);
446 ppc_sys_device_setfunc(MPC8xx_CPM_SMC2, PPC_SYS_FUNC_UART);
447#endif
448 return 0;
449}
450
451arch_initcall(mpc885ads_init);
452
453/*
454 To prevent confusion, console selection is gross:
455 by 0 assumed SMC1 and by 1 assumed SMC2
456 */
457struct platform_device* early_uart_get_pdev(int index)
458{
459 bd_t *bd = (bd_t *) __res;
460 struct fs_uart_platform_info *pinfo;
461
462 struct platform_device* pdev = NULL;
463 if(index) { /*assume SMC2 here*/
464 pdev = &ppc_sys_platform_devices[MPC8xx_CPM_SMC2];
465 pinfo = &mpc885_uart_pdata[1];
466 } else { /*over SMC1*/
467 pdev = &ppc_sys_platform_devices[MPC8xx_CPM_SMC1];
468 pinfo = &mpc885_uart_pdata[0];
469 }
470
471 pinfo->uart_clk = bd->bi_intfreq;
472 pdev->dev.platform_data = pinfo;
473 ppc_sys_fixup_mem_resource(pdev, IMAP_ADDR);
474 return NULL;
475}
476
diff --git a/arch/ppc/platforms/pq2ads.c b/arch/ppc/platforms/pq2ads.c
deleted file mode 100644
index 7fc2e02f5246..000000000000
--- a/arch/ppc/platforms/pq2ads.c
+++ /dev/null
@@ -1,53 +0,0 @@
1/*
2 * PQ2ADS platform support
3 *
4 * Author: Kumar Gala <galak@kernel.crashing.org>
5 * Derived from: est8260_setup.c by Allen Curtis
6 *
7 * Copyright 2004 Freescale Semiconductor, Inc.
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the
11 * Free Software Foundation; either version 2 of the License, or (at your
12 * option) any later version.
13 */
14
15#include <linux/init.h>
16
17#include <asm/io.h>
18#include <asm/mpc8260.h>
19#include <asm/cpm2.h>
20#include <asm/immap_cpm2.h>
21
22void __init
23m82xx_board_setup(void)
24{
25 cpm2_map_t* immap = ioremap(CPM_MAP_ADDR, sizeof(cpm2_map_t));
26 u32 *bcsr = ioremap(BCSR_ADDR+4, sizeof(u32));
27
28 /* Enable the 2nd UART port */
29 clrbits32(bcsr, BCSR1_RS232_EN2);
30
31#ifdef CONFIG_SERIAL_CPM_SCC1
32 clrbits32((u32*)&immap->im_scc[0].scc_sccm, UART_SCCM_TX | UART_SCCM_RX);
33 clrbits32((u32*)&immap->im_scc[0].scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT);
34#endif
35
36#ifdef CONFIG_SERIAL_CPM_SCC2
37 clrbits32((u32*)&immap->im_scc[1].scc_sccm, UART_SCCM_TX | UART_SCCM_RX);
38 clrbits32((u32*)&immap->im_scc[1].scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT);
39#endif
40
41#ifdef CONFIG_SERIAL_CPM_SCC3
42 clrbits32((u32*)&immap->im_scc[2].scc_sccm, UART_SCCM_TX | UART_SCCM_RX);
43 clrbits32((u32*)&immap->im_scc[2].scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT);
44#endif
45
46#ifdef CONFIG_SERIAL_CPM_SCC4
47 clrbits32((u32*)&immap->im_scc[3].scc_sccm, UART_SCCM_TX | UART_SCCM_RX);
48 clrbits32((u32*)&immap->im_scc[3].scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT);
49#endif
50
51 iounmap(bcsr);
52 iounmap(immap);
53}
diff --git a/arch/ppc/platforms/pq2ads.h b/arch/ppc/platforms/pq2ads.h
deleted file mode 100644
index 2b287f4e0ca3..000000000000
--- a/arch/ppc/platforms/pq2ads.h
+++ /dev/null
@@ -1,94 +0,0 @@
1/*
2 * A collection of structures, addresses, and values associated with
3 * the Motorola MPC8260ADS/MPC8266ADS-PCI boards.
4 * Copied from the RPX-Classic and SBS8260 stuff.
5 *
6 * Copyright (c) 2001 Dan Malek (dan@mvista.com)
7 */
8#ifdef __KERNEL__
9#ifndef __MACH_ADS8260_DEFS
10#define __MACH_ADS8260_DEFS
11
12
13#include <asm/ppcboot.h>
14
15#if defined(CONFIG_ADS8272)
16#define BOARD_CHIP_NAME "8272"
17#endif
18
19/* Memory map is configured by the PROM startup.
20 * We just map a few things we need. The CSR is actually 4 byte-wide
21 * registers that can be accessed as 8-, 16-, or 32-bit values.
22 */
23#define CPM_MAP_ADDR ((uint)0xf0000000)
24#define BCSR_ADDR ((uint)0xf4500000)
25#define BCSR_SIZE ((uint)(32 * 1024))
26
27#define BOOTROM_RESTART_ADDR ((uint)0xff000104)
28
29/* For our show_cpuinfo hooks. */
30#define CPUINFO_VENDOR "Motorola"
31#define CPUINFO_MACHINE "PQ2 ADS PowerPC"
32
33/* The ADS8260 has 16, 32-bit wide control/status registers, accessed
34 * only on word boundaries.
35 * Not all are used (yet), or are interesting to us (yet).
36 */
37
38/* Things of interest in the CSR.
39*/
40#define BCSR0_LED0 ((uint)0x02000000) /* 0 == on */
41#define BCSR0_LED1 ((uint)0x01000000) /* 0 == on */
42#define BCSR1_FETHIEN ((uint)0x08000000) /* 0 == enable */
43#define BCSR1_FETH_RST ((uint)0x04000000) /* 0 == reset */
44#define BCSR1_RS232_EN1 ((uint)0x02000000) /* 0 == enable */
45#define BCSR1_RS232_EN2 ((uint)0x01000000) /* 0 == enable */
46#define BCSR3_FETHIEN2 ((uint)0x10000000) /* 0 == enable */
47#define BCSR3_FETH2_RST ((uint)0x80000000) /* 0 == reset */
48
49#define PHY_INTERRUPT SIU_INT_IRQ7
50
51#ifdef CONFIG_PCI
52/* PCI interrupt controller */
53#define PCI_INT_STAT_REG 0xF8200000
54#define PCI_INT_MASK_REG 0xF8200004
55#define PIRQA (NR_CPM_INTS + 0)
56#define PIRQB (NR_CPM_INTS + 1)
57#define PIRQC (NR_CPM_INTS + 2)
58#define PIRQD (NR_CPM_INTS + 3)
59
60/*
61 * PCI memory map definitions for MPC8266ADS-PCI.
62 *
63 * processor view
64 * local address PCI address target
65 * 0x80000000-0x9FFFFFFF 0x80000000-0x9FFFFFFF PCI mem with prefetch
66 * 0xA0000000-0xBFFFFFFF 0xA0000000-0xBFFFFFFF PCI mem w/o prefetch
67 * 0xF4000000-0xF7FFFFFF 0x00000000-0x03FFFFFF PCI IO
68 *
69 * PCI master view
70 * local address PCI address target
71 * 0x00000000-0x1FFFFFFF 0x00000000-0x1FFFFFFF MPC8266 local memory
72 */
73
74/* All the other PCI memory map definitions reside at syslib/m82xx_pci.h
75 Here we should redefine what is unique for this board */
76#define M82xx_PCI_SLAVE_MEM_LOCAL 0x00000000 /* Local base */
77#define M82xx_PCI_SLAVE_MEM_BUS 0x00000000 /* PCI base */
78#define M82xx_PCI_SLAVE_MEM_SIZE 0x10000000 /* 256 Mb */
79
80#define M82xx_PCI_SLAVE_SEC_WND_SIZE ~(0x40000000 - 1U) /* 2 x 512Mb */
81#define M82xx_PCI_SLAVE_SEC_WND_BASE 0x80000000 /* PCI Memory base */
82
83#if defined(CONFIG_ADS8272)
84#define PCI_INT_TO_SIU SIU_INT_IRQ2
85#elif defined(CONFIG_PQ2FADS)
86#define PCI_INT_TO_SIU SIU_INT_IRQ6
87#else
88#warning PCI Bridge will be without interrupts support
89#endif
90
91#endif /* CONFIG_PCI */
92
93#endif /* __MACH_ADS8260_DEFS */
94#endif /* __KERNEL__ */
diff --git a/arch/ppc/platforms/pq2ads_pd.h b/arch/ppc/platforms/pq2ads_pd.h
deleted file mode 100644
index 672483df8079..000000000000
--- a/arch/ppc/platforms/pq2ads_pd.h
+++ /dev/null
@@ -1,32 +0,0 @@
1#ifndef __PQ2ADS_PD_H
2#define __PQ2ADS_PD_H
3/*
4 * arch/ppc/platforms/82xx/pq2ads_pd.h
5 *
6 * Some defines for MPC82xx board-specific PlatformDevice descriptions
7 *
8 * 2005 (c) MontaVista Software, Inc.
9 * Vitaly Bordug <vbordug@ru.mvista.com>
10 *
11 * This file is licensed under the terms of the GNU General Public License
12 * version 2. This program is licensed "as is" without any warranty of any
13 * kind, whether express or implied.
14 */
15
16/* FCC1 Clock Source Configuration. These can be redefined in the board specific file.
17 Can only choose from CLK9-12 */
18
19#define F1_RXCLK 11
20#define F1_TXCLK 10
21
22/* FCC2 Clock Source Configuration. These can be redefined in the board specific file.
23 Can only choose from CLK13-16 */
24#define F2_RXCLK 15
25#define F2_TXCLK 16
26
27/* FCC3 Clock Source Configuration. These can be redefined in the board specific file.
28 Can only choose from CLK13-16 */
29#define F3_RXCLK 13
30#define F3_TXCLK 14
31
32#endif
diff --git a/arch/ppc/syslib/m8260_setup.c b/arch/ppc/syslib/m8260_setup.c
index 46588fa94381..b40583724de3 100644
--- a/arch/ppc/syslib/m8260_setup.c
+++ b/arch/ppc/syslib/m8260_setup.c
@@ -175,12 +175,6 @@ m8260_init_IRQ(void)
175 * in case the boot rom changed something on us. 175 * in case the boot rom changed something on us.
176 */ 176 */
177 cpm2_immr->im_intctl.ic_siprr = 0x05309770; 177 cpm2_immr->im_intctl.ic_siprr = 0x05309770;
178
179#if defined(CONFIG_PCI) && (defined(CONFIG_ADS8272) || defined(CONFIG_PQ2FADS))
180 /* Initialize stuff for the 82xx CPLD IC and install demux */
181 pq2pci_init_irq();
182#endif
183
184} 178}
185 179
186/* 180/*
diff --git a/arch/ppc/syslib/m82xx_pci.c b/arch/ppc/syslib/m82xx_pci.c
index fe860d52e2e4..657a1c25a2ab 100644
--- a/arch/ppc/syslib/m82xx_pci.c
+++ b/arch/ppc/syslib/m82xx_pci.c
@@ -150,14 +150,6 @@ pq2pci_init_irq(void)
150{ 150{
151 int irq; 151 int irq;
152 volatile cpm2_map_t *immap = cpm2_immr; 152 volatile cpm2_map_t *immap = cpm2_immr;
153#if defined CONFIG_ADS8272
154 /* configure chip select for PCI interrupt controller */
155 immap->im_memctl.memc_br3 = PCI_INT_STAT_REG | 0x00001801;
156 immap->im_memctl.memc_or3 = 0xffff8010;
157#elif defined CONFIG_PQ2FADS
158 immap->im_memctl.memc_br8 = PCI_INT_STAT_REG | 0x00001801;
159 immap->im_memctl.memc_or8 = 0xffff8010;
160#endif
161 for (irq = NR_CPM_INTS; irq < NR_CPM_INTS + 4; irq++) 153 for (irq = NR_CPM_INTS; irq < NR_CPM_INTS + 4; irq++)
162 irq_desc[irq].chip = &pq2pci_ic; 154 irq_desc[irq].chip = &pq2pci_ic;
163 155
@@ -222,26 +214,6 @@ pq2ads_setup_pci(struct pci_controller *hose)
222 immap->im_memctl.memc_pcibr1 = M82xx_PCI_SEC_WND_BASE | PCIBR_ENABLE; 214 immap->im_memctl.memc_pcibr1 = M82xx_PCI_SEC_WND_BASE | PCIBR_ENABLE;
223#endif 215#endif
224 216
225#if defined CONFIG_ADS8272
226 immap->im_siu_conf.siu_82xx.sc_siumcr =
227 (immap->im_siu_conf.siu_82xx.sc_siumcr &
228 ~(SIUMCR_BBD | SIUMCR_ESE | SIUMCR_PBSE |
229 SIUMCR_CDIS | SIUMCR_DPPC11 | SIUMCR_L2CPC11 |
230 SIUMCR_LBPC11 | SIUMCR_APPC11 |
231 SIUMCR_CS10PC11 | SIUMCR_BCTLC11 | SIUMCR_MMR11)) |
232 SIUMCR_DPPC11 | SIUMCR_L2CPC01 | SIUMCR_LBPC00 |
233 SIUMCR_APPC10 | SIUMCR_CS10PC00 |
234 SIUMCR_BCTLC00 | SIUMCR_MMR11 ;
235
236#elif defined CONFIG_PQ2FADS
237 /*
238 * Setting required to enable IRQ1-IRQ7 (SIUMCR [DPPC]),
239 * and local bus for PCI (SIUMCR [LBPC]).
240 */
241 immap->im_siu_conf.siu_82xx.sc_siumcr = (immap->im_siu_conf.siu_82xx.sc_siumcr &
242 ~(SIUMCR_L2CPC11 | SIUMCR_LBPC11 | SIUMCR_CS10PC11 | SIUMCR_APPC11) |
243 SIUMCR_BBD | SIUMCR_LBPC01 | SIUMCR_DPPC11 | SIUMCR_APPC10);
244#endif
245 /* Enable PCI */ 217 /* Enable PCI */
246 immap->im_pci.pci_gcr = cpu_to_le32(PCIGCR_PCI_BUS_EN); 218 immap->im_pci.pci_gcr = cpu_to_le32(PCIGCR_PCI_BUS_EN);
247 219
@@ -284,12 +256,6 @@ pq2ads_setup_pci(struct pci_controller *hose)
284 immap->im_pci.pci_pibar0 = cpu_to_le32(M82xx_PCI_SLAVE_MEM_BUS >> PITA_ADDR_SHIFT); 256 immap->im_pci.pci_pibar0 = cpu_to_le32(M82xx_PCI_SLAVE_MEM_BUS >> PITA_ADDR_SHIFT);
285 immap->im_pci.pci_pitar0 = cpu_to_le32(M82xx_PCI_SLAVE_MEM_LOCAL>> PITA_ADDR_SHIFT); 257 immap->im_pci.pci_pitar0 = cpu_to_le32(M82xx_PCI_SLAVE_MEM_LOCAL>> PITA_ADDR_SHIFT);
286 258
287#if defined CONFIG_ADS8272
288 /* PCI int highest prio */
289 immap->im_siu_conf.siu_82xx.sc_ppc_alrh = 0x01236745;
290#elif defined CONFIG_PQ2FADS
291 immap->im_siu_conf.siu_82xx.sc_ppc_alrh = 0x03124567;
292#endif
293 /* park bus on PCI */ 259 /* park bus on PCI */
294 immap->im_siu_conf.siu_82xx.sc_ppc_acr = PPC_ACR_BUS_PARK_PCI; 260 immap->im_siu_conf.siu_82xx.sc_ppc_acr = PPC_ACR_BUS_PARK_PCI;
295 261
@@ -320,10 +286,6 @@ void __init pq2_find_bridges(void)
320 hose->bus_offset = 0; 286 hose->bus_offset = 0;
321 hose->last_busno = 0xff; 287 hose->last_busno = 0xff;
322 288
323#ifdef CONFIG_ADS8272
324 hose->set_cfg_type = 1;
325#endif
326
327 setup_m8260_indirect_pci(hose, 289 setup_m8260_indirect_pci(hose,
328 (unsigned long)&cpm2_immr->im_pci.pci_cfg_addr, 290 (unsigned long)&cpm2_immr->im_pci.pci_cfg_addr,
329 (unsigned long)&cpm2_immr->im_pci.pci_cfg_data); 291 (unsigned long)&cpm2_immr->im_pci.pci_cfg_data);
diff --git a/arch/ppc/syslib/m8xx_setup.c b/arch/ppc/syslib/m8xx_setup.c
index 19749e9bcf91..18da720fc1b0 100644
--- a/arch/ppc/syslib/m8xx_setup.c
+++ b/arch/ppc/syslib/m8xx_setup.c
@@ -141,16 +141,6 @@ m8xx_setup_arch(void)
141#endif 141#endif
142#endif 142#endif
143 143
144#if defined (CONFIG_MPC86XADS) || defined (CONFIG_MPC885ADS)
145#if defined(CONFIG_MTD_PHYSMAP)
146 physmap_configure(binfo->bi_flashstart, binfo->bi_flashsize,
147 MPC8xxADS_BANK_WIDTH, NULL);
148#ifdef CONFIG_MTD_PARTITIONS
149 physmap_set_partitions(mpc8xxads_partitions, mpc8xxads_part_num);
150#endif /* CONFIG_MTD_PARTITIONS */
151#endif /* CONFIG_MTD_PHYSMAP */
152#endif
153
154 board_init(); 144 board_init();
155} 145}
156 146
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index f6a68e178fc5..8f5f02160ffc 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -62,6 +62,10 @@ config GENERIC_LOCKBREAK
62 default y 62 default y
63 depends on SMP && PREEMPT 63 depends on SMP && PREEMPT
64 64
65config PGSTE
66 bool
67 default y if KVM
68
65mainmenu "Linux Kernel Configuration" 69mainmenu "Linux Kernel Configuration"
66 70
67config S390 71config S390
@@ -69,6 +73,7 @@ config S390
69 select HAVE_OPROFILE 73 select HAVE_OPROFILE
70 select HAVE_KPROBES 74 select HAVE_KPROBES
71 select HAVE_KRETPROBES 75 select HAVE_KRETPROBES
76 select HAVE_KVM if 64BIT
72 77
73source "init/Kconfig" 78source "init/Kconfig"
74 79
@@ -515,6 +520,13 @@ config ZFCPDUMP
515 Select this option if you want to build an zfcpdump enabled kernel. 520 Select this option if you want to build an zfcpdump enabled kernel.
516 Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. 521 Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
517 522
523config S390_GUEST
524bool "s390 guest support (EXPERIMENTAL)"
525 depends on 64BIT && EXPERIMENTAL
526 select VIRTIO
527 select VIRTIO_RING
528 help
529 Select this option if you want to run the kernel under s390 linux
518endmenu 530endmenu
519 531
520source "net/Kconfig" 532source "net/Kconfig"
@@ -536,3 +548,5 @@ source "security/Kconfig"
536source "crypto/Kconfig" 548source "crypto/Kconfig"
537 549
538source "lib/Kconfig" 550source "lib/Kconfig"
551
552source "arch/s390/kvm/Kconfig"
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index f708be367b03..792a4e7743ce 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -87,7 +87,7 @@ LDFLAGS_vmlinux := -e start
87head-y := arch/s390/kernel/head.o arch/s390/kernel/init_task.o 87head-y := arch/s390/kernel/head.o arch/s390/kernel/init_task.o
88 88
89core-y += arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \ 89core-y += arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \
90 arch/s390/appldata/ arch/s390/hypfs/ 90 arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/
91libs-y += arch/s390/lib/ 91libs-y += arch/s390/lib/
92drivers-y += drivers/s390/ 92drivers-y += drivers/s390/
93drivers-$(CONFIG_MATHEMU) += arch/s390/math-emu/ 93drivers-$(CONFIG_MATHEMU) += arch/s390/math-emu/
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 540a67f979b6..68ec4083bf73 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -144,6 +144,10 @@ static noinline __init void detect_machine_type(void)
144 /* Running on a P/390 ? */ 144 /* Running on a P/390 ? */
145 if (cpuinfo->cpu_id.machine == 0x7490) 145 if (cpuinfo->cpu_id.machine == 0x7490)
146 machine_flags |= 4; 146 machine_flags |= 4;
147
148 /* Running under KVM ? */
149 if (cpuinfo->cpu_id.version == 0xfe)
150 machine_flags |= 64;
147} 151}
148 152
149#ifdef CONFIG_64BIT 153#ifdef CONFIG_64BIT
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 7141147e6b63..a9d18aafa5f4 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -316,7 +316,11 @@ static int __init early_parse_ipldelay(char *p)
316early_param("ipldelay", early_parse_ipldelay); 316early_param("ipldelay", early_parse_ipldelay);
317 317
318#ifdef CONFIG_S390_SWITCH_AMODE 318#ifdef CONFIG_S390_SWITCH_AMODE
319#ifdef CONFIG_PGSTE
320unsigned int switch_amode = 1;
321#else
319unsigned int switch_amode = 0; 322unsigned int switch_amode = 0;
323#endif
320EXPORT_SYMBOL_GPL(switch_amode); 324EXPORT_SYMBOL_GPL(switch_amode);
321 325
322static void set_amode_and_uaccess(unsigned long user_amode, 326static void set_amode_and_uaccess(unsigned long user_amode,
@@ -797,9 +801,13 @@ setup_arch(char **cmdline_p)
797 "This machine has an IEEE fpu\n" : 801 "This machine has an IEEE fpu\n" :
798 "This machine has no IEEE fpu\n"); 802 "This machine has no IEEE fpu\n");
799#else /* CONFIG_64BIT */ 803#else /* CONFIG_64BIT */
800 printk((MACHINE_IS_VM) ? 804 if (MACHINE_IS_VM)
801 "We are running under VM (64 bit mode)\n" : 805 printk("We are running under VM (64 bit mode)\n");
802 "We are running native (64 bit mode)\n"); 806 else if (MACHINE_IS_KVM) {
807 printk("We are running under KVM (64 bit mode)\n");
808 add_preferred_console("ttyS", 1, NULL);
809 } else
810 printk("We are running native (64 bit mode)\n");
803#endif /* CONFIG_64BIT */ 811#endif /* CONFIG_64BIT */
804 812
805 /* Save unparsed command line copy for /proc/cmdline */ 813 /* Save unparsed command line copy for /proc/cmdline */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index c5f05b3fb2c3..ca90ee3f930e 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -110,6 +110,7 @@ void account_system_vtime(struct task_struct *tsk)
110 S390_lowcore.steal_clock -= cputime << 12; 110 S390_lowcore.steal_clock -= cputime << 12;
111 account_system_time(tsk, 0, cputime); 111 account_system_time(tsk, 0, cputime);
112} 112}
113EXPORT_SYMBOL_GPL(account_system_vtime);
113 114
114static inline void set_vtimer(__u64 expires) 115static inline void set_vtimer(__u64 expires)
115{ 116{
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
new file mode 100644
index 000000000000..1761b74d639b
--- /dev/null
+++ b/arch/s390/kvm/Kconfig
@@ -0,0 +1,46 @@
1#
2# KVM configuration
3#
4config HAVE_KVM
5 bool
6
7menuconfig VIRTUALIZATION
8 bool "Virtualization"
9 default y
10 ---help---
11 Say Y here to get to see options for using your Linux host to run other
12 operating systems inside virtual machines (guests).
13 This option alone does not add any kernel code.
14
15 If you say N, all options in this submenu will be skipped and disabled.
16
17if VIRTUALIZATION
18
19config KVM
20 tristate "Kernel-based Virtual Machine (KVM) support"
21 depends on HAVE_KVM && EXPERIMENTAL
22 select PREEMPT_NOTIFIERS
23 select ANON_INODES
24 select S390_SWITCH_AMODE
25 select PREEMPT
26 ---help---
27 Support hosting paravirtualized guest machines using the SIE
28 virtualization capability on the mainframe. This should work
29 on any 64bit machine.
30
31 This module provides access to the hardware capabilities through
32 a character device node named /dev/kvm.
33
34 To compile this as a module, choose M here: the module
35 will be called kvm.
36
37 If unsure, say N.
38
39config KVM_TRACE
40 bool
41
42# OK, it's a little counter-intuitive to do this, but it puts it neatly under
43# the virtualization menu.
44source drivers/virtio/Kconfig
45
46endif # VIRTUALIZATION
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
new file mode 100644
index 000000000000..e5221ec0b8e3
--- /dev/null
+++ b/arch/s390/kvm/Makefile
@@ -0,0 +1,14 @@
1# Makefile for kernel virtual machines on s390
2#
3# Copyright IBM Corp. 2008
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License (version 2 only)
7# as published by the Free Software Foundation.
8
9common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o)
10
11EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm
12
13kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o sigp.o diag.o
14obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
new file mode 100644
index 000000000000..f639a152869f
--- /dev/null
+++ b/arch/s390/kvm/diag.c
@@ -0,0 +1,67 @@
1/*
2 * diag.c - handling diagnose instructions
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 */
13
14#include <linux/kvm.h>
15#include <linux/kvm_host.h>
16#include "kvm-s390.h"
17
18static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
19{
20 VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
21 vcpu->stat.diagnose_44++;
22 vcpu_put(vcpu);
23 schedule();
24 vcpu_load(vcpu);
25 return 0;
26}
27
28static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
29{
30 unsigned int reg = vcpu->arch.sie_block->ipa & 0xf;
31 unsigned long subcode = vcpu->arch.guest_gprs[reg] & 0xffff;
32
33 VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
34 switch (subcode) {
35 case 3:
36 vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
37 break;
38 case 4:
39 vcpu->run->s390_reset_flags = 0;
40 break;
41 default:
42 return -ENOTSUPP;
43 }
44
45 atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
46 vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
47 vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
48 vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;
49 vcpu->run->exit_reason = KVM_EXIT_S390_RESET;
50 VCPU_EVENT(vcpu, 3, "requesting userspace resets %lx",
51 vcpu->run->s390_reset_flags);
52 return -EREMOTE;
53}
54
55int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
56{
57 int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
58
59 switch (code) {
60 case 0x44:
61 return __diag_time_slice_end(vcpu);
62 case 0x308:
63 return __diag_ipl_functions(vcpu);
64 default:
65 return -ENOTSUPP;
66 }
67}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
new file mode 100644
index 000000000000..4e0633c413f3
--- /dev/null
+++ b/arch/s390/kvm/gaccess.h
@@ -0,0 +1,274 @@
1/*
2 * gaccess.h - access guest memory
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 */
12
13#ifndef __KVM_S390_GACCESS_H
14#define __KVM_S390_GACCESS_H
15
16#include <linux/compiler.h>
17#include <linux/kvm_host.h>
18#include <asm/uaccess.h>
19
20static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu,
21 u64 guestaddr)
22{
23 u64 prefix = vcpu->arch.sie_block->prefix;
24 u64 origin = vcpu->kvm->arch.guest_origin;
25 u64 memsize = vcpu->kvm->arch.guest_memsize;
26
27 if (guestaddr < 2 * PAGE_SIZE)
28 guestaddr += prefix;
29 else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE))
30 guestaddr -= prefix;
31
32 if (guestaddr > memsize)
33 return (void __user __force *) ERR_PTR(-EFAULT);
34
35 guestaddr += origin;
36
37 return (void __user *) guestaddr;
38}
39
40static inline int get_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr,
41 u64 *result)
42{
43 void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
44
45 BUG_ON(guestaddr & 7);
46
47 if (IS_ERR((void __force *) uptr))
48 return PTR_ERR((void __force *) uptr);
49
50 return get_user(*result, (u64 __user *) uptr);
51}
52
53static inline int get_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr,
54 u32 *result)
55{
56 void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
57
58 BUG_ON(guestaddr & 3);
59
60 if (IS_ERR((void __force *) uptr))
61 return PTR_ERR((void __force *) uptr);
62
63 return get_user(*result, (u32 __user *) uptr);
64}
65
66static inline int get_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr,
67 u16 *result)
68{
69 void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
70
71 BUG_ON(guestaddr & 1);
72
73 if (IS_ERR(uptr))
74 return PTR_ERR(uptr);
75
76 return get_user(*result, (u16 __user *) uptr);
77}
78
79static inline int get_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr,
80 u8 *result)
81{
82 void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
83
84 if (IS_ERR((void __force *) uptr))
85 return PTR_ERR((void __force *) uptr);
86
87 return get_user(*result, (u8 __user *) uptr);
88}
89
90static inline int put_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr,
91 u64 value)
92{
93 void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
94
95 BUG_ON(guestaddr & 7);
96
97 if (IS_ERR((void __force *) uptr))
98 return PTR_ERR((void __force *) uptr);
99
100 return put_user(value, (u64 __user *) uptr);
101}
102
103static inline int put_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr,
104 u32 value)
105{
106 void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
107
108 BUG_ON(guestaddr & 3);
109
110 if (IS_ERR((void __force *) uptr))
111 return PTR_ERR((void __force *) uptr);
112
113 return put_user(value, (u32 __user *) uptr);
114}
115
116static inline int put_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr,
117 u16 value)
118{
119 void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
120
121 BUG_ON(guestaddr & 1);
122
123 if (IS_ERR((void __force *) uptr))
124 return PTR_ERR((void __force *) uptr);
125
126 return put_user(value, (u16 __user *) uptr);
127}
128
129static inline int put_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr,
130 u8 value)
131{
132 void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
133
134 if (IS_ERR((void __force *) uptr))
135 return PTR_ERR((void __force *) uptr);
136
137 return put_user(value, (u8 __user *) uptr);
138}
139
140
141static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, u64 guestdest,
142 const void *from, unsigned long n)
143{
144 int rc;
145 unsigned long i;
146 const u8 *data = from;
147
148 for (i = 0; i < n; i++) {
149 rc = put_guest_u8(vcpu, guestdest++, *(data++));
150 if (rc < 0)
151 return rc;
152 }
153 return 0;
154}
155
156static inline int copy_to_guest(struct kvm_vcpu *vcpu, u64 guestdest,
157 const void *from, unsigned long n)
158{
159 u64 prefix = vcpu->arch.sie_block->prefix;
160 u64 origin = vcpu->kvm->arch.guest_origin;
161 u64 memsize = vcpu->kvm->arch.guest_memsize;
162
163 if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE))
164 goto slowpath;
165
166 if ((guestdest < prefix) && (guestdest + n > prefix))
167 goto slowpath;
168
169 if ((guestdest < prefix + 2 * PAGE_SIZE)
170 && (guestdest + n > prefix + 2 * PAGE_SIZE))
171 goto slowpath;
172
173 if (guestdest < 2 * PAGE_SIZE)
174 guestdest += prefix;
175 else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE))
176 guestdest -= prefix;
177
178 if (guestdest + n > memsize)
179 return -EFAULT;
180
181 if (guestdest + n < guestdest)
182 return -EFAULT;
183
184 guestdest += origin;
185
186 return copy_to_user((void __user *) guestdest, from, n);
187slowpath:
188 return __copy_to_guest_slow(vcpu, guestdest, from, n);
189}
190
191static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to,
192 u64 guestsrc, unsigned long n)
193{
194 int rc;
195 unsigned long i;
196 u8 *data = to;
197
198 for (i = 0; i < n; i++) {
199 rc = get_guest_u8(vcpu, guestsrc++, data++);
200 if (rc < 0)
201 return rc;
202 }
203 return 0;
204}
205
206static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to,
207 u64 guestsrc, unsigned long n)
208{
209 u64 prefix = vcpu->arch.sie_block->prefix;
210 u64 origin = vcpu->kvm->arch.guest_origin;
211 u64 memsize = vcpu->kvm->arch.guest_memsize;
212
213 if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE))
214 goto slowpath;
215
216 if ((guestsrc < prefix) && (guestsrc + n > prefix))
217 goto slowpath;
218
219 if ((guestsrc < prefix + 2 * PAGE_SIZE)
220 && (guestsrc + n > prefix + 2 * PAGE_SIZE))
221 goto slowpath;
222
223 if (guestsrc < 2 * PAGE_SIZE)
224 guestsrc += prefix;
225 else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE))
226 guestsrc -= prefix;
227
228 if (guestsrc + n > memsize)
229 return -EFAULT;
230
231 if (guestsrc + n < guestsrc)
232 return -EFAULT;
233
234 guestsrc += origin;
235
236 return copy_from_user(to, (void __user *) guestsrc, n);
237slowpath:
238 return __copy_from_guest_slow(vcpu, to, guestsrc, n);
239}
240
241static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, u64 guestdest,
242 const void *from, unsigned long n)
243{
244 u64 origin = vcpu->kvm->arch.guest_origin;
245 u64 memsize = vcpu->kvm->arch.guest_memsize;
246
247 if (guestdest + n > memsize)
248 return -EFAULT;
249
250 if (guestdest + n < guestdest)
251 return -EFAULT;
252
253 guestdest += origin;
254
255 return copy_to_user((void __user *) guestdest, from, n);
256}
257
258static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to,
259 u64 guestsrc, unsigned long n)
260{
261 u64 origin = vcpu->kvm->arch.guest_origin;
262 u64 memsize = vcpu->kvm->arch.guest_memsize;
263
264 if (guestsrc + n > memsize)
265 return -EFAULT;
266
267 if (guestsrc + n < guestsrc)
268 return -EFAULT;
269
270 guestsrc += origin;
271
272 return copy_from_user(to, (void __user *) guestsrc, n);
273}
274#endif
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
new file mode 100644
index 000000000000..349581a26103
--- /dev/null
+++ b/arch/s390/kvm/intercept.c
@@ -0,0 +1,216 @@
1/*
2 * intercept.c - in-kernel handling for sie intercepts
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 */
13
14#include <linux/kvm_host.h>
15#include <linux/errno.h>
16#include <linux/pagemap.h>
17
18#include <asm/kvm_host.h>
19
20#include "kvm-s390.h"
21#include "gaccess.h"
22
23static int handle_lctg(struct kvm_vcpu *vcpu)
24{
25 int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
26 int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
27 int base2 = vcpu->arch.sie_block->ipb >> 28;
28 int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
29 ((vcpu->arch.sie_block->ipb & 0xff00) << 4);
30 u64 useraddr;
31 int reg, rc;
32
33 vcpu->stat.instruction_lctg++;
34 if ((vcpu->arch.sie_block->ipb & 0xff) != 0x2f)
35 return -ENOTSUPP;
36
37 useraddr = disp2;
38 if (base2)
39 useraddr += vcpu->arch.guest_gprs[base2];
40
41 reg = reg1;
42
43 VCPU_EVENT(vcpu, 5, "lctg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
44 disp2);
45
46 do {
47 rc = get_guest_u64(vcpu, useraddr,
48 &vcpu->arch.sie_block->gcr[reg]);
49 if (rc == -EFAULT) {
50 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
51 break;
52 }
53 useraddr += 8;
54 if (reg == reg3)
55 break;
56 reg = (reg + 1) % 16;
57 } while (1);
58 return 0;
59}
60
61static int handle_lctl(struct kvm_vcpu *vcpu)
62{
63 int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
64 int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
65 int base2 = vcpu->arch.sie_block->ipb >> 28;
66 int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
67 u64 useraddr;
68 u32 val = 0;
69 int reg, rc;
70
71 vcpu->stat.instruction_lctl++;
72
73 useraddr = disp2;
74 if (base2)
75 useraddr += vcpu->arch.guest_gprs[base2];
76
77 VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
78 disp2);
79
80 reg = reg1;
81 do {
82 rc = get_guest_u32(vcpu, useraddr, &val);
83 if (rc == -EFAULT) {
84 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
85 break;
86 }
87 vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
88 vcpu->arch.sie_block->gcr[reg] |= val;
89 useraddr += 4;
90 if (reg == reg3)
91 break;
92 reg = (reg + 1) % 16;
93 } while (1);
94 return 0;
95}
96
97static intercept_handler_t instruction_handlers[256] = {
98 [0x83] = kvm_s390_handle_diag,
99 [0xae] = kvm_s390_handle_sigp,
100 [0xb2] = kvm_s390_handle_priv,
101 [0xb7] = handle_lctl,
102 [0xeb] = handle_lctg,
103};
104
105static int handle_noop(struct kvm_vcpu *vcpu)
106{
107 switch (vcpu->arch.sie_block->icptcode) {
108 case 0x10:
109 vcpu->stat.exit_external_request++;
110 break;
111 case 0x14:
112 vcpu->stat.exit_external_interrupt++;
113 break;
114 default:
115 break; /* nothing */
116 }
117 return 0;
118}
119
120static int handle_stop(struct kvm_vcpu *vcpu)
121{
122 int rc;
123
124 vcpu->stat.exit_stop_request++;
125 atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
126 spin_lock_bh(&vcpu->arch.local_int.lock);
127 if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) {
128 vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP;
129 rc = __kvm_s390_vcpu_store_status(vcpu,
130 KVM_S390_STORE_STATUS_NOADDR);
131 if (rc >= 0)
132 rc = -ENOTSUPP;
133 }
134
135 if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
136 vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
137 VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
138 rc = -ENOTSUPP;
139 } else
140 rc = 0;
141 spin_unlock_bh(&vcpu->arch.local_int.lock);
142 return rc;
143}
144
145static int handle_validity(struct kvm_vcpu *vcpu)
146{
147 int viwhy = vcpu->arch.sie_block->ipb >> 16;
148 vcpu->stat.exit_validity++;
149 if (viwhy == 0x37) {
150 fault_in_pages_writeable((char __user *)
151 vcpu->kvm->arch.guest_origin +
152 vcpu->arch.sie_block->prefix,
153 PAGE_SIZE);
154 return 0;
155 }
156 VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
157 viwhy);
158 return -ENOTSUPP;
159}
160
161static int handle_instruction(struct kvm_vcpu *vcpu)
162{
163 intercept_handler_t handler;
164
165 vcpu->stat.exit_instruction++;
166 handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8];
167 if (handler)
168 return handler(vcpu);
169 return -ENOTSUPP;
170}
171
172static int handle_prog(struct kvm_vcpu *vcpu)
173{
174 vcpu->stat.exit_program_interruption++;
175 return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
176}
177
178static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
179{
180 int rc, rc2;
181
182 vcpu->stat.exit_instr_and_program++;
183 rc = handle_instruction(vcpu);
184 rc2 = handle_prog(vcpu);
185
186 if (rc == -ENOTSUPP)
187 vcpu->arch.sie_block->icptcode = 0x04;
188 if (rc)
189 return rc;
190 return rc2;
191}
192
193static const intercept_handler_t intercept_funcs[0x48 >> 2] = {
194 [0x00 >> 2] = handle_noop,
195 [0x04 >> 2] = handle_instruction,
196 [0x08 >> 2] = handle_prog,
197 [0x0C >> 2] = handle_instruction_and_prog,
198 [0x10 >> 2] = handle_noop,
199 [0x14 >> 2] = handle_noop,
200 [0x1C >> 2] = kvm_s390_handle_wait,
201 [0x20 >> 2] = handle_validity,
202 [0x28 >> 2] = handle_stop,
203};
204
205int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
206{
207 intercept_handler_t func;
208 u8 code = vcpu->arch.sie_block->icptcode;
209
210 if (code & 3 || code > 0x48)
211 return -ENOTSUPP;
212 func = intercept_funcs[code >> 2];
213 if (func)
214 return func(vcpu);
215 return -ENOTSUPP;
216}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
new file mode 100644
index 000000000000..fcd1ed8015c1
--- /dev/null
+++ b/arch/s390/kvm/interrupt.c
@@ -0,0 +1,592 @@
1/*
2 * interrupt.c - handling kvm guest interrupts
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 */
12
13#include <asm/lowcore.h>
14#include <asm/uaccess.h>
15#include <linux/kvm_host.h>
16#include "kvm-s390.h"
17#include "gaccess.h"
18
19static int psw_extint_disabled(struct kvm_vcpu *vcpu)
20{
21 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
22}
23
24static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
25{
26 if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) ||
27 (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO) ||
28 (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT))
29 return 0;
30 return 1;
31}
32
33static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
34 struct interrupt_info *inti)
35{
36 switch (inti->type) {
37 case KVM_S390_INT_EMERGENCY:
38 if (psw_extint_disabled(vcpu))
39 return 0;
40 if (vcpu->arch.sie_block->gcr[0] & 0x4000ul)
41 return 1;
42 return 0;
43 case KVM_S390_INT_SERVICE:
44 if (psw_extint_disabled(vcpu))
45 return 0;
46 if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
47 return 1;
48 return 0;
49 case KVM_S390_INT_VIRTIO:
50 if (psw_extint_disabled(vcpu))
51 return 0;
52 if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
53 return 1;
54 return 0;
55 case KVM_S390_PROGRAM_INT:
56 case KVM_S390_SIGP_STOP:
57 case KVM_S390_SIGP_SET_PREFIX:
58 case KVM_S390_RESTART:
59 return 1;
60 default:
61 BUG();
62 }
63 return 0;
64}
65
66static void __set_cpu_idle(struct kvm_vcpu *vcpu)
67{
68 BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
69 atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
70 set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
71}
72
73static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
74{
75 BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
76 atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
77 clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
78}
79
80static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
81{
82 atomic_clear_mask(CPUSTAT_ECALL_PEND |
83 CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
84 &vcpu->arch.sie_block->cpuflags);
85 vcpu->arch.sie_block->lctl = 0x0000;
86}
87
88static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
89{
90 atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
91}
92
93static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
94 struct interrupt_info *inti)
95{
96 switch (inti->type) {
97 case KVM_S390_INT_EMERGENCY:
98 case KVM_S390_INT_SERVICE:
99 case KVM_S390_INT_VIRTIO:
100 if (psw_extint_disabled(vcpu))
101 __set_cpuflag(vcpu, CPUSTAT_EXT_INT);
102 else
103 vcpu->arch.sie_block->lctl |= LCTL_CR0;
104 break;
105 case KVM_S390_SIGP_STOP:
106 __set_cpuflag(vcpu, CPUSTAT_STOP_INT);
107 break;
108 default:
109 BUG();
110 }
111}
112
113static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
114 struct interrupt_info *inti)
115{
116 const unsigned short table[] = { 2, 4, 4, 6 };
117 int rc, exception = 0;
118
119 switch (inti->type) {
120 case KVM_S390_INT_EMERGENCY:
121 VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
122 vcpu->stat.deliver_emergency_signal++;
123 rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201);
124 if (rc == -EFAULT)
125 exception = 1;
126
127 rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
128 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
129 if (rc == -EFAULT)
130 exception = 1;
131
132 rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
133 __LC_EXT_NEW_PSW, sizeof(psw_t));
134 if (rc == -EFAULT)
135 exception = 1;
136 break;
137
138 case KVM_S390_INT_SERVICE:
139 VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
140 inti->ext.ext_params);
141 vcpu->stat.deliver_service_signal++;
142 rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401);
143 if (rc == -EFAULT)
144 exception = 1;
145
146 rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
147 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
148 if (rc == -EFAULT)
149 exception = 1;
150
151 rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
152 __LC_EXT_NEW_PSW, sizeof(psw_t));
153 if (rc == -EFAULT)
154 exception = 1;
155
156 rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
157 if (rc == -EFAULT)
158 exception = 1;
159 break;
160
161 case KVM_S390_INT_VIRTIO:
162 VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%lx",
163 inti->ext.ext_params, inti->ext.ext_params2);
164 vcpu->stat.deliver_virtio_interrupt++;
165 rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603);
166 if (rc == -EFAULT)
167 exception = 1;
168
169 rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, 0x0d00);
170 if (rc == -EFAULT)
171 exception = 1;
172
173 rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
174 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
175 if (rc == -EFAULT)
176 exception = 1;
177
178 rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
179 __LC_EXT_NEW_PSW, sizeof(psw_t));
180 if (rc == -EFAULT)
181 exception = 1;
182
183 rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
184 if (rc == -EFAULT)
185 exception = 1;
186
187 rc = put_guest_u64(vcpu, __LC_PFAULT_INTPARM,
188 inti->ext.ext_params2);
189 if (rc == -EFAULT)
190 exception = 1;
191 break;
192
193 case KVM_S390_SIGP_STOP:
194 VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
195 vcpu->stat.deliver_stop_signal++;
196 __set_intercept_indicator(vcpu, inti);
197 break;
198
199 case KVM_S390_SIGP_SET_PREFIX:
200 VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x",
201 inti->prefix.address);
202 vcpu->stat.deliver_prefix_signal++;
203 vcpu->arch.sie_block->prefix = inti->prefix.address;
204 vcpu->arch.sie_block->ihcpu = 0xffff;
205 break;
206
207 case KVM_S390_RESTART:
208 VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
209 vcpu->stat.deliver_restart_signal++;
210 rc = copy_to_guest(vcpu, offsetof(struct _lowcore,
211 restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
212 if (rc == -EFAULT)
213 exception = 1;
214
215 rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
216 offsetof(struct _lowcore, restart_psw), sizeof(psw_t));
217 if (rc == -EFAULT)
218 exception = 1;
219 break;
220
221 case KVM_S390_PROGRAM_INT:
222 VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
223 inti->pgm.code,
224 table[vcpu->arch.sie_block->ipa >> 14]);
225 vcpu->stat.deliver_program_int++;
226 rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code);
227 if (rc == -EFAULT)
228 exception = 1;
229
230 rc = put_guest_u16(vcpu, __LC_PGM_ILC,
231 table[vcpu->arch.sie_block->ipa >> 14]);
232 if (rc == -EFAULT)
233 exception = 1;
234
235 rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW,
236 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
237 if (rc == -EFAULT)
238 exception = 1;
239
240 rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
241 __LC_PGM_NEW_PSW, sizeof(psw_t));
242 if (rc == -EFAULT)
243 exception = 1;
244 break;
245
246 default:
247 BUG();
248 }
249
250 if (exception) {
251 VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering"
252 " interrupt");
253 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
254 if (inti->type == KVM_S390_PROGRAM_INT) {
255 printk(KERN_WARNING "kvm: recursive program check\n");
256 BUG();
257 }
258 }
259}
260
261static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
262{
263 int rc, exception = 0;
264
265 if (psw_extint_disabled(vcpu))
266 return 0;
267 if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
268 return 0;
269 rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004);
270 if (rc == -EFAULT)
271 exception = 1;
272 rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
273 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
274 if (rc == -EFAULT)
275 exception = 1;
276 rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
277 __LC_EXT_NEW_PSW, sizeof(psw_t));
278 if (rc == -EFAULT)
279 exception = 1;
280
281 if (exception) {
282 VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering" \
283 " ckc interrupt");
284 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
285 return 0;
286 }
287
288 return 1;
289}
290
291int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
292{
293 struct local_interrupt *li = &vcpu->arch.local_int;
294 struct float_interrupt *fi = vcpu->arch.local_int.float_int;
295 struct interrupt_info *inti;
296 int rc = 0;
297
298 if (atomic_read(&li->active)) {
299 spin_lock_bh(&li->lock);
300 list_for_each_entry(inti, &li->list, list)
301 if (__interrupt_is_deliverable(vcpu, inti)) {
302 rc = 1;
303 break;
304 }
305 spin_unlock_bh(&li->lock);
306 }
307
308 if ((!rc) && atomic_read(&fi->active)) {
309 spin_lock_bh(&fi->lock);
310 list_for_each_entry(inti, &fi->list, list)
311 if (__interrupt_is_deliverable(vcpu, inti)) {
312 rc = 1;
313 break;
314 }
315 spin_unlock_bh(&fi->lock);
316 }
317
318 if ((!rc) && (vcpu->arch.sie_block->ckc <
319 get_clock() + vcpu->arch.sie_block->epoch)) {
320 if ((!psw_extint_disabled(vcpu)) &&
321 (vcpu->arch.sie_block->gcr[0] & 0x800ul))
322 rc = 1;
323 }
324
325 return rc;
326}
327
328int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
329{
330 return 0;
331}
332
333int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
334{
335 u64 now, sltime;
336 DECLARE_WAITQUEUE(wait, current);
337
338 vcpu->stat.exit_wait_state++;
339 if (kvm_cpu_has_interrupt(vcpu))
340 return 0;
341
342 if (psw_interrupts_disabled(vcpu)) {
343 VCPU_EVENT(vcpu, 3, "%s", "disabled wait");
344 __unset_cpu_idle(vcpu);
345 return -ENOTSUPP; /* disabled wait */
346 }
347
348 if (psw_extint_disabled(vcpu) ||
349 (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))) {
350 VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
351 goto no_timer;
352 }
353
354 now = get_clock() + vcpu->arch.sie_block->epoch;
355 if (vcpu->arch.sie_block->ckc < now) {
356 __unset_cpu_idle(vcpu);
357 return 0;
358 }
359
360 sltime = (vcpu->arch.sie_block->ckc - now) / (0xf4240000ul / HZ) + 1;
361
362 vcpu->arch.ckc_timer.expires = jiffies + sltime;
363
364 add_timer(&vcpu->arch.ckc_timer);
365 VCPU_EVENT(vcpu, 5, "enabled wait timer:%lx jiffies", sltime);
366no_timer:
367 spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
368 spin_lock_bh(&vcpu->arch.local_int.lock);
369 __set_cpu_idle(vcpu);
370 vcpu->arch.local_int.timer_due = 0;
371 add_wait_queue(&vcpu->arch.local_int.wq, &wait);
372 while (list_empty(&vcpu->arch.local_int.list) &&
373 list_empty(&vcpu->arch.local_int.float_int->list) &&
374 (!vcpu->arch.local_int.timer_due) &&
375 !signal_pending(current)) {
376 set_current_state(TASK_INTERRUPTIBLE);
377 spin_unlock_bh(&vcpu->arch.local_int.lock);
378 spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
379 vcpu_put(vcpu);
380 schedule();
381 vcpu_load(vcpu);
382 spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
383 spin_lock_bh(&vcpu->arch.local_int.lock);
384 }
385 __unset_cpu_idle(vcpu);
386 __set_current_state(TASK_RUNNING);
387 remove_wait_queue(&vcpu->wq, &wait);
388 spin_unlock_bh(&vcpu->arch.local_int.lock);
389 spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
390 del_timer(&vcpu->arch.ckc_timer);
391 return 0;
392}
393
394void kvm_s390_idle_wakeup(unsigned long data)
395{
396 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
397
398 spin_lock_bh(&vcpu->arch.local_int.lock);
399 vcpu->arch.local_int.timer_due = 1;
400 if (waitqueue_active(&vcpu->arch.local_int.wq))
401 wake_up_interruptible(&vcpu->arch.local_int.wq);
402 spin_unlock_bh(&vcpu->arch.local_int.lock);
403}
404
405
406void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
407{
408 struct local_interrupt *li = &vcpu->arch.local_int;
409 struct float_interrupt *fi = vcpu->arch.local_int.float_int;
410 struct interrupt_info *n, *inti = NULL;
411 int deliver;
412
413 __reset_intercept_indicators(vcpu);
414 if (atomic_read(&li->active)) {
415 do {
416 deliver = 0;
417 spin_lock_bh(&li->lock);
418 list_for_each_entry_safe(inti, n, &li->list, list) {
419 if (__interrupt_is_deliverable(vcpu, inti)) {
420 list_del(&inti->list);
421 deliver = 1;
422 break;
423 }
424 __set_intercept_indicator(vcpu, inti);
425 }
426 if (list_empty(&li->list))
427 atomic_set(&li->active, 0);
428 spin_unlock_bh(&li->lock);
429 if (deliver) {
430 __do_deliver_interrupt(vcpu, inti);
431 kfree(inti);
432 }
433 } while (deliver);
434 }
435
436 if ((vcpu->arch.sie_block->ckc <
437 get_clock() + vcpu->arch.sie_block->epoch))
438 __try_deliver_ckc_interrupt(vcpu);
439
440 if (atomic_read(&fi->active)) {
441 do {
442 deliver = 0;
443 spin_lock_bh(&fi->lock);
444 list_for_each_entry_safe(inti, n, &fi->list, list) {
445 if (__interrupt_is_deliverable(vcpu, inti)) {
446 list_del(&inti->list);
447 deliver = 1;
448 break;
449 }
450 __set_intercept_indicator(vcpu, inti);
451 }
452 if (list_empty(&fi->list))
453 atomic_set(&fi->active, 0);
454 spin_unlock_bh(&fi->lock);
455 if (deliver) {
456 __do_deliver_interrupt(vcpu, inti);
457 kfree(inti);
458 }
459 } while (deliver);
460 }
461}
462
463int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
464{
465 struct local_interrupt *li = &vcpu->arch.local_int;
466 struct interrupt_info *inti;
467
468 inti = kzalloc(sizeof(*inti), GFP_KERNEL);
469 if (!inti)
470 return -ENOMEM;
471
472 inti->type = KVM_S390_PROGRAM_INT;;
473 inti->pgm.code = code;
474
475 VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
476 spin_lock_bh(&li->lock);
477 list_add(&inti->list, &li->list);
478 atomic_set(&li->active, 1);
479 BUG_ON(waitqueue_active(&li->wq));
480 spin_unlock_bh(&li->lock);
481 return 0;
482}
483
484int kvm_s390_inject_vm(struct kvm *kvm,
485 struct kvm_s390_interrupt *s390int)
486{
487 struct local_interrupt *li;
488 struct float_interrupt *fi;
489 struct interrupt_info *inti;
490 int sigcpu;
491
492 inti = kzalloc(sizeof(*inti), GFP_KERNEL);
493 if (!inti)
494 return -ENOMEM;
495
496 switch (s390int->type) {
497 case KVM_S390_INT_VIRTIO:
498 VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%lx",
499 s390int->parm, s390int->parm64);
500 inti->type = s390int->type;
501 inti->ext.ext_params = s390int->parm;
502 inti->ext.ext_params2 = s390int->parm64;
503 break;
504 case KVM_S390_INT_SERVICE:
505 VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
506 inti->type = s390int->type;
507 inti->ext.ext_params = s390int->parm;
508 break;
509 case KVM_S390_PROGRAM_INT:
510 case KVM_S390_SIGP_STOP:
511 case KVM_S390_INT_EMERGENCY:
512 default:
513 kfree(inti);
514 return -EINVAL;
515 }
516
517 mutex_lock(&kvm->lock);
518 fi = &kvm->arch.float_int;
519 spin_lock_bh(&fi->lock);
520 list_add_tail(&inti->list, &fi->list);
521 atomic_set(&fi->active, 1);
522 sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
523 if (sigcpu == KVM_MAX_VCPUS) {
524 do {
525 sigcpu = fi->next_rr_cpu++;
526 if (sigcpu == KVM_MAX_VCPUS)
527 sigcpu = fi->next_rr_cpu = 0;
528 } while (fi->local_int[sigcpu] == NULL);
529 }
530 li = fi->local_int[sigcpu];
531 spin_lock_bh(&li->lock);
532 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
533 if (waitqueue_active(&li->wq))
534 wake_up_interruptible(&li->wq);
535 spin_unlock_bh(&li->lock);
536 spin_unlock_bh(&fi->lock);
537 mutex_unlock(&kvm->lock);
538 return 0;
539}
540
541int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
542 struct kvm_s390_interrupt *s390int)
543{
544 struct local_interrupt *li;
545 struct interrupt_info *inti;
546
547 inti = kzalloc(sizeof(*inti), GFP_KERNEL);
548 if (!inti)
549 return -ENOMEM;
550
551 switch (s390int->type) {
552 case KVM_S390_PROGRAM_INT:
553 if (s390int->parm & 0xffff0000) {
554 kfree(inti);
555 return -EINVAL;
556 }
557 inti->type = s390int->type;
558 inti->pgm.code = s390int->parm;
559 VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
560 s390int->parm);
561 break;
562 case KVM_S390_SIGP_STOP:
563 case KVM_S390_RESTART:
564 case KVM_S390_SIGP_SET_PREFIX:
565 case KVM_S390_INT_EMERGENCY:
566 VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
567 inti->type = s390int->type;
568 break;
569 case KVM_S390_INT_VIRTIO:
570 case KVM_S390_INT_SERVICE:
571 default:
572 kfree(inti);
573 return -EINVAL;
574 }
575
576 mutex_lock(&vcpu->kvm->lock);
577 li = &vcpu->arch.local_int;
578 spin_lock_bh(&li->lock);
579 if (inti->type == KVM_S390_PROGRAM_INT)
580 list_add(&inti->list, &li->list);
581 else
582 list_add_tail(&inti->list, &li->list);
583 atomic_set(&li->active, 1);
584 if (inti->type == KVM_S390_SIGP_STOP)
585 li->action_bits |= ACTION_STOP_ON_STOP;
586 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
587 if (waitqueue_active(&li->wq))
588 wake_up_interruptible(&vcpu->arch.local_int.wq);
589 spin_unlock_bh(&li->lock);
590 mutex_unlock(&vcpu->kvm->lock);
591 return 0;
592}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
new file mode 100644
index 000000000000..98d1e73e01f1
--- /dev/null
+++ b/arch/s390/kvm/kvm-s390.c
@@ -0,0 +1,685 @@
1/*
2 * s390host.c -- hosting zSeries kernel virtual machines
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 */
14
15#include <linux/compiler.h>
16#include <linux/err.h>
17#include <linux/fs.h>
18#include <linux/init.h>
19#include <linux/kvm.h>
20#include <linux/kvm_host.h>
21#include <linux/module.h>
22#include <linux/slab.h>
23#include <linux/timer.h>
24#include <asm/lowcore.h>
25#include <asm/pgtable.h>
26
27#include "kvm-s390.h"
28#include "gaccess.h"
29
30#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
31
32struct kvm_stats_debugfs_item debugfs_entries[] = {
33 { "userspace_handled", VCPU_STAT(exit_userspace) },
34 { "exit_validity", VCPU_STAT(exit_validity) },
35 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
36 { "exit_external_request", VCPU_STAT(exit_external_request) },
37 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
38 { "exit_instruction", VCPU_STAT(exit_instruction) },
39 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
40 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
41 { "instruction_lctg", VCPU_STAT(instruction_lctg) },
42 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
43 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
44 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
45 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
46 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
47 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
48 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
49 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
50 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
51 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
52 { "instruction_spx", VCPU_STAT(instruction_spx) },
53 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
54 { "instruction_stap", VCPU_STAT(instruction_stap) },
55 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
56 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
57 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
58 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
59 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
60 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
61 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
62 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
63 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
64 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
65 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
66 { "diagnose_44", VCPU_STAT(diagnose_44) },
67 { NULL }
68};
69
70
71/* Section: not file related */
72void kvm_arch_hardware_enable(void *garbage)
73{
74 /* every s390 is virtualization enabled ;-) */
75}
76
77void kvm_arch_hardware_disable(void *garbage)
78{
79}
80
81void decache_vcpus_on_cpu(int cpu)
82{
83}
84
85int kvm_arch_hardware_setup(void)
86{
87 return 0;
88}
89
90void kvm_arch_hardware_unsetup(void)
91{
92}
93
94void kvm_arch_check_processor_compat(void *rtn)
95{
96}
97
98int kvm_arch_init(void *opaque)
99{
100 return 0;
101}
102
103void kvm_arch_exit(void)
104{
105}
106
107/* Section: device related */
108long kvm_arch_dev_ioctl(struct file *filp,
109 unsigned int ioctl, unsigned long arg)
110{
111 if (ioctl == KVM_S390_ENABLE_SIE)
112 return s390_enable_sie();
113 return -EINVAL;
114}
115
116int kvm_dev_ioctl_check_extension(long ext)
117{
118 return 0;
119}
120
121/* Section: vm related */
122/*
123 * Get (and clear) the dirty memory log for a memory slot.
124 */
125int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
126 struct kvm_dirty_log *log)
127{
128 return 0;
129}
130
131long kvm_arch_vm_ioctl(struct file *filp,
132 unsigned int ioctl, unsigned long arg)
133{
134 struct kvm *kvm = filp->private_data;
135 void __user *argp = (void __user *)arg;
136 int r;
137
138 switch (ioctl) {
139 case KVM_S390_INTERRUPT: {
140 struct kvm_s390_interrupt s390int;
141
142 r = -EFAULT;
143 if (copy_from_user(&s390int, argp, sizeof(s390int)))
144 break;
145 r = kvm_s390_inject_vm(kvm, &s390int);
146 break;
147 }
148 default:
149 r = -EINVAL;
150 }
151
152 return r;
153}
154
155struct kvm *kvm_arch_create_vm(void)
156{
157 struct kvm *kvm;
158 int rc;
159 char debug_name[16];
160
161 rc = s390_enable_sie();
162 if (rc)
163 goto out_nokvm;
164
165 rc = -ENOMEM;
166 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
167 if (!kvm)
168 goto out_nokvm;
169
170 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
171 if (!kvm->arch.sca)
172 goto out_nosca;
173
174 sprintf(debug_name, "kvm-%u", current->pid);
175
176 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
177 if (!kvm->arch.dbf)
178 goto out_nodbf;
179
180 spin_lock_init(&kvm->arch.float_int.lock);
181 INIT_LIST_HEAD(&kvm->arch.float_int.list);
182
183 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
184 VM_EVENT(kvm, 3, "%s", "vm created");
185
186 try_module_get(THIS_MODULE);
187
188 return kvm;
189out_nodbf:
190 free_page((unsigned long)(kvm->arch.sca));
191out_nosca:
192 kfree(kvm);
193out_nokvm:
194 return ERR_PTR(rc);
195}
196
197void kvm_arch_destroy_vm(struct kvm *kvm)
198{
199 debug_unregister(kvm->arch.dbf);
200 free_page((unsigned long)(kvm->arch.sca));
201 kfree(kvm);
202 module_put(THIS_MODULE);
203}
204
205/* Section: vcpu related */
206int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
207{
208 return 0;
209}
210
211void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
212{
213 /* kvm common code refers to this, but does'nt call it */
214 BUG();
215}
216
217void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
218{
219 save_fp_regs(&vcpu->arch.host_fpregs);
220 save_access_regs(vcpu->arch.host_acrs);
221 vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
222 restore_fp_regs(&vcpu->arch.guest_fpregs);
223 restore_access_regs(vcpu->arch.guest_acrs);
224
225 if (signal_pending(current))
226 atomic_set_mask(CPUSTAT_STOP_INT,
227 &vcpu->arch.sie_block->cpuflags);
228}
229
230void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
231{
232 save_fp_regs(&vcpu->arch.guest_fpregs);
233 save_access_regs(vcpu->arch.guest_acrs);
234 restore_fp_regs(&vcpu->arch.host_fpregs);
235 restore_access_regs(vcpu->arch.host_acrs);
236}
237
238static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
239{
240 /* this equals initial cpu reset in pop, but we don't switch to ESA */
241 vcpu->arch.sie_block->gpsw.mask = 0UL;
242 vcpu->arch.sie_block->gpsw.addr = 0UL;
243 vcpu->arch.sie_block->prefix = 0UL;
244 vcpu->arch.sie_block->ihcpu = 0xffff;
245 vcpu->arch.sie_block->cputm = 0UL;
246 vcpu->arch.sie_block->ckc = 0UL;
247 vcpu->arch.sie_block->todpr = 0;
248 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
249 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
250 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
251 vcpu->arch.guest_fpregs.fpc = 0;
252 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
253 vcpu->arch.sie_block->gbea = 1;
254}
255
256int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
257{
258 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
259 vcpu->arch.sie_block->gmslm = 0xffffffffffUL;
260 vcpu->arch.sie_block->gmsor = 0x000000000000;
261 vcpu->arch.sie_block->ecb = 2;
262 vcpu->arch.sie_block->eca = 0xC1002001U;
263 setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
264 (unsigned long) vcpu);
265 get_cpu_id(&vcpu->arch.cpu_id);
266 vcpu->arch.cpu_id.version = 0xfe;
267 return 0;
268}
269
270struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
271 unsigned int id)
272{
273 struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
274 int rc = -ENOMEM;
275
276 if (!vcpu)
277 goto out_nomem;
278
279 vcpu->arch.sie_block = (struct sie_block *) get_zeroed_page(GFP_KERNEL);
280
281 if (!vcpu->arch.sie_block)
282 goto out_free_cpu;
283
284 vcpu->arch.sie_block->icpua = id;
285 BUG_ON(!kvm->arch.sca);
286 BUG_ON(kvm->arch.sca->cpu[id].sda);
287 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
288 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
289 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
290
291 spin_lock_init(&vcpu->arch.local_int.lock);
292 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
293 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
294 spin_lock_bh(&kvm->arch.float_int.lock);
295 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
296 init_waitqueue_head(&vcpu->arch.local_int.wq);
297 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
298 spin_unlock_bh(&kvm->arch.float_int.lock);
299
300 rc = kvm_vcpu_init(vcpu, kvm, id);
301 if (rc)
302 goto out_free_cpu;
303 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
304 vcpu->arch.sie_block);
305
306 try_module_get(THIS_MODULE);
307
308 return vcpu;
309out_free_cpu:
310 kfree(vcpu);
311out_nomem:
312 return ERR_PTR(rc);
313}
314
315void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
316{
317 VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
318 free_page((unsigned long)(vcpu->arch.sie_block));
319 kfree(vcpu);
320 module_put(THIS_MODULE);
321}
322
323int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
324{
325 /* kvm common code refers to this, but never calls it */
326 BUG();
327 return 0;
328}
329
330static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
331{
332 vcpu_load(vcpu);
333 kvm_s390_vcpu_initial_reset(vcpu);
334 vcpu_put(vcpu);
335 return 0;
336}
337
338int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
339{
340 vcpu_load(vcpu);
341 memcpy(&vcpu->arch.guest_gprs, &regs->gprs, sizeof(regs->gprs));
342 vcpu_put(vcpu);
343 return 0;
344}
345
346int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
347{
348 vcpu_load(vcpu);
349 memcpy(&regs->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
350 vcpu_put(vcpu);
351 return 0;
352}
353
354int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
355 struct kvm_sregs *sregs)
356{
357 vcpu_load(vcpu);
358 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
359 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
360 vcpu_put(vcpu);
361 return 0;
362}
363
364int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
365 struct kvm_sregs *sregs)
366{
367 vcpu_load(vcpu);
368 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
369 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
370 vcpu_put(vcpu);
371 return 0;
372}
373
374int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
375{
376 vcpu_load(vcpu);
377 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
378 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
379 vcpu_put(vcpu);
380 return 0;
381}
382
383int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
384{
385 vcpu_load(vcpu);
386 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
387 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
388 vcpu_put(vcpu);
389 return 0;
390}
391
392static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
393{
394 int rc = 0;
395
396 vcpu_load(vcpu);
397 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
398 rc = -EBUSY;
399 else
400 vcpu->arch.sie_block->gpsw = psw;
401 vcpu_put(vcpu);
402 return rc;
403}
404
405int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
406 struct kvm_translation *tr)
407{
408 return -EINVAL; /* not implemented yet */
409}
410
411int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
412 struct kvm_debug_guest *dbg)
413{
414 return -EINVAL; /* not implemented yet */
415}
416
417int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
418 struct kvm_mp_state *mp_state)
419{
420 return -EINVAL; /* not implemented yet */
421}
422
423int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
424 struct kvm_mp_state *mp_state)
425{
426 return -EINVAL; /* not implemented yet */
427}
428
429static void __vcpu_run(struct kvm_vcpu *vcpu)
430{
431 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
432
433 if (need_resched())
434 schedule();
435
436 vcpu->arch.sie_block->icptcode = 0;
437 local_irq_disable();
438 kvm_guest_enter();
439 local_irq_enable();
440 VCPU_EVENT(vcpu, 6, "entering sie flags %x",
441 atomic_read(&vcpu->arch.sie_block->cpuflags));
442 sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs);
443 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
444 vcpu->arch.sie_block->icptcode);
445 local_irq_disable();
446 kvm_guest_exit();
447 local_irq_enable();
448
449 memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
450}
451
452int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
453{
454 int rc;
455 sigset_t sigsaved;
456
457 vcpu_load(vcpu);
458
459 if (vcpu->sigset_active)
460 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
461
462 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
463
464 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
465
466 switch (kvm_run->exit_reason) {
467 case KVM_EXIT_S390_SIEIC:
468 vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
469 vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
470 break;
471 case KVM_EXIT_UNKNOWN:
472 case KVM_EXIT_S390_RESET:
473 break;
474 default:
475 BUG();
476 }
477
478 might_sleep();
479
480 do {
481 kvm_s390_deliver_pending_interrupts(vcpu);
482 __vcpu_run(vcpu);
483 rc = kvm_handle_sie_intercept(vcpu);
484 } while (!signal_pending(current) && !rc);
485
486 if (signal_pending(current) && !rc)
487 rc = -EINTR;
488
489 if (rc == -ENOTSUPP) {
490 /* intercept cannot be handled in-kernel, prepare kvm-run */
491 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
492 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
493 kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask;
494 kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr;
495 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
496 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
497 rc = 0;
498 }
499
500 if (rc == -EREMOTE) {
501 /* intercept was handled, but userspace support is needed
502 * kvm_run has been prepared by the handler */
503 rc = 0;
504 }
505
506 if (vcpu->sigset_active)
507 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
508
509 vcpu_put(vcpu);
510
511 vcpu->stat.exit_userspace++;
512 return rc;
513}
514
515static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
516 unsigned long n, int prefix)
517{
518 if (prefix)
519 return copy_to_guest(vcpu, guestdest, from, n);
520 else
521 return copy_to_guest_absolute(vcpu, guestdest, from, n);
522}
523
524/*
525 * store status at address
526 * we use have two special cases:
527 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
528 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
529 */
530int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
531{
532 const unsigned char archmode = 1;
533 int prefix;
534
535 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
536 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
537 return -EFAULT;
538 addr = SAVE_AREA_BASE;
539 prefix = 0;
540 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
541 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
542 return -EFAULT;
543 addr = SAVE_AREA_BASE;
544 prefix = 1;
545 } else
546 prefix = 0;
547
548 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
549 vcpu->arch.guest_fpregs.fprs, 128, prefix))
550 return -EFAULT;
551
552 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
553 vcpu->arch.guest_gprs, 128, prefix))
554 return -EFAULT;
555
556 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
557 &vcpu->arch.sie_block->gpsw, 16, prefix))
558 return -EFAULT;
559
560 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
561 &vcpu->arch.sie_block->prefix, 4, prefix))
562 return -EFAULT;
563
564 if (__guestcopy(vcpu,
565 addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
566 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
567 return -EFAULT;
568
569 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
570 &vcpu->arch.sie_block->todpr, 4, prefix))
571 return -EFAULT;
572
573 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
574 &vcpu->arch.sie_block->cputm, 8, prefix))
575 return -EFAULT;
576
577 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
578 &vcpu->arch.sie_block->ckc, 8, prefix))
579 return -EFAULT;
580
581 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
582 &vcpu->arch.guest_acrs, 64, prefix))
583 return -EFAULT;
584
585 if (__guestcopy(vcpu,
586 addr + offsetof(struct save_area_s390x, ctrl_regs),
587 &vcpu->arch.sie_block->gcr, 128, prefix))
588 return -EFAULT;
589 return 0;
590}
591
592static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
593{
594 int rc;
595
596 vcpu_load(vcpu);
597 rc = __kvm_s390_vcpu_store_status(vcpu, addr);
598 vcpu_put(vcpu);
599 return rc;
600}
601
602long kvm_arch_vcpu_ioctl(struct file *filp,
603 unsigned int ioctl, unsigned long arg)
604{
605 struct kvm_vcpu *vcpu = filp->private_data;
606 void __user *argp = (void __user *)arg;
607
608 switch (ioctl) {
609 case KVM_S390_INTERRUPT: {
610 struct kvm_s390_interrupt s390int;
611
612 if (copy_from_user(&s390int, argp, sizeof(s390int)))
613 return -EFAULT;
614 return kvm_s390_inject_vcpu(vcpu, &s390int);
615 }
616 case KVM_S390_STORE_STATUS:
617 return kvm_s390_vcpu_store_status(vcpu, arg);
618 case KVM_S390_SET_INITIAL_PSW: {
619 psw_t psw;
620
621 if (copy_from_user(&psw, argp, sizeof(psw)))
622 return -EFAULT;
623 return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
624 }
625 case KVM_S390_INITIAL_RESET:
626 return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
627 default:
628 ;
629 }
630 return -EINVAL;
631}
632
633/* Section: memory related */
634int kvm_arch_set_memory_region(struct kvm *kvm,
635 struct kvm_userspace_memory_region *mem,
636 struct kvm_memory_slot old,
637 int user_alloc)
638{
639 /* A few sanity checks. We can have exactly one memory slot which has
640 to start at guest virtual zero and which has to be located at a
641 page boundary in userland and which has to end at a page boundary.
642 The memory in userland is ok to be fragmented into various different
643 vmas. It is okay to mmap() and munmap() stuff in this slot after
644 doing this call at any time */
645
646 if (mem->slot)
647 return -EINVAL;
648
649 if (mem->guest_phys_addr)
650 return -EINVAL;
651
652 if (mem->userspace_addr & (PAGE_SIZE - 1))
653 return -EINVAL;
654
655 if (mem->memory_size & (PAGE_SIZE - 1))
656 return -EINVAL;
657
658 kvm->arch.guest_origin = mem->userspace_addr;
659 kvm->arch.guest_memsize = mem->memory_size;
660
661 /* FIXME: we do want to interrupt running CPUs and update their memory
662 configuration now to avoid race conditions. But hey, changing the
663 memory layout while virtual CPUs are running is usually bad
664 programming practice. */
665
666 return 0;
667}
668
669gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
670{
671 return gfn;
672}
673
674static int __init kvm_s390_init(void)
675{
676 return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
677}
678
679static void __exit kvm_s390_exit(void)
680{
681 kvm_exit();
682}
683
684module_init(kvm_s390_init);
685module_exit(kvm_s390_exit);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
new file mode 100644
index 000000000000..3893cf12eacf
--- /dev/null
+++ b/arch/s390/kvm/kvm-s390.h
@@ -0,0 +1,64 @@
1/*
2 * kvm_s390.h - definition for kvm on s390
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 */
13
14#ifndef ARCH_S390_KVM_S390_H
15#define ARCH_S390_KVM_S390_H
16
17#include <linux/kvm.h>
18#include <linux/kvm_host.h>
19
20typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
21
22int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
23
24#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
25do { \
26 debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \
27 d_args); \
28} while (0)
29
30#define VCPU_EVENT(d_vcpu, d_loglevel, d_string, d_args...)\
31do { \
32 debug_sprintf_event(d_vcpu->kvm->arch.dbf, d_loglevel, \
33 "%02d[%016lx-%016lx]: " d_string "\n", d_vcpu->vcpu_id, \
34 d_vcpu->arch.sie_block->gpsw.mask, d_vcpu->arch.sie_block->gpsw.addr,\
35 d_args); \
36} while (0)
37
38static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu)
39{
40 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT;
41}
42
43int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
44void kvm_s390_idle_wakeup(unsigned long data);
45void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
46int kvm_s390_inject_vm(struct kvm *kvm,
47 struct kvm_s390_interrupt *s390int);
48int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
49 struct kvm_s390_interrupt *s390int);
50int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
51
52/* implemented in priv.c */
53int kvm_s390_handle_priv(struct kvm_vcpu *vcpu);
54
55/* implemented in sigp.c */
56int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
57
58/* implemented in kvm-s390.c */
59int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu,
60 unsigned long addr);
61/* implemented in diag.c */
62int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
63
64#endif
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
new file mode 100644
index 000000000000..1465946325c5
--- /dev/null
+++ b/arch/s390/kvm/priv.c
@@ -0,0 +1,323 @@
1/*
2 * priv.c - handling privileged instructions
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 */
13
14#include <linux/kvm.h>
15#include <linux/errno.h>
16#include <asm/current.h>
17#include <asm/debug.h>
18#include <asm/ebcdic.h>
19#include <asm/sysinfo.h>
20#include "gaccess.h"
21#include "kvm-s390.h"
22
23static int handle_set_prefix(struct kvm_vcpu *vcpu)
24{
25 int base2 = vcpu->arch.sie_block->ipb >> 28;
26 int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
27 u64 operand2;
28 u32 address = 0;
29 u8 tmp;
30
31 vcpu->stat.instruction_spx++;
32
33 operand2 = disp2;
34 if (base2)
35 operand2 += vcpu->arch.guest_gprs[base2];
36
37 /* must be word boundary */
38 if (operand2 & 3) {
39 kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
40 goto out;
41 }
42
43 /* get the value */
44 if (get_guest_u32(vcpu, operand2, &address)) {
45 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
46 goto out;
47 }
48
49 address = address & 0x7fffe000u;
50
51 /* make sure that the new value is valid memory */
52 if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
53 (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) {
54 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
55 goto out;
56 }
57
58 vcpu->arch.sie_block->prefix = address;
59 vcpu->arch.sie_block->ihcpu = 0xffff;
60
61 VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
62out:
63 return 0;
64}
65
66static int handle_store_prefix(struct kvm_vcpu *vcpu)
67{
68 int base2 = vcpu->arch.sie_block->ipb >> 28;
69 int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
70 u64 operand2;
71 u32 address;
72
73 vcpu->stat.instruction_stpx++;
74 operand2 = disp2;
75 if (base2)
76 operand2 += vcpu->arch.guest_gprs[base2];
77
78 /* must be word boundary */
79 if (operand2 & 3) {
80 kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
81 goto out;
82 }
83
84 address = vcpu->arch.sie_block->prefix;
85 address = address & 0x7fffe000u;
86
87 /* get the value */
88 if (put_guest_u32(vcpu, operand2, address)) {
89 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
90 goto out;
91 }
92
93 VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
94out:
95 return 0;
96}
97
98static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
99{
100 int base2 = vcpu->arch.sie_block->ipb >> 28;
101 int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
102 u64 useraddr;
103 int rc;
104
105 vcpu->stat.instruction_stap++;
106 useraddr = disp2;
107 if (base2)
108 useraddr += vcpu->arch.guest_gprs[base2];
109
110 if (useraddr & 1) {
111 kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
112 goto out;
113 }
114
115 rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id);
116 if (rc == -EFAULT) {
117 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
118 goto out;
119 }
120
121 VCPU_EVENT(vcpu, 5, "storing cpu address to %lx", useraddr);
122out:
123 return 0;
124}
125
126static int handle_skey(struct kvm_vcpu *vcpu)
127{
128 vcpu->stat.instruction_storage_key++;
129 vcpu->arch.sie_block->gpsw.addr -= 4;
130 VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
131 return 0;
132}
133
134static int handle_stsch(struct kvm_vcpu *vcpu)
135{
136 vcpu->stat.instruction_stsch++;
137 VCPU_EVENT(vcpu, 4, "%s", "store subchannel - CC3");
138 /* condition code 3 */
139 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
140 vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
141 return 0;
142}
143
144static int handle_chsc(struct kvm_vcpu *vcpu)
145{
146 vcpu->stat.instruction_chsc++;
147 VCPU_EVENT(vcpu, 4, "%s", "channel subsystem call - CC3");
148 /* condition code 3 */
149 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
150 vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
151 return 0;
152}
153
154static unsigned int kvm_stfl(void)
155{
156 asm volatile(
157 " .insn s,0xb2b10000,0(0)\n" /* stfl */
158 "0:\n"
159 EX_TABLE(0b, 0b));
160 return S390_lowcore.stfl_fac_list;
161}
162
163static int handle_stfl(struct kvm_vcpu *vcpu)
164{
165 unsigned int facility_list = kvm_stfl();
166 int rc;
167
168 vcpu->stat.instruction_stfl++;
169 facility_list &= ~(1UL<<24); /* no stfle */
170
171 rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
172 &facility_list, sizeof(facility_list));
173 if (rc == -EFAULT)
174 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
175 else
176 VCPU_EVENT(vcpu, 5, "store facility list value %x",
177 facility_list);
178 return 0;
179}
180
181static int handle_stidp(struct kvm_vcpu *vcpu)
182{
183 int base2 = vcpu->arch.sie_block->ipb >> 28;
184 int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
185 u64 operand2;
186 int rc;
187
188 vcpu->stat.instruction_stidp++;
189 operand2 = disp2;
190 if (base2)
191 operand2 += vcpu->arch.guest_gprs[base2];
192
193 if (operand2 & 7) {
194 kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
195 goto out;
196 }
197
198 rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data);
199 if (rc == -EFAULT) {
200 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
201 goto out;
202 }
203
204 VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
205out:
206 return 0;
207}
208
209static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
210{
211 struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
212 int cpus = 0;
213 int n;
214
215 spin_lock_bh(&fi->lock);
216 for (n = 0; n < KVM_MAX_VCPUS; n++)
217 if (fi->local_int[n])
218 cpus++;
219 spin_unlock_bh(&fi->lock);
220
221 /* deal with other level 3 hypervisors */
222 if (stsi(mem, 3, 2, 2) == -ENOSYS)
223 mem->count = 0;
224 if (mem->count < 8)
225 mem->count++;
226 for (n = mem->count - 1; n > 0 ; n--)
227 memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0]));
228
229 mem->vm[0].cpus_total = cpus;
230 mem->vm[0].cpus_configured = cpus;
231 mem->vm[0].cpus_standby = 0;
232 mem->vm[0].cpus_reserved = 0;
233 mem->vm[0].caf = 1000;
234 memcpy(mem->vm[0].name, "KVMguest", 8);
235 ASCEBC(mem->vm[0].name, 8);
236 memcpy(mem->vm[0].cpi, "KVM/Linux ", 16);
237 ASCEBC(mem->vm[0].cpi, 16);
238}
239
240static int handle_stsi(struct kvm_vcpu *vcpu)
241{
242 int fc = (vcpu->arch.guest_gprs[0] & 0xf0000000) >> 28;
243 int sel1 = vcpu->arch.guest_gprs[0] & 0xff;
244 int sel2 = vcpu->arch.guest_gprs[1] & 0xffff;
245 int base2 = vcpu->arch.sie_block->ipb >> 28;
246 int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
247 u64 operand2;
248 unsigned long mem;
249
250 vcpu->stat.instruction_stsi++;
251 VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
252
253 operand2 = disp2;
254 if (base2)
255 operand2 += vcpu->arch.guest_gprs[base2];
256
257 if (operand2 & 0xfff && fc > 0)
258 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
259
260 switch (fc) {
261 case 0:
262 vcpu->arch.guest_gprs[0] = 3 << 28;
263 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
264 return 0;
265 case 1: /* same handling for 1 and 2 */
266 case 2:
267 mem = get_zeroed_page(GFP_KERNEL);
268 if (!mem)
269 goto out_fail;
270 if (stsi((void *) mem, fc, sel1, sel2) == -ENOSYS)
271 goto out_mem;
272 break;
273 case 3:
274 if (sel1 != 2 || sel2 != 2)
275 goto out_fail;
276 mem = get_zeroed_page(GFP_KERNEL);
277 if (!mem)
278 goto out_fail;
279 handle_stsi_3_2_2(vcpu, (void *) mem);
280 break;
281 default:
282 goto out_fail;
283 }
284
285 if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) {
286 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
287 goto out_mem;
288 }
289 free_page(mem);
290 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
291 vcpu->arch.guest_gprs[0] = 0;
292 return 0;
293out_mem:
294 free_page(mem);
295out_fail:
296 /* condition code 3 */
297 vcpu->arch.sie_block->gpsw.mask |= 3ul << 44;
298 return 0;
299}
300
301static intercept_handler_t priv_handlers[256] = {
302 [0x02] = handle_stidp,
303 [0x10] = handle_set_prefix,
304 [0x11] = handle_store_prefix,
305 [0x12] = handle_store_cpu_address,
306 [0x29] = handle_skey,
307 [0x2a] = handle_skey,
308 [0x2b] = handle_skey,
309 [0x34] = handle_stsch,
310 [0x5f] = handle_chsc,
311 [0x7d] = handle_stsi,
312 [0xb1] = handle_stfl,
313};
314
315int kvm_s390_handle_priv(struct kvm_vcpu *vcpu)
316{
317 intercept_handler_t handler;
318
319 handler = priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
320 if (handler)
321 return handler(vcpu);
322 return -ENOTSUPP;
323}
diff --git a/arch/s390/kvm/sie64a.S b/arch/s390/kvm/sie64a.S
new file mode 100644
index 000000000000..934fd6a885f6
--- /dev/null
+++ b/arch/s390/kvm/sie64a.S
@@ -0,0 +1,47 @@
1/*
2 * sie64a.S - low level sie call
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
11 */
12
13#include <linux/errno.h>
14#include <asm/asm-offsets.h>
15
16SP_R5 = 5 * 8 # offset into stackframe
17SP_R6 = 6 * 8
18
19/*
20 * sie64a calling convention:
21 * %r2 pointer to sie control block
22 * %r3 guest register save area
23 */
24 .globl sie64a
25sie64a:
26 lgr %r5,%r3
27 stmg %r5,%r14,SP_R5(%r15) # save register on entry
28 lgr %r14,%r2 # pointer to sie control block
29 lmg %r0,%r13,0(%r3) # load guest gprs 0-13
30sie_inst:
31 sie 0(%r14)
32 lg %r14,SP_R5(%r15)
33 stmg %r0,%r13,0(%r14) # save guest gprs 0-13
34 lghi %r2,0
35 lmg %r6,%r14,SP_R6(%r15)
36 br %r14
37
38sie_err:
39 lg %r14,SP_R5(%r15)
40 stmg %r0,%r13,0(%r14) # save guest gprs 0-13
41 lghi %r2,-EFAULT
42 lmg %r6,%r14,SP_R6(%r15)
43 br %r14
44
45 .section __ex_table,"a"
46 .quad sie_inst,sie_err
47 .previous
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
new file mode 100644
index 000000000000..0a236acfb5f6
--- /dev/null
+++ b/arch/s390/kvm/sigp.c
@@ -0,0 +1,288 @@
1/*
2 * sigp.c - handlinge interprocessor communication
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 */
13
14#include <linux/kvm.h>
15#include <linux/kvm_host.h>
16#include "gaccess.h"
17#include "kvm-s390.h"
18
19/* sigp order codes */
20#define SIGP_SENSE 0x01
21#define SIGP_EXTERNAL_CALL 0x02
22#define SIGP_EMERGENCY 0x03
23#define SIGP_START 0x04
24#define SIGP_STOP 0x05
25#define SIGP_RESTART 0x06
26#define SIGP_STOP_STORE_STATUS 0x09
27#define SIGP_INITIAL_CPU_RESET 0x0b
28#define SIGP_CPU_RESET 0x0c
29#define SIGP_SET_PREFIX 0x0d
30#define SIGP_STORE_STATUS_ADDR 0x0e
31#define SIGP_SET_ARCH 0x12
32
33/* cpu status bits */
34#define SIGP_STAT_EQUIPMENT_CHECK 0x80000000UL
35#define SIGP_STAT_INCORRECT_STATE 0x00000200UL
36#define SIGP_STAT_INVALID_PARAMETER 0x00000100UL
37#define SIGP_STAT_EXT_CALL_PENDING 0x00000080UL
38#define SIGP_STAT_STOPPED 0x00000040UL
39#define SIGP_STAT_OPERATOR_INTERV 0x00000020UL
40#define SIGP_STAT_CHECK_STOP 0x00000010UL
41#define SIGP_STAT_INOPERATIVE 0x00000004UL
42#define SIGP_STAT_INVALID_ORDER 0x00000002UL
43#define SIGP_STAT_RECEIVER_CHECK 0x00000001UL
44
45
46static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg)
47{
48 struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
49 int rc;
50
51 if (cpu_addr >= KVM_MAX_VCPUS)
52 return 3; /* not operational */
53
54 spin_lock_bh(&fi->lock);
55 if (fi->local_int[cpu_addr] == NULL)
56 rc = 3; /* not operational */
57 else if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
58 & CPUSTAT_RUNNING) {
59 *reg &= 0xffffffff00000000UL;
60 rc = 1; /* status stored */
61 } else {
62 *reg &= 0xffffffff00000000UL;
63 *reg |= SIGP_STAT_STOPPED;
64 rc = 1; /* status stored */
65 }
66 spin_unlock_bh(&fi->lock);
67
68 VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
69 return rc;
70}
71
72static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
73{
74 struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
75 struct local_interrupt *li;
76 struct interrupt_info *inti;
77 int rc;
78
79 if (cpu_addr >= KVM_MAX_VCPUS)
80 return 3; /* not operational */
81
82 inti = kzalloc(sizeof(*inti), GFP_KERNEL);
83 if (!inti)
84 return -ENOMEM;
85
86 inti->type = KVM_S390_INT_EMERGENCY;
87
88 spin_lock_bh(&fi->lock);
89 li = fi->local_int[cpu_addr];
90 if (li == NULL) {
91 rc = 3; /* not operational */
92 kfree(inti);
93 goto unlock;
94 }
95 spin_lock_bh(&li->lock);
96 list_add_tail(&inti->list, &li->list);
97 atomic_set(&li->active, 1);
98 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
99 if (waitqueue_active(&li->wq))
100 wake_up_interruptible(&li->wq);
101 spin_unlock_bh(&li->lock);
102 rc = 0; /* order accepted */
103unlock:
104 spin_unlock_bh(&fi->lock);
105 VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
106 return rc;
107}
108
109static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
110{
111 struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
112 struct local_interrupt *li;
113 struct interrupt_info *inti;
114 int rc;
115
116 if (cpu_addr >= KVM_MAX_VCPUS)
117 return 3; /* not operational */
118
119 inti = kzalloc(sizeof(*inti), GFP_KERNEL);
120 if (!inti)
121 return -ENOMEM;
122
123 inti->type = KVM_S390_SIGP_STOP;
124
125 spin_lock_bh(&fi->lock);
126 li = fi->local_int[cpu_addr];
127 if (li == NULL) {
128 rc = 3; /* not operational */
129 kfree(inti);
130 goto unlock;
131 }
132 spin_lock_bh(&li->lock);
133 list_add_tail(&inti->list, &li->list);
134 atomic_set(&li->active, 1);
135 atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
136 if (store)
137 li->action_bits |= ACTION_STORE_ON_STOP;
138 li->action_bits |= ACTION_STOP_ON_STOP;
139 if (waitqueue_active(&li->wq))
140 wake_up_interruptible(&li->wq);
141 spin_unlock_bh(&li->lock);
142 rc = 0; /* order accepted */
143unlock:
144 spin_unlock_bh(&fi->lock);
145 VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
146 return rc;
147}
148
149static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
150{
151 int rc;
152
153 switch (parameter & 0xff) {
154 case 0:
155 printk(KERN_WARNING "kvm: request to switch to ESA/390 mode"
156 " not supported");
157 rc = 3; /* not operational */
158 break;
159 case 1:
160 case 2:
161 rc = 0; /* order accepted */
162 break;
163 default:
164 rc = -ENOTSUPP;
165 }
166 return rc;
167}
168
169static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
170 u64 *reg)
171{
172 struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
173 struct local_interrupt *li;
174 struct interrupt_info *inti;
175 int rc;
176 u8 tmp;
177
178 /* make sure that the new value is valid memory */
179 address = address & 0x7fffe000u;
180 if ((copy_from_guest(vcpu, &tmp,
181 (u64) (address + vcpu->kvm->arch.guest_origin) , 1)) ||
182 (copy_from_guest(vcpu, &tmp, (u64) (address +
183 vcpu->kvm->arch.guest_origin + PAGE_SIZE), 1))) {
184 *reg |= SIGP_STAT_INVALID_PARAMETER;
185 return 1; /* invalid parameter */
186 }
187
188 inti = kzalloc(sizeof(*inti), GFP_KERNEL);
189 if (!inti)
190 return 2; /* busy */
191
192 spin_lock_bh(&fi->lock);
193 li = fi->local_int[cpu_addr];
194
195 if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) {
196 rc = 1; /* incorrect state */
197 *reg &= SIGP_STAT_INCORRECT_STATE;
198 kfree(inti);
199 goto out_fi;
200 }
201
202 spin_lock_bh(&li->lock);
203 /* cpu must be in stopped state */
204 if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
205 rc = 1; /* incorrect state */
206 *reg &= SIGP_STAT_INCORRECT_STATE;
207 kfree(inti);
208 goto out_li;
209 }
210
211 inti->type = KVM_S390_SIGP_SET_PREFIX;
212 inti->prefix.address = address;
213
214 list_add_tail(&inti->list, &li->list);
215 atomic_set(&li->active, 1);
216 if (waitqueue_active(&li->wq))
217 wake_up_interruptible(&li->wq);
218 rc = 0; /* order accepted */
219
220 VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
221out_li:
222 spin_unlock_bh(&li->lock);
223out_fi:
224 spin_unlock_bh(&fi->lock);
225 return rc;
226}
227
228int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
229{
230 int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
231 int r3 = vcpu->arch.sie_block->ipa & 0x000f;
232 int base2 = vcpu->arch.sie_block->ipb >> 28;
233 int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
234 u32 parameter;
235 u16 cpu_addr = vcpu->arch.guest_gprs[r3];
236 u8 order_code;
237 int rc;
238
239 order_code = disp2;
240 if (base2)
241 order_code += vcpu->arch.guest_gprs[base2];
242
243 if (r1 % 2)
244 parameter = vcpu->arch.guest_gprs[r1];
245 else
246 parameter = vcpu->arch.guest_gprs[r1 + 1];
247
248 switch (order_code) {
249 case SIGP_SENSE:
250 vcpu->stat.instruction_sigp_sense++;
251 rc = __sigp_sense(vcpu, cpu_addr,
252 &vcpu->arch.guest_gprs[r1]);
253 break;
254 case SIGP_EMERGENCY:
255 vcpu->stat.instruction_sigp_emergency++;
256 rc = __sigp_emergency(vcpu, cpu_addr);
257 break;
258 case SIGP_STOP:
259 vcpu->stat.instruction_sigp_stop++;
260 rc = __sigp_stop(vcpu, cpu_addr, 0);
261 break;
262 case SIGP_STOP_STORE_STATUS:
263 vcpu->stat.instruction_sigp_stop++;
264 rc = __sigp_stop(vcpu, cpu_addr, 1);
265 break;
266 case SIGP_SET_ARCH:
267 vcpu->stat.instruction_sigp_arch++;
268 rc = __sigp_set_arch(vcpu, parameter);
269 break;
270 case SIGP_SET_PREFIX:
271 vcpu->stat.instruction_sigp_prefix++;
272 rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
273 &vcpu->arch.guest_gprs[r1]);
274 break;
275 case SIGP_RESTART:
276 vcpu->stat.instruction_sigp_restart++;
277 /* user space must know about restart */
278 default:
279 return -ENOTSUPP;
280 }
281
282 if (rc < 0)
283 return rc;
284
285 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
286 vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44;
287 return 0;
288}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index fd072013f88c..5c1aea97cd12 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -30,11 +30,27 @@
30#define TABLES_PER_PAGE 4 30#define TABLES_PER_PAGE 4
31#define FRAG_MASK 15UL 31#define FRAG_MASK 15UL
32#define SECOND_HALVES 10UL 32#define SECOND_HALVES 10UL
33
34void clear_table_pgstes(unsigned long *table)
35{
36 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
37 memset(table + 256, 0, PAGE_SIZE/4);
38 clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
39 memset(table + 768, 0, PAGE_SIZE/4);
40}
41
33#else 42#else
34#define ALLOC_ORDER 2 43#define ALLOC_ORDER 2
35#define TABLES_PER_PAGE 2 44#define TABLES_PER_PAGE 2
36#define FRAG_MASK 3UL 45#define FRAG_MASK 3UL
37#define SECOND_HALVES 2UL 46#define SECOND_HALVES 2UL
47
48void clear_table_pgstes(unsigned long *table)
49{
50 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
51 memset(table + 256, 0, PAGE_SIZE/2);
52}
53
38#endif 54#endif
39 55
40unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) 56unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
@@ -153,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
153 unsigned long *table; 169 unsigned long *table;
154 unsigned long bits; 170 unsigned long bits;
155 171
156 bits = mm->context.noexec ? 3UL : 1UL; 172 bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
157 spin_lock(&mm->page_table_lock); 173 spin_lock(&mm->page_table_lock);
158 page = NULL; 174 page = NULL;
159 if (!list_empty(&mm->context.pgtable_list)) { 175 if (!list_empty(&mm->context.pgtable_list)) {
@@ -170,7 +186,10 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
170 pgtable_page_ctor(page); 186 pgtable_page_ctor(page);
171 page->flags &= ~FRAG_MASK; 187 page->flags &= ~FRAG_MASK;
172 table = (unsigned long *) page_to_phys(page); 188 table = (unsigned long *) page_to_phys(page);
173 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); 189 if (mm->context.pgstes)
190 clear_table_pgstes(table);
191 else
192 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
174 spin_lock(&mm->page_table_lock); 193 spin_lock(&mm->page_table_lock);
175 list_add(&page->lru, &mm->context.pgtable_list); 194 list_add(&page->lru, &mm->context.pgtable_list);
176 } 195 }
@@ -191,7 +210,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
191 struct page *page; 210 struct page *page;
192 unsigned long bits; 211 unsigned long bits;
193 212
194 bits = mm->context.noexec ? 3UL : 1UL; 213 bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
195 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); 214 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
196 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 215 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
197 spin_lock(&mm->page_table_lock); 216 spin_lock(&mm->page_table_lock);
@@ -228,3 +247,43 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
228 mm->context.noexec = 0; 247 mm->context.noexec = 0;
229 update_mm(mm, tsk); 248 update_mm(mm, tsk);
230} 249}
250
251/*
252 * switch on pgstes for its userspace process (for kvm)
253 */
254int s390_enable_sie(void)
255{
256 struct task_struct *tsk = current;
257 struct mm_struct *mm;
258 int rc;
259
260 task_lock(tsk);
261
262 rc = 0;
263 if (tsk->mm->context.pgstes)
264 goto unlock;
265
266 rc = -EINVAL;
267 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
268 tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
269 goto unlock;
270
271 tsk->mm->context.pgstes = 1; /* dirty little tricks .. */
272 mm = dup_mm(tsk);
273 tsk->mm->context.pgstes = 0;
274
275 rc = -ENOMEM;
276 if (!mm)
277 goto unlock;
278 mmput(tsk->mm);
279 tsk->mm = tsk->active_mm = mm;
280 preempt_disable();
281 update_mm(mm, tsk);
282 cpu_set(smp_processor_id(), mm->cpu_vm_mask);
283 preempt_enable();
284 rc = 0;
285unlock:
286 task_unlock(tsk);
287 return rc;
288}
289EXPORT_SYMBOL_GPL(s390_enable_sie);
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 524b88920947..409dd71f2738 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -866,14 +866,21 @@ void smp_call_function_client(int irq, struct pt_regs *regs)
866 void *info = call_data->info; 866 void *info = call_data->info;
867 867
868 clear_softint(1 << irq); 868 clear_softint(1 << irq);
869
870 irq_enter();
871
872 if (!call_data->wait) {
873 /* let initiator proceed after getting data */
874 atomic_inc(&call_data->finished);
875 }
876
877 func(info);
878
879 irq_exit();
880
869 if (call_data->wait) { 881 if (call_data->wait) {
870 /* let initiator proceed only after completion */ 882 /* let initiator proceed only after completion */
871 func(info);
872 atomic_inc(&call_data->finished); 883 atomic_inc(&call_data->finished);
873 } else {
874 /* let initiator proceed after getting data */
875 atomic_inc(&call_data->finished);
876 func(info);
877 } 884 }
878} 885}
879 886
@@ -1032,7 +1039,9 @@ void smp_receive_signal(int cpu)
1032 1039
1033void smp_receive_signal_client(int irq, struct pt_regs *regs) 1040void smp_receive_signal_client(int irq, struct pt_regs *regs)
1034{ 1041{
1042 irq_enter();
1035 clear_softint(1 << irq); 1043 clear_softint(1 << irq);
1044 irq_exit();
1036} 1045}
1037 1046
1038void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) 1047void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
@@ -1040,6 +1049,8 @@ void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
1040 struct mm_struct *mm; 1049 struct mm_struct *mm;
1041 unsigned long flags; 1050 unsigned long flags;
1042 1051
1052 irq_enter();
1053
1043 clear_softint(1 << irq); 1054 clear_softint(1 << irq);
1044 1055
1045 /* See if we need to allocate a new TLB context because 1056 /* See if we need to allocate a new TLB context because
@@ -1059,6 +1070,8 @@ void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
1059 load_secondary_context(mm); 1070 load_secondary_context(mm);
1060 __flush_tlb_mm(CTX_HWBITS(mm->context), 1071 __flush_tlb_mm(CTX_HWBITS(mm->context),
1061 SECONDARY_CONTEXT); 1072 SECONDARY_CONTEXT);
1073
1074 irq_exit();
1062} 1075}
1063 1076
1064void smp_new_mmu_context_version(void) 1077void smp_new_mmu_context_version(void)
@@ -1217,6 +1230,8 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs)
1217{ 1230{
1218 clear_softint(1 << irq); 1231 clear_softint(1 << irq);
1219 1232
1233 irq_enter();
1234
1220 preempt_disable(); 1235 preempt_disable();
1221 1236
1222 __asm__ __volatile__("flushw"); 1237 __asm__ __volatile__("flushw");
@@ -1229,6 +1244,8 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs)
1229 prom_world(0); 1244 prom_world(0);
1230 1245
1231 preempt_enable(); 1246 preempt_enable();
1247
1248 irq_exit();
1232} 1249}
1233 1250
1234/* /proc/profile writes can call this, don't __init it please. */ 1251/* /proc/profile writes can call this, don't __init it please. */
diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c
index 73ed01ba40dc..8d4761f15fa9 100644
--- a/arch/sparc64/kernel/sys_sparc.c
+++ b/arch/sparc64/kernel/sys_sparc.c
@@ -454,8 +454,8 @@ asmlinkage long sys_ipc(unsigned int call, int first, unsigned long second,
454 err = sys_semget(first, (int)second, (int)third); 454 err = sys_semget(first, (int)second, (int)third);
455 goto out; 455 goto out;
456 case SEMCTL: { 456 case SEMCTL: {
457 err = sys_semctl(first, third, 457 err = sys_semctl(first, second,
458 (int)second | IPC_64, 458 (int)third | IPC_64,
459 (union semun) ptr); 459 (union semun) ptr);
460 goto out; 460 goto out;
461 } 461 }
diff --git a/arch/um/Kconfig.x86_64 b/arch/um/Kconfig.x86_64
index 3fbe69e359ed..5696e7b374b3 100644
--- a/arch/um/Kconfig.x86_64
+++ b/arch/um/Kconfig.x86_64
@@ -1,3 +1,10 @@
1
2menu "Host processor type and features"
3
4source "arch/x86/Kconfig.cpu"
5
6endmenu
7
1config UML_X86 8config UML_X86
2 bool 9 bool
3 default y 10 default y
diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c
index f4bd349d4412..f25c29a12d00 100644
--- a/arch/um/os-Linux/helper.c
+++ b/arch/um/os-Linux/helper.c
@@ -14,6 +14,7 @@
14#include "os.h" 14#include "os.h"
15#include "um_malloc.h" 15#include "um_malloc.h"
16#include "user.h" 16#include "user.h"
17#include <linux/limits.h>
17 18
18struct helper_data { 19struct helper_data {
19 void (*pre_exec)(void*); 20 void (*pre_exec)(void*);
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index 964dc1a04c37..598b5c1903af 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -6,7 +6,7 @@ obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
6 ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \ 6 ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \
7 sys_call_table.o tls.o 7 sys_call_table.o tls.o
8 8
9subarch-obj-y = lib/bitops_32.o lib/semaphore_32.o lib/string_32.o 9subarch-obj-y = lib/semaphore_32.o lib/string_32.o
10subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o 10subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o
11subarch-obj-$(CONFIG_MODULES) += kernel/module_32.o 11subarch-obj-$(CONFIG_MODULES) += kernel/module_32.o
12 12
diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile
index 3c22de532088..c8b4cce9cfe1 100644
--- a/arch/um/sys-x86_64/Makefile
+++ b/arch/um/sys-x86_64/Makefile
@@ -10,7 +10,7 @@ obj-y = bug.o bugs.o delay.o fault.o ldt.o mem.o ptrace.o ptrace_user.o \
10 10
11obj-$(CONFIG_MODULES) += um_module.o 11obj-$(CONFIG_MODULES) += um_module.o
12 12
13subarch-obj-y = lib/bitops_64.o lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o 13subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o
14subarch-obj-$(CONFIG_MODULES) += kernel/module_64.o 14subarch-obj-$(CONFIG_MODULES) += kernel/module_64.o
15 15
16ldt-y = ../sys-i386/ldt.o 16ldt-y = ../sys-i386/ldt.o
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 87a693cf2bb7..e5790fe9e330 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -23,7 +23,7 @@ config X86
23 select HAVE_KPROBES 23 select HAVE_KPROBES
24 select HAVE_KRETPROBES 24 select HAVE_KRETPROBES
25 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) 25 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
26 select HAVE_ARCH_KGDB 26 select HAVE_ARCH_KGDB if !X86_VOYAGER
27 27
28 28
29config GENERIC_LOCKBREAK 29config GENERIC_LOCKBREAK
@@ -142,6 +142,9 @@ config AUDIT_ARCH
142config ARCH_SUPPORTS_AOUT 142config ARCH_SUPPORTS_AOUT
143 def_bool y 143 def_bool y
144 144
145config ARCH_SUPPORTS_OPTIMIZED_INLINING
146 def_bool y
147
145# Use the generic interrupt handling code in kernel/irq/: 148# Use the generic interrupt handling code in kernel/irq/:
146config GENERIC_HARDIRQS 149config GENERIC_HARDIRQS
147 bool 150 bool
@@ -370,6 +373,25 @@ config VMI
370 at the moment), by linking the kernel to a GPL-ed ROM module 373 at the moment), by linking the kernel to a GPL-ed ROM module
371 provided by the hypervisor. 374 provided by the hypervisor.
372 375
376config KVM_CLOCK
377 bool "KVM paravirtualized clock"
378 select PARAVIRT
379 depends on !(X86_VISWS || X86_VOYAGER)
380 help
381 Turning on this option will allow you to run a paravirtualized clock
382 when running over the KVM hypervisor. Instead of relying on a PIT
383 (or probably other) emulation by the underlying device model, the host
384 provides the guest with timing infrastructure such as time of day, and
385 system time
386
387config KVM_GUEST
388 bool "KVM Guest support"
389 select PARAVIRT
390 depends on !(X86_VISWS || X86_VOYAGER)
391 help
392 This option enables various optimizations for running under the KVM
393 hypervisor.
394
373source "arch/x86/lguest/Kconfig" 395source "arch/x86/lguest/Kconfig"
374 396
375config PARAVIRT 397config PARAVIRT
@@ -1049,9 +1071,9 @@ config MTRR
1049 See <file:Documentation/mtrr.txt> for more information. 1071 See <file:Documentation/mtrr.txt> for more information.
1050 1072
1051config X86_PAT 1073config X86_PAT
1052 def_bool y 1074 bool
1053 prompt "x86 PAT support" 1075 prompt "x86 PAT support"
1054 depends on MTRR && NONPROMISC_DEVMEM 1076 depends on MTRR
1055 help 1077 help
1056 Use PAT attributes to setup page level cache control. 1078 Use PAT attributes to setup page level cache control.
1057 1079
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 57072f2716f9..7ef18b01f0bc 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -21,8 +21,8 @@ config M386
21 21
22 Here are the settings recommended for greatest speed: 22 Here are the settings recommended for greatest speed:
23 - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI 23 - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI
24 486DLC/DLC2, UMC 486SX-S and NexGen Nx586. Only "386" kernels 24 486DLC/DLC2, and UMC 486SX-S. Only "386" kernels will run on a 386
25 will run on a 386 class machine. 25 class machine.
26 - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or 26 - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or
27 SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S. 27 SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S.
28 - "586" for generic Pentium CPUs lacking the TSC 28 - "586" for generic Pentium CPUs lacking the TSC
@@ -278,6 +278,11 @@ config GENERIC_CPU
278 278
279endchoice 279endchoice
280 280
281config X86_CPU
282 def_bool y
283 select GENERIC_FIND_FIRST_BIT
284 select GENERIC_FIND_NEXT_BIT
285
281config X86_GENERIC 286config X86_GENERIC
282 bool "Generic x86 support" 287 bool "Generic x86 support"
283 depends on X86_32 288 depends on X86_32
@@ -398,7 +403,7 @@ config X86_TSC
398# generates cmov. 403# generates cmov.
399config X86_CMOV 404config X86_CMOV
400 def_bool y 405 def_bool y
401 depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7) 406 depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || X86_64)
402 407
403config X86_MINIMUM_CPU_FAMILY 408config X86_MINIMUM_CPU_FAMILY
404 int 409 int
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 610aaecc19f8..5b1979a45a1e 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -5,6 +5,17 @@ config TRACE_IRQFLAGS_SUPPORT
5 5
6source "lib/Kconfig.debug" 6source "lib/Kconfig.debug"
7 7
8config NONPROMISC_DEVMEM
9 bool "Disable promiscuous /dev/mem"
10 help
11 The /dev/mem file by default only allows userspace access to PCI
12 space and the BIOS code and data regions. This is sufficient for
13 dosemu and X and all common users of /dev/mem. With this config
14 option, you allow userspace access to all of memory, including
15 kernel and userspace memory. Accidental access to this is
16 obviously disasterous, but specific access can be used by people
17 debugging the kernel.
18
8config EARLY_PRINTK 19config EARLY_PRINTK
9 bool "Early printk" if EMBEDDED 20 bool "Early printk" if EMBEDDED
10 default y 21 default y
@@ -246,3 +257,16 @@ config CPA_DEBUG
246 Do change_page_attr() self-tests every 30 seconds. 257 Do change_page_attr() self-tests every 30 seconds.
247 258
248endmenu 259endmenu
260
261config OPTIMIZE_INLINING
262 bool "Allow gcc to uninline functions marked 'inline'"
263 default y
264 help
265 This option determines if the kernel forces gcc to inline the functions
266 developers have marked 'inline'. Doing so takes away freedom from gcc to
267 do what it thinks is best, which is desirable for the gcc 3.x series of
268 compilers. The gcc 4.x series have a rewritten inlining algorithm and
269 disabling this option will generate a smaller kernel there. Hopefully
270 this algorithm is so good that allowing gcc4 to make the decision can
271 become the default in the future, until then this option is there to
272 test gcc for this.
diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore
index b1bdc4c6f9f2..172cf8a98bdd 100644
--- a/arch/x86/boot/.gitignore
+++ b/arch/x86/boot/.gitignore
@@ -1,7 +1,8 @@
1bootsect 1bootsect
2bzImage 2bzImage
3cpustr.h
4mkcpustr
5offsets.h
3setup 6setup
4setup.bin 7setup.bin
5setup.elf 8setup.elf
6cpustr.h
7mkcpustr
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 6d2df8d61c54..af86e431acfa 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -120,7 +120,7 @@ _start:
120 # Part 2 of the header, from the old setup.S 120 # Part 2 of the header, from the old setup.S
121 121
122 .ascii "HdrS" # header signature 122 .ascii "HdrS" # header signature
123 .word 0x0208 # header version number (>= 0x0105) 123 .word 0x0209 # header version number (>= 0x0105)
124 # or else old loadlin-1.5 will fail) 124 # or else old loadlin-1.5 will fail)
125 .globl realmode_swtch 125 .globl realmode_swtch
126realmode_swtch: .word 0, 0 # default_switch, SETUPSEG 126realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
@@ -227,6 +227,10 @@ hardware_subarch_data: .quad 0
227payload_offset: .long input_data 227payload_offset: .long input_data
228payload_length: .long input_data_end-input_data 228payload_length: .long input_data_end-input_data
229 229
230setup_data: .quad 0 # 64-bit physical pointer to
231 # single linked list of
232 # struct setup_data
233
230# End of setup header ##################################################### 234# End of setup header #####################################################
231 235
232 .section ".inittext", "ax" 236 .section ".inittext", "ax"
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 3df340b54e57..ad7ddaaff588 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1421,6 +1421,7 @@ CONFIG_DEBUG_BUGVERBOSE=y
1421# CONFIG_DEBUG_VM is not set 1421# CONFIG_DEBUG_VM is not set
1422# CONFIG_DEBUG_LIST is not set 1422# CONFIG_DEBUG_LIST is not set
1423# CONFIG_FRAME_POINTER is not set 1423# CONFIG_FRAME_POINTER is not set
1424CONFIG_OPTIMIZE_INLINING=y
1424# CONFIG_RCU_TORTURE_TEST is not set 1425# CONFIG_RCU_TORTURE_TEST is not set
1425# CONFIG_LKDTM is not set 1426# CONFIG_LKDTM is not set
1426# CONFIG_FAULT_INJECTION is not set 1427# CONFIG_FAULT_INJECTION is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index eef98cb00c62..2d6f5b2809d2 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1346,6 +1346,7 @@ CONFIG_DEBUG_BUGVERBOSE=y
1346# CONFIG_DEBUG_VM is not set 1346# CONFIG_DEBUG_VM is not set
1347# CONFIG_DEBUG_LIST is not set 1347# CONFIG_DEBUG_LIST is not set
1348# CONFIG_FRAME_POINTER is not set 1348# CONFIG_FRAME_POINTER is not set
1349CONFIG_OPTIMIZE_INLINING=y
1349# CONFIG_RCU_TORTURE_TEST is not set 1350# CONFIG_RCU_TORTURE_TEST is not set
1350# CONFIG_LKDTM is not set 1351# CONFIG_LKDTM is not set
1351# CONFIG_FAULT_INJECTION is not set 1352# CONFIG_FAULT_INJECTION is not set
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 05e155d3fb6c..bbed3a26ce55 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -499,11 +499,6 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
499 regs->cs = __USER32_CS; 499 regs->cs = __USER32_CS;
500 regs->ss = __USER32_DS; 500 regs->ss = __USER32_DS;
501 501
502 set_fs(USER_DS);
503 regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
504 if (test_thread_flag(TIF_SINGLESTEP))
505 ptrace_notify(SIGTRAP);
506
507#if DEBUG_SIG 502#if DEBUG_SIG
508 printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", 503 printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
509 current->comm, current->pid, frame, regs->ip, frame->pretcode); 504 current->comm, current->pid, frame, regs->ip, frame->pretcode);
@@ -599,11 +594,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
599 regs->cs = __USER32_CS; 594 regs->cs = __USER32_CS;
600 regs->ss = __USER32_DS; 595 regs->ss = __USER32_DS;
601 596
602 set_fs(USER_DS);
603 regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
604 if (test_thread_flag(TIF_SINGLESTEP))
605 ptrace_notify(SIGTRAP);
606
607#if DEBUG_SIG 597#if DEBUG_SIG
608 printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", 598 printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
609 current->comm, current->pid, frame, regs->ip, frame->pretcode); 599 current->comm, current->pid, frame, regs->ip, frame->pretcode);
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index ae7158bce4d6..b5e329da166c 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -430,7 +430,7 @@ ia32_sys_call_table:
430 .quad sys_setuid16 430 .quad sys_setuid16
431 .quad sys_getuid16 431 .quad sys_getuid16
432 .quad compat_sys_stime /* stime */ /* 25 */ 432 .quad compat_sys_stime /* stime */ /* 25 */
433 .quad sys32_ptrace /* ptrace */ 433 .quad compat_sys_ptrace /* ptrace */
434 .quad sys_alarm 434 .quad sys_alarm
435 .quad sys_fstat /* (old)fstat */ 435 .quad sys_fstat /* (old)fstat */
436 .quad sys_pause 436 .quad sys_pause
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 90e092d0af0c..fa19c3819540 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -80,6 +80,8 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
80obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o 80obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
81 81
82obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o 82obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
83obj-$(CONFIG_KVM_GUEST) += kvm.o
84obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
83obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o 85obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
84 86
85ifdef CONFIG_INPUT_PCSPKR 87ifdef CONFIG_INPUT_PCSPKR
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 057ccf1d5ad4..977ed5cdeaa3 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -697,10 +697,6 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table)
697#define HPET_RESOURCE_NAME_SIZE 9 697#define HPET_RESOURCE_NAME_SIZE 9
698 hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE); 698 hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE);
699 699
700 if (!hpet_res)
701 return 0;
702
703 memset(hpet_res, 0, sizeof(*hpet_res));
704 hpet_res->name = (void *)&hpet_res[1]; 700 hpet_res->name = (void *)&hpet_res[1];
705 hpet_res->flags = IORESOURCE_MEM; 701 hpet_res->flags = IORESOURCE_MEM;
706 snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, "HPET %u", 702 snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, "HPET %u",
diff --git a/arch/x86/kernel/acpi/realmode/.gitignore b/arch/x86/kernel/acpi/realmode/.gitignore
new file mode 100644
index 000000000000..58f1f48a58f8
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/.gitignore
@@ -0,0 +1,3 @@
1wakeup.bin
2wakeup.elf
3wakeup.lds
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index df4099dc1c68..65c7857a90dd 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -511,31 +511,30 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
511 unsigned long flags; 511 unsigned long flags;
512 char *vaddr; 512 char *vaddr;
513 int nr_pages = 2; 513 int nr_pages = 2;
514 struct page *pages[2];
515 int i;
514 516
515 BUG_ON(len > sizeof(long)); 517 if (!core_kernel_text((unsigned long)addr)) {
516 BUG_ON((((long)addr + len - 1) & ~(sizeof(long) - 1)) 518 pages[0] = vmalloc_to_page(addr);
517 - ((long)addr & ~(sizeof(long) - 1))); 519 pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
518 if (kernel_text_address((unsigned long)addr)) {
519 struct page *pages[2] = { virt_to_page(addr),
520 virt_to_page(addr + PAGE_SIZE) };
521 if (!pages[1])
522 nr_pages = 1;
523 vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
524 BUG_ON(!vaddr);
525 local_irq_save(flags);
526 memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
527 local_irq_restore(flags);
528 vunmap(vaddr);
529 } else { 520 } else {
530 /* 521 pages[0] = virt_to_page(addr);
531 * modules are in vmalloc'ed memory, always writable. 522 WARN_ON(!PageReserved(pages[0]));
532 */ 523 pages[1] = virt_to_page(addr + PAGE_SIZE);
533 local_irq_save(flags);
534 memcpy(addr, opcode, len);
535 local_irq_restore(flags);
536 } 524 }
525 BUG_ON(!pages[0]);
526 if (!pages[1])
527 nr_pages = 1;
528 vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
529 BUG_ON(!vaddr);
530 local_irq_save(flags);
531 memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
532 local_irq_restore(flags);
533 vunmap(vaddr);
537 sync_core(); 534 sync_core();
538 /* Could also do a CLFLUSH here to speed up CPU recovery; but 535 /* Could also do a CLFLUSH here to speed up CPU recovery; but
539 that causes hangs on some VIA CPUs. */ 536 that causes hangs on some VIA CPUs. */
537 for (i = 0; i < len; i++)
538 BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
540 return addr; 539 return addr;
541} 540}
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 687208190b06..4b99b1bdeb6c 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -451,7 +451,8 @@ void __init setup_boot_APIC_clock(void)
451 } 451 }
452 452
453 /* Calculate the scaled math multiplication factor */ 453 /* Calculate the scaled math multiplication factor */
454 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 32); 454 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
455 lapic_clockevent.shift);
455 lapic_clockevent.max_delta_ns = 456 lapic_clockevent.max_delta_ns =
456 clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); 457 clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
457 lapic_clockevent.min_delta_ns = 458 lapic_clockevent.min_delta_ns =
@@ -902,7 +903,7 @@ void __init init_bsp_APIC(void)
902 apic_write_around(APIC_LVT1, value); 903 apic_write_around(APIC_LVT1, value);
903} 904}
904 905
905void __cpuinit lapic_setup_esr(void) 906static void __cpuinit lapic_setup_esr(void)
906{ 907{
907 unsigned long oldvalue, value, maxlvt; 908 unsigned long oldvalue, value, maxlvt;
908 if (lapic_is_integrated() && !esr_disable) { 909 if (lapic_is_integrated() && !esr_disable) {
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 9e8e5c050c55..5910020c3f24 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -360,7 +360,8 @@ static void __init calibrate_APIC_clock(void)
360 result / 1000 / 1000, result / 1000 % 1000); 360 result / 1000 / 1000, result / 1000 % 1000);
361 361
362 /* Calculate the scaled math multiplication factor */ 362 /* Calculate the scaled math multiplication factor */
363 lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32); 363 lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC,
364 lapic_clockevent.shift);
364 lapic_clockevent.max_delta_ns = 365 lapic_clockevent.max_delta_ns =
365 clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); 366 clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
366 lapic_clockevent.min_delta_ns = 367 lapic_clockevent.min_delta_ns =
@@ -429,7 +430,7 @@ void __init setup_boot_APIC_clock(void)
429 * set the DUMMY flag again and force the broadcast mode in the 430 * set the DUMMY flag again and force the broadcast mode in the
430 * clockevents layer. 431 * clockevents layer.
431 */ 432 */
432void __cpuinit check_boot_apic_timer_broadcast(void) 433static void __cpuinit check_boot_apic_timer_broadcast(void)
433{ 434{
434 if (!disable_apic_timer || 435 if (!disable_apic_timer ||
435 (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY)) 436 (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY))
@@ -834,7 +835,7 @@ void __cpuinit setup_local_APIC(void)
834 preempt_enable(); 835 preempt_enable();
835} 836}
836 837
837void __cpuinit lapic_setup_esr(void) 838static void __cpuinit lapic_setup_esr(void)
838{ 839{
839 unsigned maxlvt = lapic_get_maxlvt(); 840 unsigned maxlvt = lapic_get_maxlvt();
840 841
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index f0030a0999c7..e4ea362e8480 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -904,6 +904,7 @@ recalc:
904 original_pm_idle(); 904 original_pm_idle();
905 else 905 else
906 default_idle(); 906 default_idle();
907 local_irq_disable();
907 jiffies_since_last_check = jiffies - last_jiffies; 908 jiffies_since_last_check = jiffies - last_jiffies;
908 if (jiffies_since_last_check > idle_period) 909 if (jiffies_since_last_check > idle_period)
909 goto recalc; 910 goto recalc;
@@ -911,6 +912,8 @@ recalc:
911 912
912 if (apm_idle_done) 913 if (apm_idle_done)
913 apm_do_busy(); 914 apm_do_busy();
915
916 local_irq_enable();
914} 917}
915 918
916/** 919/**
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index ee7c45235e54..a0c6f8190887 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -11,7 +11,6 @@ obj-$(CONFIG_X86_32) += cyrix.o
11obj-$(CONFIG_X86_32) += centaur.o 11obj-$(CONFIG_X86_32) += centaur.o
12obj-$(CONFIG_X86_32) += transmeta.o 12obj-$(CONFIG_X86_32) += transmeta.o
13obj-$(CONFIG_X86_32) += intel.o 13obj-$(CONFIG_X86_32) += intel.o
14obj-$(CONFIG_X86_32) += nexgen.o
15obj-$(CONFIG_X86_32) += umc.o 14obj-$(CONFIG_X86_32) += umc.o
16 15
17obj-$(CONFIG_X86_MCE) += mcheck/ 16obj-$(CONFIG_X86_MCE) += mcheck/
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 0173065dc3b7..245866828294 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -343,10 +343,4 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = {
343 .c_size_cache = amd_size_cache, 343 .c_size_cache = amd_size_cache,
344}; 344};
345 345
346int __init amd_init_cpu(void)
347{
348 cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev;
349 return 0;
350}
351
352cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); 346cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 9a699ed03598..e07e8c068ae0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -49,7 +49,7 @@ static int banks;
49static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; 49static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
50static unsigned long notify_user; 50static unsigned long notify_user;
51static int rip_msr; 51static int rip_msr;
52static int mce_bootlog = 1; 52static int mce_bootlog = -1;
53static atomic_t mce_events; 53static atomic_t mce_events;
54 54
55static char trigger[128]; 55static char trigger[128];
@@ -471,13 +471,15 @@ static void mce_init(void *dummy)
471static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) 471static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
472{ 472{
473 /* This should be disabled by the BIOS, but isn't always */ 473 /* This should be disabled by the BIOS, but isn't always */
474 if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) { 474 if (c->x86_vendor == X86_VENDOR_AMD) {
475 /* disable GART TBL walk error reporting, which trips off 475 if(c->x86 == 15)
476 incorrectly with the IOMMU & 3ware & Cerberus. */ 476 /* disable GART TBL walk error reporting, which trips off
477 clear_bit(10, &bank[4]); 477 incorrectly with the IOMMU & 3ware & Cerberus. */
478 /* Lots of broken BIOS around that don't clear them 478 clear_bit(10, &bank[4]);
479 by default and leave crap in there. Don't log. */ 479 if(c->x86 <= 17 && mce_bootlog < 0)
480 mce_bootlog = 0; 480 /* Lots of broken BIOS around that don't clear them
481 by default and leave crap in there. Don't log. */
482 mce_bootlog = 0;
481 } 483 }
482 484
483} 485}
diff --git a/arch/x86/kernel/cpu/nexgen.c b/arch/x86/kernel/cpu/nexgen.c
deleted file mode 100644
index 5d5e1c134123..000000000000
--- a/arch/x86/kernel/cpu/nexgen.c
+++ /dev/null
@@ -1,59 +0,0 @@
1#include <linux/kernel.h>
2#include <linux/init.h>
3#include <linux/string.h>
4#include <asm/processor.h>
5
6#include "cpu.h"
7
8/*
9 * Detect a NexGen CPU running without BIOS hypercode new enough
10 * to have CPUID. (Thanks to Herbert Oppmann)
11 */
12
13static int __cpuinit deep_magic_nexgen_probe(void)
14{
15 int ret;
16
17 __asm__ __volatile__ (
18 " movw $0x5555, %%ax\n"
19 " xorw %%dx,%%dx\n"
20 " movw $2, %%cx\n"
21 " divw %%cx\n"
22 " movl $0, %%eax\n"
23 " jnz 1f\n"
24 " movl $1, %%eax\n"
25 "1:\n"
26 : "=a" (ret) : : "cx", "dx");
27 return ret;
28}
29
30static void __cpuinit init_nexgen(struct cpuinfo_x86 *c)
31{
32 c->x86_cache_size = 256; /* A few had 1 MB... */
33}
34
35static void __cpuinit nexgen_identify(struct cpuinfo_x86 *c)
36{
37 /* Detect NexGen with old hypercode */
38 if (deep_magic_nexgen_probe())
39 strcpy(c->x86_vendor_id, "NexGenDriven");
40}
41
42static struct cpu_dev nexgen_cpu_dev __cpuinitdata = {
43 .c_vendor = "Nexgen",
44 .c_ident = { "NexGenDriven" },
45 .c_models = {
46 { .vendor = X86_VENDOR_NEXGEN,
47 .family = 5,
48 .model_names = { [1] = "Nx586" }
49 },
50 },
51 .c_init = init_nexgen,
52 .c_identify = nexgen_identify,
53};
54
55int __init nexgen_init_cpu(void)
56{
57 cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev;
58 return 0;
59}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index b943e10ad814..f9ae93adffe5 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -614,16 +614,6 @@ static struct wd_ops intel_arch_wd_ops __read_mostly = {
614 .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, 614 .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
615}; 615};
616 616
617static struct wd_ops coreduo_wd_ops = {
618 .reserve = single_msr_reserve,
619 .unreserve = single_msr_unreserve,
620 .setup = setup_intel_arch_watchdog,
621 .rearm = p6_rearm,
622 .stop = single_msr_stop_watchdog,
623 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
624 .evntsel = MSR_ARCH_PERFMON_EVENTSEL0,
625};
626
627static void probe_nmi_watchdog(void) 617static void probe_nmi_watchdog(void)
628{ 618{
629 switch (boot_cpu_data.x86_vendor) { 619 switch (boot_cpu_data.x86_vendor) {
@@ -637,8 +627,8 @@ static void probe_nmi_watchdog(void)
637 /* Work around Core Duo (Yonah) errata AE49 where perfctr1 627 /* Work around Core Duo (Yonah) errata AE49 where perfctr1
638 doesn't have a working enable bit. */ 628 doesn't have a working enable bit. */
639 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { 629 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
640 wd_ops = &coreduo_wd_ops; 630 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
641 break; 631 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
642 } 632 }
643 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 633 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
644 wd_ops = &intel_arch_wd_ops; 634 wd_ops = &intel_arch_wd_ops;
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 2251d0ae9570..268553817909 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -25,6 +25,7 @@
25#include <asm/hpet.h> 25#include <asm/hpet.h>
26#include <linux/kdebug.h> 26#include <linux/kdebug.h>
27#include <asm/smp.h> 27#include <asm/smp.h>
28#include <asm/reboot.h>
28 29
29#include <mach_ipi.h> 30#include <mach_ipi.h>
30 31
@@ -117,7 +118,7 @@ static void nmi_shootdown_cpus(void)
117} 118}
118#endif 119#endif
119 120
120void machine_crash_shutdown(struct pt_regs *regs) 121void native_machine_crash_shutdown(struct pt_regs *regs)
121{ 122{
122 /* This function is only called after the system 123 /* This function is only called after the system
123 * has panicked or is otherwise in a critical state. 124 * has panicked or is otherwise in a critical state.
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index cbd42e51cb08..645ee5e32a27 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -84,14 +84,41 @@ void __init reserve_early(unsigned long start, unsigned long end, char *name)
84 strncpy(r->name, name, sizeof(r->name) - 1); 84 strncpy(r->name, name, sizeof(r->name) - 1);
85} 85}
86 86
87void __init early_res_to_bootmem(void) 87void __init free_early(unsigned long start, unsigned long end)
88{
89 struct early_res *r;
90 int i, j;
91
92 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
93 r = &early_res[i];
94 if (start == r->start && end == r->end)
95 break;
96 }
97 if (i >= MAX_EARLY_RES || !early_res[i].end)
98 panic("free_early on not reserved area: %lx-%lx!", start, end);
99
100 for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
101 ;
102
103 memcpy(&early_res[i], &early_res[i + 1],
104 (j - 1 - i) * sizeof(struct early_res));
105
106 early_res[j - 1].end = 0;
107}
108
109void __init early_res_to_bootmem(unsigned long start, unsigned long end)
88{ 110{
89 int i; 111 int i;
112 unsigned long final_start, final_end;
90 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { 113 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
91 struct early_res *r = &early_res[i]; 114 struct early_res *r = &early_res[i];
92 printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i, 115 final_start = max(start, r->start);
93 r->start, r->end - 1, r->name); 116 final_end = min(end, r->end);
94 reserve_bootmem_generic(r->start, r->end - r->start); 117 if (final_start >= final_end)
118 continue;
119 printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i,
120 final_start, final_end - 1, r->name);
121 reserve_bootmem_generic(final_start, final_end - final_start);
95 } 122 }
96} 123}
97 124
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f0f8934fc303..2a609dc3271c 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -409,7 +409,7 @@ restore_nocheck_notrace:
409irq_return: 409irq_return:
410 INTERRUPT_RETURN 410 INTERRUPT_RETURN
411.section .fixup,"ax" 411.section .fixup,"ax"
412iret_exc: 412ENTRY(iret_exc)
413 pushl $0 # no error code 413 pushl $0 # no error code
414 pushl $do_iret_error 414 pushl $do_iret_error
415 jmp error_code 415 jmp error_code
@@ -1017,6 +1017,13 @@ ENTRY(kernel_thread_helper)
1017ENDPROC(kernel_thread_helper) 1017ENDPROC(kernel_thread_helper)
1018 1018
1019#ifdef CONFIG_XEN 1019#ifdef CONFIG_XEN
1020/* Xen doesn't set %esp to be precisely what the normal sysenter
1021 entrypoint expects, so fix it up before using the normal path. */
1022ENTRY(xen_sysenter_target)
1023 RING0_INT_FRAME
1024 addl $5*4, %esp /* remove xen-provided frame */
1025 jmp sysenter_past_esp
1026
1020ENTRY(xen_hypervisor_callback) 1027ENTRY(xen_hypervisor_callback)
1021 CFI_STARTPROC 1028 CFI_STARTPROC
1022 pushl $0 1029 pushl $0
@@ -1035,8 +1042,9 @@ ENTRY(xen_hypervisor_callback)
1035 cmpl $xen_iret_end_crit,%eax 1042 cmpl $xen_iret_end_crit,%eax
1036 jae 1f 1043 jae 1f
1037 1044
1038 call xen_iret_crit_fixup 1045 jmp xen_iret_crit_fixup
1039 1046
1047ENTRY(xen_do_upcall)
10401: mov %esp, %eax 10481: mov %esp, %eax
1041 call xen_evtchn_do_upcall 1049 call xen_evtchn_do_upcall
1042 jmp ret_from_intr 1050 jmp ret_from_intr
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 9546ef408b92..021624c83583 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -51,7 +51,7 @@ void __init setup_apic_routing(void)
51 else 51 else
52#endif 52#endif
53 53
54 if (cpus_weight(cpu_possible_map) <= 8) 54 if (num_possible_cpus() <= 8)
55 genapic = &apic_flat; 55 genapic = &apic_flat;
56 else 56 else
57 genapic = &apic_physflat; 57 genapic = &apic_physflat;
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 993c76773256..e25c57b8aa84 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -11,6 +11,7 @@
11#include <linux/string.h> 11#include <linux/string.h>
12#include <linux/percpu.h> 12#include <linux/percpu.h>
13#include <linux/start_kernel.h> 13#include <linux/start_kernel.h>
14#include <linux/io.h>
14 15
15#include <asm/processor.h> 16#include <asm/processor.h>
16#include <asm/proto.h> 17#include <asm/proto.h>
@@ -22,6 +23,7 @@
22#include <asm/sections.h> 23#include <asm/sections.h>
23#include <asm/kdebug.h> 24#include <asm/kdebug.h>
24#include <asm/e820.h> 25#include <asm/e820.h>
26#include <asm/bios_ebda.h>
25 27
26static void __init zap_identity_mappings(void) 28static void __init zap_identity_mappings(void)
27{ 29{
@@ -49,7 +51,6 @@ static void __init copy_bootdata(char *real_mode_data)
49 } 51 }
50} 52}
51 53
52#define BIOS_EBDA_SEGMENT 0x40E
53#define BIOS_LOWMEM_KILOBYTES 0x413 54#define BIOS_LOWMEM_KILOBYTES 0x413
54 55
55/* 56/*
@@ -80,8 +81,7 @@ static void __init reserve_ebda_region(void)
80 lowmem <<= 10; 81 lowmem <<= 10;
81 82
82 /* start of EBDA area */ 83 /* start of EBDA area */
83 ebda_addr = *(unsigned short *)__va(BIOS_EBDA_SEGMENT); 84 ebda_addr = get_bios_ebda();
84 ebda_addr <<= 4;
85 85
86 /* Fixup: bios puts an EBDA in the top 64K segment */ 86 /* Fixup: bios puts an EBDA in the top 64K segment */
87 /* of conventional memory, but does not adjust lowmem. */ 87 /* of conventional memory, but does not adjust lowmem. */
@@ -101,6 +101,24 @@ static void __init reserve_ebda_region(void)
101 reserve_early(lowmem, 0x100000, "BIOS reserved"); 101 reserve_early(lowmem, 0x100000, "BIOS reserved");
102} 102}
103 103
104static void __init reserve_setup_data(void)
105{
106 struct setup_data *data;
107 unsigned long pa_data;
108 char buf[32];
109
110 if (boot_params.hdr.version < 0x0209)
111 return;
112 pa_data = boot_params.hdr.setup_data;
113 while (pa_data) {
114 data = early_ioremap(pa_data, sizeof(*data));
115 sprintf(buf, "setup data %x", data->type);
116 reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
117 pa_data = data->next;
118 early_iounmap(data, sizeof(*data));
119 }
120}
121
104void __init x86_64_start_kernel(char * real_mode_data) 122void __init x86_64_start_kernel(char * real_mode_data)
105{ 123{
106 int i; 124 int i;
@@ -157,6 +175,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
157#endif 175#endif
158 176
159 reserve_ebda_region(); 177 reserve_ebda_region();
178 reserve_setup_data();
160 179
161 /* 180 /*
162 * At this point everything still needed from the boot loader 181 * At this point everything still needed from the boot loader
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 36652ea1a265..9007f9ea64ee 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -218,7 +218,7 @@ static void hpet_legacy_clockevent_register(void)
218 hpet_freq = 1000000000000000ULL; 218 hpet_freq = 1000000000000000ULL;
219 do_div(hpet_freq, hpet_period); 219 do_div(hpet_freq, hpet_period);
220 hpet_clockevent.mult = div_sc((unsigned long) hpet_freq, 220 hpet_clockevent.mult = div_sc((unsigned long) hpet_freq,
221 NSEC_PER_SEC, 32); 221 NSEC_PER_SEC, hpet_clockevent.shift);
222 /* Calculate the min / max delta */ 222 /* Calculate the min / max delta */
223 hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, 223 hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
224 &hpet_clockevent); 224 &hpet_clockevent);
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 8540abe86ade..c1b5e3ece1f2 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -115,7 +115,8 @@ void __init setup_pit_timer(void)
115 * IO_APIC has been initialized. 115 * IO_APIC has been initialized.
116 */ 116 */
117 pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); 117 pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
118 pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 32); 118 pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
119 pit_clockevent.shift);
119 pit_clockevent.max_delta_ns = 120 pit_clockevent.max_delta_ns =
120 clockevent_delta2ns(0x7FFF, &pit_clockevent); 121 clockevent_delta2ns(0x7FFF, &pit_clockevent);
121 pit_clockevent.min_delta_ns = 122 pit_clockevent.min_delta_ns =
@@ -224,7 +225,8 @@ static int __init init_pit_clocksource(void)
224 pit_clockevent.mode != CLOCK_EVT_MODE_PERIODIC) 225 pit_clockevent.mode != CLOCK_EVT_MODE_PERIODIC)
225 return 0; 226 return 0;
226 227
227 clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); 228 clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE,
229 clocksource_pit.shift);
228 return clocksource_register(&clocksource_pit); 230 return clocksource_register(&clocksource_pit);
229} 231}
230arch_initcall(init_pit_clocksource); 232arch_initcall(init_pit_clocksource);
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 2e2f42074e18..696b8e4e66bb 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -2068,7 +2068,7 @@ static void __init setup_nmi(void)
2068 * cycles as some i82489DX-based boards have glue logic that keeps the 2068 * cycles as some i82489DX-based boards have glue logic that keeps the
2069 * 8259A interrupt line asserted until INTA. --macro 2069 * 8259A interrupt line asserted until INTA. --macro
2070 */ 2070 */
2071static inline void unlock_ExtINT_logic(void) 2071static inline void __init unlock_ExtINT_logic(void)
2072{ 2072{
2073 int apic, pin, i; 2073 int apic, pin, i;
2074 struct IO_APIC_route_entry entry0, entry1; 2074 struct IO_APIC_route_entry entry0, entry1;
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 9ba11d07920f..ef1a8dfcc529 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1599,7 +1599,7 @@ static void __init setup_nmi(void)
1599 * cycles as some i82489DX-based boards have glue logic that keeps the 1599 * cycles as some i82489DX-based boards have glue logic that keeps the
1600 * 8259A interrupt line asserted until INTA. --macro 1600 * 8259A interrupt line asserted until INTA. --macro
1601 */ 1601 */
1602static inline void unlock_ExtINT_logic(void) 1602static inline void __init unlock_ExtINT_logic(void)
1603{ 1603{
1604 int apic, pin, i; 1604 int apic, pin, i;
1605 struct IO_APIC_route_entry entry0, entry1; 1605 struct IO_APIC_route_entry entry0, entry1;
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 6ea67b76a214..00bda7bcda63 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -134,7 +134,7 @@ unsigned int do_IRQ(struct pt_regs *regs)
134 : "=a" (arg1), "=d" (arg2), "=b" (bx) 134 : "=a" (arg1), "=d" (arg2), "=b" (bx)
135 : "0" (irq), "1" (desc), "2" (isp), 135 : "0" (irq), "1" (desc), "2" (isp),
136 "D" (desc->handle_irq) 136 "D" (desc->handle_irq)
137 : "memory", "cc" 137 : "memory", "cc", "ecx"
138 ); 138 );
139 } else 139 } else
140#endif 140#endif
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 73354302fda7..c03205991718 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -6,23 +6,171 @@
6 * 6 *
7 * This file is released under the GPLv2. 7 * This file is released under the GPLv2.
8 */ 8 */
9
10#include <linux/debugfs.h> 9#include <linux/debugfs.h>
10#include <linux/uaccess.h>
11#include <linux/stat.h> 11#include <linux/stat.h>
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/io.h>
14#include <linux/mm.h>
13 15
14#include <asm/setup.h> 16#include <asm/setup.h>
15 17
16#ifdef CONFIG_DEBUG_BOOT_PARAMS 18#ifdef CONFIG_DEBUG_BOOT_PARAMS
19struct setup_data_node {
20 u64 paddr;
21 u32 type;
22 u32 len;
23};
24
25static ssize_t
26setup_data_read(struct file *file, char __user *user_buf, size_t count,
27 loff_t *ppos)
28{
29 struct setup_data_node *node = file->private_data;
30 unsigned long remain;
31 loff_t pos = *ppos;
32 struct page *pg;
33 void *p;
34 u64 pa;
35
36 if (pos < 0)
37 return -EINVAL;
38 if (pos >= node->len)
39 return 0;
40
41 if (count > node->len - pos)
42 count = node->len - pos;
43 pa = node->paddr + sizeof(struct setup_data) + pos;
44 pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT);
45 if (PageHighMem(pg)) {
46 p = ioremap_cache(pa, count);
47 if (!p)
48 return -ENXIO;
49 } else {
50 p = __va(pa);
51 }
52
53 remain = copy_to_user(user_buf, p, count);
54
55 if (PageHighMem(pg))
56 iounmap(p);
57
58 if (remain)
59 return -EFAULT;
60
61 *ppos = pos + count;
62
63 return count;
64}
65
66static int setup_data_open(struct inode *inode, struct file *file)
67{
68 file->private_data = inode->i_private;
69 return 0;
70}
71
72static const struct file_operations fops_setup_data = {
73 .read = setup_data_read,
74 .open = setup_data_open,
75};
76
77static int __init
78create_setup_data_node(struct dentry *parent, int no,
79 struct setup_data_node *node)
80{
81 struct dentry *d, *type, *data;
82 char buf[16];
83 int error;
84
85 sprintf(buf, "%d", no);
86 d = debugfs_create_dir(buf, parent);
87 if (!d) {
88 error = -ENOMEM;
89 goto err_return;
90 }
91 type = debugfs_create_x32("type", S_IRUGO, d, &node->type);
92 if (!type) {
93 error = -ENOMEM;
94 goto err_dir;
95 }
96 data = debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data);
97 if (!data) {
98 error = -ENOMEM;
99 goto err_type;
100 }
101 return 0;
102
103err_type:
104 debugfs_remove(type);
105err_dir:
106 debugfs_remove(d);
107err_return:
108 return error;
109}
110
111static int __init create_setup_data_nodes(struct dentry *parent)
112{
113 struct setup_data_node *node;
114 struct setup_data *data;
115 int error, no = 0;
116 struct dentry *d;
117 struct page *pg;
118 u64 pa_data;
119
120 d = debugfs_create_dir("setup_data", parent);
121 if (!d) {
122 error = -ENOMEM;
123 goto err_return;
124 }
125
126 pa_data = boot_params.hdr.setup_data;
127
128 while (pa_data) {
129 node = kmalloc(sizeof(*node), GFP_KERNEL);
130 if (!node) {
131 error = -ENOMEM;
132 goto err_dir;
133 }
134 pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT);
135 if (PageHighMem(pg)) {
136 data = ioremap_cache(pa_data, sizeof(*data));
137 if (!data) {
138 error = -ENXIO;
139 goto err_dir;
140 }
141 } else {
142 data = __va(pa_data);
143 }
144
145 node->paddr = pa_data;
146 node->type = data->type;
147 node->len = data->len;
148 error = create_setup_data_node(d, no, node);
149 pa_data = data->next;
150
151 if (PageHighMem(pg))
152 iounmap(data);
153 if (error)
154 goto err_dir;
155 no++;
156 }
157 return 0;
158
159err_dir:
160 debugfs_remove(d);
161err_return:
162 return error;
163}
164
17static struct debugfs_blob_wrapper boot_params_blob = { 165static struct debugfs_blob_wrapper boot_params_blob = {
18 .data = &boot_params, 166 .data = &boot_params,
19 .size = sizeof(boot_params), 167 .size = sizeof(boot_params),
20}; 168};
21 169
22static int __init boot_params_kdebugfs_init(void) 170static int __init boot_params_kdebugfs_init(void)
23{ 171{
24 int error;
25 struct dentry *dbp, *version, *data; 172 struct dentry *dbp, *version, *data;
173 int error;
26 174
27 dbp = debugfs_create_dir("boot_params", NULL); 175 dbp = debugfs_create_dir("boot_params", NULL);
28 if (!dbp) { 176 if (!dbp) {
@@ -41,7 +189,13 @@ static int __init boot_params_kdebugfs_init(void)
41 error = -ENOMEM; 189 error = -ENOMEM;
42 goto err_version; 190 goto err_version;
43 } 191 }
192 error = create_setup_data_nodes(dbp);
193 if (error)
194 goto err_data;
44 return 0; 195 return 0;
196
197err_data:
198 debugfs_remove(data);
45err_version: 199err_version:
46 debugfs_remove(version); 200 debugfs_remove(version);
47err_dir: 201err_dir:
@@ -61,5 +215,4 @@ static int __init arch_kdebugfs_init(void)
61 215
62 return error; 216 return error;
63} 217}
64
65arch_initcall(arch_kdebugfs_init); 218arch_initcall(arch_kdebugfs_init);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
new file mode 100644
index 000000000000..8b7a3cf37d2b
--- /dev/null
+++ b/arch/x86/kernel/kvm.c
@@ -0,0 +1,248 @@
1/*
2 * KVM paravirt_ops implementation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 *
18 * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
19 * Copyright IBM Corporation, 2007
20 * Authors: Anthony Liguori <aliguori@us.ibm.com>
21 */
22
23#include <linux/module.h>
24#include <linux/kernel.h>
25#include <linux/kvm_para.h>
26#include <linux/cpu.h>
27#include <linux/mm.h>
28#include <linux/highmem.h>
29#include <linux/hardirq.h>
30
31#define MMU_QUEUE_SIZE 1024
32
33struct kvm_para_state {
34 u8 mmu_queue[MMU_QUEUE_SIZE];
35 int mmu_queue_len;
36 enum paravirt_lazy_mode mode;
37};
38
39static DEFINE_PER_CPU(struct kvm_para_state, para_state);
40
41static struct kvm_para_state *kvm_para_state(void)
42{
43 return &per_cpu(para_state, raw_smp_processor_id());
44}
45
46/*
47 * No need for any "IO delay" on KVM
48 */
49static void kvm_io_delay(void)
50{
51}
52
53static void kvm_mmu_op(void *buffer, unsigned len)
54{
55 int r;
56 unsigned long a1, a2;
57
58 do {
59 a1 = __pa(buffer);
60 a2 = 0; /* on i386 __pa() always returns <4G */
61 r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2);
62 buffer += r;
63 len -= r;
64 } while (len);
65}
66
67static void mmu_queue_flush(struct kvm_para_state *state)
68{
69 if (state->mmu_queue_len) {
70 kvm_mmu_op(state->mmu_queue, state->mmu_queue_len);
71 state->mmu_queue_len = 0;
72 }
73}
74
75static void kvm_deferred_mmu_op(void *buffer, int len)
76{
77 struct kvm_para_state *state = kvm_para_state();
78
79 if (state->mode != PARAVIRT_LAZY_MMU) {
80 kvm_mmu_op(buffer, len);
81 return;
82 }
83 if (state->mmu_queue_len + len > sizeof state->mmu_queue)
84 mmu_queue_flush(state);
85 memcpy(state->mmu_queue + state->mmu_queue_len, buffer, len);
86 state->mmu_queue_len += len;
87}
88
89static void kvm_mmu_write(void *dest, u64 val)
90{
91 __u64 pte_phys;
92 struct kvm_mmu_op_write_pte wpte;
93
94#ifdef CONFIG_HIGHPTE
95 struct page *page;
96 unsigned long dst = (unsigned long) dest;
97
98 page = kmap_atomic_to_page(dest);
99 pte_phys = page_to_pfn(page);
100 pte_phys <<= PAGE_SHIFT;
101 pte_phys += (dst & ~(PAGE_MASK));
102#else
103 pte_phys = (unsigned long)__pa(dest);
104#endif
105 wpte.header.op = KVM_MMU_OP_WRITE_PTE;
106 wpte.pte_val = val;
107 wpte.pte_phys = pte_phys;
108
109 kvm_deferred_mmu_op(&wpte, sizeof wpte);
110}
111
112/*
113 * We only need to hook operations that are MMU writes. We hook these so that
114 * we can use lazy MMU mode to batch these operations. We could probably
115 * improve the performance of the host code if we used some of the information
116 * here to simplify processing of batched writes.
117 */
118static void kvm_set_pte(pte_t *ptep, pte_t pte)
119{
120 kvm_mmu_write(ptep, pte_val(pte));
121}
122
123static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr,
124 pte_t *ptep, pte_t pte)
125{
126 kvm_mmu_write(ptep, pte_val(pte));
127}
128
129static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd)
130{
131 kvm_mmu_write(pmdp, pmd_val(pmd));
132}
133
134#if PAGETABLE_LEVELS >= 3
135#ifdef CONFIG_X86_PAE
136static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte)
137{
138 kvm_mmu_write(ptep, pte_val(pte));
139}
140
141static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr,
142 pte_t *ptep, pte_t pte)
143{
144 kvm_mmu_write(ptep, pte_val(pte));
145}
146
147static void kvm_pte_clear(struct mm_struct *mm,
148 unsigned long addr, pte_t *ptep)
149{
150 kvm_mmu_write(ptep, 0);
151}
152
153static void kvm_pmd_clear(pmd_t *pmdp)
154{
155 kvm_mmu_write(pmdp, 0);
156}
157#endif
158
159static void kvm_set_pud(pud_t *pudp, pud_t pud)
160{
161 kvm_mmu_write(pudp, pud_val(pud));
162}
163
164#if PAGETABLE_LEVELS == 4
165static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd)
166{
167 kvm_mmu_write(pgdp, pgd_val(pgd));
168}
169#endif
170#endif /* PAGETABLE_LEVELS >= 3 */
171
172static void kvm_flush_tlb(void)
173{
174 struct kvm_mmu_op_flush_tlb ftlb = {
175 .header.op = KVM_MMU_OP_FLUSH_TLB,
176 };
177
178 kvm_deferred_mmu_op(&ftlb, sizeof ftlb);
179}
180
181static void kvm_release_pt(u32 pfn)
182{
183 struct kvm_mmu_op_release_pt rpt = {
184 .header.op = KVM_MMU_OP_RELEASE_PT,
185 .pt_phys = (u64)pfn << PAGE_SHIFT,
186 };
187
188 kvm_mmu_op(&rpt, sizeof rpt);
189}
190
191static void kvm_enter_lazy_mmu(void)
192{
193 struct kvm_para_state *state = kvm_para_state();
194
195 paravirt_enter_lazy_mmu();
196 state->mode = paravirt_get_lazy_mode();
197}
198
199static void kvm_leave_lazy_mmu(void)
200{
201 struct kvm_para_state *state = kvm_para_state();
202
203 mmu_queue_flush(state);
204 paravirt_leave_lazy(paravirt_get_lazy_mode());
205 state->mode = paravirt_get_lazy_mode();
206}
207
208static void paravirt_ops_setup(void)
209{
210 pv_info.name = "KVM";
211 pv_info.paravirt_enabled = 1;
212
213 if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
214 pv_cpu_ops.io_delay = kvm_io_delay;
215
216 if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) {
217 pv_mmu_ops.set_pte = kvm_set_pte;
218 pv_mmu_ops.set_pte_at = kvm_set_pte_at;
219 pv_mmu_ops.set_pmd = kvm_set_pmd;
220#if PAGETABLE_LEVELS >= 3
221#ifdef CONFIG_X86_PAE
222 pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic;
223 pv_mmu_ops.set_pte_present = kvm_set_pte_present;
224 pv_mmu_ops.pte_clear = kvm_pte_clear;
225 pv_mmu_ops.pmd_clear = kvm_pmd_clear;
226#endif
227 pv_mmu_ops.set_pud = kvm_set_pud;
228#if PAGETABLE_LEVELS == 4
229 pv_mmu_ops.set_pgd = kvm_set_pgd;
230#endif
231#endif
232 pv_mmu_ops.flush_tlb_user = kvm_flush_tlb;
233 pv_mmu_ops.release_pte = kvm_release_pt;
234 pv_mmu_ops.release_pmd = kvm_release_pt;
235 pv_mmu_ops.release_pud = kvm_release_pt;
236
237 pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
238 pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
239 }
240}
241
242void __init kvm_guest_init(void)
243{
244 if (!kvm_para_available())
245 return;
246
247 paravirt_ops_setup();
248}
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
new file mode 100644
index 000000000000..ddee04043aeb
--- /dev/null
+++ b/arch/x86/kernel/kvmclock.c
@@ -0,0 +1,187 @@
1/* KVM paravirtual clock driver. A clocksource implementation
2 Copyright (C) 2008 Glauber de Oliveira Costa, Red Hat Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17*/
18
19#include <linux/clocksource.h>
20#include <linux/kvm_para.h>
21#include <asm/arch_hooks.h>
22#include <asm/msr.h>
23#include <asm/apic.h>
24#include <linux/percpu.h>
25#include <asm/reboot.h>
26
27#define KVM_SCALE 22
28
29static int kvmclock = 1;
30
31static int parse_no_kvmclock(char *arg)
32{
33 kvmclock = 0;
34 return 0;
35}
36early_param("no-kvmclock", parse_no_kvmclock);
37
38/* The hypervisor will put information about time periodically here */
39static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
40#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
41
42static inline u64 kvm_get_delta(u64 last_tsc)
43{
44 int cpu = smp_processor_id();
45 u64 delta = native_read_tsc() - last_tsc;
46 return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
47}
48
49static struct kvm_wall_clock wall_clock;
50static cycle_t kvm_clock_read(void);
51/*
52 * The wallclock is the time of day when we booted. Since then, some time may
53 * have elapsed since the hypervisor wrote the data. So we try to account for
54 * that with system time
55 */
56unsigned long kvm_get_wallclock(void)
57{
58 u32 wc_sec, wc_nsec;
59 u64 delta;
60 struct timespec ts;
61 int version, nsec;
62 int low, high;
63
64 low = (int)__pa(&wall_clock);
65 high = ((u64)__pa(&wall_clock) >> 32);
66
67 delta = kvm_clock_read();
68
69 native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
70 do {
71 version = wall_clock.wc_version;
72 rmb();
73 wc_sec = wall_clock.wc_sec;
74 wc_nsec = wall_clock.wc_nsec;
75 rmb();
76 } while ((wall_clock.wc_version != version) || (version & 1));
77
78 delta = kvm_clock_read() - delta;
79 delta += wc_nsec;
80 nsec = do_div(delta, NSEC_PER_SEC);
81 set_normalized_timespec(&ts, wc_sec + delta, nsec);
82 /*
83 * Of all mechanisms of time adjustment I've tested, this one
84 * was the champion!
85 */
86 return ts.tv_sec + 1;
87}
88
89int kvm_set_wallclock(unsigned long now)
90{
91 return 0;
92}
93
94/*
95 * This is our read_clock function. The host puts an tsc timestamp each time
96 * it updates a new time. Without the tsc adjustment, we can have a situation
97 * in which a vcpu starts to run earlier (smaller system_time), but probes
98 * time later (compared to another vcpu), leading to backwards time
99 */
100static cycle_t kvm_clock_read(void)
101{
102 u64 last_tsc, now;
103 int cpu;
104
105 preempt_disable();
106 cpu = smp_processor_id();
107
108 last_tsc = get_clock(cpu, tsc_timestamp);
109 now = get_clock(cpu, system_time);
110
111 now += kvm_get_delta(last_tsc);
112 preempt_enable();
113
114 return now;
115}
116static struct clocksource kvm_clock = {
117 .name = "kvm-clock",
118 .read = kvm_clock_read,
119 .rating = 400,
120 .mask = CLOCKSOURCE_MASK(64),
121 .mult = 1 << KVM_SCALE,
122 .shift = KVM_SCALE,
123 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
124};
125
126static int kvm_register_clock(void)
127{
128 int cpu = smp_processor_id();
129 int low, high;
130 low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
131 high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
132
133 return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
134}
135
136static void kvm_setup_secondary_clock(void)
137{
138 /*
139 * Now that the first cpu already had this clocksource initialized,
140 * we shouldn't fail.
141 */
142 WARN_ON(kvm_register_clock());
143 /* ok, done with our trickery, call native */
144 setup_secondary_APIC_clock();
145}
146
147/*
148 * After the clock is registered, the host will keep writing to the
149 * registered memory location. If the guest happens to shutdown, this memory
150 * won't be valid. In cases like kexec, in which you install a new kernel, this
151 * means a random memory location will be kept being written. So before any
152 * kind of shutdown from our side, we unregister the clock by writting anything
153 * that does not have the 'enable' bit set in the msr
154 */
155#ifdef CONFIG_KEXEC
156static void kvm_crash_shutdown(struct pt_regs *regs)
157{
158 native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0);
159 native_machine_crash_shutdown(regs);
160}
161#endif
162
163static void kvm_shutdown(void)
164{
165 native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0);
166 native_machine_shutdown();
167}
168
169void __init kvmclock_init(void)
170{
171 if (!kvm_para_available())
172 return;
173
174 if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
175 if (kvm_register_clock())
176 return;
177 pv_time_ops.get_wallclock = kvm_get_wallclock;
178 pv_time_ops.set_wallclock = kvm_set_wallclock;
179 pv_time_ops.sched_clock = kvm_clock_read;
180 pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
181 machine_ops.shutdown = kvm_shutdown;
182#ifdef CONFIG_KEXEC
183 machine_ops.crash_shutdown = kvm_crash_shutdown;
184#endif
185 clocksource_register(&kvm_clock);
186 }
187}
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index b402c0f3f192..cfc2648d25ff 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -364,7 +364,8 @@ int __init mfgpt_timer_setup(void)
364 geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, val); 364 geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, val);
365 365
366 /* Set up the clock event */ 366 /* Set up the clock event */
367 mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC, 32); 367 mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC,
368 mfgpt_clockevent.shift);
368 mfgpt_clockevent.min_delta_ns = clockevent_delta2ns(0xF, 369 mfgpt_clockevent.min_delta_ns = clockevent_delta2ns(0xF,
369 &mfgpt_clockevent); 370 &mfgpt_clockevent);
370 mfgpt_clockevent.max_delta_ns = clockevent_delta2ns(0xFFFE, 371 mfgpt_clockevent.max_delta_ns = clockevent_delta2ns(0xFFFE,
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 70744e344fa1..3e2c54dc8b29 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -686,13 +686,11 @@ void __init get_smp_config(void)
686static int __init smp_scan_config(unsigned long base, unsigned long length, 686static int __init smp_scan_config(unsigned long base, unsigned long length,
687 unsigned reserve) 687 unsigned reserve)
688{ 688{
689 extern void __bad_mpf_size(void);
690 unsigned int *bp = phys_to_virt(base); 689 unsigned int *bp = phys_to_virt(base);
691 struct intel_mp_floating *mpf; 690 struct intel_mp_floating *mpf;
692 691
693 Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length); 692 Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length);
694 if (sizeof(*mpf) != 16) 693 BUILD_BUG_ON(sizeof(*mpf) != 16);
695 __bad_mpf_size();
696 694
697 while (length > 0) { 695 while (length > 0) {
698 mpf = (struct intel_mp_floating *)bp; 696 mpf = (struct intel_mp_floating *)bp;
@@ -801,7 +799,6 @@ void __init find_smp_config(void)
801#ifdef CONFIG_X86_IO_APIC 799#ifdef CONFIG_X86_IO_APIC
802 800
803#define MP_ISA_BUS 0 801#define MP_ISA_BUS 0
804#define MP_MAX_IOAPIC_PIN 127
805 802
806extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; 803extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
807 804
@@ -820,7 +817,7 @@ static int mp_find_ioapic(int gsi)
820 return -1; 817 return -1;
821} 818}
822 819
823static u8 uniq_ioapic_id(u8 id) 820static u8 __init uniq_ioapic_id(u8 id)
824{ 821{
825#ifdef CONFIG_X86_32 822#ifdef CONFIG_X86_32
826 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && 823 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
@@ -909,14 +906,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
909 intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ 906 intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */
910 intsrc.mpc_dstirq = pin; /* INTIN# */ 907 intsrc.mpc_dstirq = pin; /* INTIN# */
911 908
912 Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", 909 MP_intsrc_info(&intsrc);
913 intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
914 (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
915 intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
916
917 mp_irqs[mp_irq_entries] = intsrc;
918 if (++mp_irq_entries == MAX_IRQ_SOURCES)
919 panic("Max # of irq sources exceeded!\n");
920} 910}
921 911
922int es7000_plat; 912int es7000_plat;
@@ -985,23 +975,14 @@ void __init mp_config_acpi_legacy_irqs(void)
985 intsrc.mpc_srcbusirq = i; /* Identity mapped */ 975 intsrc.mpc_srcbusirq = i; /* Identity mapped */
986 intsrc.mpc_dstirq = i; 976 intsrc.mpc_dstirq = i;
987 977
988 Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, " 978 MP_intsrc_info(&intsrc);
989 "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
990 (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
991 intsrc.mpc_srcbusirq, intsrc.mpc_dstapic,
992 intsrc.mpc_dstirq);
993
994 mp_irqs[mp_irq_entries] = intsrc;
995 if (++mp_irq_entries == MAX_IRQ_SOURCES)
996 panic("Max # of irq sources exceeded!\n");
997 } 979 }
998} 980}
999 981
1000int mp_register_gsi(u32 gsi, int triggering, int polarity) 982int mp_register_gsi(u32 gsi, int triggering, int polarity)
1001{ 983{
1002 int ioapic = -1; 984 int ioapic;
1003 int ioapic_pin = 0; 985 int ioapic_pin;
1004 int idx, bit = 0;
1005#ifdef CONFIG_X86_32 986#ifdef CONFIG_X86_32
1006#define MAX_GSI_NUM 4096 987#define MAX_GSI_NUM 4096
1007#define IRQ_COMPRESSION_START 64 988#define IRQ_COMPRESSION_START 64
@@ -1041,15 +1022,13 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
1041 * with redundant pin->gsi mappings (but unique PCI devices); 1022 * with redundant pin->gsi mappings (but unique PCI devices);
1042 * we only program the IOAPIC on the first. 1023 * we only program the IOAPIC on the first.
1043 */ 1024 */
1044 bit = ioapic_pin % 32; 1025 if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
1045 idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
1046 if (idx > 3) {
1047 printk(KERN_ERR "Invalid reference to IOAPIC pin " 1026 printk(KERN_ERR "Invalid reference to IOAPIC pin "
1048 "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, 1027 "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
1049 ioapic_pin); 1028 ioapic_pin);
1050 return gsi; 1029 return gsi;
1051 } 1030 }
1052 if ((1 << bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { 1031 if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
1053 Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", 1032 Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
1054 mp_ioapic_routing[ioapic].apic_id, ioapic_pin); 1033 mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
1055#ifdef CONFIG_X86_32 1034#ifdef CONFIG_X86_32
@@ -1059,7 +1038,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
1059#endif 1038#endif
1060 } 1039 }
1061 1040
1062 mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1 << bit); 1041 set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
1063#ifdef CONFIG_X86_32 1042#ifdef CONFIG_X86_32
1064 /* 1043 /*
1065 * For GSI >= 64, use IRQ compression 1044 * For GSI >= 64, use IRQ compression
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 3733412d1357..74f0c5ea2a03 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -366,11 +366,13 @@ struct pv_mmu_ops pv_mmu_ops = {
366 .flush_tlb_single = native_flush_tlb_single, 366 .flush_tlb_single = native_flush_tlb_single,
367 .flush_tlb_others = native_flush_tlb_others, 367 .flush_tlb_others = native_flush_tlb_others,
368 368
369 .alloc_pt = paravirt_nop, 369 .alloc_pte = paravirt_nop,
370 .alloc_pd = paravirt_nop, 370 .alloc_pmd = paravirt_nop,
371 .alloc_pd_clone = paravirt_nop, 371 .alloc_pmd_clone = paravirt_nop,
372 .release_pt = paravirt_nop, 372 .alloc_pud = paravirt_nop,
373 .release_pd = paravirt_nop, 373 .release_pte = paravirt_nop,
374 .release_pmd = paravirt_nop,
375 .release_pud = paravirt_nop,
374 376
375 .set_pte = native_set_pte, 377 .set_pte = native_set_pte,
376 .set_pte_at = native_set_pte_at, 378 .set_pte_at = native_set_pte_at,
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 2edee22e9c30..e28ec497e142 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -43,6 +43,7 @@
43#include <asm/system.h> 43#include <asm/system.h>
44#include <asm/dma.h> 44#include <asm/dma.h>
45#include <asm/rio.h> 45#include <asm/rio.h>
46#include <asm/bios_ebda.h>
46 47
47#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT 48#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
48int use_calgary __read_mostly = 1; 49int use_calgary __read_mostly = 1;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3004d716539d..67e9b4a1e89d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -4,6 +4,8 @@
4#include <linux/smp.h> 4#include <linux/smp.h>
5#include <linux/slab.h> 5#include <linux/slab.h>
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/module.h>
8#include <linux/pm.h>
7 9
8struct kmem_cache *task_xstate_cachep; 10struct kmem_cache *task_xstate_cachep;
9 11
@@ -42,3 +44,118 @@ void arch_task_cache_init(void)
42 __alignof__(union thread_xstate), 44 __alignof__(union thread_xstate),
43 SLAB_PANIC, NULL); 45 SLAB_PANIC, NULL);
44} 46}
47
48static void do_nothing(void *unused)
49{
50}
51
52/*
53 * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
54 * pm_idle and update to new pm_idle value. Required while changing pm_idle
55 * handler on SMP systems.
56 *
57 * Caller must have changed pm_idle to the new value before the call. Old
58 * pm_idle value will not be used by any CPU after the return of this function.
59 */
60void cpu_idle_wait(void)
61{
62 smp_mb();
63 /* kick all the CPUs so that they exit out of pm_idle */
64 smp_call_function(do_nothing, NULL, 0, 1);
65}
66EXPORT_SYMBOL_GPL(cpu_idle_wait);
67
68/*
69 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
70 * which can obviate IPI to trigger checking of need_resched.
71 * We execute MONITOR against need_resched and enter optimized wait state
72 * through MWAIT. Whenever someone changes need_resched, we would be woken
73 * up from MWAIT (without an IPI).
74 *
75 * New with Core Duo processors, MWAIT can take some hints based on CPU
76 * capability.
77 */
78void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
79{
80 if (!need_resched()) {
81 __monitor((void *)&current_thread_info()->flags, 0, 0);
82 smp_mb();
83 if (!need_resched())
84 __mwait(ax, cx);
85 }
86}
87
88/* Default MONITOR/MWAIT with no hints, used for default C1 state */
89static void mwait_idle(void)
90{
91 if (!need_resched()) {
92 __monitor((void *)&current_thread_info()->flags, 0, 0);
93 smp_mb();
94 if (!need_resched())
95 __sti_mwait(0, 0);
96 else
97 local_irq_enable();
98 } else
99 local_irq_enable();
100}
101
102
103static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
104{
105 if (force_mwait)
106 return 1;
107 /* Any C1 states supported? */
108 return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
109}
110
111/*
112 * On SMP it's slightly faster (but much more power-consuming!)
113 * to poll the ->work.need_resched flag instead of waiting for the
114 * cross-CPU IPI to arrive. Use this option with caution.
115 */
116static void poll_idle(void)
117{
118 local_irq_enable();
119 cpu_relax();
120}
121
122void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
123{
124 static int selected;
125
126 if (selected)
127 return;
128#ifdef CONFIG_X86_SMP
129 if (pm_idle == poll_idle && smp_num_siblings > 1) {
130 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
131 " performance may degrade.\n");
132 }
133#endif
134 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
135 /*
136 * Skip, if setup has overridden idle.
137 * One CPU supports mwait => All CPUs supports mwait
138 */
139 if (!pm_idle) {
140 printk(KERN_INFO "using mwait in idle threads.\n");
141 pm_idle = mwait_idle;
142 }
143 }
144 selected = 1;
145}
146
147static int __init idle_setup(char *str)
148{
149 if (!strcmp(str, "poll")) {
150 printk("using polling idle threads.\n");
151 pm_idle = poll_idle;
152 } else if (!strcmp(str, "mwait"))
153 force_mwait = 1;
154 else
155 return -1;
156
157 boot_option_idle_override = 1;
158 return 0;
159}
160early_param("idle", idle_setup);
161
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 7adad088e373..f8476dfbb60d 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -111,12 +111,10 @@ void default_idle(void)
111 */ 111 */
112 smp_mb(); 112 smp_mb();
113 113
114 local_irq_disable(); 114 if (!need_resched())
115 if (!need_resched()) {
116 safe_halt(); /* enables interrupts racelessly */ 115 safe_halt(); /* enables interrupts racelessly */
117 local_irq_disable(); 116 else
118 } 117 local_irq_enable();
119 local_irq_enable();
120 current_thread_info()->status |= TS_POLLING; 118 current_thread_info()->status |= TS_POLLING;
121 } else { 119 } else {
122 local_irq_enable(); 120 local_irq_enable();
@@ -128,17 +126,6 @@ void default_idle(void)
128EXPORT_SYMBOL(default_idle); 126EXPORT_SYMBOL(default_idle);
129#endif 127#endif
130 128
131/*
132 * On SMP it's slightly faster (but much more power-consuming!)
133 * to poll the ->work.need_resched flag instead of waiting for the
134 * cross-CPU IPI to arrive. Use this option with caution.
135 */
136static void poll_idle(void)
137{
138 local_irq_enable();
139 cpu_relax();
140}
141
142#ifdef CONFIG_HOTPLUG_CPU 129#ifdef CONFIG_HOTPLUG_CPU
143#include <asm/nmi.h> 130#include <asm/nmi.h>
144/* We don't actually take CPU down, just spin without interrupts. */ 131/* We don't actually take CPU down, just spin without interrupts. */
@@ -196,6 +183,7 @@ void cpu_idle(void)
196 if (cpu_is_offline(cpu)) 183 if (cpu_is_offline(cpu))
197 play_dead(); 184 play_dead();
198 185
186 local_irq_disable();
199 __get_cpu_var(irq_stat).idle_timestamp = jiffies; 187 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
200 idle(); 188 idle();
201 } 189 }
@@ -206,104 +194,6 @@ void cpu_idle(void)
206 } 194 }
207} 195}
208 196
209static void do_nothing(void *unused)
210{
211}
212
213/*
214 * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
215 * pm_idle and update to new pm_idle value. Required while changing pm_idle
216 * handler on SMP systems.
217 *
218 * Caller must have changed pm_idle to the new value before the call. Old
219 * pm_idle value will not be used by any CPU after the return of this function.
220 */
221void cpu_idle_wait(void)
222{
223 smp_mb();
224 /* kick all the CPUs so that they exit out of pm_idle */
225 smp_call_function(do_nothing, NULL, 0, 1);
226}
227EXPORT_SYMBOL_GPL(cpu_idle_wait);
228
229/*
230 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
231 * which can obviate IPI to trigger checking of need_resched.
232 * We execute MONITOR against need_resched and enter optimized wait state
233 * through MWAIT. Whenever someone changes need_resched, we would be woken
234 * up from MWAIT (without an IPI).
235 *
236 * New with Core Duo processors, MWAIT can take some hints based on CPU
237 * capability.
238 */
239void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
240{
241 if (!need_resched()) {
242 __monitor((void *)&current_thread_info()->flags, 0, 0);
243 smp_mb();
244 if (!need_resched())
245 __sti_mwait(ax, cx);
246 else
247 local_irq_enable();
248 } else
249 local_irq_enable();
250}
251
252/* Default MONITOR/MWAIT with no hints, used for default C1 state */
253static void mwait_idle(void)
254{
255 local_irq_enable();
256 mwait_idle_with_hints(0, 0);
257}
258
259static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
260{
261 if (force_mwait)
262 return 1;
263 /* Any C1 states supported? */
264 return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
265}
266
267void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
268{
269 static int selected;
270
271 if (selected)
272 return;
273#ifdef CONFIG_X86_SMP
274 if (pm_idle == poll_idle && smp_num_siblings > 1) {
275 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
276 " performance may degrade.\n");
277 }
278#endif
279 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
280 /*
281 * Skip, if setup has overridden idle.
282 * One CPU supports mwait => All CPUs supports mwait
283 */
284 if (!pm_idle) {
285 printk(KERN_INFO "using mwait in idle threads.\n");
286 pm_idle = mwait_idle;
287 }
288 }
289 selected = 1;
290}
291
292static int __init idle_setup(char *str)
293{
294 if (!strcmp(str, "poll")) {
295 printk("using polling idle threads.\n");
296 pm_idle = poll_idle;
297 } else if (!strcmp(str, "mwait"))
298 force_mwait = 1;
299 else
300 return -1;
301
302 boot_option_idle_override = 1;
303 return 0;
304}
305early_param("idle", idle_setup);
306
307void __show_registers(struct pt_regs *regs, int all) 197void __show_registers(struct pt_regs *regs, int all)
308{ 198{
309 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; 199 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
@@ -550,7 +440,7 @@ static void hard_enable_TSC(void)
550 write_cr4(read_cr4() & ~X86_CR4_TSD); 440 write_cr4(read_cr4() & ~X86_CR4_TSD);
551} 441}
552 442
553void enable_TSC(void) 443static void enable_TSC(void)
554{ 444{
555 preempt_disable(); 445 preempt_disable();
556 if (test_and_clear_thread_flag(TIF_NOTSC)) 446 if (test_and_clear_thread_flag(TIF_NOTSC))
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 891af1a1b48a..e2319f39988b 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -106,26 +106,13 @@ void default_idle(void)
106 * test NEED_RESCHED: 106 * test NEED_RESCHED:
107 */ 107 */
108 smp_mb(); 108 smp_mb();
109 local_irq_disable(); 109 if (!need_resched())
110 if (!need_resched()) {
111 safe_halt(); /* enables interrupts racelessly */ 110 safe_halt(); /* enables interrupts racelessly */
112 local_irq_disable(); 111 else
113 } 112 local_irq_enable();
114 local_irq_enable();
115 current_thread_info()->status |= TS_POLLING; 113 current_thread_info()->status |= TS_POLLING;
116} 114}
117 115
118/*
119 * On SMP it's slightly faster (but much more power-consuming!)
120 * to poll the ->need_resched flag instead of waiting for the
121 * cross-CPU IPI to arrive. Use this option with caution.
122 */
123static void poll_idle(void)
124{
125 local_irq_enable();
126 cpu_relax();
127}
128
129#ifdef CONFIG_HOTPLUG_CPU 116#ifdef CONFIG_HOTPLUG_CPU
130DECLARE_PER_CPU(int, cpu_state); 117DECLARE_PER_CPU(int, cpu_state);
131 118
@@ -192,110 +179,6 @@ void cpu_idle(void)
192 } 179 }
193} 180}
194 181
195static void do_nothing(void *unused)
196{
197}
198
199/*
200 * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
201 * pm_idle and update to new pm_idle value. Required while changing pm_idle
202 * handler on SMP systems.
203 *
204 * Caller must have changed pm_idle to the new value before the call. Old
205 * pm_idle value will not be used by any CPU after the return of this function.
206 */
207void cpu_idle_wait(void)
208{
209 smp_mb();
210 /* kick all the CPUs so that they exit out of pm_idle */
211 smp_call_function(do_nothing, NULL, 0, 1);
212}
213EXPORT_SYMBOL_GPL(cpu_idle_wait);
214
215/*
216 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
217 * which can obviate IPI to trigger checking of need_resched.
218 * We execute MONITOR against need_resched and enter optimized wait state
219 * through MWAIT. Whenever someone changes need_resched, we would be woken
220 * up from MWAIT (without an IPI).
221 *
222 * New with Core Duo processors, MWAIT can take some hints based on CPU
223 * capability.
224 */
225void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
226{
227 if (!need_resched()) {
228 __monitor((void *)&current_thread_info()->flags, 0, 0);
229 smp_mb();
230 if (!need_resched())
231 __mwait(ax, cx);
232 }
233}
234
235/* Default MONITOR/MWAIT with no hints, used for default C1 state */
236static void mwait_idle(void)
237{
238 if (!need_resched()) {
239 __monitor((void *)&current_thread_info()->flags, 0, 0);
240 smp_mb();
241 if (!need_resched())
242 __sti_mwait(0, 0);
243 else
244 local_irq_enable();
245 } else {
246 local_irq_enable();
247 }
248}
249
250
251static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
252{
253 if (force_mwait)
254 return 1;
255 /* Any C1 states supported? */
256 return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
257}
258
259void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
260{
261 static int selected;
262
263 if (selected)
264 return;
265#ifdef CONFIG_X86_SMP
266 if (pm_idle == poll_idle && smp_num_siblings > 1) {
267 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
268 " performance may degrade.\n");
269 }
270#endif
271 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
272 /*
273 * Skip, if setup has overridden idle.
274 * One CPU supports mwait => All CPUs supports mwait
275 */
276 if (!pm_idle) {
277 printk(KERN_INFO "using mwait in idle threads.\n");
278 pm_idle = mwait_idle;
279 }
280 }
281 selected = 1;
282}
283
284static int __init idle_setup(char *str)
285{
286 if (!strcmp(str, "poll")) {
287 printk("using polling idle threads.\n");
288 pm_idle = poll_idle;
289 } else if (!strcmp(str, "mwait"))
290 force_mwait = 1;
291 else
292 return -1;
293
294 boot_option_idle_override = 1;
295 return 0;
296}
297early_param("idle", idle_setup);
298
299/* Prints also some state that isn't saved in the pt_regs */ 182/* Prints also some state that isn't saved in the pt_regs */
300void __show_regs(struct pt_regs * regs) 183void __show_regs(struct pt_regs * regs)
301{ 184{
@@ -562,7 +445,7 @@ static void hard_enable_TSC(void)
562 write_cr4(read_cr4() & ~X86_CR4_TSD); 445 write_cr4(read_cr4() & ~X86_CR4_TSD);
563} 446}
564 447
565void enable_TSC(void) 448static void enable_TSC(void)
566{ 449{
567 preempt_disable(); 450 preempt_disable();
568 if (test_and_clear_thread_flag(TIF_NOTSC)) 451 if (test_and_clear_thread_flag(TIF_NOTSC))
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 559c1b027417..fb03ef380f0e 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1207,97 +1207,16 @@ static int genregs32_set(struct task_struct *target,
1207 return ret; 1207 return ret;
1208} 1208}
1209 1209
1210static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data) 1210long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
1211 compat_ulong_t caddr, compat_ulong_t cdata)
1211{ 1212{
1212 siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t)); 1213 unsigned long addr = caddr;
1213 compat_siginfo_t __user *si32 = compat_ptr(data); 1214 unsigned long data = cdata;
1214 siginfo_t ssi;
1215 int ret;
1216
1217 if (request == PTRACE_SETSIGINFO) {
1218 memset(&ssi, 0, sizeof(siginfo_t));
1219 ret = copy_siginfo_from_user32(&ssi, si32);
1220 if (ret)
1221 return ret;
1222 if (copy_to_user(si, &ssi, sizeof(siginfo_t)))
1223 return -EFAULT;
1224 }
1225 ret = sys_ptrace(request, pid, addr, (unsigned long)si);
1226 if (ret)
1227 return ret;
1228 if (request == PTRACE_GETSIGINFO) {
1229 if (copy_from_user(&ssi, si, sizeof(siginfo_t)))
1230 return -EFAULT;
1231 ret = copy_siginfo_to_user32(si32, &ssi);
1232 }
1233 return ret;
1234}
1235
1236asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
1237{
1238 struct task_struct *child;
1239 struct pt_regs *childregs;
1240 void __user *datap = compat_ptr(data); 1215 void __user *datap = compat_ptr(data);
1241 int ret; 1216 int ret;
1242 __u32 val; 1217 __u32 val;
1243 1218
1244 switch (request) { 1219 switch (request) {
1245 case PTRACE_TRACEME:
1246 case PTRACE_ATTACH:
1247 case PTRACE_KILL:
1248 case PTRACE_CONT:
1249 case PTRACE_SINGLESTEP:
1250 case PTRACE_SINGLEBLOCK:
1251 case PTRACE_DETACH:
1252 case PTRACE_SYSCALL:
1253 case PTRACE_OLDSETOPTIONS:
1254 case PTRACE_SETOPTIONS:
1255 case PTRACE_SET_THREAD_AREA:
1256 case PTRACE_GET_THREAD_AREA:
1257#ifdef X86_BTS
1258 case PTRACE_BTS_CONFIG:
1259 case PTRACE_BTS_STATUS:
1260 case PTRACE_BTS_SIZE:
1261 case PTRACE_BTS_GET:
1262 case PTRACE_BTS_CLEAR:
1263 case PTRACE_BTS_DRAIN:
1264#endif
1265 return sys_ptrace(request, pid, addr, data);
1266
1267 default:
1268 return -EINVAL;
1269
1270 case PTRACE_PEEKTEXT:
1271 case PTRACE_PEEKDATA:
1272 case PTRACE_POKEDATA:
1273 case PTRACE_POKETEXT:
1274 case PTRACE_POKEUSR:
1275 case PTRACE_PEEKUSR:
1276 case PTRACE_GETREGS:
1277 case PTRACE_SETREGS:
1278 case PTRACE_SETFPREGS:
1279 case PTRACE_GETFPREGS:
1280 case PTRACE_SETFPXREGS:
1281 case PTRACE_GETFPXREGS:
1282 case PTRACE_GETEVENTMSG:
1283 break;
1284
1285 case PTRACE_SETSIGINFO:
1286 case PTRACE_GETSIGINFO:
1287 return ptrace32_siginfo(request, pid, addr, data);
1288 }
1289
1290 child = ptrace_get_task_struct(pid);
1291 if (IS_ERR(child))
1292 return PTR_ERR(child);
1293
1294 ret = ptrace_check_attach(child, request == PTRACE_KILL);
1295 if (ret < 0)
1296 goto out;
1297
1298 childregs = task_pt_regs(child);
1299
1300 switch (request) {
1301 case PTRACE_PEEKUSR: 1220 case PTRACE_PEEKUSR:
1302 ret = getreg32(child, addr, &val); 1221 ret = getreg32(child, addr, &val);
1303 if (ret == 0) 1222 if (ret == 0)
@@ -1343,12 +1262,14 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
1343 sizeof(struct user32_fxsr_struct), 1262 sizeof(struct user32_fxsr_struct),
1344 datap); 1263 datap);
1345 1264
1265 case PTRACE_GET_THREAD_AREA:
1266 case PTRACE_SET_THREAD_AREA:
1267 return arch_ptrace(child, request, addr, data);
1268
1346 default: 1269 default:
1347 return compat_ptrace_request(child, request, addr, data); 1270 return compat_ptrace_request(child, request, addr, data);
1348 } 1271 }
1349 1272
1350 out:
1351 put_task_struct(child);
1352 return ret; 1273 return ret;
1353} 1274}
1354 1275
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 19c9386ac118..a4a838306b2c 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -8,6 +8,7 @@
8#include <asm/apic.h> 8#include <asm/apic.h>
9#include <asm/desc.h> 9#include <asm/desc.h>
10#include <asm/hpet.h> 10#include <asm/hpet.h>
11#include <asm/pgtable.h>
11#include <asm/reboot_fixups.h> 12#include <asm/reboot_fixups.h>
12#include <asm/reboot.h> 13#include <asm/reboot.h>
13 14
@@ -15,7 +16,6 @@
15# include <linux/dmi.h> 16# include <linux/dmi.h>
16# include <linux/ctype.h> 17# include <linux/ctype.h>
17# include <linux/mc146818rtc.h> 18# include <linux/mc146818rtc.h>
18# include <asm/pgtable.h>
19#else 19#else
20# include <asm/iommu.h> 20# include <asm/iommu.h>
21#endif 21#endif
@@ -275,7 +275,7 @@ void machine_real_restart(unsigned char *code, int length)
275 /* Remap the kernel at virtual address zero, as well as offset zero 275 /* Remap the kernel at virtual address zero, as well as offset zero
276 from the kernel segment. This assumes the kernel segment starts at 276 from the kernel segment. This assumes the kernel segment starts at
277 virtual address PAGE_OFFSET. */ 277 virtual address PAGE_OFFSET. */
278 memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, 278 memcpy(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
279 sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS); 279 sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
280 280
281 /* 281 /*
@@ -399,7 +399,7 @@ static void native_machine_emergency_restart(void)
399 } 399 }
400} 400}
401 401
402static void native_machine_shutdown(void) 402void native_machine_shutdown(void)
403{ 403{
404 /* Stop the cpus and apics */ 404 /* Stop the cpus and apics */
405#ifdef CONFIG_SMP 405#ifdef CONFIG_SMP
@@ -470,7 +470,10 @@ struct machine_ops machine_ops = {
470 .shutdown = native_machine_shutdown, 470 .shutdown = native_machine_shutdown,
471 .emergency_restart = native_machine_emergency_restart, 471 .emergency_restart = native_machine_emergency_restart,
472 .restart = native_machine_restart, 472 .restart = native_machine_restart,
473 .halt = native_machine_halt 473 .halt = native_machine_halt,
474#ifdef CONFIG_KEXEC
475 .crash_shutdown = native_machine_crash_shutdown,
476#endif
474}; 477};
475 478
476void machine_power_off(void) 479void machine_power_off(void)
@@ -498,3 +501,9 @@ void machine_halt(void)
498 machine_ops.halt(); 501 machine_ops.halt();
499} 502}
500 503
504#ifdef CONFIG_KEXEC
505void machine_crash_shutdown(struct pt_regs *regs)
506{
507 machine_ops.crash_shutdown(regs);
508}
509#endif
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0d1f44ae6eea..c0c68c18a788 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -18,8 +18,6 @@ unsigned disabled_cpus __cpuinitdata;
18unsigned int boot_cpu_physical_apicid = -1U; 18unsigned int boot_cpu_physical_apicid = -1U;
19EXPORT_SYMBOL(boot_cpu_physical_apicid); 19EXPORT_SYMBOL(boot_cpu_physical_apicid);
20 20
21physid_mask_t phys_cpu_present_map;
22
23DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; 21DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
24EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); 22EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
25 23
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 78828b0f604f..2283422af794 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -47,6 +47,7 @@
47#include <linux/pfn.h> 47#include <linux/pfn.h>
48#include <linux/pci.h> 48#include <linux/pci.h>
49#include <linux/init_ohci1394_dma.h> 49#include <linux/init_ohci1394_dma.h>
50#include <linux/kvm_para.h>
50 51
51#include <video/edid.h> 52#include <video/edid.h>
52 53
@@ -389,7 +390,6 @@ unsigned long __init find_max_low_pfn(void)
389 return max_low_pfn; 390 return max_low_pfn;
390} 391}
391 392
392#define BIOS_EBDA_SEGMENT 0x40E
393#define BIOS_LOWMEM_KILOBYTES 0x413 393#define BIOS_LOWMEM_KILOBYTES 0x413
394 394
395/* 395/*
@@ -420,8 +420,7 @@ static void __init reserve_ebda_region(void)
420 lowmem <<= 10; 420 lowmem <<= 10;
421 421
422 /* start of EBDA area */ 422 /* start of EBDA area */
423 ebda_addr = *(unsigned short *)__va(BIOS_EBDA_SEGMENT); 423 ebda_addr = get_bios_ebda();
424 ebda_addr <<= 4;
425 424
426 /* Fixup: bios puts an EBDA in the top 64K segment */ 425 /* Fixup: bios puts an EBDA in the top 64K segment */
427 /* of conventional memory, but does not adjust lowmem. */ 426 /* of conventional memory, but does not adjust lowmem. */
@@ -442,7 +441,7 @@ static void __init reserve_ebda_region(void)
442} 441}
443 442
444#ifndef CONFIG_NEED_MULTIPLE_NODES 443#ifndef CONFIG_NEED_MULTIPLE_NODES
445void __init setup_bootmem_allocator(void); 444static void __init setup_bootmem_allocator(void);
446static unsigned long __init setup_memory(void) 445static unsigned long __init setup_memory(void)
447{ 446{
448 /* 447 /*
@@ -477,7 +476,7 @@ static unsigned long __init setup_memory(void)
477 return max_low_pfn; 476 return max_low_pfn;
478} 477}
479 478
480void __init zone_sizes_init(void) 479static void __init zone_sizes_init(void)
481{ 480{
482 unsigned long max_zone_pfns[MAX_NR_ZONES]; 481 unsigned long max_zone_pfns[MAX_NR_ZONES];
483 memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 482 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
@@ -822,6 +821,10 @@ void __init setup_arch(char **cmdline_p)
822 821
823 max_low_pfn = setup_memory(); 822 max_low_pfn = setup_memory();
824 823
824#ifdef CONFIG_KVM_CLOCK
825 kvmclock_init();
826#endif
827
825#ifdef CONFIG_VMI 828#ifdef CONFIG_VMI
826 /* 829 /*
827 * Must be after max_low_pfn is determined, and before kernel 830 * Must be after max_low_pfn is determined, and before kernel
@@ -829,6 +832,7 @@ void __init setup_arch(char **cmdline_p)
829 */ 832 */
830 vmi_init(); 833 vmi_init();
831#endif 834#endif
835 kvm_guest_init();
832 836
833 /* 837 /*
834 * NOTE: before this point _nobody_ is allowed to allocate 838 * NOTE: before this point _nobody_ is allowed to allocate
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index c2ec3dcb6b99..a94fb959a87a 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -42,6 +42,7 @@
42#include <linux/ctype.h> 42#include <linux/ctype.h>
43#include <linux/uaccess.h> 43#include <linux/uaccess.h>
44#include <linux/init_ohci1394_dma.h> 44#include <linux/init_ohci1394_dma.h>
45#include <linux/kvm_para.h>
45 46
46#include <asm/mtrr.h> 47#include <asm/mtrr.h>
47#include <asm/uaccess.h> 48#include <asm/uaccess.h>
@@ -116,7 +117,7 @@ extern int root_mountflags;
116 117
117char __initdata command_line[COMMAND_LINE_SIZE]; 118char __initdata command_line[COMMAND_LINE_SIZE];
118 119
119struct resource standard_io_resources[] = { 120static struct resource standard_io_resources[] = {
120 { .name = "dma1", .start = 0x00, .end = 0x1f, 121 { .name = "dma1", .start = 0x00, .end = 0x1f,
121 .flags = IORESOURCE_BUSY | IORESOURCE_IO }, 122 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
122 { .name = "pic1", .start = 0x20, .end = 0x21, 123 { .name = "pic1", .start = 0x20, .end = 0x21,
@@ -190,6 +191,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
190 bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); 191 bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
191 e820_register_active_regions(0, start_pfn, end_pfn); 192 e820_register_active_regions(0, start_pfn, end_pfn);
192 free_bootmem_with_active_regions(0, end_pfn); 193 free_bootmem_with_active_regions(0, end_pfn);
194 early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
193 reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); 195 reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
194} 196}
195#endif 197#endif
@@ -264,6 +266,28 @@ void __attribute__((weak)) __init memory_setup(void)
264 machine_specific_memory_setup(); 266 machine_specific_memory_setup();
265} 267}
266 268
269static void __init parse_setup_data(void)
270{
271 struct setup_data *data;
272 unsigned long pa_data;
273
274 if (boot_params.hdr.version < 0x0209)
275 return;
276 pa_data = boot_params.hdr.setup_data;
277 while (pa_data) {
278 data = early_ioremap(pa_data, PAGE_SIZE);
279 switch (data->type) {
280 default:
281 break;
282 }
283#ifndef CONFIG_DEBUG_BOOT_PARAMS
284 free_early(pa_data, pa_data+sizeof(*data)+data->len);
285#endif
286 pa_data = data->next;
287 early_iounmap(data, PAGE_SIZE);
288 }
289}
290
267/* 291/*
268 * setup_arch - architecture-specific boot-time initializations 292 * setup_arch - architecture-specific boot-time initializations
269 * 293 *
@@ -316,6 +340,8 @@ void __init setup_arch(char **cmdline_p)
316 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); 340 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
317 *cmdline_p = command_line; 341 *cmdline_p = command_line;
318 342
343 parse_setup_data();
344
319 parse_early_param(); 345 parse_early_param();
320 346
321#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT 347#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
@@ -359,6 +385,10 @@ void __init setup_arch(char **cmdline_p)
359 385
360 io_delay_init(); 386 io_delay_init();
361 387
388#ifdef CONFIG_KVM_CLOCK
389 kvmclock_init();
390#endif
391
362#ifdef CONFIG_SMP 392#ifdef CONFIG_SMP
363 /* setup to use the early static init tables during kernel startup */ 393 /* setup to use the early static init tables during kernel startup */
364 x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; 394 x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
@@ -397,8 +427,6 @@ void __init setup_arch(char **cmdline_p)
397 contig_initmem_init(0, end_pfn); 427 contig_initmem_init(0, end_pfn);
398#endif 428#endif
399 429
400 early_res_to_bootmem();
401
402 dma32_reserve_bootmem(); 430 dma32_reserve_bootmem();
403 431
404#ifdef CONFIG_ACPI_SLEEP 432#ifdef CONFIG_ACPI_SLEEP
@@ -465,6 +493,8 @@ void __init setup_arch(char **cmdline_p)
465 init_apic_mappings(); 493 init_apic_mappings();
466 ioapic_init_mappings(); 494 ioapic_init_mappings();
467 495
496 kvm_guest_init();
497
468 /* 498 /*
469 * We trust e820 completely. No explicit ROM probing in memory. 499 * We trust e820 completely. No explicit ROM probing in memory.
470 */ 500 */
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index f1b117930837..8e05e7f7bd40 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -413,16 +413,6 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
413 regs->ss = __USER_DS; 413 regs->ss = __USER_DS;
414 regs->cs = __USER_CS; 414 regs->cs = __USER_CS;
415 415
416 /*
417 * Clear TF when entering the signal handler, but
418 * notify any tracer that was single-stepping it.
419 * The tracer may want to single-step inside the
420 * handler too.
421 */
422 regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
423 if (test_thread_flag(TIF_SINGLESTEP))
424 ptrace_notify(SIGTRAP);
425
426 return 0; 416 return 0;
427 417
428give_sigsegv: 418give_sigsegv:
@@ -501,16 +491,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
501 regs->ss = __USER_DS; 491 regs->ss = __USER_DS;
502 regs->cs = __USER_CS; 492 regs->cs = __USER_CS;
503 493
504 /*
505 * Clear TF when entering the signal handler, but
506 * notify any tracer that was single-stepping it.
507 * The tracer may want to single-step inside the
508 * handler too.
509 */
510 regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
511 if (test_thread_flag(TIF_SINGLESTEP))
512 ptrace_notify(SIGTRAP);
513
514 return 0; 494 return 0;
515 495
516give_sigsegv: 496give_sigsegv:
@@ -566,6 +546,21 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
566 if (ret) 546 if (ret)
567 return ret; 547 return ret;
568 548
549 /*
550 * Clear the direction flag as per the ABI for function entry.
551 */
552 regs->flags &= ~X86_EFLAGS_DF;
553
554 /*
555 * Clear TF when entering the signal handler, but
556 * notify any tracer that was single-stepping it.
557 * The tracer may want to single-step inside the
558 * handler too.
559 */
560 regs->flags &= ~X86_EFLAGS_TF;
561 if (test_thread_flag(TIF_SINGLESTEP))
562 ptrace_notify(SIGTRAP);
563
569 spin_lock_irq(&current->sighand->siglock); 564 spin_lock_irq(&current->sighand->siglock);
570 sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask); 565 sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
571 if (!(ka->sa.sa_flags & SA_NODEFER)) 566 if (!(ka->sa.sa_flags & SA_NODEFER))
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 827179c5b32a..ccb2a4560c2d 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -285,14 +285,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
285 even if the handler happens to be interrupting 32-bit code. */ 285 even if the handler happens to be interrupting 32-bit code. */
286 regs->cs = __USER_CS; 286 regs->cs = __USER_CS;
287 287
288 /* This, by contrast, has nothing to do with segment registers -
289 see include/asm-x86_64/uaccess.h for details. */
290 set_fs(USER_DS);
291
292 regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
293 if (test_thread_flag(TIF_SINGLESTEP))
294 ptrace_notify(SIGTRAP);
295
296 return 0; 288 return 0;
297 289
298give_sigsegv: 290give_sigsegv:
@@ -380,6 +372,28 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
380 ret = setup_rt_frame(sig, ka, info, oldset, regs); 372 ret = setup_rt_frame(sig, ka, info, oldset, regs);
381 373
382 if (ret == 0) { 374 if (ret == 0) {
375 /*
376 * This has nothing to do with segment registers,
377 * despite the name. This magic affects uaccess.h
378 * macros' behavior. Reset it to the normal setting.
379 */
380 set_fs(USER_DS);
381
382 /*
383 * Clear the direction flag as per the ABI for function entry.
384 */
385 regs->flags &= ~X86_EFLAGS_DF;
386
387 /*
388 * Clear TF when entering the signal handler, but
389 * notify any tracer that was single-stepping it.
390 * The tracer may want to single-step inside the
391 * handler too.
392 */
393 regs->flags &= ~X86_EFLAGS_TF;
394 if (test_thread_flag(TIF_SINGLESTEP))
395 ptrace_notify(SIGTRAP);
396
383 spin_lock_irq(&current->sighand->siglock); 397 spin_lock_irq(&current->sighand->siglock);
384 sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask); 398 sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
385 if (!(ka->sa.sa_flags & SA_NODEFER)) 399 if (!(ka->sa.sa_flags & SA_NODEFER))
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6a925394bc7e..04c662ba18f1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -184,7 +184,7 @@ static void unmap_cpu_to_node(int cpu)
184u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = 184u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
185 { [0 ... NR_CPUS-1] = BAD_APICID }; 185 { [0 ... NR_CPUS-1] = BAD_APICID };
186 186
187void map_cpu_to_logical_apicid(void) 187static void map_cpu_to_logical_apicid(void)
188{ 188{
189 int cpu = smp_processor_id(); 189 int cpu = smp_processor_id();
190 int apicid = logical_smp_processor_id(); 190 int apicid = logical_smp_processor_id();
@@ -197,7 +197,7 @@ void map_cpu_to_logical_apicid(void)
197 map_cpu_to_node(cpu, node); 197 map_cpu_to_node(cpu, node);
198} 198}
199 199
200void unmap_cpu_to_logical_apicid(int cpu) 200static void unmap_cpu_to_logical_apicid(int cpu)
201{ 201{
202 cpu_2_logical_apicid[cpu] = BAD_APICID; 202 cpu_2_logical_apicid[cpu] = BAD_APICID;
203 unmap_cpu_to_node(cpu); 203 unmap_cpu_to_node(cpu);
@@ -211,7 +211,7 @@ void unmap_cpu_to_logical_apicid(int cpu)
211 * Report back to the Boot Processor. 211 * Report back to the Boot Processor.
212 * Running on AP. 212 * Running on AP.
213 */ 213 */
214void __cpuinit smp_callin(void) 214static void __cpuinit smp_callin(void)
215{ 215{
216 int cpuid, phys_id; 216 int cpuid, phys_id;
217 unsigned long timeout; 217 unsigned long timeout;
@@ -436,7 +436,7 @@ valid_k7:
436#endif 436#endif
437} 437}
438 438
439void __cpuinit smp_checks(void) 439static void __cpuinit smp_checks(void)
440{ 440{
441 if (smp_b_stepping) 441 if (smp_b_stepping)
442 printk(KERN_WARNING "WARNING: SMP operation may be unreliable" 442 printk(KERN_WARNING "WARNING: SMP operation may be unreliable"
@@ -565,7 +565,7 @@ void __init smp_alloc_memory(void)
565} 565}
566#endif 566#endif
567 567
568void impress_friends(void) 568static void impress_friends(void)
569{ 569{
570 int cpu; 570 int cpu;
571 unsigned long bogosum = 0; 571 unsigned long bogosum = 0;
@@ -1039,8 +1039,8 @@ int __cpuinit native_cpu_up(unsigned int cpu)
1039 1039
1040#ifdef CONFIG_X86_32 1040#ifdef CONFIG_X86_32
1041 /* init low mem mapping */ 1041 /* init low mem mapping */
1042 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, 1042 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
1043 min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); 1043 min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
1044 flush_tlb_all(); 1044 flush_tlb_all();
1045#endif 1045#endif
1046 1046
@@ -1058,7 +1058,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
1058 check_tsc_sync_source(cpu); 1058 check_tsc_sync_source(cpu);
1059 local_irq_restore(flags); 1059 local_irq_restore(flags);
1060 1060
1061 while (!cpu_isset(cpu, cpu_online_map)) { 1061 while (!cpu_online(cpu)) {
1062 cpu_relax(); 1062 cpu_relax();
1063 touch_nmi_watchdog(); 1063 touch_nmi_watchdog();
1064 } 1064 }
@@ -1168,7 +1168,7 @@ static void __init smp_cpu_index_default(void)
1168 int i; 1168 int i;
1169 struct cpuinfo_x86 *c; 1169 struct cpuinfo_x86 *c;
1170 1170
1171 for_each_cpu_mask(i, cpu_possible_map) { 1171 for_each_possible_cpu(i) {
1172 c = &cpu_data(i); 1172 c = &cpu_data(i);
1173 /* mark all to hotplug */ 1173 /* mark all to hotplug */
1174 c->cpu_index = NR_CPUS; 1174 c->cpu_index = NR_CPUS;
@@ -1287,7 +1287,7 @@ void cpu_exit_clear(void)
1287} 1287}
1288# endif /* CONFIG_X86_32 */ 1288# endif /* CONFIG_X86_32 */
1289 1289
1290void remove_siblinginfo(int cpu) 1290static void remove_siblinginfo(int cpu)
1291{ 1291{
1292 int sibling; 1292 int sibling;
1293 struct cpuinfo_x86 *c = &cpu_data(cpu); 1293 struct cpuinfo_x86 *c = &cpu_data(cpu);
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
index 6878a9c2df5d..ae751094eba9 100644
--- a/arch/x86/kernel/summit_32.c
+++ b/arch/x86/kernel/summit_32.c
@@ -29,6 +29,7 @@
29#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/init.h> 30#include <linux/init.h>
31#include <asm/io.h> 31#include <asm/io.h>
32#include <asm/bios_ebda.h>
32#include <asm/mach-summit/mach_mpparse.h> 33#include <asm/mach-summit/mach_mpparse.h>
33 34
34static struct rio_table_hdr *rio_table_hdr __initdata; 35static struct rio_table_hdr *rio_table_hdr __initdata;
@@ -140,8 +141,8 @@ void __init setup_summit(void)
140 int i, next_wpeg, next_bus = 0; 141 int i, next_wpeg, next_bus = 0;
141 142
142 /* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */ 143 /* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */
143 ptr = *(unsigned short *)phys_to_virt(0x40Eul); 144 ptr = get_bios_ebda();
144 ptr = (unsigned long)phys_to_virt(ptr << 4); 145 ptr = (unsigned long)phys_to_virt(ptr);
145 146
146 rio_table_hdr = NULL; 147 rio_table_hdr = NULL;
147 offset = 0x180; 148 offset = 0x180;
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 1558e513757e..a1f07d793202 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -191,13 +191,13 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
191 spin_unlock(&f->tlbstate_lock); 191 spin_unlock(&f->tlbstate_lock);
192} 192}
193 193
194int __cpuinit init_smp_flush(void) 194static int __cpuinit init_smp_flush(void)
195{ 195{
196 int i; 196 int i;
197 197
198 for_each_cpu_mask(i, cpu_possible_map) { 198 for_each_possible_cpu(i)
199 spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); 199 spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
200 } 200
201 return 0; 201 return 0;
202} 202}
203core_initcall(init_smp_flush); 203core_initcall(init_smp_flush);
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S
index 64580679861e..d8ccc3c6552f 100644
--- a/arch/x86/kernel/trampoline_32.S
+++ b/arch/x86/kernel/trampoline_32.S
@@ -33,7 +33,7 @@
33 33
34/* We can free up trampoline after bootup if cpu hotplug is not supported. */ 34/* We can free up trampoline after bootup if cpu hotplug is not supported. */
35#ifndef CONFIG_HOTPLUG_CPU 35#ifndef CONFIG_HOTPLUG_CPU
36.section ".init.data","aw",@progbits 36.section ".cpuinit.data","aw",@progbits
37#else 37#else
38.section .rodata,"a",@progbits 38.section .rodata,"a",@progbits
39#endif 39#endif
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 471e694d6713..bde6f63e15d5 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -602,7 +602,7 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
602DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) 602DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
603DO_ERROR(12, SIGBUS, "stack segment", stack_segment) 603DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
604DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) 604DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
605DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1) 605DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1)
606 606
607void __kprobes do_general_protection(struct pt_regs *regs, long error_code) 607void __kprobes do_general_protection(struct pt_regs *regs, long error_code)
608{ 608{
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 12affe1f9bce..956f38927aa7 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -320,7 +320,7 @@ static void check_zeroed_page(u32 pfn, int type, struct page *page)
320 * pdes need to be zeroed. 320 * pdes need to be zeroed.
321 */ 321 */
322 if (type & VMI_PAGE_CLONE) 322 if (type & VMI_PAGE_CLONE)
323 limit = USER_PTRS_PER_PGD; 323 limit = KERNEL_PGD_BOUNDARY;
324 for (i = 0; i < limit; i++) 324 for (i = 0; i < limit; i++)
325 BUG_ON(ptr[i]); 325 BUG_ON(ptr[i]);
326} 326}
@@ -392,13 +392,13 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
392} 392}
393#endif 393#endif
394 394
395static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn) 395static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn)
396{ 396{
397 vmi_set_page_type(pfn, VMI_PAGE_L1); 397 vmi_set_page_type(pfn, VMI_PAGE_L1);
398 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); 398 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
399} 399}
400 400
401static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn) 401static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn)
402{ 402{
403 /* 403 /*
404 * This call comes in very early, before mem_map is setup. 404 * This call comes in very early, before mem_map is setup.
@@ -409,20 +409,20 @@ static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn)
409 vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); 409 vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0);
410} 410}
411 411
412static void vmi_allocate_pd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count) 412static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count)
413{ 413{
414 vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); 414 vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE);
415 vmi_check_page_type(clonepfn, VMI_PAGE_L2); 415 vmi_check_page_type(clonepfn, VMI_PAGE_L2);
416 vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); 416 vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count);
417} 417}
418 418
419static void vmi_release_pt(u32 pfn) 419static void vmi_release_pte(u32 pfn)
420{ 420{
421 vmi_ops.release_page(pfn, VMI_PAGE_L1); 421 vmi_ops.release_page(pfn, VMI_PAGE_L1);
422 vmi_set_page_type(pfn, VMI_PAGE_NORMAL); 422 vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
423} 423}
424 424
425static void vmi_release_pd(u32 pfn) 425static void vmi_release_pmd(u32 pfn)
426{ 426{
427 vmi_ops.release_page(pfn, VMI_PAGE_L2); 427 vmi_ops.release_page(pfn, VMI_PAGE_L2);
428 vmi_set_page_type(pfn, VMI_PAGE_NORMAL); 428 vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
@@ -871,15 +871,15 @@ static inline int __init activate_vmi(void)
871 871
872 vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); 872 vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
873 if (vmi_ops.allocate_page) { 873 if (vmi_ops.allocate_page) {
874 pv_mmu_ops.alloc_pt = vmi_allocate_pt; 874 pv_mmu_ops.alloc_pte = vmi_allocate_pte;
875 pv_mmu_ops.alloc_pd = vmi_allocate_pd; 875 pv_mmu_ops.alloc_pmd = vmi_allocate_pmd;
876 pv_mmu_ops.alloc_pd_clone = vmi_allocate_pd_clone; 876 pv_mmu_ops.alloc_pmd_clone = vmi_allocate_pmd_clone;
877 } 877 }
878 878
879 vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); 879 vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
880 if (vmi_ops.release_page) { 880 if (vmi_ops.release_page) {
881 pv_mmu_ops.release_pt = vmi_release_pt; 881 pv_mmu_ops.release_pte = vmi_release_pte;
882 pv_mmu_ops.release_pd = vmi_release_pd; 882 pv_mmu_ops.release_pmd = vmi_release_pmd;
883 } 883 }
884 884
885 /* Set linear is needed in all cases */ 885 /* Set linear is needed in all cases */
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index edff4c985485..61efa2f7d564 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -216,7 +216,7 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
216 return 0; 216 return 0;
217} 217}
218 218
219long __vsyscall(3) venosys_1(void) 219static long __vsyscall(3) venosys_1(void)
220{ 220{
221 return -ENOSYS; 221 return -ENOSYS;
222} 222}
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 41962e793c0f..8d45fabc5f3b 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -19,7 +19,7 @@ if VIRTUALIZATION
19 19
20config KVM 20config KVM
21 tristate "Kernel-based Virtual Machine (KVM) support" 21 tristate "Kernel-based Virtual Machine (KVM) support"
22 depends on HAVE_KVM && EXPERIMENTAL 22 depends on HAVE_KVM
23 select PREEMPT_NOTIFIERS 23 select PREEMPT_NOTIFIERS
24 select ANON_INODES 24 select ANON_INODES
25 ---help--- 25 ---help---
@@ -50,6 +50,17 @@ config KVM_AMD
50 Provides support for KVM on AMD processors equipped with the AMD-V 50 Provides support for KVM on AMD processors equipped with the AMD-V
51 (SVM) extensions. 51 (SVM) extensions.
52 52
53config KVM_TRACE
54 bool "KVM trace support"
55 depends on KVM && MARKERS && SYSFS
56 select RELAY
57 select DEBUG_FS
58 default n
59 ---help---
60 This option allows reading a trace of kvm-related events through
61 relayfs. Note the ABI is not considered stable and will be
62 modified in future updates.
63
53# OK, it's a little counter-intuitive to do this, but it puts it neatly under 64# OK, it's a little counter-intuitive to do this, but it puts it neatly under
54# the virtualization menu. 65# the virtualization menu.
55source drivers/lguest/Kconfig 66source drivers/lguest/Kconfig
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index ffdd0b310784..c97d35c218db 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -3,10 +3,14 @@
3# 3#
4 4
5common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) 5common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
6ifeq ($(CONFIG_KVM_TRACE),y)
7common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
8endif
6 9
7EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm 10EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
8 11
9kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o 12kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
13 i8254.o
10obj-$(CONFIG_KVM) += kvm.o 14obj-$(CONFIG_KVM) += kvm.o
11kvm-intel-objs = vmx.o 15kvm-intel-objs = vmx.o
12obj-$(CONFIG_KVM_INTEL) += kvm-intel.o 16obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
new file mode 100644
index 000000000000..361e31611276
--- /dev/null
+++ b/arch/x86/kvm/i8254.c
@@ -0,0 +1,611 @@
1/*
2 * 8253/8254 interval timer emulation
3 *
4 * Copyright (c) 2003-2004 Fabrice Bellard
5 * Copyright (c) 2006 Intel Corporation
6 * Copyright (c) 2007 Keir Fraser, XenSource Inc
7 * Copyright (c) 2008 Intel Corporation
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a copy
10 * of this software and associated documentation files (the "Software"), to deal
11 * in the Software without restriction, including without limitation the rights
12 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 * copies of the Software, and to permit persons to whom the Software is
14 * furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included in
17 * all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 * THE SOFTWARE.
26 *
27 * Authors:
28 * Sheng Yang <sheng.yang@intel.com>
29 * Based on QEMU and Xen.
30 */
31
32#include <linux/kvm_host.h>
33
34#include "irq.h"
35#include "i8254.h"
36
37#ifndef CONFIG_X86_64
38#define mod_64(x, y) ((x) - (y) * div64_64(x, y))
39#else
40#define mod_64(x, y) ((x) % (y))
41#endif
42
43#define RW_STATE_LSB 1
44#define RW_STATE_MSB 2
45#define RW_STATE_WORD0 3
46#define RW_STATE_WORD1 4
47
48/* Compute with 96 bit intermediate result: (a*b)/c */
49static u64 muldiv64(u64 a, u32 b, u32 c)
50{
51 union {
52 u64 ll;
53 struct {
54 u32 low, high;
55 } l;
56 } u, res;
57 u64 rl, rh;
58
59 u.ll = a;
60 rl = (u64)u.l.low * (u64)b;
61 rh = (u64)u.l.high * (u64)b;
62 rh += (rl >> 32);
63 res.l.high = div64_64(rh, c);
64 res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
65 return res.ll;
66}
67
68static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
69{
70 struct kvm_kpit_channel_state *c =
71 &kvm->arch.vpit->pit_state.channels[channel];
72
73 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
74
75 switch (c->mode) {
76 default:
77 case 0:
78 case 4:
79 /* XXX: just disable/enable counting */
80 break;
81 case 1:
82 case 2:
83 case 3:
84 case 5:
85 /* Restart counting on rising edge. */
86 if (c->gate < val)
87 c->count_load_time = ktime_get();
88 break;
89 }
90
91 c->gate = val;
92}
93
94int pit_get_gate(struct kvm *kvm, int channel)
95{
96 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
97
98 return kvm->arch.vpit->pit_state.channels[channel].gate;
99}
100
101static int pit_get_count(struct kvm *kvm, int channel)
102{
103 struct kvm_kpit_channel_state *c =
104 &kvm->arch.vpit->pit_state.channels[channel];
105 s64 d, t;
106 int counter;
107
108 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
109
110 t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
111 d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
112
113 switch (c->mode) {
114 case 0:
115 case 1:
116 case 4:
117 case 5:
118 counter = (c->count - d) & 0xffff;
119 break;
120 case 3:
121 /* XXX: may be incorrect for odd counts */
122 counter = c->count - (mod_64((2 * d), c->count));
123 break;
124 default:
125 counter = c->count - mod_64(d, c->count);
126 break;
127 }
128 return counter;
129}
130
131static int pit_get_out(struct kvm *kvm, int channel)
132{
133 struct kvm_kpit_channel_state *c =
134 &kvm->arch.vpit->pit_state.channels[channel];
135 s64 d, t;
136 int out;
137
138 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
139
140 t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
141 d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
142
143 switch (c->mode) {
144 default:
145 case 0:
146 out = (d >= c->count);
147 break;
148 case 1:
149 out = (d < c->count);
150 break;
151 case 2:
152 out = ((mod_64(d, c->count) == 0) && (d != 0));
153 break;
154 case 3:
155 out = (mod_64(d, c->count) < ((c->count + 1) >> 1));
156 break;
157 case 4:
158 case 5:
159 out = (d == c->count);
160 break;
161 }
162
163 return out;
164}
165
166static void pit_latch_count(struct kvm *kvm, int channel)
167{
168 struct kvm_kpit_channel_state *c =
169 &kvm->arch.vpit->pit_state.channels[channel];
170
171 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
172
173 if (!c->count_latched) {
174 c->latched_count = pit_get_count(kvm, channel);
175 c->count_latched = c->rw_mode;
176 }
177}
178
179static void pit_latch_status(struct kvm *kvm, int channel)
180{
181 struct kvm_kpit_channel_state *c =
182 &kvm->arch.vpit->pit_state.channels[channel];
183
184 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
185
186 if (!c->status_latched) {
187 /* TODO: Return NULL COUNT (bit 6). */
188 c->status = ((pit_get_out(kvm, channel) << 7) |
189 (c->rw_mode << 4) |
190 (c->mode << 1) |
191 c->bcd);
192 c->status_latched = 1;
193 }
194}
195
196int __pit_timer_fn(struct kvm_kpit_state *ps)
197{
198 struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
199 struct kvm_kpit_timer *pt = &ps->pit_timer;
200
201 atomic_inc(&pt->pending);
202 smp_mb__after_atomic_inc();
203 /* FIXME: handle case where the guest is in guest mode */
204 if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
205 vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
206 wake_up_interruptible(&vcpu0->wq);
207 }
208
209 pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
210 pt->scheduled = ktime_to_ns(pt->timer.expires);
211
212 return (pt->period == 0 ? 0 : 1);
213}
214
215int pit_has_pending_timer(struct kvm_vcpu *vcpu)
216{
217 struct kvm_pit *pit = vcpu->kvm->arch.vpit;
218
219 if (pit && vcpu->vcpu_id == 0)
220 return atomic_read(&pit->pit_state.pit_timer.pending);
221
222 return 0;
223}
224
225static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
226{
227 struct kvm_kpit_state *ps;
228 int restart_timer = 0;
229
230 ps = container_of(data, struct kvm_kpit_state, pit_timer.timer);
231
232 restart_timer = __pit_timer_fn(ps);
233
234 if (restart_timer)
235 return HRTIMER_RESTART;
236 else
237 return HRTIMER_NORESTART;
238}
239
240static void destroy_pit_timer(struct kvm_kpit_timer *pt)
241{
242 pr_debug("pit: execute del timer!\n");
243 hrtimer_cancel(&pt->timer);
244}
245
246static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period)
247{
248 s64 interval;
249
250 interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
251
252 pr_debug("pit: create pit timer, interval is %llu nsec\n", interval);
253
254 /* TODO The new value only affected after the retriggered */
255 hrtimer_cancel(&pt->timer);
256 pt->period = (is_period == 0) ? 0 : interval;
257 pt->timer.function = pit_timer_fn;
258 atomic_set(&pt->pending, 0);
259
260 hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
261 HRTIMER_MODE_ABS);
262}
263
264static void pit_load_count(struct kvm *kvm, int channel, u32 val)
265{
266 struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
267
268 WARN_ON(!mutex_is_locked(&ps->lock));
269
270 pr_debug("pit: load_count val is %d, channel is %d\n", val, channel);
271
272 /*
273 * Though spec said the state of 8254 is undefined after power-up,
274 * seems some tricky OS like Windows XP depends on IRQ0 interrupt
275 * when booting up.
276 * So here setting initialize rate for it, and not a specific number
277 */
278 if (val == 0)
279 val = 0x10000;
280
281 ps->channels[channel].count_load_time = ktime_get();
282 ps->channels[channel].count = val;
283
284 if (channel != 0)
285 return;
286
287 /* Two types of timer
288 * mode 1 is one shot, mode 2 is period, otherwise del timer */
289 switch (ps->channels[0].mode) {
290 case 1:
291 create_pit_timer(&ps->pit_timer, val, 0);
292 break;
293 case 2:
294 create_pit_timer(&ps->pit_timer, val, 1);
295 break;
296 default:
297 destroy_pit_timer(&ps->pit_timer);
298 }
299}
300
301void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val)
302{
303 mutex_lock(&kvm->arch.vpit->pit_state.lock);
304 pit_load_count(kvm, channel, val);
305 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
306}
307
308static void pit_ioport_write(struct kvm_io_device *this,
309 gpa_t addr, int len, const void *data)
310{
311 struct kvm_pit *pit = (struct kvm_pit *)this->private;
312 struct kvm_kpit_state *pit_state = &pit->pit_state;
313 struct kvm *kvm = pit->kvm;
314 int channel, access;
315 struct kvm_kpit_channel_state *s;
316 u32 val = *(u32 *) data;
317
318 val &= 0xff;
319 addr &= KVM_PIT_CHANNEL_MASK;
320
321 mutex_lock(&pit_state->lock);
322
323 if (val != 0)
324 pr_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n",
325 (unsigned int)addr, len, val);
326
327 if (addr == 3) {
328 channel = val >> 6;
329 if (channel == 3) {
330 /* Read-Back Command. */
331 for (channel = 0; channel < 3; channel++) {
332 s = &pit_state->channels[channel];
333 if (val & (2 << channel)) {
334 if (!(val & 0x20))
335 pit_latch_count(kvm, channel);
336 if (!(val & 0x10))
337 pit_latch_status(kvm, channel);
338 }
339 }
340 } else {
341 /* Select Counter <channel>. */
342 s = &pit_state->channels[channel];
343 access = (val >> 4) & KVM_PIT_CHANNEL_MASK;
344 if (access == 0) {
345 pit_latch_count(kvm, channel);
346 } else {
347 s->rw_mode = access;
348 s->read_state = access;
349 s->write_state = access;
350 s->mode = (val >> 1) & 7;
351 if (s->mode > 5)
352 s->mode -= 4;
353 s->bcd = val & 1;
354 }
355 }
356 } else {
357 /* Write Count. */
358 s = &pit_state->channels[addr];
359 switch (s->write_state) {
360 default:
361 case RW_STATE_LSB:
362 pit_load_count(kvm, addr, val);
363 break;
364 case RW_STATE_MSB:
365 pit_load_count(kvm, addr, val << 8);
366 break;
367 case RW_STATE_WORD0:
368 s->write_latch = val;
369 s->write_state = RW_STATE_WORD1;
370 break;
371 case RW_STATE_WORD1:
372 pit_load_count(kvm, addr, s->write_latch | (val << 8));
373 s->write_state = RW_STATE_WORD0;
374 break;
375 }
376 }
377
378 mutex_unlock(&pit_state->lock);
379}
380
381static void pit_ioport_read(struct kvm_io_device *this,
382 gpa_t addr, int len, void *data)
383{
384 struct kvm_pit *pit = (struct kvm_pit *)this->private;
385 struct kvm_kpit_state *pit_state = &pit->pit_state;
386 struct kvm *kvm = pit->kvm;
387 int ret, count;
388 struct kvm_kpit_channel_state *s;
389
390 addr &= KVM_PIT_CHANNEL_MASK;
391 s = &pit_state->channels[addr];
392
393 mutex_lock(&pit_state->lock);
394
395 if (s->status_latched) {
396 s->status_latched = 0;
397 ret = s->status;
398 } else if (s->count_latched) {
399 switch (s->count_latched) {
400 default:
401 case RW_STATE_LSB:
402 ret = s->latched_count & 0xff;
403 s->count_latched = 0;
404 break;
405 case RW_STATE_MSB:
406 ret = s->latched_count >> 8;
407 s->count_latched = 0;
408 break;
409 case RW_STATE_WORD0:
410 ret = s->latched_count & 0xff;
411 s->count_latched = RW_STATE_MSB;
412 break;
413 }
414 } else {
415 switch (s->read_state) {
416 default:
417 case RW_STATE_LSB:
418 count = pit_get_count(kvm, addr);
419 ret = count & 0xff;
420 break;
421 case RW_STATE_MSB:
422 count = pit_get_count(kvm, addr);
423 ret = (count >> 8) & 0xff;
424 break;
425 case RW_STATE_WORD0:
426 count = pit_get_count(kvm, addr);
427 ret = count & 0xff;
428 s->read_state = RW_STATE_WORD1;
429 break;
430 case RW_STATE_WORD1:
431 count = pit_get_count(kvm, addr);
432 ret = (count >> 8) & 0xff;
433 s->read_state = RW_STATE_WORD0;
434 break;
435 }
436 }
437
438 if (len > sizeof(ret))
439 len = sizeof(ret);
440 memcpy(data, (char *)&ret, len);
441
442 mutex_unlock(&pit_state->lock);
443}
444
445static int pit_in_range(struct kvm_io_device *this, gpa_t addr)
446{
447 return ((addr >= KVM_PIT_BASE_ADDRESS) &&
448 (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
449}
450
451static void speaker_ioport_write(struct kvm_io_device *this,
452 gpa_t addr, int len, const void *data)
453{
454 struct kvm_pit *pit = (struct kvm_pit *)this->private;
455 struct kvm_kpit_state *pit_state = &pit->pit_state;
456 struct kvm *kvm = pit->kvm;
457 u32 val = *(u32 *) data;
458
459 mutex_lock(&pit_state->lock);
460 pit_state->speaker_data_on = (val >> 1) & 1;
461 pit_set_gate(kvm, 2, val & 1);
462 mutex_unlock(&pit_state->lock);
463}
464
465static void speaker_ioport_read(struct kvm_io_device *this,
466 gpa_t addr, int len, void *data)
467{
468 struct kvm_pit *pit = (struct kvm_pit *)this->private;
469 struct kvm_kpit_state *pit_state = &pit->pit_state;
470 struct kvm *kvm = pit->kvm;
471 unsigned int refresh_clock;
472 int ret;
473
474 /* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
475 refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
476
477 mutex_lock(&pit_state->lock);
478 ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) |
479 (pit_get_out(kvm, 2) << 5) | (refresh_clock << 4));
480 if (len > sizeof(ret))
481 len = sizeof(ret);
482 memcpy(data, (char *)&ret, len);
483 mutex_unlock(&pit_state->lock);
484}
485
486static int speaker_in_range(struct kvm_io_device *this, gpa_t addr)
487{
488 return (addr == KVM_SPEAKER_BASE_ADDRESS);
489}
490
491void kvm_pit_reset(struct kvm_pit *pit)
492{
493 int i;
494 struct kvm_kpit_channel_state *c;
495
496 mutex_lock(&pit->pit_state.lock);
497 for (i = 0; i < 3; i++) {
498 c = &pit->pit_state.channels[i];
499 c->mode = 0xff;
500 c->gate = (i != 2);
501 pit_load_count(pit->kvm, i, 0);
502 }
503 mutex_unlock(&pit->pit_state.lock);
504
505 atomic_set(&pit->pit_state.pit_timer.pending, 0);
506 pit->pit_state.inject_pending = 1;
507}
508
509struct kvm_pit *kvm_create_pit(struct kvm *kvm)
510{
511 struct kvm_pit *pit;
512 struct kvm_kpit_state *pit_state;
513
514 pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
515 if (!pit)
516 return NULL;
517
518 mutex_init(&pit->pit_state.lock);
519 mutex_lock(&pit->pit_state.lock);
520
521 /* Initialize PIO device */
522 pit->dev.read = pit_ioport_read;
523 pit->dev.write = pit_ioport_write;
524 pit->dev.in_range = pit_in_range;
525 pit->dev.private = pit;
526 kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
527
528 pit->speaker_dev.read = speaker_ioport_read;
529 pit->speaker_dev.write = speaker_ioport_write;
530 pit->speaker_dev.in_range = speaker_in_range;
531 pit->speaker_dev.private = pit;
532 kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
533
534 kvm->arch.vpit = pit;
535 pit->kvm = kvm;
536
537 pit_state = &pit->pit_state;
538 pit_state->pit = pit;
539 hrtimer_init(&pit_state->pit_timer.timer,
540 CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
541 mutex_unlock(&pit->pit_state.lock);
542
543 kvm_pit_reset(pit);
544
545 return pit;
546}
547
548void kvm_free_pit(struct kvm *kvm)
549{
550 struct hrtimer *timer;
551
552 if (kvm->arch.vpit) {
553 mutex_lock(&kvm->arch.vpit->pit_state.lock);
554 timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
555 hrtimer_cancel(timer);
556 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
557 kfree(kvm->arch.vpit);
558 }
559}
560
561void __inject_pit_timer_intr(struct kvm *kvm)
562{
563 mutex_lock(&kvm->lock);
564 kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1);
565 kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0);
566 kvm_pic_set_irq(pic_irqchip(kvm), 0, 1);
567 kvm_pic_set_irq(pic_irqchip(kvm), 0, 0);
568 mutex_unlock(&kvm->lock);
569}
570
571void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
572{
573 struct kvm_pit *pit = vcpu->kvm->arch.vpit;
574 struct kvm *kvm = vcpu->kvm;
575 struct kvm_kpit_state *ps;
576
577 if (vcpu && pit) {
578 ps = &pit->pit_state;
579
580 /* Try to inject pending interrupts when:
581 * 1. Pending exists
582 * 2. Last interrupt was accepted or waited for too long time*/
583 if (atomic_read(&ps->pit_timer.pending) &&
584 (ps->inject_pending ||
585 (jiffies - ps->last_injected_time
586 >= KVM_MAX_PIT_INTR_INTERVAL))) {
587 ps->inject_pending = 0;
588 __inject_pit_timer_intr(kvm);
589 ps->last_injected_time = jiffies;
590 }
591 }
592}
593
594void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
595{
596 struct kvm_arch *arch = &vcpu->kvm->arch;
597 struct kvm_kpit_state *ps;
598
599 if (vcpu && arch->vpit) {
600 ps = &arch->vpit->pit_state;
601 if (atomic_read(&ps->pit_timer.pending) &&
602 (((arch->vpic->pics[0].imr & 1) == 0 &&
603 arch->vpic->pics[0].irq_base == vec) ||
604 (arch->vioapic->redirtbl[0].fields.vector == vec &&
605 arch->vioapic->redirtbl[0].fields.mask != 1))) {
606 ps->inject_pending = 1;
607 atomic_dec(&ps->pit_timer.pending);
608 ps->channels[0].count_load_time = ktime_get();
609 }
610 }
611}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
new file mode 100644
index 000000000000..db25c2a6c8c4
--- /dev/null
+++ b/arch/x86/kvm/i8254.h
@@ -0,0 +1,63 @@
1#ifndef __I8254_H
2#define __I8254_H
3
4#include "iodev.h"
5
6struct kvm_kpit_timer {
7 struct hrtimer timer;
8 int irq;
9 s64 period; /* unit: ns */
10 s64 scheduled;
11 ktime_t last_update;
12 atomic_t pending;
13};
14
15struct kvm_kpit_channel_state {
16 u32 count; /* can be 65536 */
17 u16 latched_count;
18 u8 count_latched;
19 u8 status_latched;
20 u8 status;
21 u8 read_state;
22 u8 write_state;
23 u8 write_latch;
24 u8 rw_mode;
25 u8 mode;
26 u8 bcd; /* not supported */
27 u8 gate; /* timer start */
28 ktime_t count_load_time;
29};
30
31struct kvm_kpit_state {
32 struct kvm_kpit_channel_state channels[3];
33 struct kvm_kpit_timer pit_timer;
34 u32 speaker_data_on;
35 struct mutex lock;
36 struct kvm_pit *pit;
37 bool inject_pending; /* if inject pending interrupts */
38 unsigned long last_injected_time;
39};
40
41struct kvm_pit {
42 unsigned long base_addresss;
43 struct kvm_io_device dev;
44 struct kvm_io_device speaker_dev;
45 struct kvm *kvm;
46 struct kvm_kpit_state pit_state;
47};
48
49#define KVM_PIT_BASE_ADDRESS 0x40
50#define KVM_SPEAKER_BASE_ADDRESS 0x61
51#define KVM_PIT_MEM_LENGTH 4
52#define KVM_PIT_FREQ 1193181
53#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100
54#define KVM_PIT_CHANNEL_MASK 0x3
55
56void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
57void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
58void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val);
59struct kvm_pit *kvm_create_pit(struct kvm *kvm);
60void kvm_free_pit(struct kvm *kvm);
61void kvm_pit_reset(struct kvm_pit *pit);
62
63#endif
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index e5714759e97f..ce1f583459b1 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -23,6 +23,22 @@
23#include <linux/kvm_host.h> 23#include <linux/kvm_host.h>
24 24
25#include "irq.h" 25#include "irq.h"
26#include "i8254.h"
27
28/*
29 * check if there are pending timer events
30 * to be processed.
31 */
32int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
33{
34 int ret;
35
36 ret = pit_has_pending_timer(vcpu);
37 ret |= apic_has_pending_timer(vcpu);
38
39 return ret;
40}
41EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
26 42
27/* 43/*
28 * check if there is pending interrupt without 44 * check if there is pending interrupt without
@@ -66,6 +82,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
66void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) 82void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
67{ 83{
68 kvm_inject_apic_timer_irqs(vcpu); 84 kvm_inject_apic_timer_irqs(vcpu);
85 kvm_inject_pit_timer_irqs(vcpu);
69 /* TODO: PIT, RTC etc. */ 86 /* TODO: PIT, RTC etc. */
70} 87}
71EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); 88EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
@@ -73,6 +90,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
73void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) 90void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
74{ 91{
75 kvm_apic_timer_intr_post(vcpu, vec); 92 kvm_apic_timer_intr_post(vcpu, vec);
93 kvm_pit_timer_intr_post(vcpu, vec);
76 /* TODO: PIT, RTC etc. */ 94 /* TODO: PIT, RTC etc. */
77} 95}
78EXPORT_SYMBOL_GPL(kvm_timer_intr_post); 96EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index fa5ed5d59b5d..1802134b836f 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -85,4 +85,7 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
85void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); 85void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
86void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); 86void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
87 87
88int pit_has_pending_timer(struct kvm_vcpu *vcpu);
89int apic_has_pending_timer(struct kvm_vcpu *vcpu);
90
88#endif 91#endif
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index ecdfe97e4635..65ef0fc2c036 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -39,6 +39,8 @@ struct vcpu_svm {
39 unsigned long host_db_regs[NUM_DB_REGS]; 39 unsigned long host_db_regs[NUM_DB_REGS];
40 unsigned long host_dr6; 40 unsigned long host_dr6;
41 unsigned long host_dr7; 41 unsigned long host_dr7;
42
43 u32 *msrpm;
42}; 44};
43 45
44#endif 46#endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 68a6b1511934..57ac4e4c556a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -338,10 +338,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
338 } else 338 } else
339 apic_clear_vector(vector, apic->regs + APIC_TMR); 339 apic_clear_vector(vector, apic->regs + APIC_TMR);
340 340
341 if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE) 341 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
342 kvm_vcpu_kick(vcpu); 342 kvm_vcpu_kick(vcpu);
343 else if (vcpu->arch.mp_state == VCPU_MP_STATE_HALTED) { 343 else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) {
344 vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; 344 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
345 if (waitqueue_active(&vcpu->wq)) 345 if (waitqueue_active(&vcpu->wq))
346 wake_up_interruptible(&vcpu->wq); 346 wake_up_interruptible(&vcpu->wq);
347 } 347 }
@@ -362,11 +362,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
362 362
363 case APIC_DM_INIT: 363 case APIC_DM_INIT:
364 if (level) { 364 if (level) {
365 if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE) 365 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
366 printk(KERN_DEBUG 366 printk(KERN_DEBUG
367 "INIT on a runnable vcpu %d\n", 367 "INIT on a runnable vcpu %d\n",
368 vcpu->vcpu_id); 368 vcpu->vcpu_id);
369 vcpu->arch.mp_state = VCPU_MP_STATE_INIT_RECEIVED; 369 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
370 kvm_vcpu_kick(vcpu); 370 kvm_vcpu_kick(vcpu);
371 } else { 371 } else {
372 printk(KERN_DEBUG 372 printk(KERN_DEBUG
@@ -379,9 +379,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
379 case APIC_DM_STARTUP: 379 case APIC_DM_STARTUP:
380 printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n", 380 printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n",
381 vcpu->vcpu_id, vector); 381 vcpu->vcpu_id, vector);
382 if (vcpu->arch.mp_state == VCPU_MP_STATE_INIT_RECEIVED) { 382 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
383 vcpu->arch.sipi_vector = vector; 383 vcpu->arch.sipi_vector = vector;
384 vcpu->arch.mp_state = VCPU_MP_STATE_SIPI_RECEIVED; 384 vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
385 if (waitqueue_active(&vcpu->wq)) 385 if (waitqueue_active(&vcpu->wq))
386 wake_up_interruptible(&vcpu->wq); 386 wake_up_interruptible(&vcpu->wq);
387 } 387 }
@@ -658,7 +658,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
658 apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" 658 apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
659 PRIx64 ", " 659 PRIx64 ", "
660 "timer initial count 0x%x, period %lldns, " 660 "timer initial count 0x%x, period %lldns, "
661 "expire @ 0x%016" PRIx64 ".\n", __FUNCTION__, 661 "expire @ 0x%016" PRIx64 ".\n", __func__,
662 APIC_BUS_CYCLE_NS, ktime_to_ns(now), 662 APIC_BUS_CYCLE_NS, ktime_to_ns(now),
663 apic_get_reg(apic, APIC_TMICT), 663 apic_get_reg(apic, APIC_TMICT),
664 apic->timer.period, 664 apic->timer.period,
@@ -691,7 +691,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
691 /* too common printing */ 691 /* too common printing */
692 if (offset != APIC_EOI) 692 if (offset != APIC_EOI)
693 apic_debug("%s: offset 0x%x with length 0x%x, and value is " 693 apic_debug("%s: offset 0x%x with length 0x%x, and value is "
694 "0x%x\n", __FUNCTION__, offset, len, val); 694 "0x%x\n", __func__, offset, len, val);
695 695
696 offset &= 0xff0; 696 offset &= 0xff0;
697 697
@@ -822,6 +822,7 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
822 apic_set_tpr(apic, ((cr8 & 0x0f) << 4) 822 apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
823 | (apic_get_reg(apic, APIC_TASKPRI) & 4)); 823 | (apic_get_reg(apic, APIC_TASKPRI) & 4));
824} 824}
825EXPORT_SYMBOL_GPL(kvm_lapic_set_tpr);
825 826
826u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) 827u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
827{ 828{
@@ -869,7 +870,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
869 struct kvm_lapic *apic; 870 struct kvm_lapic *apic;
870 int i; 871 int i;
871 872
872 apic_debug("%s\n", __FUNCTION__); 873 apic_debug("%s\n", __func__);
873 874
874 ASSERT(vcpu); 875 ASSERT(vcpu);
875 apic = vcpu->arch.apic; 876 apic = vcpu->arch.apic;
@@ -907,7 +908,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
907 apic_update_ppr(apic); 908 apic_update_ppr(apic);
908 909
909 apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" 910 apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
910 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __FUNCTION__, 911 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
911 vcpu, kvm_apic_id(apic), 912 vcpu, kvm_apic_id(apic),
912 vcpu->arch.apic_base, apic->base_address); 913 vcpu->arch.apic_base, apic->base_address);
913} 914}
@@ -940,7 +941,7 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
940 941
941 atomic_inc(&apic->timer.pending); 942 atomic_inc(&apic->timer.pending);
942 if (waitqueue_active(q)) { 943 if (waitqueue_active(q)) {
943 apic->vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; 944 apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
944 wake_up_interruptible(q); 945 wake_up_interruptible(q);
945 } 946 }
946 if (apic_lvtt_period(apic)) { 947 if (apic_lvtt_period(apic)) {
@@ -952,6 +953,16 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
952 return result; 953 return result;
953} 954}
954 955
956int apic_has_pending_timer(struct kvm_vcpu *vcpu)
957{
958 struct kvm_lapic *lapic = vcpu->arch.apic;
959
960 if (lapic)
961 return atomic_read(&lapic->timer.pending);
962
963 return 0;
964}
965
955static int __inject_apic_timer_irq(struct kvm_lapic *apic) 966static int __inject_apic_timer_irq(struct kvm_lapic *apic)
956{ 967{
957 int vector; 968 int vector;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e55af12e11b7..2ad6f5481671 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -27,11 +27,22 @@
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/swap.h> 29#include <linux/swap.h>
30#include <linux/hugetlb.h>
31#include <linux/compiler.h>
30 32
31#include <asm/page.h> 33#include <asm/page.h>
32#include <asm/cmpxchg.h> 34#include <asm/cmpxchg.h>
33#include <asm/io.h> 35#include <asm/io.h>
34 36
37/*
38 * When setting this variable to true it enables Two-Dimensional-Paging
39 * where the hardware walks 2 page tables:
40 * 1. the guest-virtual to guest-physical
41 * 2. while doing 1. it walks guest-physical to host-physical
42 * If the hardware supports that we don't need to do shadow paging.
43 */
44bool tdp_enabled = false;
45
35#undef MMU_DEBUG 46#undef MMU_DEBUG
36 47
37#undef AUDIT 48#undef AUDIT
@@ -101,8 +112,6 @@ static int dbg = 1;
101#define PT_FIRST_AVAIL_BITS_SHIFT 9 112#define PT_FIRST_AVAIL_BITS_SHIFT 9
102#define PT64_SECOND_AVAIL_BITS_SHIFT 52 113#define PT64_SECOND_AVAIL_BITS_SHIFT 52
103 114
104#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
105
106#define VALID_PAGE(x) ((x) != INVALID_PAGE) 115#define VALID_PAGE(x) ((x) != INVALID_PAGE)
107 116
108#define PT64_LEVEL_BITS 9 117#define PT64_LEVEL_BITS 9
@@ -159,6 +168,13 @@ static int dbg = 1;
159#define ACC_USER_MASK PT_USER_MASK 168#define ACC_USER_MASK PT_USER_MASK
160#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) 169#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
161 170
171struct kvm_pv_mmu_op_buffer {
172 void *ptr;
173 unsigned len;
174 unsigned processed;
175 char buf[512] __aligned(sizeof(long));
176};
177
162struct kvm_rmap_desc { 178struct kvm_rmap_desc {
163 u64 *shadow_ptes[RMAP_EXT]; 179 u64 *shadow_ptes[RMAP_EXT];
164 struct kvm_rmap_desc *more; 180 struct kvm_rmap_desc *more;
@@ -200,11 +216,15 @@ static int is_present_pte(unsigned long pte)
200 216
201static int is_shadow_present_pte(u64 pte) 217static int is_shadow_present_pte(u64 pte)
202{ 218{
203 pte &= ~PT_SHADOW_IO_MARK;
204 return pte != shadow_trap_nonpresent_pte 219 return pte != shadow_trap_nonpresent_pte
205 && pte != shadow_notrap_nonpresent_pte; 220 && pte != shadow_notrap_nonpresent_pte;
206} 221}
207 222
223static int is_large_pte(u64 pte)
224{
225 return pte & PT_PAGE_SIZE_MASK;
226}
227
208static int is_writeble_pte(unsigned long pte) 228static int is_writeble_pte(unsigned long pte)
209{ 229{
210 return pte & PT_WRITABLE_MASK; 230 return pte & PT_WRITABLE_MASK;
@@ -215,14 +235,14 @@ static int is_dirty_pte(unsigned long pte)
215 return pte & PT_DIRTY_MASK; 235 return pte & PT_DIRTY_MASK;
216} 236}
217 237
218static int is_io_pte(unsigned long pte) 238static int is_rmap_pte(u64 pte)
219{ 239{
220 return pte & PT_SHADOW_IO_MARK; 240 return is_shadow_present_pte(pte);
221} 241}
222 242
223static int is_rmap_pte(u64 pte) 243static pfn_t spte_to_pfn(u64 pte)
224{ 244{
225 return is_shadow_present_pte(pte); 245 return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
226} 246}
227 247
228static gfn_t pse36_gfn_delta(u32 gpte) 248static gfn_t pse36_gfn_delta(u32 gpte)
@@ -349,16 +369,100 @@ static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
349} 369}
350 370
351/* 371/*
372 * Return the pointer to the largepage write count for a given
373 * gfn, handling slots that are not large page aligned.
374 */
375static int *slot_largepage_idx(gfn_t gfn, struct kvm_memory_slot *slot)
376{
377 unsigned long idx;
378
379 idx = (gfn / KVM_PAGES_PER_HPAGE) -
380 (slot->base_gfn / KVM_PAGES_PER_HPAGE);
381 return &slot->lpage_info[idx].write_count;
382}
383
384static void account_shadowed(struct kvm *kvm, gfn_t gfn)
385{
386 int *write_count;
387
388 write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
389 *write_count += 1;
390 WARN_ON(*write_count > KVM_PAGES_PER_HPAGE);
391}
392
393static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
394{
395 int *write_count;
396
397 write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
398 *write_count -= 1;
399 WARN_ON(*write_count < 0);
400}
401
402static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn)
403{
404 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
405 int *largepage_idx;
406
407 if (slot) {
408 largepage_idx = slot_largepage_idx(gfn, slot);
409 return *largepage_idx;
410 }
411
412 return 1;
413}
414
415static int host_largepage_backed(struct kvm *kvm, gfn_t gfn)
416{
417 struct vm_area_struct *vma;
418 unsigned long addr;
419
420 addr = gfn_to_hva(kvm, gfn);
421 if (kvm_is_error_hva(addr))
422 return 0;
423
424 vma = find_vma(current->mm, addr);
425 if (vma && is_vm_hugetlb_page(vma))
426 return 1;
427
428 return 0;
429}
430
431static int is_largepage_backed(struct kvm_vcpu *vcpu, gfn_t large_gfn)
432{
433 struct kvm_memory_slot *slot;
434
435 if (has_wrprotected_page(vcpu->kvm, large_gfn))
436 return 0;
437
438 if (!host_largepage_backed(vcpu->kvm, large_gfn))
439 return 0;
440
441 slot = gfn_to_memslot(vcpu->kvm, large_gfn);
442 if (slot && slot->dirty_bitmap)
443 return 0;
444
445 return 1;
446}
447
448/*
352 * Take gfn and return the reverse mapping to it. 449 * Take gfn and return the reverse mapping to it.
353 * Note: gfn must be unaliased before this function get called 450 * Note: gfn must be unaliased before this function get called
354 */ 451 */
355 452
356static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn) 453static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage)
357{ 454{
358 struct kvm_memory_slot *slot; 455 struct kvm_memory_slot *slot;
456 unsigned long idx;
359 457
360 slot = gfn_to_memslot(kvm, gfn); 458 slot = gfn_to_memslot(kvm, gfn);
361 return &slot->rmap[gfn - slot->base_gfn]; 459 if (!lpage)
460 return &slot->rmap[gfn - slot->base_gfn];
461
462 idx = (gfn / KVM_PAGES_PER_HPAGE) -
463 (slot->base_gfn / KVM_PAGES_PER_HPAGE);
464
465 return &slot->lpage_info[idx].rmap_pde;
362} 466}
363 467
364/* 468/*
@@ -370,7 +474,7 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn)
370 * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc 474 * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc
371 * containing more mappings. 475 * containing more mappings.
372 */ 476 */
373static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) 477static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
374{ 478{
375 struct kvm_mmu_page *sp; 479 struct kvm_mmu_page *sp;
376 struct kvm_rmap_desc *desc; 480 struct kvm_rmap_desc *desc;
@@ -382,7 +486,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
382 gfn = unalias_gfn(vcpu->kvm, gfn); 486 gfn = unalias_gfn(vcpu->kvm, gfn);
383 sp = page_header(__pa(spte)); 487 sp = page_header(__pa(spte));
384 sp->gfns[spte - sp->spt] = gfn; 488 sp->gfns[spte - sp->spt] = gfn;
385 rmapp = gfn_to_rmap(vcpu->kvm, gfn); 489 rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage);
386 if (!*rmapp) { 490 if (!*rmapp) {
387 rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); 491 rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
388 *rmapp = (unsigned long)spte; 492 *rmapp = (unsigned long)spte;
@@ -435,20 +539,21 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
435 struct kvm_rmap_desc *desc; 539 struct kvm_rmap_desc *desc;
436 struct kvm_rmap_desc *prev_desc; 540 struct kvm_rmap_desc *prev_desc;
437 struct kvm_mmu_page *sp; 541 struct kvm_mmu_page *sp;
438 struct page *page; 542 pfn_t pfn;
439 unsigned long *rmapp; 543 unsigned long *rmapp;
440 int i; 544 int i;
441 545
442 if (!is_rmap_pte(*spte)) 546 if (!is_rmap_pte(*spte))
443 return; 547 return;
444 sp = page_header(__pa(spte)); 548 sp = page_header(__pa(spte));
445 page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); 549 pfn = spte_to_pfn(*spte);
446 mark_page_accessed(page); 550 if (*spte & PT_ACCESSED_MASK)
551 kvm_set_pfn_accessed(pfn);
447 if (is_writeble_pte(*spte)) 552 if (is_writeble_pte(*spte))
448 kvm_release_page_dirty(page); 553 kvm_release_pfn_dirty(pfn);
449 else 554 else
450 kvm_release_page_clean(page); 555 kvm_release_pfn_clean(pfn);
451 rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt]); 556 rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], is_large_pte(*spte));
452 if (!*rmapp) { 557 if (!*rmapp) {
453 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); 558 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
454 BUG(); 559 BUG();
@@ -514,7 +619,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
514 int write_protected = 0; 619 int write_protected = 0;
515 620
516 gfn = unalias_gfn(kvm, gfn); 621 gfn = unalias_gfn(kvm, gfn);
517 rmapp = gfn_to_rmap(kvm, gfn); 622 rmapp = gfn_to_rmap(kvm, gfn, 0);
518 623
519 spte = rmap_next(kvm, rmapp, NULL); 624 spte = rmap_next(kvm, rmapp, NULL);
520 while (spte) { 625 while (spte) {
@@ -527,8 +632,35 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
527 } 632 }
528 spte = rmap_next(kvm, rmapp, spte); 633 spte = rmap_next(kvm, rmapp, spte);
529 } 634 }
635 if (write_protected) {
636 pfn_t pfn;
637
638 spte = rmap_next(kvm, rmapp, NULL);
639 pfn = spte_to_pfn(*spte);
640 kvm_set_pfn_dirty(pfn);
641 }
642
643 /* check for huge page mappings */
644 rmapp = gfn_to_rmap(kvm, gfn, 1);
645 spte = rmap_next(kvm, rmapp, NULL);
646 while (spte) {
647 BUG_ON(!spte);
648 BUG_ON(!(*spte & PT_PRESENT_MASK));
649 BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK));
650 pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);
651 if (is_writeble_pte(*spte)) {
652 rmap_remove(kvm, spte);
653 --kvm->stat.lpages;
654 set_shadow_pte(spte, shadow_trap_nonpresent_pte);
655 write_protected = 1;
656 }
657 spte = rmap_next(kvm, rmapp, spte);
658 }
659
530 if (write_protected) 660 if (write_protected)
531 kvm_flush_remote_tlbs(kvm); 661 kvm_flush_remote_tlbs(kvm);
662
663 account_shadowed(kvm, gfn);
532} 664}
533 665
534#ifdef MMU_DEBUG 666#ifdef MMU_DEBUG
@@ -538,8 +670,8 @@ static int is_empty_shadow_page(u64 *spt)
538 u64 *end; 670 u64 *end;
539 671
540 for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) 672 for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++)
541 if ((*pos & ~PT_SHADOW_IO_MARK) != shadow_trap_nonpresent_pte) { 673 if (*pos != shadow_trap_nonpresent_pte) {
542 printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__, 674 printk(KERN_ERR "%s: %p %llx\n", __func__,
543 pos, *pos); 675 pos, *pos);
544 return 0; 676 return 0;
545 } 677 }
@@ -559,7 +691,7 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp)
559 691
560static unsigned kvm_page_table_hashfn(gfn_t gfn) 692static unsigned kvm_page_table_hashfn(gfn_t gfn)
561{ 693{
562 return gfn; 694 return gfn & ((1 << KVM_MMU_HASH_SHIFT) - 1);
563} 695}
564 696
565static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, 697static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
@@ -662,13 +794,14 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
662 struct kvm_mmu_page *sp; 794 struct kvm_mmu_page *sp;
663 struct hlist_node *node; 795 struct hlist_node *node;
664 796
665 pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn); 797 pgprintk("%s: looking for gfn %lx\n", __func__, gfn);
666 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; 798 index = kvm_page_table_hashfn(gfn);
667 bucket = &kvm->arch.mmu_page_hash[index]; 799 bucket = &kvm->arch.mmu_page_hash[index];
668 hlist_for_each_entry(sp, node, bucket, hash_link) 800 hlist_for_each_entry(sp, node, bucket, hash_link)
669 if (sp->gfn == gfn && !sp->role.metaphysical) { 801 if (sp->gfn == gfn && !sp->role.metaphysical
802 && !sp->role.invalid) {
670 pgprintk("%s: found role %x\n", 803 pgprintk("%s: found role %x\n",
671 __FUNCTION__, sp->role.word); 804 __func__, sp->role.word);
672 return sp; 805 return sp;
673 } 806 }
674 return NULL; 807 return NULL;
@@ -699,27 +832,27 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
699 quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; 832 quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
700 role.quadrant = quadrant; 833 role.quadrant = quadrant;
701 } 834 }
702 pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__, 835 pgprintk("%s: looking gfn %lx role %x\n", __func__,
703 gfn, role.word); 836 gfn, role.word);
704 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; 837 index = kvm_page_table_hashfn(gfn);
705 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 838 bucket = &vcpu->kvm->arch.mmu_page_hash[index];
706 hlist_for_each_entry(sp, node, bucket, hash_link) 839 hlist_for_each_entry(sp, node, bucket, hash_link)
707 if (sp->gfn == gfn && sp->role.word == role.word) { 840 if (sp->gfn == gfn && sp->role.word == role.word) {
708 mmu_page_add_parent_pte(vcpu, sp, parent_pte); 841 mmu_page_add_parent_pte(vcpu, sp, parent_pte);
709 pgprintk("%s: found\n", __FUNCTION__); 842 pgprintk("%s: found\n", __func__);
710 return sp; 843 return sp;
711 } 844 }
712 ++vcpu->kvm->stat.mmu_cache_miss; 845 ++vcpu->kvm->stat.mmu_cache_miss;
713 sp = kvm_mmu_alloc_page(vcpu, parent_pte); 846 sp = kvm_mmu_alloc_page(vcpu, parent_pte);
714 if (!sp) 847 if (!sp)
715 return sp; 848 return sp;
716 pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word); 849 pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word);
717 sp->gfn = gfn; 850 sp->gfn = gfn;
718 sp->role = role; 851 sp->role = role;
719 hlist_add_head(&sp->hash_link, bucket); 852 hlist_add_head(&sp->hash_link, bucket);
720 vcpu->arch.mmu.prefetch_page(vcpu, sp);
721 if (!metaphysical) 853 if (!metaphysical)
722 rmap_write_protect(vcpu->kvm, gfn); 854 rmap_write_protect(vcpu->kvm, gfn);
855 vcpu->arch.mmu.prefetch_page(vcpu, sp);
723 return sp; 856 return sp;
724} 857}
725 858
@@ -745,11 +878,17 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
745 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { 878 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
746 ent = pt[i]; 879 ent = pt[i];
747 880
881 if (is_shadow_present_pte(ent)) {
882 if (!is_large_pte(ent)) {
883 ent &= PT64_BASE_ADDR_MASK;
884 mmu_page_remove_parent_pte(page_header(ent),
885 &pt[i]);
886 } else {
887 --kvm->stat.lpages;
888 rmap_remove(kvm, &pt[i]);
889 }
890 }
748 pt[i] = shadow_trap_nonpresent_pte; 891 pt[i] = shadow_trap_nonpresent_pte;
749 if (!is_shadow_present_pte(ent))
750 continue;
751 ent &= PT64_BASE_ADDR_MASK;
752 mmu_page_remove_parent_pte(page_header(ent), &pt[i]);
753 } 892 }
754 kvm_flush_remote_tlbs(kvm); 893 kvm_flush_remote_tlbs(kvm);
755} 894}
@@ -789,10 +928,15 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
789 } 928 }
790 kvm_mmu_page_unlink_children(kvm, sp); 929 kvm_mmu_page_unlink_children(kvm, sp);
791 if (!sp->root_count) { 930 if (!sp->root_count) {
931 if (!sp->role.metaphysical)
932 unaccount_shadowed(kvm, sp->gfn);
792 hlist_del(&sp->hash_link); 933 hlist_del(&sp->hash_link);
793 kvm_mmu_free_page(kvm, sp); 934 kvm_mmu_free_page(kvm, sp);
794 } else 935 } else {
795 list_move(&sp->link, &kvm->arch.active_mmu_pages); 936 list_move(&sp->link, &kvm->arch.active_mmu_pages);
937 sp->role.invalid = 1;
938 kvm_reload_remote_mmus(kvm);
939 }
796 kvm_mmu_reset_last_pte_updated(kvm); 940 kvm_mmu_reset_last_pte_updated(kvm);
797} 941}
798 942
@@ -838,13 +982,13 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
838 struct hlist_node *node, *n; 982 struct hlist_node *node, *n;
839 int r; 983 int r;
840 984
841 pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn); 985 pgprintk("%s: looking for gfn %lx\n", __func__, gfn);
842 r = 0; 986 r = 0;
843 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; 987 index = kvm_page_table_hashfn(gfn);
844 bucket = &kvm->arch.mmu_page_hash[index]; 988 bucket = &kvm->arch.mmu_page_hash[index];
845 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) 989 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
846 if (sp->gfn == gfn && !sp->role.metaphysical) { 990 if (sp->gfn == gfn && !sp->role.metaphysical) {
847 pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn, 991 pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
848 sp->role.word); 992 sp->role.word);
849 kvm_mmu_zap_page(kvm, sp); 993 kvm_mmu_zap_page(kvm, sp);
850 r = 1; 994 r = 1;
@@ -857,7 +1001,7 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
857 struct kvm_mmu_page *sp; 1001 struct kvm_mmu_page *sp;
858 1002
859 while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) { 1003 while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) {
860 pgprintk("%s: zap %lx %x\n", __FUNCTION__, gfn, sp->role.word); 1004 pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word);
861 kvm_mmu_zap_page(kvm, sp); 1005 kvm_mmu_zap_page(kvm, sp);
862 } 1006 }
863} 1007}
@@ -889,26 +1033,39 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
889static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, 1033static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
890 unsigned pt_access, unsigned pte_access, 1034 unsigned pt_access, unsigned pte_access,
891 int user_fault, int write_fault, int dirty, 1035 int user_fault, int write_fault, int dirty,
892 int *ptwrite, gfn_t gfn, struct page *page) 1036 int *ptwrite, int largepage, gfn_t gfn,
1037 pfn_t pfn, bool speculative)
893{ 1038{
894 u64 spte; 1039 u64 spte;
895 int was_rmapped = 0; 1040 int was_rmapped = 0;
896 int was_writeble = is_writeble_pte(*shadow_pte); 1041 int was_writeble = is_writeble_pte(*shadow_pte);
897 hfn_t host_pfn = (*shadow_pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
898 1042
899 pgprintk("%s: spte %llx access %x write_fault %d" 1043 pgprintk("%s: spte %llx access %x write_fault %d"
900 " user_fault %d gfn %lx\n", 1044 " user_fault %d gfn %lx\n",
901 __FUNCTION__, *shadow_pte, pt_access, 1045 __func__, *shadow_pte, pt_access,
902 write_fault, user_fault, gfn); 1046 write_fault, user_fault, gfn);
903 1047
904 if (is_rmap_pte(*shadow_pte)) { 1048 if (is_rmap_pte(*shadow_pte)) {
905 if (host_pfn != page_to_pfn(page)) { 1049 /*
1050 * If we overwrite a PTE page pointer with a 2MB PMD, unlink
1051 * the parent of the now unreachable PTE.
1052 */
1053 if (largepage && !is_large_pte(*shadow_pte)) {
1054 struct kvm_mmu_page *child;
1055 u64 pte = *shadow_pte;
1056
1057 child = page_header(pte & PT64_BASE_ADDR_MASK);
1058 mmu_page_remove_parent_pte(child, shadow_pte);
1059 } else if (pfn != spte_to_pfn(*shadow_pte)) {
906 pgprintk("hfn old %lx new %lx\n", 1060 pgprintk("hfn old %lx new %lx\n",
907 host_pfn, page_to_pfn(page)); 1061 spte_to_pfn(*shadow_pte), pfn);
908 rmap_remove(vcpu->kvm, shadow_pte); 1062 rmap_remove(vcpu->kvm, shadow_pte);
1063 } else {
1064 if (largepage)
1065 was_rmapped = is_large_pte(*shadow_pte);
1066 else
1067 was_rmapped = 1;
909 } 1068 }
910 else
911 was_rmapped = 1;
912 } 1069 }
913 1070
914 /* 1071 /*
@@ -917,6 +1074,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
917 * demand paging). 1074 * demand paging).
918 */ 1075 */
919 spte = PT_PRESENT_MASK | PT_DIRTY_MASK; 1076 spte = PT_PRESENT_MASK | PT_DIRTY_MASK;
1077 if (!speculative)
1078 pte_access |= PT_ACCESSED_MASK;
920 if (!dirty) 1079 if (!dirty)
921 pte_access &= ~ACC_WRITE_MASK; 1080 pte_access &= ~ACC_WRITE_MASK;
922 if (!(pte_access & ACC_EXEC_MASK)) 1081 if (!(pte_access & ACC_EXEC_MASK))
@@ -925,15 +1084,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
925 spte |= PT_PRESENT_MASK; 1084 spte |= PT_PRESENT_MASK;
926 if (pte_access & ACC_USER_MASK) 1085 if (pte_access & ACC_USER_MASK)
927 spte |= PT_USER_MASK; 1086 spte |= PT_USER_MASK;
1087 if (largepage)
1088 spte |= PT_PAGE_SIZE_MASK;
928 1089
929 if (is_error_page(page)) { 1090 spte |= (u64)pfn << PAGE_SHIFT;
930 set_shadow_pte(shadow_pte,
931 shadow_trap_nonpresent_pte | PT_SHADOW_IO_MARK);
932 kvm_release_page_clean(page);
933 return;
934 }
935
936 spte |= page_to_phys(page);
937 1091
938 if ((pte_access & ACC_WRITE_MASK) 1092 if ((pte_access & ACC_WRITE_MASK)
939 || (write_fault && !is_write_protection(vcpu) && !user_fault)) { 1093 || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
@@ -946,9 +1100,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
946 } 1100 }
947 1101
948 shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); 1102 shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
949 if (shadow) { 1103 if (shadow ||
1104 (largepage && has_wrprotected_page(vcpu->kvm, gfn))) {
950 pgprintk("%s: found shadow page for %lx, marking ro\n", 1105 pgprintk("%s: found shadow page for %lx, marking ro\n",
951 __FUNCTION__, gfn); 1106 __func__, gfn);
952 pte_access &= ~ACC_WRITE_MASK; 1107 pte_access &= ~ACC_WRITE_MASK;
953 if (is_writeble_pte(spte)) { 1108 if (is_writeble_pte(spte)) {
954 spte &= ~PT_WRITABLE_MASK; 1109 spte &= ~PT_WRITABLE_MASK;
@@ -964,18 +1119,25 @@ unshadowed:
964 if (pte_access & ACC_WRITE_MASK) 1119 if (pte_access & ACC_WRITE_MASK)
965 mark_page_dirty(vcpu->kvm, gfn); 1120 mark_page_dirty(vcpu->kvm, gfn);
966 1121
967 pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte); 1122 pgprintk("%s: setting spte %llx\n", __func__, spte);
1123 pgprintk("instantiating %s PTE (%s) at %d (%llx) addr %llx\n",
1124 (spte&PT_PAGE_SIZE_MASK)? "2MB" : "4kB",
1125 (spte&PT_WRITABLE_MASK)?"RW":"R", gfn, spte, shadow_pte);
968 set_shadow_pte(shadow_pte, spte); 1126 set_shadow_pte(shadow_pte, spte);
1127 if (!was_rmapped && (spte & PT_PAGE_SIZE_MASK)
1128 && (spte & PT_PRESENT_MASK))
1129 ++vcpu->kvm->stat.lpages;
1130
969 page_header_update_slot(vcpu->kvm, shadow_pte, gfn); 1131 page_header_update_slot(vcpu->kvm, shadow_pte, gfn);
970 if (!was_rmapped) { 1132 if (!was_rmapped) {
971 rmap_add(vcpu, shadow_pte, gfn); 1133 rmap_add(vcpu, shadow_pte, gfn, largepage);
972 if (!is_rmap_pte(*shadow_pte)) 1134 if (!is_rmap_pte(*shadow_pte))
973 kvm_release_page_clean(page); 1135 kvm_release_pfn_clean(pfn);
974 } else { 1136 } else {
975 if (was_writeble) 1137 if (was_writeble)
976 kvm_release_page_dirty(page); 1138 kvm_release_pfn_dirty(pfn);
977 else 1139 else
978 kvm_release_page_clean(page); 1140 kvm_release_pfn_clean(pfn);
979 } 1141 }
980 if (!ptwrite || !*ptwrite) 1142 if (!ptwrite || !*ptwrite)
981 vcpu->arch.last_pte_updated = shadow_pte; 1143 vcpu->arch.last_pte_updated = shadow_pte;
@@ -985,10 +1147,10 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
985{ 1147{
986} 1148}
987 1149
988static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, 1150static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
989 gfn_t gfn, struct page *page) 1151 int largepage, gfn_t gfn, pfn_t pfn,
1152 int level)
990{ 1153{
991 int level = PT32E_ROOT_LEVEL;
992 hpa_t table_addr = vcpu->arch.mmu.root_hpa; 1154 hpa_t table_addr = vcpu->arch.mmu.root_hpa;
993 int pt_write = 0; 1155 int pt_write = 0;
994 1156
@@ -1001,8 +1163,14 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
1001 1163
1002 if (level == 1) { 1164 if (level == 1) {
1003 mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, 1165 mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
1004 0, write, 1, &pt_write, gfn, page); 1166 0, write, 1, &pt_write, 0, gfn, pfn, false);
1005 return pt_write || is_io_pte(table[index]); 1167 return pt_write;
1168 }
1169
1170 if (largepage && level == 2) {
1171 mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
1172 0, write, 1, &pt_write, 1, gfn, pfn, false);
1173 return pt_write;
1006 } 1174 }
1007 1175
1008 if (table[index] == shadow_trap_nonpresent_pte) { 1176 if (table[index] == shadow_trap_nonpresent_pte) {
@@ -1016,7 +1184,7 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
1016 1, ACC_ALL, &table[index]); 1184 1, ACC_ALL, &table[index]);
1017 if (!new_table) { 1185 if (!new_table) {
1018 pgprintk("nonpaging_map: ENOMEM\n"); 1186 pgprintk("nonpaging_map: ENOMEM\n");
1019 kvm_release_page_clean(page); 1187 kvm_release_pfn_clean(pfn);
1020 return -ENOMEM; 1188 return -ENOMEM;
1021 } 1189 }
1022 1190
@@ -1030,21 +1198,30 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
1030static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) 1198static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1031{ 1199{
1032 int r; 1200 int r;
1033 1201 int largepage = 0;
1034 struct page *page; 1202 pfn_t pfn;
1035
1036 down_read(&vcpu->kvm->slots_lock);
1037 1203
1038 down_read(&current->mm->mmap_sem); 1204 down_read(&current->mm->mmap_sem);
1039 page = gfn_to_page(vcpu->kvm, gfn); 1205 if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
1206 gfn &= ~(KVM_PAGES_PER_HPAGE-1);
1207 largepage = 1;
1208 }
1209
1210 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1040 up_read(&current->mm->mmap_sem); 1211 up_read(&current->mm->mmap_sem);
1041 1212
1213 /* mmio */
1214 if (is_error_pfn(pfn)) {
1215 kvm_release_pfn_clean(pfn);
1216 return 1;
1217 }
1218
1042 spin_lock(&vcpu->kvm->mmu_lock); 1219 spin_lock(&vcpu->kvm->mmu_lock);
1043 kvm_mmu_free_some_pages(vcpu); 1220 kvm_mmu_free_some_pages(vcpu);
1044 r = __nonpaging_map(vcpu, v, write, gfn, page); 1221 r = __direct_map(vcpu, v, write, largepage, gfn, pfn,
1222 PT32E_ROOT_LEVEL);
1045 spin_unlock(&vcpu->kvm->mmu_lock); 1223 spin_unlock(&vcpu->kvm->mmu_lock);
1046 1224
1047 up_read(&vcpu->kvm->slots_lock);
1048 1225
1049 return r; 1226 return r;
1050} 1227}
@@ -1073,6 +1250,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
1073 1250
1074 sp = page_header(root); 1251 sp = page_header(root);
1075 --sp->root_count; 1252 --sp->root_count;
1253 if (!sp->root_count && sp->role.invalid)
1254 kvm_mmu_zap_page(vcpu->kvm, sp);
1076 vcpu->arch.mmu.root_hpa = INVALID_PAGE; 1255 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
1077 spin_unlock(&vcpu->kvm->mmu_lock); 1256 spin_unlock(&vcpu->kvm->mmu_lock);
1078 return; 1257 return;
@@ -1085,6 +1264,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
1085 root &= PT64_BASE_ADDR_MASK; 1264 root &= PT64_BASE_ADDR_MASK;
1086 sp = page_header(root); 1265 sp = page_header(root);
1087 --sp->root_count; 1266 --sp->root_count;
1267 if (!sp->root_count && sp->role.invalid)
1268 kvm_mmu_zap_page(vcpu->kvm, sp);
1088 } 1269 }
1089 vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; 1270 vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
1090 } 1271 }
@@ -1097,6 +1278,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1097 int i; 1278 int i;
1098 gfn_t root_gfn; 1279 gfn_t root_gfn;
1099 struct kvm_mmu_page *sp; 1280 struct kvm_mmu_page *sp;
1281 int metaphysical = 0;
1100 1282
1101 root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; 1283 root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
1102 1284
@@ -1105,14 +1287,20 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1105 hpa_t root = vcpu->arch.mmu.root_hpa; 1287 hpa_t root = vcpu->arch.mmu.root_hpa;
1106 1288
1107 ASSERT(!VALID_PAGE(root)); 1289 ASSERT(!VALID_PAGE(root));
1290 if (tdp_enabled)
1291 metaphysical = 1;
1108 sp = kvm_mmu_get_page(vcpu, root_gfn, 0, 1292 sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
1109 PT64_ROOT_LEVEL, 0, ACC_ALL, NULL); 1293 PT64_ROOT_LEVEL, metaphysical,
1294 ACC_ALL, NULL);
1110 root = __pa(sp->spt); 1295 root = __pa(sp->spt);
1111 ++sp->root_count; 1296 ++sp->root_count;
1112 vcpu->arch.mmu.root_hpa = root; 1297 vcpu->arch.mmu.root_hpa = root;
1113 return; 1298 return;
1114 } 1299 }
1115#endif 1300#endif
1301 metaphysical = !is_paging(vcpu);
1302 if (tdp_enabled)
1303 metaphysical = 1;
1116 for (i = 0; i < 4; ++i) { 1304 for (i = 0; i < 4; ++i) {
1117 hpa_t root = vcpu->arch.mmu.pae_root[i]; 1305 hpa_t root = vcpu->arch.mmu.pae_root[i];
1118 1306
@@ -1126,7 +1314,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1126 } else if (vcpu->arch.mmu.root_level == 0) 1314 } else if (vcpu->arch.mmu.root_level == 0)
1127 root_gfn = 0; 1315 root_gfn = 0;
1128 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, 1316 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
1129 PT32_ROOT_LEVEL, !is_paging(vcpu), 1317 PT32_ROOT_LEVEL, metaphysical,
1130 ACC_ALL, NULL); 1318 ACC_ALL, NULL);
1131 root = __pa(sp->spt); 1319 root = __pa(sp->spt);
1132 ++sp->root_count; 1320 ++sp->root_count;
@@ -1146,7 +1334,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
1146 gfn_t gfn; 1334 gfn_t gfn;
1147 int r; 1335 int r;
1148 1336
1149 pgprintk("%s: gva %lx error %x\n", __FUNCTION__, gva, error_code); 1337 pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
1150 r = mmu_topup_memory_caches(vcpu); 1338 r = mmu_topup_memory_caches(vcpu);
1151 if (r) 1339 if (r)
1152 return r; 1340 return r;
@@ -1160,6 +1348,41 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
1160 error_code & PFERR_WRITE_MASK, gfn); 1348 error_code & PFERR_WRITE_MASK, gfn);
1161} 1349}
1162 1350
1351static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
1352 u32 error_code)
1353{
1354 pfn_t pfn;
1355 int r;
1356 int largepage = 0;
1357 gfn_t gfn = gpa >> PAGE_SHIFT;
1358
1359 ASSERT(vcpu);
1360 ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
1361
1362 r = mmu_topup_memory_caches(vcpu);
1363 if (r)
1364 return r;
1365
1366 down_read(&current->mm->mmap_sem);
1367 if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
1368 gfn &= ~(KVM_PAGES_PER_HPAGE-1);
1369 largepage = 1;
1370 }
1371 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1372 up_read(&current->mm->mmap_sem);
1373 if (is_error_pfn(pfn)) {
1374 kvm_release_pfn_clean(pfn);
1375 return 1;
1376 }
1377 spin_lock(&vcpu->kvm->mmu_lock);
1378 kvm_mmu_free_some_pages(vcpu);
1379 r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
1380 largepage, gfn, pfn, TDP_ROOT_LEVEL);
1381 spin_unlock(&vcpu->kvm->mmu_lock);
1382
1383 return r;
1384}
1385
1163static void nonpaging_free(struct kvm_vcpu *vcpu) 1386static void nonpaging_free(struct kvm_vcpu *vcpu)
1164{ 1387{
1165 mmu_free_roots(vcpu); 1388 mmu_free_roots(vcpu);
@@ -1188,7 +1411,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
1188 1411
1189static void paging_new_cr3(struct kvm_vcpu *vcpu) 1412static void paging_new_cr3(struct kvm_vcpu *vcpu)
1190{ 1413{
1191 pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->arch.cr3); 1414 pgprintk("%s: cr3 %lx\n", __func__, vcpu->arch.cr3);
1192 mmu_free_roots(vcpu); 1415 mmu_free_roots(vcpu);
1193} 1416}
1194 1417
@@ -1253,7 +1476,35 @@ static int paging32E_init_context(struct kvm_vcpu *vcpu)
1253 return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); 1476 return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
1254} 1477}
1255 1478
1256static int init_kvm_mmu(struct kvm_vcpu *vcpu) 1479static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
1480{
1481 struct kvm_mmu *context = &vcpu->arch.mmu;
1482
1483 context->new_cr3 = nonpaging_new_cr3;
1484 context->page_fault = tdp_page_fault;
1485 context->free = nonpaging_free;
1486 context->prefetch_page = nonpaging_prefetch_page;
1487 context->shadow_root_level = TDP_ROOT_LEVEL;
1488 context->root_hpa = INVALID_PAGE;
1489
1490 if (!is_paging(vcpu)) {
1491 context->gva_to_gpa = nonpaging_gva_to_gpa;
1492 context->root_level = 0;
1493 } else if (is_long_mode(vcpu)) {
1494 context->gva_to_gpa = paging64_gva_to_gpa;
1495 context->root_level = PT64_ROOT_LEVEL;
1496 } else if (is_pae(vcpu)) {
1497 context->gva_to_gpa = paging64_gva_to_gpa;
1498 context->root_level = PT32E_ROOT_LEVEL;
1499 } else {
1500 context->gva_to_gpa = paging32_gva_to_gpa;
1501 context->root_level = PT32_ROOT_LEVEL;
1502 }
1503
1504 return 0;
1505}
1506
1507static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
1257{ 1508{
1258 ASSERT(vcpu); 1509 ASSERT(vcpu);
1259 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); 1510 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
@@ -1268,6 +1519,16 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu)
1268 return paging32_init_context(vcpu); 1519 return paging32_init_context(vcpu);
1269} 1520}
1270 1521
1522static int init_kvm_mmu(struct kvm_vcpu *vcpu)
1523{
1524 vcpu->arch.update_pte.pfn = bad_pfn;
1525
1526 if (tdp_enabled)
1527 return init_kvm_tdp_mmu(vcpu);
1528 else
1529 return init_kvm_softmmu(vcpu);
1530}
1531
1271static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) 1532static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
1272{ 1533{
1273 ASSERT(vcpu); 1534 ASSERT(vcpu);
@@ -1316,7 +1577,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
1316 1577
1317 pte = *spte; 1578 pte = *spte;
1318 if (is_shadow_present_pte(pte)) { 1579 if (is_shadow_present_pte(pte)) {
1319 if (sp->role.level == PT_PAGE_TABLE_LEVEL) 1580 if (sp->role.level == PT_PAGE_TABLE_LEVEL ||
1581 is_large_pte(pte))
1320 rmap_remove(vcpu->kvm, spte); 1582 rmap_remove(vcpu->kvm, spte);
1321 else { 1583 else {
1322 child = page_header(pte & PT64_BASE_ADDR_MASK); 1584 child = page_header(pte & PT64_BASE_ADDR_MASK);
@@ -1324,24 +1586,26 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
1324 } 1586 }
1325 } 1587 }
1326 set_shadow_pte(spte, shadow_trap_nonpresent_pte); 1588 set_shadow_pte(spte, shadow_trap_nonpresent_pte);
1589 if (is_large_pte(pte))
1590 --vcpu->kvm->stat.lpages;
1327} 1591}
1328 1592
1329static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, 1593static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
1330 struct kvm_mmu_page *sp, 1594 struct kvm_mmu_page *sp,
1331 u64 *spte, 1595 u64 *spte,
1332 const void *new, int bytes, 1596 const void *new)
1333 int offset_in_pte)
1334{ 1597{
1335 if (sp->role.level != PT_PAGE_TABLE_LEVEL) { 1598 if ((sp->role.level != PT_PAGE_TABLE_LEVEL)
1599 && !vcpu->arch.update_pte.largepage) {
1336 ++vcpu->kvm->stat.mmu_pde_zapped; 1600 ++vcpu->kvm->stat.mmu_pde_zapped;
1337 return; 1601 return;
1338 } 1602 }
1339 1603
1340 ++vcpu->kvm->stat.mmu_pte_updated; 1604 ++vcpu->kvm->stat.mmu_pte_updated;
1341 if (sp->role.glevels == PT32_ROOT_LEVEL) 1605 if (sp->role.glevels == PT32_ROOT_LEVEL)
1342 paging32_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte); 1606 paging32_update_pte(vcpu, sp, spte, new);
1343 else 1607 else
1344 paging64_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte); 1608 paging64_update_pte(vcpu, sp, spte, new);
1345} 1609}
1346 1610
1347static bool need_remote_flush(u64 old, u64 new) 1611static bool need_remote_flush(u64 old, u64 new)
@@ -1378,7 +1642,9 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1378 gfn_t gfn; 1642 gfn_t gfn;
1379 int r; 1643 int r;
1380 u64 gpte = 0; 1644 u64 gpte = 0;
1381 struct page *page; 1645 pfn_t pfn;
1646
1647 vcpu->arch.update_pte.largepage = 0;
1382 1648
1383 if (bytes != 4 && bytes != 8) 1649 if (bytes != 4 && bytes != 8)
1384 return; 1650 return;
@@ -1408,11 +1674,19 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1408 gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; 1674 gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
1409 1675
1410 down_read(&current->mm->mmap_sem); 1676 down_read(&current->mm->mmap_sem);
1411 page = gfn_to_page(vcpu->kvm, gfn); 1677 if (is_large_pte(gpte) && is_largepage_backed(vcpu, gfn)) {
1678 gfn &= ~(KVM_PAGES_PER_HPAGE-1);
1679 vcpu->arch.update_pte.largepage = 1;
1680 }
1681 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1412 up_read(&current->mm->mmap_sem); 1682 up_read(&current->mm->mmap_sem);
1413 1683
1684 if (is_error_pfn(pfn)) {
1685 kvm_release_pfn_clean(pfn);
1686 return;
1687 }
1414 vcpu->arch.update_pte.gfn = gfn; 1688 vcpu->arch.update_pte.gfn = gfn;
1415 vcpu->arch.update_pte.page = page; 1689 vcpu->arch.update_pte.pfn = pfn;
1416} 1690}
1417 1691
1418void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 1692void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
@@ -1423,7 +1697,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1423 struct hlist_node *node, *n; 1697 struct hlist_node *node, *n;
1424 struct hlist_head *bucket; 1698 struct hlist_head *bucket;
1425 unsigned index; 1699 unsigned index;
1426 u64 entry; 1700 u64 entry, gentry;
1427 u64 *spte; 1701 u64 *spte;
1428 unsigned offset = offset_in_page(gpa); 1702 unsigned offset = offset_in_page(gpa);
1429 unsigned pte_size; 1703 unsigned pte_size;
@@ -1433,8 +1707,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1433 int level; 1707 int level;
1434 int flooded = 0; 1708 int flooded = 0;
1435 int npte; 1709 int npte;
1710 int r;
1436 1711
1437 pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); 1712 pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
1438 mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); 1713 mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes);
1439 spin_lock(&vcpu->kvm->mmu_lock); 1714 spin_lock(&vcpu->kvm->mmu_lock);
1440 kvm_mmu_free_some_pages(vcpu); 1715 kvm_mmu_free_some_pages(vcpu);
@@ -1450,7 +1725,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1450 vcpu->arch.last_pt_write_count = 1; 1725 vcpu->arch.last_pt_write_count = 1;
1451 vcpu->arch.last_pte_updated = NULL; 1726 vcpu->arch.last_pte_updated = NULL;
1452 } 1727 }
1453 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; 1728 index = kvm_page_table_hashfn(gfn);
1454 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 1729 bucket = &vcpu->kvm->arch.mmu_page_hash[index];
1455 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { 1730 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) {
1456 if (sp->gfn != gfn || sp->role.metaphysical) 1731 if (sp->gfn != gfn || sp->role.metaphysical)
@@ -1496,20 +1771,29 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1496 continue; 1771 continue;
1497 } 1772 }
1498 spte = &sp->spt[page_offset / sizeof(*spte)]; 1773 spte = &sp->spt[page_offset / sizeof(*spte)];
1774 if ((gpa & (pte_size - 1)) || (bytes < pte_size)) {
1775 gentry = 0;
1776 r = kvm_read_guest_atomic(vcpu->kvm,
1777 gpa & ~(u64)(pte_size - 1),
1778 &gentry, pte_size);
1779 new = (const void *)&gentry;
1780 if (r < 0)
1781 new = NULL;
1782 }
1499 while (npte--) { 1783 while (npte--) {
1500 entry = *spte; 1784 entry = *spte;
1501 mmu_pte_write_zap_pte(vcpu, sp, spte); 1785 mmu_pte_write_zap_pte(vcpu, sp, spte);
1502 mmu_pte_write_new_pte(vcpu, sp, spte, new, bytes, 1786 if (new)
1503 page_offset & (pte_size - 1)); 1787 mmu_pte_write_new_pte(vcpu, sp, spte, new);
1504 mmu_pte_write_flush_tlb(vcpu, entry, *spte); 1788 mmu_pte_write_flush_tlb(vcpu, entry, *spte);
1505 ++spte; 1789 ++spte;
1506 } 1790 }
1507 } 1791 }
1508 kvm_mmu_audit(vcpu, "post pte write"); 1792 kvm_mmu_audit(vcpu, "post pte write");
1509 spin_unlock(&vcpu->kvm->mmu_lock); 1793 spin_unlock(&vcpu->kvm->mmu_lock);
1510 if (vcpu->arch.update_pte.page) { 1794 if (!is_error_pfn(vcpu->arch.update_pte.pfn)) {
1511 kvm_release_page_clean(vcpu->arch.update_pte.page); 1795 kvm_release_pfn_clean(vcpu->arch.update_pte.pfn);
1512 vcpu->arch.update_pte.page = NULL; 1796 vcpu->arch.update_pte.pfn = bad_pfn;
1513 } 1797 }
1514} 1798}
1515 1799
@@ -1518,9 +1802,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
1518 gpa_t gpa; 1802 gpa_t gpa;
1519 int r; 1803 int r;
1520 1804
1521 down_read(&vcpu->kvm->slots_lock);
1522 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); 1805 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
1523 up_read(&vcpu->kvm->slots_lock);
1524 1806
1525 spin_lock(&vcpu->kvm->mmu_lock); 1807 spin_lock(&vcpu->kvm->mmu_lock);
1526 r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); 1808 r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
@@ -1577,6 +1859,12 @@ out:
1577} 1859}
1578EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); 1860EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
1579 1861
1862void kvm_enable_tdp(void)
1863{
1864 tdp_enabled = true;
1865}
1866EXPORT_SYMBOL_GPL(kvm_enable_tdp);
1867
1580static void free_mmu_pages(struct kvm_vcpu *vcpu) 1868static void free_mmu_pages(struct kvm_vcpu *vcpu)
1581{ 1869{
1582 struct kvm_mmu_page *sp; 1870 struct kvm_mmu_page *sp;
@@ -1677,7 +1965,53 @@ void kvm_mmu_zap_all(struct kvm *kvm)
1677 kvm_flush_remote_tlbs(kvm); 1965 kvm_flush_remote_tlbs(kvm);
1678} 1966}
1679 1967
1680void kvm_mmu_module_exit(void) 1968void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm)
1969{
1970 struct kvm_mmu_page *page;
1971
1972 page = container_of(kvm->arch.active_mmu_pages.prev,
1973 struct kvm_mmu_page, link);
1974 kvm_mmu_zap_page(kvm, page);
1975}
1976
1977static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
1978{
1979 struct kvm *kvm;
1980 struct kvm *kvm_freed = NULL;
1981 int cache_count = 0;
1982
1983 spin_lock(&kvm_lock);
1984
1985 list_for_each_entry(kvm, &vm_list, vm_list) {
1986 int npages;
1987
1988 spin_lock(&kvm->mmu_lock);
1989 npages = kvm->arch.n_alloc_mmu_pages -
1990 kvm->arch.n_free_mmu_pages;
1991 cache_count += npages;
1992 if (!kvm_freed && nr_to_scan > 0 && npages > 0) {
1993 kvm_mmu_remove_one_alloc_mmu_page(kvm);
1994 cache_count--;
1995 kvm_freed = kvm;
1996 }
1997 nr_to_scan--;
1998
1999 spin_unlock(&kvm->mmu_lock);
2000 }
2001 if (kvm_freed)
2002 list_move_tail(&kvm_freed->vm_list, &vm_list);
2003
2004 spin_unlock(&kvm_lock);
2005
2006 return cache_count;
2007}
2008
2009static struct shrinker mmu_shrinker = {
2010 .shrink = mmu_shrink,
2011 .seeks = DEFAULT_SEEKS * 10,
2012};
2013
2014void mmu_destroy_caches(void)
1681{ 2015{
1682 if (pte_chain_cache) 2016 if (pte_chain_cache)
1683 kmem_cache_destroy(pte_chain_cache); 2017 kmem_cache_destroy(pte_chain_cache);
@@ -1687,6 +2021,12 @@ void kvm_mmu_module_exit(void)
1687 kmem_cache_destroy(mmu_page_header_cache); 2021 kmem_cache_destroy(mmu_page_header_cache);
1688} 2022}
1689 2023
2024void kvm_mmu_module_exit(void)
2025{
2026 mmu_destroy_caches();
2027 unregister_shrinker(&mmu_shrinker);
2028}
2029
1690int kvm_mmu_module_init(void) 2030int kvm_mmu_module_init(void)
1691{ 2031{
1692 pte_chain_cache = kmem_cache_create("kvm_pte_chain", 2032 pte_chain_cache = kmem_cache_create("kvm_pte_chain",
@@ -1706,10 +2046,12 @@ int kvm_mmu_module_init(void)
1706 if (!mmu_page_header_cache) 2046 if (!mmu_page_header_cache)
1707 goto nomem; 2047 goto nomem;
1708 2048
2049 register_shrinker(&mmu_shrinker);
2050
1709 return 0; 2051 return 0;
1710 2052
1711nomem: 2053nomem:
1712 kvm_mmu_module_exit(); 2054 mmu_destroy_caches();
1713 return -ENOMEM; 2055 return -ENOMEM;
1714} 2056}
1715 2057
@@ -1732,6 +2074,127 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
1732 return nr_mmu_pages; 2074 return nr_mmu_pages;
1733} 2075}
1734 2076
2077static void *pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer *buffer,
2078 unsigned len)
2079{
2080 if (len > buffer->len)
2081 return NULL;
2082 return buffer->ptr;
2083}
2084
2085static void *pv_mmu_read_buffer(struct kvm_pv_mmu_op_buffer *buffer,
2086 unsigned len)
2087{
2088 void *ret;
2089
2090 ret = pv_mmu_peek_buffer(buffer, len);
2091 if (!ret)
2092 return ret;
2093 buffer->ptr += len;
2094 buffer->len -= len;
2095 buffer->processed += len;
2096 return ret;
2097}
2098
2099static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
2100 gpa_t addr, gpa_t value)
2101{
2102 int bytes = 8;
2103 int r;
2104
2105 if (!is_long_mode(vcpu) && !is_pae(vcpu))
2106 bytes = 4;
2107
2108 r = mmu_topup_memory_caches(vcpu);
2109 if (r)
2110 return r;
2111
2112 if (!emulator_write_phys(vcpu, addr, &value, bytes))
2113 return -EFAULT;
2114
2115 return 1;
2116}
2117
2118static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
2119{
2120 kvm_x86_ops->tlb_flush(vcpu);
2121 return 1;
2122}
2123
2124static int kvm_pv_mmu_release_pt(struct kvm_vcpu *vcpu, gpa_t addr)
2125{
2126 spin_lock(&vcpu->kvm->mmu_lock);
2127 mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT);
2128 spin_unlock(&vcpu->kvm->mmu_lock);
2129 return 1;
2130}
2131
2132static int kvm_pv_mmu_op_one(struct kvm_vcpu *vcpu,
2133 struct kvm_pv_mmu_op_buffer *buffer)
2134{
2135 struct kvm_mmu_op_header *header;
2136
2137 header = pv_mmu_peek_buffer(buffer, sizeof *header);
2138 if (!header)
2139 return 0;
2140 switch (header->op) {
2141 case KVM_MMU_OP_WRITE_PTE: {
2142 struct kvm_mmu_op_write_pte *wpte;
2143
2144 wpte = pv_mmu_read_buffer(buffer, sizeof *wpte);
2145 if (!wpte)
2146 return 0;
2147 return kvm_pv_mmu_write(vcpu, wpte->pte_phys,
2148 wpte->pte_val);
2149 }
2150 case KVM_MMU_OP_FLUSH_TLB: {
2151 struct kvm_mmu_op_flush_tlb *ftlb;
2152
2153 ftlb = pv_mmu_read_buffer(buffer, sizeof *ftlb);
2154 if (!ftlb)
2155 return 0;
2156 return kvm_pv_mmu_flush_tlb(vcpu);
2157 }
2158 case KVM_MMU_OP_RELEASE_PT: {
2159 struct kvm_mmu_op_release_pt *rpt;
2160
2161 rpt = pv_mmu_read_buffer(buffer, sizeof *rpt);
2162 if (!rpt)
2163 return 0;
2164 return kvm_pv_mmu_release_pt(vcpu, rpt->pt_phys);
2165 }
2166 default: return 0;
2167 }
2168}
2169
2170int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
2171 gpa_t addr, unsigned long *ret)
2172{
2173 int r;
2174 struct kvm_pv_mmu_op_buffer buffer;
2175
2176 buffer.ptr = buffer.buf;
2177 buffer.len = min_t(unsigned long, bytes, sizeof buffer.buf);
2178 buffer.processed = 0;
2179
2180 r = kvm_read_guest(vcpu->kvm, addr, buffer.buf, buffer.len);
2181 if (r)
2182 goto out;
2183
2184 while (buffer.len) {
2185 r = kvm_pv_mmu_op_one(vcpu, &buffer);
2186 if (r < 0)
2187 goto out;
2188 if (r == 0)
2189 break;
2190 }
2191
2192 r = 1;
2193out:
2194 *ret = buffer.processed;
2195 return r;
2196}
2197
1735#ifdef AUDIT 2198#ifdef AUDIT
1736 2199
1737static const char *audit_msg; 2200static const char *audit_msg;
@@ -1768,8 +2231,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
1768 audit_mappings_page(vcpu, ent, va, level - 1); 2231 audit_mappings_page(vcpu, ent, va, level - 1);
1769 } else { 2232 } else {
1770 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); 2233 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
1771 struct page *page = gpa_to_page(vcpu, gpa); 2234 hpa_t hpa = (hpa_t)gpa_to_pfn(vcpu, gpa) << PAGE_SHIFT;
1772 hpa_t hpa = page_to_phys(page);
1773 2235
1774 if (is_shadow_present_pte(ent) 2236 if (is_shadow_present_pte(ent)
1775 && (ent & PT64_BASE_ADDR_MASK) != hpa) 2237 && (ent & PT64_BASE_ADDR_MASK) != hpa)
@@ -1782,7 +2244,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
1782 && !is_error_hpa(hpa)) 2244 && !is_error_hpa(hpa))
1783 printk(KERN_ERR "audit: (%s) notrap shadow," 2245 printk(KERN_ERR "audit: (%s) notrap shadow,"
1784 " valid guest gva %lx\n", audit_msg, va); 2246 " valid guest gva %lx\n", audit_msg, va);
1785 kvm_release_page_clean(page); 2247 kvm_release_pfn_clean(pfn);
1786 2248
1787 } 2249 }
1788 } 2250 }
@@ -1867,7 +2329,7 @@ static void audit_rmap(struct kvm_vcpu *vcpu)
1867 2329
1868 if (n_rmap != n_actual) 2330 if (n_rmap != n_actual)
1869 printk(KERN_ERR "%s: (%s) rmap %d actual %d\n", 2331 printk(KERN_ERR "%s: (%s) rmap %d actual %d\n",
1870 __FUNCTION__, audit_msg, n_rmap, n_actual); 2332 __func__, audit_msg, n_rmap, n_actual);
1871} 2333}
1872 2334
1873static void audit_write_protection(struct kvm_vcpu *vcpu) 2335static void audit_write_protection(struct kvm_vcpu *vcpu)
@@ -1887,7 +2349,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
1887 if (*rmapp) 2349 if (*rmapp)
1888 printk(KERN_ERR "%s: (%s) shadow page has writable" 2350 printk(KERN_ERR "%s: (%s) shadow page has writable"
1889 " mappings: gfn %lx role %x\n", 2351 " mappings: gfn %lx role %x\n",
1890 __FUNCTION__, audit_msg, sp->gfn, 2352 __func__, audit_msg, sp->gfn,
1891 sp->role.word); 2353 sp->role.word);
1892 } 2354 }
1893} 2355}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 1fce19ec7a23..e64e9f56a65e 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -3,6 +3,12 @@
3 3
4#include <linux/kvm_host.h> 4#include <linux/kvm_host.h>
5 5
6#ifdef CONFIG_X86_64
7#define TDP_ROOT_LEVEL PT64_ROOT_LEVEL
8#else
9#define TDP_ROOT_LEVEL PT32E_ROOT_LEVEL
10#endif
11
6static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) 12static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
7{ 13{
8 if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES)) 14 if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index ecc0856268c4..156fe10288ae 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -130,7 +130,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
130 unsigned index, pt_access, pte_access; 130 unsigned index, pt_access, pte_access;
131 gpa_t pte_gpa; 131 gpa_t pte_gpa;
132 132
133 pgprintk("%s: addr %lx\n", __FUNCTION__, addr); 133 pgprintk("%s: addr %lx\n", __func__, addr);
134walk: 134walk:
135 walker->level = vcpu->arch.mmu.root_level; 135 walker->level = vcpu->arch.mmu.root_level;
136 pte = vcpu->arch.cr3; 136 pte = vcpu->arch.cr3;
@@ -155,7 +155,7 @@ walk:
155 pte_gpa += index * sizeof(pt_element_t); 155 pte_gpa += index * sizeof(pt_element_t);
156 walker->table_gfn[walker->level - 1] = table_gfn; 156 walker->table_gfn[walker->level - 1] = table_gfn;
157 walker->pte_gpa[walker->level - 1] = pte_gpa; 157 walker->pte_gpa[walker->level - 1] = pte_gpa;
158 pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, 158 pgprintk("%s: table_gfn[%d] %lx\n", __func__,
159 walker->level - 1, table_gfn); 159 walker->level - 1, table_gfn);
160 160
161 kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)); 161 kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
@@ -222,7 +222,7 @@ walk:
222 walker->pt_access = pt_access; 222 walker->pt_access = pt_access;
223 walker->pte_access = pte_access; 223 walker->pte_access = pte_access;
224 pgprintk("%s: pte %llx pte_access %x pt_access %x\n", 224 pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
225 __FUNCTION__, (u64)pte, pt_access, pte_access); 225 __func__, (u64)pte, pt_access, pte_access);
226 return 1; 226 return 1;
227 227
228not_present: 228not_present:
@@ -243,31 +243,30 @@ err:
243} 243}
244 244
245static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, 245static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
246 u64 *spte, const void *pte, int bytes, 246 u64 *spte, const void *pte)
247 int offset_in_pte)
248{ 247{
249 pt_element_t gpte; 248 pt_element_t gpte;
250 unsigned pte_access; 249 unsigned pte_access;
251 struct page *npage; 250 pfn_t pfn;
251 int largepage = vcpu->arch.update_pte.largepage;
252 252
253 gpte = *(const pt_element_t *)pte; 253 gpte = *(const pt_element_t *)pte;
254 if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { 254 if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
255 if (!offset_in_pte && !is_present_pte(gpte)) 255 if (!is_present_pte(gpte))
256 set_shadow_pte(spte, shadow_notrap_nonpresent_pte); 256 set_shadow_pte(spte, shadow_notrap_nonpresent_pte);
257 return; 257 return;
258 } 258 }
259 if (bytes < sizeof(pt_element_t)) 259 pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
260 return;
261 pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte);
262 pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte); 260 pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
263 if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) 261 if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
264 return; 262 return;
265 npage = vcpu->arch.update_pte.page; 263 pfn = vcpu->arch.update_pte.pfn;
266 if (!npage) 264 if (is_error_pfn(pfn))
267 return; 265 return;
268 get_page(npage); 266 kvm_get_pfn(pfn);
269 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, 267 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
270 gpte & PT_DIRTY_MASK, NULL, gpte_to_gfn(gpte), npage); 268 gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
269 pfn, true);
271} 270}
272 271
273/* 272/*
@@ -275,8 +274,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
275 */ 274 */
276static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, 275static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
277 struct guest_walker *walker, 276 struct guest_walker *walker,
278 int user_fault, int write_fault, int *ptwrite, 277 int user_fault, int write_fault, int largepage,
279 struct page *page) 278 int *ptwrite, pfn_t pfn)
280{ 279{
281 hpa_t shadow_addr; 280 hpa_t shadow_addr;
282 int level; 281 int level;
@@ -304,11 +303,19 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
304 shadow_ent = ((u64 *)__va(shadow_addr)) + index; 303 shadow_ent = ((u64 *)__va(shadow_addr)) + index;
305 if (level == PT_PAGE_TABLE_LEVEL) 304 if (level == PT_PAGE_TABLE_LEVEL)
306 break; 305 break;
307 if (is_shadow_present_pte(*shadow_ent)) { 306
307 if (largepage && level == PT_DIRECTORY_LEVEL)
308 break;
309
310 if (is_shadow_present_pte(*shadow_ent)
311 && !is_large_pte(*shadow_ent)) {
308 shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK; 312 shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK;
309 continue; 313 continue;
310 } 314 }
311 315
316 if (is_large_pte(*shadow_ent))
317 rmap_remove(vcpu->kvm, shadow_ent);
318
312 if (level - 1 == PT_PAGE_TABLE_LEVEL 319 if (level - 1 == PT_PAGE_TABLE_LEVEL
313 && walker->level == PT_DIRECTORY_LEVEL) { 320 && walker->level == PT_DIRECTORY_LEVEL) {
314 metaphysical = 1; 321 metaphysical = 1;
@@ -329,7 +336,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
329 walker->pte_gpa[level - 2], 336 walker->pte_gpa[level - 2],
330 &curr_pte, sizeof(curr_pte)); 337 &curr_pte, sizeof(curr_pte));
331 if (r || curr_pte != walker->ptes[level - 2]) { 338 if (r || curr_pte != walker->ptes[level - 2]) {
332 kvm_release_page_clean(page); 339 kvm_release_pfn_clean(pfn);
333 return NULL; 340 return NULL;
334 } 341 }
335 } 342 }
@@ -342,7 +349,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
342 mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, 349 mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
343 user_fault, write_fault, 350 user_fault, write_fault,
344 walker->ptes[walker->level-1] & PT_DIRTY_MASK, 351 walker->ptes[walker->level-1] & PT_DIRTY_MASK,
345 ptwrite, walker->gfn, page); 352 ptwrite, largepage, walker->gfn, pfn, false);
346 353
347 return shadow_ent; 354 return shadow_ent;
348} 355}
@@ -371,16 +378,16 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
371 u64 *shadow_pte; 378 u64 *shadow_pte;
372 int write_pt = 0; 379 int write_pt = 0;
373 int r; 380 int r;
374 struct page *page; 381 pfn_t pfn;
382 int largepage = 0;
375 383
376 pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code); 384 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
377 kvm_mmu_audit(vcpu, "pre page fault"); 385 kvm_mmu_audit(vcpu, "pre page fault");
378 386
379 r = mmu_topup_memory_caches(vcpu); 387 r = mmu_topup_memory_caches(vcpu);
380 if (r) 388 if (r)
381 return r; 389 return r;
382 390
383 down_read(&vcpu->kvm->slots_lock);
384 /* 391 /*
385 * Look up the shadow pte for the faulting address. 392 * Look up the shadow pte for the faulting address.
386 */ 393 */
@@ -391,40 +398,45 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
391 * The page is not mapped by the guest. Let the guest handle it. 398 * The page is not mapped by the guest. Let the guest handle it.
392 */ 399 */
393 if (!r) { 400 if (!r) {
394 pgprintk("%s: guest page fault\n", __FUNCTION__); 401 pgprintk("%s: guest page fault\n", __func__);
395 inject_page_fault(vcpu, addr, walker.error_code); 402 inject_page_fault(vcpu, addr, walker.error_code);
396 vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ 403 vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
397 up_read(&vcpu->kvm->slots_lock);
398 return 0; 404 return 0;
399 } 405 }
400 406
401 down_read(&current->mm->mmap_sem); 407 down_read(&current->mm->mmap_sem);
402 page = gfn_to_page(vcpu->kvm, walker.gfn); 408 if (walker.level == PT_DIRECTORY_LEVEL) {
409 gfn_t large_gfn;
410 large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1);
411 if (is_largepage_backed(vcpu, large_gfn)) {
412 walker.gfn = large_gfn;
413 largepage = 1;
414 }
415 }
416 pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
403 up_read(&current->mm->mmap_sem); 417 up_read(&current->mm->mmap_sem);
404 418
419 /* mmio */
420 if (is_error_pfn(pfn)) {
421 pgprintk("gfn %x is mmio\n", walker.gfn);
422 kvm_release_pfn_clean(pfn);
423 return 1;
424 }
425
405 spin_lock(&vcpu->kvm->mmu_lock); 426 spin_lock(&vcpu->kvm->mmu_lock);
406 kvm_mmu_free_some_pages(vcpu); 427 kvm_mmu_free_some_pages(vcpu);
407 shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, 428 shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
408 &write_pt, page); 429 largepage, &write_pt, pfn);
409 pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, 430
431 pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
410 shadow_pte, *shadow_pte, write_pt); 432 shadow_pte, *shadow_pte, write_pt);
411 433
412 if (!write_pt) 434 if (!write_pt)
413 vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ 435 vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
414 436
415 /*
416 * mmio: emulate if accessible, otherwise its a guest fault.
417 */
418 if (shadow_pte && is_io_pte(*shadow_pte)) {
419 spin_unlock(&vcpu->kvm->mmu_lock);
420 up_read(&vcpu->kvm->slots_lock);
421 return 1;
422 }
423
424 ++vcpu->stat.pf_fixed; 437 ++vcpu->stat.pf_fixed;
425 kvm_mmu_audit(vcpu, "post page fault (fixed)"); 438 kvm_mmu_audit(vcpu, "post page fault (fixed)");
426 spin_unlock(&vcpu->kvm->mmu_lock); 439 spin_unlock(&vcpu->kvm->mmu_lock);
427 up_read(&vcpu->kvm->slots_lock);
428 440
429 return write_pt; 441 return write_pt;
430} 442}
diff --git a/arch/x86/kvm/segment_descriptor.h b/arch/x86/kvm/segment_descriptor.h
deleted file mode 100644
index 56fc4c873389..000000000000
--- a/arch/x86/kvm/segment_descriptor.h
+++ /dev/null
@@ -1,29 +0,0 @@
1#ifndef __SEGMENT_DESCRIPTOR_H
2#define __SEGMENT_DESCRIPTOR_H
3
4struct segment_descriptor {
5 u16 limit_low;
6 u16 base_low;
7 u8 base_mid;
8 u8 type : 4;
9 u8 system : 1;
10 u8 dpl : 2;
11 u8 present : 1;
12 u8 limit_high : 4;
13 u8 avl : 1;
14 u8 long_mode : 1;
15 u8 default_op : 1;
16 u8 granularity : 1;
17 u8 base_high;
18} __attribute__((packed));
19
20#ifdef CONFIG_X86_64
21/* LDT or TSS descriptor in the GDT. 16 bytes. */
22struct segment_descriptor_64 {
23 struct segment_descriptor s;
24 u32 base_higher;
25 u32 pad_zero;
26};
27
28#endif
29#endif
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1a582f1090e8..89e0be2c10d0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -47,6 +47,18 @@ MODULE_LICENSE("GPL");
47#define SVM_FEATURE_LBRV (1 << 1) 47#define SVM_FEATURE_LBRV (1 << 1)
48#define SVM_DEATURE_SVML (1 << 2) 48#define SVM_DEATURE_SVML (1 << 2)
49 49
50#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
51
52/* enable NPT for AMD64 and X86 with PAE */
53#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
54static bool npt_enabled = true;
55#else
56static bool npt_enabled = false;
57#endif
58static int npt = 1;
59
60module_param(npt, int, S_IRUGO);
61
50static void kvm_reput_irq(struct vcpu_svm *svm); 62static void kvm_reput_irq(struct vcpu_svm *svm);
51 63
52static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) 64static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
@@ -54,8 +66,7 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
54 return container_of(vcpu, struct vcpu_svm, vcpu); 66 return container_of(vcpu, struct vcpu_svm, vcpu);
55} 67}
56 68
57unsigned long iopm_base; 69static unsigned long iopm_base;
58unsigned long msrpm_base;
59 70
60struct kvm_ldttss_desc { 71struct kvm_ldttss_desc {
61 u16 limit0; 72 u16 limit0;
@@ -182,7 +193,7 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
182 193
183static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) 194static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
184{ 195{
185 if (!(efer & EFER_LMA)) 196 if (!npt_enabled && !(efer & EFER_LMA))
186 efer &= ~EFER_LME; 197 efer &= ~EFER_LME;
187 198
188 to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK; 199 to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
@@ -219,12 +230,12 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
219 struct vcpu_svm *svm = to_svm(vcpu); 230 struct vcpu_svm *svm = to_svm(vcpu);
220 231
221 if (!svm->next_rip) { 232 if (!svm->next_rip) {
222 printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__); 233 printk(KERN_DEBUG "%s: NOP\n", __func__);
223 return; 234 return;
224 } 235 }
225 if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE) 236 if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE)
226 printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n", 237 printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n",
227 __FUNCTION__, 238 __func__,
228 svm->vmcb->save.rip, 239 svm->vmcb->save.rip,
229 svm->next_rip); 240 svm->next_rip);
230 241
@@ -279,11 +290,7 @@ static void svm_hardware_enable(void *garbage)
279 290
280 struct svm_cpu_data *svm_data; 291 struct svm_cpu_data *svm_data;
281 uint64_t efer; 292 uint64_t efer;
282#ifdef CONFIG_X86_64
283 struct desc_ptr gdt_descr;
284#else
285 struct desc_ptr gdt_descr; 293 struct desc_ptr gdt_descr;
286#endif
287 struct desc_struct *gdt; 294 struct desc_struct *gdt;
288 int me = raw_smp_processor_id(); 295 int me = raw_smp_processor_id();
289 296
@@ -302,7 +309,6 @@ static void svm_hardware_enable(void *garbage)
302 svm_data->asid_generation = 1; 309 svm_data->asid_generation = 1;
303 svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; 310 svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
304 svm_data->next_asid = svm_data->max_asid + 1; 311 svm_data->next_asid = svm_data->max_asid + 1;
305 svm_features = cpuid_edx(SVM_CPUID_FUNC);
306 312
307 asm volatile ("sgdt %0" : "=m"(gdt_descr)); 313 asm volatile ("sgdt %0" : "=m"(gdt_descr));
308 gdt = (struct desc_struct *)gdt_descr.address; 314 gdt = (struct desc_struct *)gdt_descr.address;
@@ -361,12 +367,51 @@ static void set_msr_interception(u32 *msrpm, unsigned msr,
361 BUG(); 367 BUG();
362} 368}
363 369
370static void svm_vcpu_init_msrpm(u32 *msrpm)
371{
372 memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
373
374#ifdef CONFIG_X86_64
375 set_msr_interception(msrpm, MSR_GS_BASE, 1, 1);
376 set_msr_interception(msrpm, MSR_FS_BASE, 1, 1);
377 set_msr_interception(msrpm, MSR_KERNEL_GS_BASE, 1, 1);
378 set_msr_interception(msrpm, MSR_LSTAR, 1, 1);
379 set_msr_interception(msrpm, MSR_CSTAR, 1, 1);
380 set_msr_interception(msrpm, MSR_SYSCALL_MASK, 1, 1);
381#endif
382 set_msr_interception(msrpm, MSR_K6_STAR, 1, 1);
383 set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1);
384 set_msr_interception(msrpm, MSR_IA32_SYSENTER_ESP, 1, 1);
385 set_msr_interception(msrpm, MSR_IA32_SYSENTER_EIP, 1, 1);
386}
387
388static void svm_enable_lbrv(struct vcpu_svm *svm)
389{
390 u32 *msrpm = svm->msrpm;
391
392 svm->vmcb->control.lbr_ctl = 1;
393 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
394 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
395 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
396 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
397}
398
399static void svm_disable_lbrv(struct vcpu_svm *svm)
400{
401 u32 *msrpm = svm->msrpm;
402
403 svm->vmcb->control.lbr_ctl = 0;
404 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
405 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
406 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
407 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
408}
409
364static __init int svm_hardware_setup(void) 410static __init int svm_hardware_setup(void)
365{ 411{
366 int cpu; 412 int cpu;
367 struct page *iopm_pages; 413 struct page *iopm_pages;
368 struct page *msrpm_pages; 414 void *iopm_va;
369 void *iopm_va, *msrpm_va;
370 int r; 415 int r;
371 416
372 iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER); 417 iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
@@ -379,41 +424,33 @@ static __init int svm_hardware_setup(void)
379 clear_bit(0x80, iopm_va); /* allow direct access to PC debug port */ 424 clear_bit(0x80, iopm_va); /* allow direct access to PC debug port */
380 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; 425 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
381 426
427 if (boot_cpu_has(X86_FEATURE_NX))
428 kvm_enable_efer_bits(EFER_NX);
382 429
383 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 430 for_each_online_cpu(cpu) {
431 r = svm_cpu_init(cpu);
432 if (r)
433 goto err;
434 }
384 435
385 r = -ENOMEM; 436 svm_features = cpuid_edx(SVM_CPUID_FUNC);
386 if (!msrpm_pages)
387 goto err_1;
388 437
389 msrpm_va = page_address(msrpm_pages); 438 if (!svm_has(SVM_FEATURE_NPT))
390 memset(msrpm_va, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); 439 npt_enabled = false;
391 msrpm_base = page_to_pfn(msrpm_pages) << PAGE_SHIFT;
392 440
393#ifdef CONFIG_X86_64 441 if (npt_enabled && !npt) {
394 set_msr_interception(msrpm_va, MSR_GS_BASE, 1, 1); 442 printk(KERN_INFO "kvm: Nested Paging disabled\n");
395 set_msr_interception(msrpm_va, MSR_FS_BASE, 1, 1); 443 npt_enabled = false;
396 set_msr_interception(msrpm_va, MSR_KERNEL_GS_BASE, 1, 1); 444 }
397 set_msr_interception(msrpm_va, MSR_LSTAR, 1, 1);
398 set_msr_interception(msrpm_va, MSR_CSTAR, 1, 1);
399 set_msr_interception(msrpm_va, MSR_SYSCALL_MASK, 1, 1);
400#endif
401 set_msr_interception(msrpm_va, MSR_K6_STAR, 1, 1);
402 set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_CS, 1, 1);
403 set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_ESP, 1, 1);
404 set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_EIP, 1, 1);
405 445
406 for_each_online_cpu(cpu) { 446 if (npt_enabled) {
407 r = svm_cpu_init(cpu); 447 printk(KERN_INFO "kvm: Nested Paging enabled\n");
408 if (r) 448 kvm_enable_tdp();
409 goto err_2;
410 } 449 }
450
411 return 0; 451 return 0;
412 452
413err_2: 453err:
414 __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
415 msrpm_base = 0;
416err_1:
417 __free_pages(iopm_pages, IOPM_ALLOC_ORDER); 454 __free_pages(iopm_pages, IOPM_ALLOC_ORDER);
418 iopm_base = 0; 455 iopm_base = 0;
419 return r; 456 return r;
@@ -421,9 +458,8 @@ err_1:
421 458
422static __exit void svm_hardware_unsetup(void) 459static __exit void svm_hardware_unsetup(void)
423{ 460{
424 __free_pages(pfn_to_page(msrpm_base >> PAGE_SHIFT), MSRPM_ALLOC_ORDER);
425 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); 461 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
426 iopm_base = msrpm_base = 0; 462 iopm_base = 0;
427} 463}
428 464
429static void init_seg(struct vmcb_seg *seg) 465static void init_seg(struct vmcb_seg *seg)
@@ -443,15 +479,14 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
443 seg->base = 0; 479 seg->base = 0;
444} 480}
445 481
446static void init_vmcb(struct vmcb *vmcb) 482static void init_vmcb(struct vcpu_svm *svm)
447{ 483{
448 struct vmcb_control_area *control = &vmcb->control; 484 struct vmcb_control_area *control = &svm->vmcb->control;
449 struct vmcb_save_area *save = &vmcb->save; 485 struct vmcb_save_area *save = &svm->vmcb->save;
450 486
451 control->intercept_cr_read = INTERCEPT_CR0_MASK | 487 control->intercept_cr_read = INTERCEPT_CR0_MASK |
452 INTERCEPT_CR3_MASK | 488 INTERCEPT_CR3_MASK |
453 INTERCEPT_CR4_MASK | 489 INTERCEPT_CR4_MASK;
454 INTERCEPT_CR8_MASK;
455 490
456 control->intercept_cr_write = INTERCEPT_CR0_MASK | 491 control->intercept_cr_write = INTERCEPT_CR0_MASK |
457 INTERCEPT_CR3_MASK | 492 INTERCEPT_CR3_MASK |
@@ -471,23 +506,13 @@ static void init_vmcb(struct vmcb *vmcb)
471 INTERCEPT_DR7_MASK; 506 INTERCEPT_DR7_MASK;
472 507
473 control->intercept_exceptions = (1 << PF_VECTOR) | 508 control->intercept_exceptions = (1 << PF_VECTOR) |
474 (1 << UD_VECTOR); 509 (1 << UD_VECTOR) |
510 (1 << MC_VECTOR);
475 511
476 512
477 control->intercept = (1ULL << INTERCEPT_INTR) | 513 control->intercept = (1ULL << INTERCEPT_INTR) |
478 (1ULL << INTERCEPT_NMI) | 514 (1ULL << INTERCEPT_NMI) |
479 (1ULL << INTERCEPT_SMI) | 515 (1ULL << INTERCEPT_SMI) |
480 /*
481 * selective cr0 intercept bug?
482 * 0: 0f 22 d8 mov %eax,%cr3
483 * 3: 0f 20 c0 mov %cr0,%eax
484 * 6: 0d 00 00 00 80 or $0x80000000,%eax
485 * b: 0f 22 c0 mov %eax,%cr0
486 * set cr3 ->interception
487 * get cr0 ->interception
488 * set cr0 -> no interception
489 */
490 /* (1ULL << INTERCEPT_SELECTIVE_CR0) | */
491 (1ULL << INTERCEPT_CPUID) | 516 (1ULL << INTERCEPT_CPUID) |
492 (1ULL << INTERCEPT_INVD) | 517 (1ULL << INTERCEPT_INVD) |
493 (1ULL << INTERCEPT_HLT) | 518 (1ULL << INTERCEPT_HLT) |
@@ -508,7 +533,7 @@ static void init_vmcb(struct vmcb *vmcb)
508 (1ULL << INTERCEPT_MWAIT); 533 (1ULL << INTERCEPT_MWAIT);
509 534
510 control->iopm_base_pa = iopm_base; 535 control->iopm_base_pa = iopm_base;
511 control->msrpm_base_pa = msrpm_base; 536 control->msrpm_base_pa = __pa(svm->msrpm);
512 control->tsc_offset = 0; 537 control->tsc_offset = 0;
513 control->int_ctl = V_INTR_MASKING_MASK; 538 control->int_ctl = V_INTR_MASKING_MASK;
514 539
@@ -550,13 +575,30 @@ static void init_vmcb(struct vmcb *vmcb)
550 save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP; 575 save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP;
551 save->cr4 = X86_CR4_PAE; 576 save->cr4 = X86_CR4_PAE;
552 /* rdx = ?? */ 577 /* rdx = ?? */
578
579 if (npt_enabled) {
580 /* Setup VMCB for Nested Paging */
581 control->nested_ctl = 1;
582 control->intercept &= ~(1ULL << INTERCEPT_TASK_SWITCH);
583 control->intercept_exceptions &= ~(1 << PF_VECTOR);
584 control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK|
585 INTERCEPT_CR3_MASK);
586 control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
587 INTERCEPT_CR3_MASK);
588 save->g_pat = 0x0007040600070406ULL;
589 /* enable caching because the QEMU Bios doesn't enable it */
590 save->cr0 = X86_CR0_ET;
591 save->cr3 = 0;
592 save->cr4 = 0;
593 }
594 force_new_asid(&svm->vcpu);
553} 595}
554 596
555static int svm_vcpu_reset(struct kvm_vcpu *vcpu) 597static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
556{ 598{
557 struct vcpu_svm *svm = to_svm(vcpu); 599 struct vcpu_svm *svm = to_svm(vcpu);
558 600
559 init_vmcb(svm->vmcb); 601 init_vmcb(svm);
560 602
561 if (vcpu->vcpu_id != 0) { 603 if (vcpu->vcpu_id != 0) {
562 svm->vmcb->save.rip = 0; 604 svm->vmcb->save.rip = 0;
@@ -571,6 +613,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
571{ 613{
572 struct vcpu_svm *svm; 614 struct vcpu_svm *svm;
573 struct page *page; 615 struct page *page;
616 struct page *msrpm_pages;
574 int err; 617 int err;
575 618
576 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 619 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
@@ -589,12 +632,19 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
589 goto uninit; 632 goto uninit;
590 } 633 }
591 634
635 err = -ENOMEM;
636 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
637 if (!msrpm_pages)
638 goto uninit;
639 svm->msrpm = page_address(msrpm_pages);
640 svm_vcpu_init_msrpm(svm->msrpm);
641
592 svm->vmcb = page_address(page); 642 svm->vmcb = page_address(page);
593 clear_page(svm->vmcb); 643 clear_page(svm->vmcb);
594 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; 644 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
595 svm->asid_generation = 0; 645 svm->asid_generation = 0;
596 memset(svm->db_regs, 0, sizeof(svm->db_regs)); 646 memset(svm->db_regs, 0, sizeof(svm->db_regs));
597 init_vmcb(svm->vmcb); 647 init_vmcb(svm);
598 648
599 fx_init(&svm->vcpu); 649 fx_init(&svm->vcpu);
600 svm->vcpu.fpu_active = 1; 650 svm->vcpu.fpu_active = 1;
@@ -617,6 +667,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
617 struct vcpu_svm *svm = to_svm(vcpu); 667 struct vcpu_svm *svm = to_svm(vcpu);
618 668
619 __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); 669 __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
670 __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
620 kvm_vcpu_uninit(vcpu); 671 kvm_vcpu_uninit(vcpu);
621 kmem_cache_free(kvm_vcpu_cache, svm); 672 kmem_cache_free(kvm_vcpu_cache, svm);
622} 673}
@@ -731,6 +782,13 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
731 var->unusable = !var->present; 782 var->unusable = !var->present;
732} 783}
733 784
785static int svm_get_cpl(struct kvm_vcpu *vcpu)
786{
787 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
788
789 return save->cpl;
790}
791
734static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 792static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
735{ 793{
736 struct vcpu_svm *svm = to_svm(vcpu); 794 struct vcpu_svm *svm = to_svm(vcpu);
@@ -784,6 +842,9 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
784 } 842 }
785 } 843 }
786#endif 844#endif
845 if (npt_enabled)
846 goto set;
847
787 if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) { 848 if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
788 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); 849 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
789 vcpu->fpu_active = 1; 850 vcpu->fpu_active = 1;
@@ -791,18 +852,29 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
791 852
792 vcpu->arch.cr0 = cr0; 853 vcpu->arch.cr0 = cr0;
793 cr0 |= X86_CR0_PG | X86_CR0_WP; 854 cr0 |= X86_CR0_PG | X86_CR0_WP;
794 cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
795 if (!vcpu->fpu_active) { 855 if (!vcpu->fpu_active) {
796 svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); 856 svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
797 cr0 |= X86_CR0_TS; 857 cr0 |= X86_CR0_TS;
798 } 858 }
859set:
860 /*
861 * re-enable caching here because the QEMU bios
862 * does not do it - this results in some delay at
863 * reboot
864 */
865 cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
799 svm->vmcb->save.cr0 = cr0; 866 svm->vmcb->save.cr0 = cr0;
800} 867}
801 868
802static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 869static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
803{ 870{
804 vcpu->arch.cr4 = cr4; 871 unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE;
805 to_svm(vcpu)->vmcb->save.cr4 = cr4 | X86_CR4_PAE; 872
873 vcpu->arch.cr4 = cr4;
874 if (!npt_enabled)
875 cr4 |= X86_CR4_PAE;
876 cr4 |= host_cr4_mce;
877 to_svm(vcpu)->vmcb->save.cr4 = cr4;
806} 878}
807 879
808static void svm_set_segment(struct kvm_vcpu *vcpu, 880static void svm_set_segment(struct kvm_vcpu *vcpu,
@@ -833,13 +905,6 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
833 905
834} 906}
835 907
836/* FIXME:
837
838 svm(vcpu)->vmcb->control.int_ctl &= ~V_TPR_MASK;
839 svm(vcpu)->vmcb->control.int_ctl |= (sregs->cr8 & V_TPR_MASK);
840
841*/
842
843static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) 908static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
844{ 909{
845 return -EOPNOTSUPP; 910 return -EOPNOTSUPP;
@@ -920,7 +985,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
920 } 985 }
921 default: 986 default:
922 printk(KERN_DEBUG "%s: unexpected dr %u\n", 987 printk(KERN_DEBUG "%s: unexpected dr %u\n",
923 __FUNCTION__, dr); 988 __func__, dr);
924 *exception = UD_VECTOR; 989 *exception = UD_VECTOR;
925 return; 990 return;
926 } 991 }
@@ -962,6 +1027,19 @@ static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
962 return 1; 1027 return 1;
963} 1028}
964 1029
1030static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1031{
1032 /*
1033 * On an #MC intercept the MCE handler is not called automatically in
1034 * the host. So do it by hand here.
1035 */
1036 asm volatile (
1037 "int $0x12\n");
1038 /* not sure if we ever come back to this point */
1039
1040 return 1;
1041}
1042
965static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1043static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
966{ 1044{
967 /* 1045 /*
@@ -969,7 +1047,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
969 * so reinitialize it. 1047 * so reinitialize it.
970 */ 1048 */
971 clear_page(svm->vmcb); 1049 clear_page(svm->vmcb);
972 init_vmcb(svm->vmcb); 1050 init_vmcb(svm);
973 1051
974 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; 1052 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
975 return 0; 1053 return 0;
@@ -1033,9 +1111,18 @@ static int invalid_op_interception(struct vcpu_svm *svm,
1033static int task_switch_interception(struct vcpu_svm *svm, 1111static int task_switch_interception(struct vcpu_svm *svm,
1034 struct kvm_run *kvm_run) 1112 struct kvm_run *kvm_run)
1035{ 1113{
1036 pr_unimpl(&svm->vcpu, "%s: task switch is unsupported\n", __FUNCTION__); 1114 u16 tss_selector;
1037 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 1115
1038 return 0; 1116 tss_selector = (u16)svm->vmcb->control.exit_info_1;
1117 if (svm->vmcb->control.exit_info_2 &
1118 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
1119 return kvm_task_switch(&svm->vcpu, tss_selector,
1120 TASK_SWITCH_IRET);
1121 if (svm->vmcb->control.exit_info_2 &
1122 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
1123 return kvm_task_switch(&svm->vcpu, tss_selector,
1124 TASK_SWITCH_JMP);
1125 return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL);
1039} 1126}
1040 1127
1041static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1128static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
@@ -1049,7 +1136,7 @@ static int emulate_on_interception(struct vcpu_svm *svm,
1049 struct kvm_run *kvm_run) 1136 struct kvm_run *kvm_run)
1050{ 1137{
1051 if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE) 1138 if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE)
1052 pr_unimpl(&svm->vcpu, "%s: failed\n", __FUNCTION__); 1139 pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
1053 return 1; 1140 return 1;
1054} 1141}
1055 1142
@@ -1179,8 +1266,19 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
1179 svm->vmcb->save.sysenter_esp = data; 1266 svm->vmcb->save.sysenter_esp = data;
1180 break; 1267 break;
1181 case MSR_IA32_DEBUGCTLMSR: 1268 case MSR_IA32_DEBUGCTLMSR:
1182 pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", 1269 if (!svm_has(SVM_FEATURE_LBRV)) {
1183 __FUNCTION__, data); 1270 pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
1271 __func__, data);
1272 break;
1273 }
1274 if (data & DEBUGCTL_RESERVED_BITS)
1275 return 1;
1276
1277 svm->vmcb->save.dbgctl = data;
1278 if (data & (1ULL<<0))
1279 svm_enable_lbrv(svm);
1280 else
1281 svm_disable_lbrv(svm);
1184 break; 1282 break;
1185 case MSR_K7_EVNTSEL0: 1283 case MSR_K7_EVNTSEL0:
1186 case MSR_K7_EVNTSEL1: 1284 case MSR_K7_EVNTSEL1:
@@ -1265,6 +1363,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
1265 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, 1363 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
1266 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, 1364 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
1267 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, 1365 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
1366 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
1268 [SVM_EXIT_INTR] = nop_on_interception, 1367 [SVM_EXIT_INTR] = nop_on_interception,
1269 [SVM_EXIT_NMI] = nop_on_interception, 1368 [SVM_EXIT_NMI] = nop_on_interception,
1270 [SVM_EXIT_SMI] = nop_on_interception, 1369 [SVM_EXIT_SMI] = nop_on_interception,
@@ -1290,14 +1389,34 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
1290 [SVM_EXIT_WBINVD] = emulate_on_interception, 1389 [SVM_EXIT_WBINVD] = emulate_on_interception,
1291 [SVM_EXIT_MONITOR] = invalid_op_interception, 1390 [SVM_EXIT_MONITOR] = invalid_op_interception,
1292 [SVM_EXIT_MWAIT] = invalid_op_interception, 1391 [SVM_EXIT_MWAIT] = invalid_op_interception,
1392 [SVM_EXIT_NPF] = pf_interception,
1293}; 1393};
1294 1394
1295
1296static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1395static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1297{ 1396{
1298 struct vcpu_svm *svm = to_svm(vcpu); 1397 struct vcpu_svm *svm = to_svm(vcpu);
1299 u32 exit_code = svm->vmcb->control.exit_code; 1398 u32 exit_code = svm->vmcb->control.exit_code;
1300 1399
1400 if (npt_enabled) {
1401 int mmu_reload = 0;
1402 if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
1403 svm_set_cr0(vcpu, svm->vmcb->save.cr0);
1404 mmu_reload = 1;
1405 }
1406 vcpu->arch.cr0 = svm->vmcb->save.cr0;
1407 vcpu->arch.cr3 = svm->vmcb->save.cr3;
1408 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
1409 if (!load_pdptrs(vcpu, vcpu->arch.cr3)) {
1410 kvm_inject_gp(vcpu, 0);
1411 return 1;
1412 }
1413 }
1414 if (mmu_reload) {
1415 kvm_mmu_reset_context(vcpu);
1416 kvm_mmu_load(vcpu);
1417 }
1418 }
1419
1301 kvm_reput_irq(svm); 1420 kvm_reput_irq(svm);
1302 1421
1303 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { 1422 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
@@ -1308,10 +1427,11 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1308 } 1427 }
1309 1428
1310 if (is_external_interrupt(svm->vmcb->control.exit_int_info) && 1429 if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
1311 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR) 1430 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
1431 exit_code != SVM_EXIT_NPF)
1312 printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " 1432 printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
1313 "exit_code 0x%x\n", 1433 "exit_code 0x%x\n",
1314 __FUNCTION__, svm->vmcb->control.exit_int_info, 1434 __func__, svm->vmcb->control.exit_int_info,
1315 exit_code); 1435 exit_code);
1316 1436
1317 if (exit_code >= ARRAY_SIZE(svm_exit_handlers) 1437 if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
@@ -1364,6 +1484,27 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
1364 svm_inject_irq(svm, irq); 1484 svm_inject_irq(svm, irq);
1365} 1485}
1366 1486
1487static void update_cr8_intercept(struct kvm_vcpu *vcpu)
1488{
1489 struct vcpu_svm *svm = to_svm(vcpu);
1490 struct vmcb *vmcb = svm->vmcb;
1491 int max_irr, tpr;
1492
1493 if (!irqchip_in_kernel(vcpu->kvm) || vcpu->arch.apic->vapic_addr)
1494 return;
1495
1496 vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
1497
1498 max_irr = kvm_lapic_find_highest_irr(vcpu);
1499 if (max_irr == -1)
1500 return;
1501
1502 tpr = kvm_lapic_get_cr8(vcpu) << 4;
1503
1504 if (tpr >= (max_irr & 0xf0))
1505 vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
1506}
1507
1367static void svm_intr_assist(struct kvm_vcpu *vcpu) 1508static void svm_intr_assist(struct kvm_vcpu *vcpu)
1368{ 1509{
1369 struct vcpu_svm *svm = to_svm(vcpu); 1510 struct vcpu_svm *svm = to_svm(vcpu);
@@ -1376,14 +1517,14 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
1376 SVM_EVTINJ_VEC_MASK; 1517 SVM_EVTINJ_VEC_MASK;
1377 vmcb->control.exit_int_info = 0; 1518 vmcb->control.exit_int_info = 0;
1378 svm_inject_irq(svm, intr_vector); 1519 svm_inject_irq(svm, intr_vector);
1379 return; 1520 goto out;
1380 } 1521 }
1381 1522
1382 if (vmcb->control.int_ctl & V_IRQ_MASK) 1523 if (vmcb->control.int_ctl & V_IRQ_MASK)
1383 return; 1524 goto out;
1384 1525
1385 if (!kvm_cpu_has_interrupt(vcpu)) 1526 if (!kvm_cpu_has_interrupt(vcpu))
1386 return; 1527 goto out;
1387 1528
1388 if (!(vmcb->save.rflags & X86_EFLAGS_IF) || 1529 if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
1389 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || 1530 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
@@ -1391,12 +1532,14 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
1391 /* unable to deliver irq, set pending irq */ 1532 /* unable to deliver irq, set pending irq */
1392 vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR); 1533 vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR);
1393 svm_inject_irq(svm, 0x0); 1534 svm_inject_irq(svm, 0x0);
1394 return; 1535 goto out;
1395 } 1536 }
1396 /* Okay, we can deliver the interrupt: grab it and update PIC state. */ 1537 /* Okay, we can deliver the interrupt: grab it and update PIC state. */
1397 intr_vector = kvm_cpu_get_interrupt(vcpu); 1538 intr_vector = kvm_cpu_get_interrupt(vcpu);
1398 svm_inject_irq(svm, intr_vector); 1539 svm_inject_irq(svm, intr_vector);
1399 kvm_timer_intr_post(vcpu, intr_vector); 1540 kvm_timer_intr_post(vcpu, intr_vector);
1541out:
1542 update_cr8_intercept(vcpu);
1400} 1543}
1401 1544
1402static void kvm_reput_irq(struct vcpu_svm *svm) 1545static void kvm_reput_irq(struct vcpu_svm *svm)
@@ -1482,6 +1625,29 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
1482{ 1625{
1483} 1626}
1484 1627
1628static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
1629{
1630 struct vcpu_svm *svm = to_svm(vcpu);
1631
1632 if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) {
1633 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
1634 kvm_lapic_set_tpr(vcpu, cr8);
1635 }
1636}
1637
1638static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
1639{
1640 struct vcpu_svm *svm = to_svm(vcpu);
1641 u64 cr8;
1642
1643 if (!irqchip_in_kernel(vcpu->kvm))
1644 return;
1645
1646 cr8 = kvm_get_cr8(vcpu);
1647 svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
1648 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
1649}
1650
1485static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1651static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1486{ 1652{
1487 struct vcpu_svm *svm = to_svm(vcpu); 1653 struct vcpu_svm *svm = to_svm(vcpu);
@@ -1491,6 +1657,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1491 1657
1492 pre_svm_run(svm); 1658 pre_svm_run(svm);
1493 1659
1660 sync_lapic_to_cr8(vcpu);
1661
1494 save_host_msrs(vcpu); 1662 save_host_msrs(vcpu);
1495 fs_selector = read_fs(); 1663 fs_selector = read_fs();
1496 gs_selector = read_gs(); 1664 gs_selector = read_gs();
@@ -1499,6 +1667,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1499 svm->host_dr6 = read_dr6(); 1667 svm->host_dr6 = read_dr6();
1500 svm->host_dr7 = read_dr7(); 1668 svm->host_dr7 = read_dr7();
1501 svm->vmcb->save.cr2 = vcpu->arch.cr2; 1669 svm->vmcb->save.cr2 = vcpu->arch.cr2;
1670 /* required for live migration with NPT */
1671 if (npt_enabled)
1672 svm->vmcb->save.cr3 = vcpu->arch.cr3;
1502 1673
1503 if (svm->vmcb->save.dr7 & 0xff) { 1674 if (svm->vmcb->save.dr7 & 0xff) {
1504 write_dr7(0); 1675 write_dr7(0);
@@ -1635,6 +1806,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1635 1806
1636 stgi(); 1807 stgi();
1637 1808
1809 sync_cr8_to_lapic(vcpu);
1810
1638 svm->next_rip = 0; 1811 svm->next_rip = 0;
1639} 1812}
1640 1813
@@ -1642,6 +1815,12 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
1642{ 1815{
1643 struct vcpu_svm *svm = to_svm(vcpu); 1816 struct vcpu_svm *svm = to_svm(vcpu);
1644 1817
1818 if (npt_enabled) {
1819 svm->vmcb->control.nested_cr3 = root;
1820 force_new_asid(vcpu);
1821 return;
1822 }
1823
1645 svm->vmcb->save.cr3 = root; 1824 svm->vmcb->save.cr3 = root;
1646 force_new_asid(vcpu); 1825 force_new_asid(vcpu);
1647 1826
@@ -1709,6 +1888,7 @@ static struct kvm_x86_ops svm_x86_ops = {
1709 .get_segment_base = svm_get_segment_base, 1888 .get_segment_base = svm_get_segment_base,
1710 .get_segment = svm_get_segment, 1889 .get_segment = svm_get_segment,
1711 .set_segment = svm_set_segment, 1890 .set_segment = svm_set_segment,
1891 .get_cpl = svm_get_cpl,
1712 .get_cs_db_l_bits = kvm_get_cs_db_l_bits, 1892 .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
1713 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, 1893 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
1714 .set_cr0 = svm_set_cr0, 1894 .set_cr0 = svm_set_cr0,
diff --git a/arch/x86/kvm/svm.h b/arch/x86/kvm/svm.h
index 5fd50491b555..1b8afa78e869 100644
--- a/arch/x86/kvm/svm.h
+++ b/arch/x86/kvm/svm.h
@@ -238,6 +238,9 @@ struct __attribute__ ((__packed__)) vmcb {
238#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID 238#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
239#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR 239#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
240 240
241#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
242#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
243
241#define SVM_EXIT_READ_CR0 0x000 244#define SVM_EXIT_READ_CR0 0x000
242#define SVM_EXIT_READ_CR3 0x003 245#define SVM_EXIT_READ_CR3 0x003
243#define SVM_EXIT_READ_CR4 0x004 246#define SVM_EXIT_READ_CR4 0x004
diff --git a/arch/x86/kvm/tss.h b/arch/x86/kvm/tss.h
new file mode 100644
index 000000000000..622aa10f692f
--- /dev/null
+++ b/arch/x86/kvm/tss.h
@@ -0,0 +1,59 @@
1#ifndef __TSS_SEGMENT_H
2#define __TSS_SEGMENT_H
3
4struct tss_segment_32 {
5 u32 prev_task_link;
6 u32 esp0;
7 u32 ss0;
8 u32 esp1;
9 u32 ss1;
10 u32 esp2;
11 u32 ss2;
12 u32 cr3;
13 u32 eip;
14 u32 eflags;
15 u32 eax;
16 u32 ecx;
17 u32 edx;
18 u32 ebx;
19 u32 esp;
20 u32 ebp;
21 u32 esi;
22 u32 edi;
23 u32 es;
24 u32 cs;
25 u32 ss;
26 u32 ds;
27 u32 fs;
28 u32 gs;
29 u32 ldt_selector;
30 u16 t;
31 u16 io_map;
32};
33
34struct tss_segment_16 {
35 u16 prev_task_link;
36 u16 sp0;
37 u16 ss0;
38 u16 sp1;
39 u16 ss1;
40 u16 sp2;
41 u16 ss2;
42 u16 ip;
43 u16 flag;
44 u16 ax;
45 u16 cx;
46 u16 dx;
47 u16 bx;
48 u16 sp;
49 u16 bp;
50 u16 si;
51 u16 di;
52 u16 es;
53 u16 cs;
54 u16 ss;
55 u16 ds;
56 u16 ldt;
57};
58
59#endif
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8e1462880d1f..8e5d6645b90d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -17,7 +17,6 @@
17 17
18#include "irq.h" 18#include "irq.h"
19#include "vmx.h" 19#include "vmx.h"
20#include "segment_descriptor.h"
21#include "mmu.h" 20#include "mmu.h"
22 21
23#include <linux/kvm_host.h> 22#include <linux/kvm_host.h>
@@ -37,6 +36,12 @@ MODULE_LICENSE("GPL");
37static int bypass_guest_pf = 1; 36static int bypass_guest_pf = 1;
38module_param(bypass_guest_pf, bool, 0); 37module_param(bypass_guest_pf, bool, 0);
39 38
39static int enable_vpid = 1;
40module_param(enable_vpid, bool, 0);
41
42static int flexpriority_enabled = 1;
43module_param(flexpriority_enabled, bool, 0);
44
40struct vmcs { 45struct vmcs {
41 u32 revision_id; 46 u32 revision_id;
42 u32 abort; 47 u32 abort;
@@ -71,6 +76,7 @@ struct vcpu_vmx {
71 unsigned rip; 76 unsigned rip;
72 } irq; 77 } irq;
73 } rmode; 78 } rmode;
79 int vpid;
74}; 80};
75 81
76static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) 82static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -85,6 +91,10 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
85 91
86static struct page *vmx_io_bitmap_a; 92static struct page *vmx_io_bitmap_a;
87static struct page *vmx_io_bitmap_b; 93static struct page *vmx_io_bitmap_b;
94static struct page *vmx_msr_bitmap;
95
96static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
97static DEFINE_SPINLOCK(vmx_vpid_lock);
88 98
89static struct vmcs_config { 99static struct vmcs_config {
90 int size; 100 int size;
@@ -176,6 +186,11 @@ static inline int is_external_interrupt(u32 intr_info)
176 == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); 186 == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
177} 187}
178 188
189static inline int cpu_has_vmx_msr_bitmap(void)
190{
191 return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS);
192}
193
179static inline int cpu_has_vmx_tpr_shadow(void) 194static inline int cpu_has_vmx_tpr_shadow(void)
180{ 195{
181 return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW); 196 return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW);
@@ -194,8 +209,9 @@ static inline int cpu_has_secondary_exec_ctrls(void)
194 209
195static inline bool cpu_has_vmx_virtualize_apic_accesses(void) 210static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
196{ 211{
197 return (vmcs_config.cpu_based_2nd_exec_ctrl & 212 return flexpriority_enabled
198 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); 213 && (vmcs_config.cpu_based_2nd_exec_ctrl &
214 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
199} 215}
200 216
201static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) 217static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
@@ -204,6 +220,12 @@ static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
204 (irqchip_in_kernel(kvm))); 220 (irqchip_in_kernel(kvm)));
205} 221}
206 222
223static inline int cpu_has_vmx_vpid(void)
224{
225 return (vmcs_config.cpu_based_2nd_exec_ctrl &
226 SECONDARY_EXEC_ENABLE_VPID);
227}
228
207static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) 229static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
208{ 230{
209 int i; 231 int i;
@@ -214,6 +236,20 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
214 return -1; 236 return -1;
215} 237}
216 238
239static inline void __invvpid(int ext, u16 vpid, gva_t gva)
240{
241 struct {
242 u64 vpid : 16;
243 u64 rsvd : 48;
244 u64 gva;
245 } operand = { vpid, 0, gva };
246
247 asm volatile (ASM_VMX_INVVPID
248 /* CF==1 or ZF==1 --> rc = -1 */
249 "; ja 1f ; ud2 ; 1:"
250 : : "a"(&operand), "c"(ext) : "cc", "memory");
251}
252
217static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) 253static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
218{ 254{
219 int i; 255 int i;
@@ -257,6 +293,14 @@ static void vcpu_clear(struct vcpu_vmx *vmx)
257 vmx->launched = 0; 293 vmx->launched = 0;
258} 294}
259 295
296static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx)
297{
298 if (vmx->vpid == 0)
299 return;
300
301 __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0);
302}
303
260static unsigned long vmcs_readl(unsigned long field) 304static unsigned long vmcs_readl(unsigned long field)
261{ 305{
262 unsigned long value; 306 unsigned long value;
@@ -353,7 +397,7 @@ static void reload_tss(void)
353 * VT restores TR but not its size. Useless. 397 * VT restores TR but not its size. Useless.
354 */ 398 */
355 struct descriptor_table gdt; 399 struct descriptor_table gdt;
356 struct segment_descriptor *descs; 400 struct desc_struct *descs;
357 401
358 get_gdt(&gdt); 402 get_gdt(&gdt);
359 descs = (void *)gdt.base; 403 descs = (void *)gdt.base;
@@ -485,11 +529,12 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
485{ 529{
486 struct vcpu_vmx *vmx = to_vmx(vcpu); 530 struct vcpu_vmx *vmx = to_vmx(vcpu);
487 u64 phys_addr = __pa(vmx->vmcs); 531 u64 phys_addr = __pa(vmx->vmcs);
488 u64 tsc_this, delta; 532 u64 tsc_this, delta, new_offset;
489 533
490 if (vcpu->cpu != cpu) { 534 if (vcpu->cpu != cpu) {
491 vcpu_clear(vmx); 535 vcpu_clear(vmx);
492 kvm_migrate_apic_timer(vcpu); 536 kvm_migrate_apic_timer(vcpu);
537 vpid_sync_vcpu_all(vmx);
493 } 538 }
494 539
495 if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { 540 if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
@@ -524,8 +569,11 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
524 * Make sure the time stamp counter is monotonous. 569 * Make sure the time stamp counter is monotonous.
525 */ 570 */
526 rdtscll(tsc_this); 571 rdtscll(tsc_this);
527 delta = vcpu->arch.host_tsc - tsc_this; 572 if (tsc_this < vcpu->arch.host_tsc) {
528 vmcs_write64(TSC_OFFSET, vmcs_read64(TSC_OFFSET) + delta); 573 delta = vcpu->arch.host_tsc - tsc_this;
574 new_offset = vmcs_read64(TSC_OFFSET) + delta;
575 vmcs_write64(TSC_OFFSET, new_offset);
576 }
529 } 577 }
530} 578}
531 579
@@ -596,7 +644,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
596{ 644{
597 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 645 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
598 nr | INTR_TYPE_EXCEPTION 646 nr | INTR_TYPE_EXCEPTION
599 | (has_error_code ? INTR_INFO_DELIEVER_CODE_MASK : 0) 647 | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
600 | INTR_INFO_VALID_MASK); 648 | INTR_INFO_VALID_MASK);
601 if (has_error_code) 649 if (has_error_code)
602 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); 650 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
@@ -959,6 +1007,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
959 CPU_BASED_MOV_DR_EXITING | 1007 CPU_BASED_MOV_DR_EXITING |
960 CPU_BASED_USE_TSC_OFFSETING; 1008 CPU_BASED_USE_TSC_OFFSETING;
961 opt = CPU_BASED_TPR_SHADOW | 1009 opt = CPU_BASED_TPR_SHADOW |
1010 CPU_BASED_USE_MSR_BITMAPS |
962 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; 1011 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
963 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, 1012 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
964 &_cpu_based_exec_control) < 0) 1013 &_cpu_based_exec_control) < 0)
@@ -971,7 +1020,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
971 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { 1020 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
972 min = 0; 1021 min = 0;
973 opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 1022 opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
974 SECONDARY_EXEC_WBINVD_EXITING; 1023 SECONDARY_EXEC_WBINVD_EXITING |
1024 SECONDARY_EXEC_ENABLE_VPID;
975 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2, 1025 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2,
976 &_cpu_based_2nd_exec_control) < 0) 1026 &_cpu_based_2nd_exec_control) < 0)
977 return -EIO; 1027 return -EIO;
@@ -1080,6 +1130,10 @@ static __init int hardware_setup(void)
1080{ 1130{
1081 if (setup_vmcs_config(&vmcs_config) < 0) 1131 if (setup_vmcs_config(&vmcs_config) < 0)
1082 return -EIO; 1132 return -EIO;
1133
1134 if (boot_cpu_has(X86_FEATURE_NX))
1135 kvm_enable_efer_bits(EFER_NX);
1136
1083 return alloc_kvm_area(); 1137 return alloc_kvm_area();
1084} 1138}
1085 1139
@@ -1214,7 +1268,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
1214 guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); 1268 guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
1215 if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { 1269 if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
1216 printk(KERN_DEBUG "%s: tss fixup for long mode. \n", 1270 printk(KERN_DEBUG "%s: tss fixup for long mode. \n",
1217 __FUNCTION__); 1271 __func__);
1218 vmcs_write32(GUEST_TR_AR_BYTES, 1272 vmcs_write32(GUEST_TR_AR_BYTES,
1219 (guest_tr_ar & ~AR_TYPE_MASK) 1273 (guest_tr_ar & ~AR_TYPE_MASK)
1220 | AR_TYPE_BUSY_64_TSS); 1274 | AR_TYPE_BUSY_64_TSS);
@@ -1239,6 +1293,11 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
1239 1293
1240#endif 1294#endif
1241 1295
1296static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
1297{
1298 vpid_sync_vcpu_all(to_vmx(vcpu));
1299}
1300
1242static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) 1301static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
1243{ 1302{
1244 vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK; 1303 vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK;
@@ -1275,6 +1334,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1275 1334
1276static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 1335static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1277{ 1336{
1337 vmx_flush_tlb(vcpu);
1278 vmcs_writel(GUEST_CR3, cr3); 1338 vmcs_writel(GUEST_CR3, cr3);
1279 if (vcpu->arch.cr0 & X86_CR0_PE) 1339 if (vcpu->arch.cr0 & X86_CR0_PE)
1280 vmx_fpu_deactivate(vcpu); 1340 vmx_fpu_deactivate(vcpu);
@@ -1288,14 +1348,14 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1288 vcpu->arch.cr4 = cr4; 1348 vcpu->arch.cr4 = cr4;
1289} 1349}
1290 1350
1291#ifdef CONFIG_X86_64
1292
1293static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) 1351static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
1294{ 1352{
1295 struct vcpu_vmx *vmx = to_vmx(vcpu); 1353 struct vcpu_vmx *vmx = to_vmx(vcpu);
1296 struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); 1354 struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
1297 1355
1298 vcpu->arch.shadow_efer = efer; 1356 vcpu->arch.shadow_efer = efer;
1357 if (!msr)
1358 return;
1299 if (efer & EFER_LMA) { 1359 if (efer & EFER_LMA) {
1300 vmcs_write32(VM_ENTRY_CONTROLS, 1360 vmcs_write32(VM_ENTRY_CONTROLS,
1301 vmcs_read32(VM_ENTRY_CONTROLS) | 1361 vmcs_read32(VM_ENTRY_CONTROLS) |
@@ -1312,8 +1372,6 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
1312 setup_msrs(vmx); 1372 setup_msrs(vmx);
1313} 1373}
1314 1374
1315#endif
1316
1317static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) 1375static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
1318{ 1376{
1319 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 1377 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -1344,6 +1402,20 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
1344 var->unusable = (ar >> 16) & 1; 1402 var->unusable = (ar >> 16) & 1;
1345} 1403}
1346 1404
1405static int vmx_get_cpl(struct kvm_vcpu *vcpu)
1406{
1407 struct kvm_segment kvm_seg;
1408
1409 if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */
1410 return 0;
1411
1412 if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */
1413 return 3;
1414
1415 vmx_get_segment(vcpu, &kvm_seg, VCPU_SREG_CS);
1416 return kvm_seg.selector & 3;
1417}
1418
1347static u32 vmx_segment_access_rights(struct kvm_segment *var) 1419static u32 vmx_segment_access_rights(struct kvm_segment *var)
1348{ 1420{
1349 u32 ar; 1421 u32 ar;
@@ -1433,7 +1505,6 @@ static int init_rmode_tss(struct kvm *kvm)
1433 int ret = 0; 1505 int ret = 0;
1434 int r; 1506 int r;
1435 1507
1436 down_read(&kvm->slots_lock);
1437 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); 1508 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
1438 if (r < 0) 1509 if (r < 0)
1439 goto out; 1510 goto out;
@@ -1456,7 +1527,6 @@ static int init_rmode_tss(struct kvm *kvm)
1456 1527
1457 ret = 1; 1528 ret = 1;
1458out: 1529out:
1459 up_read(&kvm->slots_lock);
1460 return ret; 1530 return ret;
1461} 1531}
1462 1532
@@ -1494,6 +1564,46 @@ out:
1494 return r; 1564 return r;
1495} 1565}
1496 1566
1567static void allocate_vpid(struct vcpu_vmx *vmx)
1568{
1569 int vpid;
1570
1571 vmx->vpid = 0;
1572 if (!enable_vpid || !cpu_has_vmx_vpid())
1573 return;
1574 spin_lock(&vmx_vpid_lock);
1575 vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
1576 if (vpid < VMX_NR_VPIDS) {
1577 vmx->vpid = vpid;
1578 __set_bit(vpid, vmx_vpid_bitmap);
1579 }
1580 spin_unlock(&vmx_vpid_lock);
1581}
1582
1583void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
1584{
1585 void *va;
1586
1587 if (!cpu_has_vmx_msr_bitmap())
1588 return;
1589
1590 /*
1591 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
1592 * have the write-low and read-high bitmap offsets the wrong way round.
1593 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
1594 */
1595 va = kmap(msr_bitmap);
1596 if (msr <= 0x1fff) {
1597 __clear_bit(msr, va + 0x000); /* read-low */
1598 __clear_bit(msr, va + 0x800); /* write-low */
1599 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
1600 msr &= 0x1fff;
1601 __clear_bit(msr, va + 0x400); /* read-high */
1602 __clear_bit(msr, va + 0xc00); /* write-high */
1603 }
1604 kunmap(msr_bitmap);
1605}
1606
1497/* 1607/*
1498 * Sets up the vmcs for emulated real mode. 1608 * Sets up the vmcs for emulated real mode.
1499 */ 1609 */
@@ -1511,6 +1621,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
1511 vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a)); 1621 vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a));
1512 vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b)); 1622 vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b));
1513 1623
1624 if (cpu_has_vmx_msr_bitmap())
1625 vmcs_write64(MSR_BITMAP, page_to_phys(vmx_msr_bitmap));
1626
1514 vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ 1627 vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
1515 1628
1516 /* Control */ 1629 /* Control */
@@ -1532,6 +1645,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
1532 if (!vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) 1645 if (!vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
1533 exec_control &= 1646 exec_control &=
1534 ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; 1647 ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
1648 if (vmx->vpid == 0)
1649 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
1535 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); 1650 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
1536 } 1651 }
1537 1652
@@ -1613,6 +1728,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
1613 u64 msr; 1728 u64 msr;
1614 int ret; 1729 int ret;
1615 1730
1731 down_read(&vcpu->kvm->slots_lock);
1616 if (!init_rmode_tss(vmx->vcpu.kvm)) { 1732 if (!init_rmode_tss(vmx->vcpu.kvm)) {
1617 ret = -ENOMEM; 1733 ret = -ENOMEM;
1618 goto out; 1734 goto out;
@@ -1621,7 +1737,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
1621 vmx->vcpu.arch.rmode.active = 0; 1737 vmx->vcpu.arch.rmode.active = 0;
1622 1738
1623 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); 1739 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
1624 set_cr8(&vmx->vcpu, 0); 1740 kvm_set_cr8(&vmx->vcpu, 0);
1625 msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 1741 msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
1626 if (vmx->vcpu.vcpu_id == 0) 1742 if (vmx->vcpu.vcpu_id == 0)
1627 msr |= MSR_IA32_APICBASE_BSP; 1743 msr |= MSR_IA32_APICBASE_BSP;
@@ -1704,18 +1820,22 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
1704 vmcs_write64(APIC_ACCESS_ADDR, 1820 vmcs_write64(APIC_ACCESS_ADDR,
1705 page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); 1821 page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
1706 1822
1823 if (vmx->vpid != 0)
1824 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
1825
1707 vmx->vcpu.arch.cr0 = 0x60000010; 1826 vmx->vcpu.arch.cr0 = 0x60000010;
1708 vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ 1827 vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */
1709 vmx_set_cr4(&vmx->vcpu, 0); 1828 vmx_set_cr4(&vmx->vcpu, 0);
1710#ifdef CONFIG_X86_64
1711 vmx_set_efer(&vmx->vcpu, 0); 1829 vmx_set_efer(&vmx->vcpu, 0);
1712#endif
1713 vmx_fpu_activate(&vmx->vcpu); 1830 vmx_fpu_activate(&vmx->vcpu);
1714 update_exception_bitmap(&vmx->vcpu); 1831 update_exception_bitmap(&vmx->vcpu);
1715 1832
1716 return 0; 1833 vpid_sync_vcpu_all(vmx);
1834
1835 ret = 0;
1717 1836
1718out: 1837out:
1838 up_read(&vcpu->kvm->slots_lock);
1719 return ret; 1839 return ret;
1720} 1840}
1721 1841
@@ -1723,6 +1843,8 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
1723{ 1843{
1724 struct vcpu_vmx *vmx = to_vmx(vcpu); 1844 struct vcpu_vmx *vmx = to_vmx(vcpu);
1725 1845
1846 KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
1847
1726 if (vcpu->arch.rmode.active) { 1848 if (vcpu->arch.rmode.active) {
1727 vmx->rmode.irq.pending = true; 1849 vmx->rmode.irq.pending = true;
1728 vmx->rmode.irq.vector = irq; 1850 vmx->rmode.irq.vector = irq;
@@ -1844,7 +1966,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1844 if ((vect_info & VECTORING_INFO_VALID_MASK) && 1966 if ((vect_info & VECTORING_INFO_VALID_MASK) &&
1845 !is_page_fault(intr_info)) 1967 !is_page_fault(intr_info))
1846 printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " 1968 printk(KERN_ERR "%s: unexpected, vectoring info 0x%x "
1847 "intr info 0x%x\n", __FUNCTION__, vect_info, intr_info); 1969 "intr info 0x%x\n", __func__, vect_info, intr_info);
1848 1970
1849 if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) { 1971 if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) {
1850 int irq = vect_info & VECTORING_INFO_VECTOR_MASK; 1972 int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
@@ -1869,10 +1991,12 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1869 1991
1870 error_code = 0; 1992 error_code = 0;
1871 rip = vmcs_readl(GUEST_RIP); 1993 rip = vmcs_readl(GUEST_RIP);
1872 if (intr_info & INTR_INFO_DELIEVER_CODE_MASK) 1994 if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
1873 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 1995 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
1874 if (is_page_fault(intr_info)) { 1996 if (is_page_fault(intr_info)) {
1875 cr2 = vmcs_readl(EXIT_QUALIFICATION); 1997 cr2 = vmcs_readl(EXIT_QUALIFICATION);
1998 KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
1999 (u32)((u64)cr2 >> 32), handler);
1876 return kvm_mmu_page_fault(vcpu, cr2, error_code); 2000 return kvm_mmu_page_fault(vcpu, cr2, error_code);
1877 } 2001 }
1878 2002
@@ -1901,6 +2025,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu,
1901 struct kvm_run *kvm_run) 2025 struct kvm_run *kvm_run)
1902{ 2026{
1903 ++vcpu->stat.irq_exits; 2027 ++vcpu->stat.irq_exits;
2028 KVMTRACE_1D(INTR, vcpu, vmcs_read32(VM_EXIT_INTR_INFO), handler);
1904 return 1; 2029 return 1;
1905} 2030}
1906 2031
@@ -1958,25 +2083,27 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1958 reg = (exit_qualification >> 8) & 15; 2083 reg = (exit_qualification >> 8) & 15;
1959 switch ((exit_qualification >> 4) & 3) { 2084 switch ((exit_qualification >> 4) & 3) {
1960 case 0: /* mov to cr */ 2085 case 0: /* mov to cr */
2086 KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)vcpu->arch.regs[reg],
2087 (u32)((u64)vcpu->arch.regs[reg] >> 32), handler);
1961 switch (cr) { 2088 switch (cr) {
1962 case 0: 2089 case 0:
1963 vcpu_load_rsp_rip(vcpu); 2090 vcpu_load_rsp_rip(vcpu);
1964 set_cr0(vcpu, vcpu->arch.regs[reg]); 2091 kvm_set_cr0(vcpu, vcpu->arch.regs[reg]);
1965 skip_emulated_instruction(vcpu); 2092 skip_emulated_instruction(vcpu);
1966 return 1; 2093 return 1;
1967 case 3: 2094 case 3:
1968 vcpu_load_rsp_rip(vcpu); 2095 vcpu_load_rsp_rip(vcpu);
1969 set_cr3(vcpu, vcpu->arch.regs[reg]); 2096 kvm_set_cr3(vcpu, vcpu->arch.regs[reg]);
1970 skip_emulated_instruction(vcpu); 2097 skip_emulated_instruction(vcpu);
1971 return 1; 2098 return 1;
1972 case 4: 2099 case 4:
1973 vcpu_load_rsp_rip(vcpu); 2100 vcpu_load_rsp_rip(vcpu);
1974 set_cr4(vcpu, vcpu->arch.regs[reg]); 2101 kvm_set_cr4(vcpu, vcpu->arch.regs[reg]);
1975 skip_emulated_instruction(vcpu); 2102 skip_emulated_instruction(vcpu);
1976 return 1; 2103 return 1;
1977 case 8: 2104 case 8:
1978 vcpu_load_rsp_rip(vcpu); 2105 vcpu_load_rsp_rip(vcpu);
1979 set_cr8(vcpu, vcpu->arch.regs[reg]); 2106 kvm_set_cr8(vcpu, vcpu->arch.regs[reg]);
1980 skip_emulated_instruction(vcpu); 2107 skip_emulated_instruction(vcpu);
1981 if (irqchip_in_kernel(vcpu->kvm)) 2108 if (irqchip_in_kernel(vcpu->kvm))
1982 return 1; 2109 return 1;
@@ -1990,6 +2117,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1990 vcpu->arch.cr0 &= ~X86_CR0_TS; 2117 vcpu->arch.cr0 &= ~X86_CR0_TS;
1991 vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); 2118 vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
1992 vmx_fpu_activate(vcpu); 2119 vmx_fpu_activate(vcpu);
2120 KVMTRACE_0D(CLTS, vcpu, handler);
1993 skip_emulated_instruction(vcpu); 2121 skip_emulated_instruction(vcpu);
1994 return 1; 2122 return 1;
1995 case 1: /*mov from cr*/ 2123 case 1: /*mov from cr*/
@@ -1998,18 +2126,24 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1998 vcpu_load_rsp_rip(vcpu); 2126 vcpu_load_rsp_rip(vcpu);
1999 vcpu->arch.regs[reg] = vcpu->arch.cr3; 2127 vcpu->arch.regs[reg] = vcpu->arch.cr3;
2000 vcpu_put_rsp_rip(vcpu); 2128 vcpu_put_rsp_rip(vcpu);
2129 KVMTRACE_3D(CR_READ, vcpu, (u32)cr,
2130 (u32)vcpu->arch.regs[reg],
2131 (u32)((u64)vcpu->arch.regs[reg] >> 32),
2132 handler);
2001 skip_emulated_instruction(vcpu); 2133 skip_emulated_instruction(vcpu);
2002 return 1; 2134 return 1;
2003 case 8: 2135 case 8:
2004 vcpu_load_rsp_rip(vcpu); 2136 vcpu_load_rsp_rip(vcpu);
2005 vcpu->arch.regs[reg] = get_cr8(vcpu); 2137 vcpu->arch.regs[reg] = kvm_get_cr8(vcpu);
2006 vcpu_put_rsp_rip(vcpu); 2138 vcpu_put_rsp_rip(vcpu);
2139 KVMTRACE_2D(CR_READ, vcpu, (u32)cr,
2140 (u32)vcpu->arch.regs[reg], handler);
2007 skip_emulated_instruction(vcpu); 2141 skip_emulated_instruction(vcpu);
2008 return 1; 2142 return 1;
2009 } 2143 }
2010 break; 2144 break;
2011 case 3: /* lmsw */ 2145 case 3: /* lmsw */
2012 lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f); 2146 kvm_lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f);
2013 2147
2014 skip_emulated_instruction(vcpu); 2148 skip_emulated_instruction(vcpu);
2015 return 1; 2149 return 1;
@@ -2049,6 +2183,7 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2049 val = 0; 2183 val = 0;
2050 } 2184 }
2051 vcpu->arch.regs[reg] = val; 2185 vcpu->arch.regs[reg] = val;
2186 KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
2052 } else { 2187 } else {
2053 /* mov to dr */ 2188 /* mov to dr */
2054 } 2189 }
@@ -2073,6 +2208,9 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2073 return 1; 2208 return 1;
2074 } 2209 }
2075 2210
2211 KVMTRACE_3D(MSR_READ, vcpu, ecx, (u32)data, (u32)(data >> 32),
2212 handler);
2213
2076 /* FIXME: handling of bits 32:63 of rax, rdx */ 2214 /* FIXME: handling of bits 32:63 of rax, rdx */
2077 vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u; 2215 vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u;
2078 vcpu->arch.regs[VCPU_REGS_RDX] = (data >> 32) & -1u; 2216 vcpu->arch.regs[VCPU_REGS_RDX] = (data >> 32) & -1u;
@@ -2086,6 +2224,9 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2086 u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) 2224 u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
2087 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); 2225 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
2088 2226
2227 KVMTRACE_3D(MSR_WRITE, vcpu, ecx, (u32)data, (u32)(data >> 32),
2228 handler);
2229
2089 if (vmx_set_msr(vcpu, ecx, data) != 0) { 2230 if (vmx_set_msr(vcpu, ecx, data) != 0) {
2090 kvm_inject_gp(vcpu, 0); 2231 kvm_inject_gp(vcpu, 0);
2091 return 1; 2232 return 1;
@@ -2110,6 +2251,9 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
2110 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 2251 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2111 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; 2252 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
2112 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 2253 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2254
2255 KVMTRACE_0D(PEND_INTR, vcpu, handler);
2256
2113 /* 2257 /*
2114 * If the user space waits to inject interrupts, exit as soon as 2258 * If the user space waits to inject interrupts, exit as soon as
2115 * possible 2259 * possible
@@ -2152,6 +2296,8 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2152 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); 2296 exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
2153 offset = exit_qualification & 0xffful; 2297 offset = exit_qualification & 0xffful;
2154 2298
2299 KVMTRACE_1D(APIC_ACCESS, vcpu, (u32)offset, handler);
2300
2155 er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); 2301 er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
2156 2302
2157 if (er != EMULATE_DONE) { 2303 if (er != EMULATE_DONE) {
@@ -2163,6 +2309,20 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2163 return 1; 2309 return 1;
2164} 2310}
2165 2311
2312static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2313{
2314 unsigned long exit_qualification;
2315 u16 tss_selector;
2316 int reason;
2317
2318 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
2319
2320 reason = (u32)exit_qualification >> 30;
2321 tss_selector = exit_qualification;
2322
2323 return kvm_task_switch(vcpu, tss_selector, reason);
2324}
2325
2166/* 2326/*
2167 * The exit handlers return 1 if the exit was handled fully and guest execution 2327 * The exit handlers return 1 if the exit was handled fully and guest execution
2168 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 2328 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -2185,6 +2345,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
2185 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, 2345 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
2186 [EXIT_REASON_APIC_ACCESS] = handle_apic_access, 2346 [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
2187 [EXIT_REASON_WBINVD] = handle_wbinvd, 2347 [EXIT_REASON_WBINVD] = handle_wbinvd,
2348 [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
2188}; 2349};
2189 2350
2190static const int kvm_vmx_max_exit_handlers = 2351static const int kvm_vmx_max_exit_handlers =
@@ -2200,6 +2361,9 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2200 struct vcpu_vmx *vmx = to_vmx(vcpu); 2361 struct vcpu_vmx *vmx = to_vmx(vcpu);
2201 u32 vectoring_info = vmx->idt_vectoring_info; 2362 u32 vectoring_info = vmx->idt_vectoring_info;
2202 2363
2364 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP),
2365 (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit);
2366
2203 if (unlikely(vmx->fail)) { 2367 if (unlikely(vmx->fail)) {
2204 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 2368 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
2205 kvm_run->fail_entry.hardware_entry_failure_reason 2369 kvm_run->fail_entry.hardware_entry_failure_reason
@@ -2210,7 +2374,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2210 if ((vectoring_info & VECTORING_INFO_VALID_MASK) && 2374 if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
2211 exit_reason != EXIT_REASON_EXCEPTION_NMI) 2375 exit_reason != EXIT_REASON_EXCEPTION_NMI)
2212 printk(KERN_WARNING "%s: unexpected, valid vectoring info and " 2376 printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
2213 "exit reason is 0x%x\n", __FUNCTION__, exit_reason); 2377 "exit reason is 0x%x\n", __func__, exit_reason);
2214 if (exit_reason < kvm_vmx_max_exit_handlers 2378 if (exit_reason < kvm_vmx_max_exit_handlers
2215 && kvm_vmx_exit_handlers[exit_reason]) 2379 && kvm_vmx_exit_handlers[exit_reason])
2216 return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); 2380 return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
@@ -2221,10 +2385,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2221 return 0; 2385 return 0;
2222} 2386}
2223 2387
2224static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
2225{
2226}
2227
2228static void update_tpr_threshold(struct kvm_vcpu *vcpu) 2388static void update_tpr_threshold(struct kvm_vcpu *vcpu)
2229{ 2389{
2230 int max_irr, tpr; 2390 int max_irr, tpr;
@@ -2285,11 +2445,13 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
2285 return; 2445 return;
2286 } 2446 }
2287 2447
2448 KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler);
2449
2288 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field); 2450 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
2289 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 2451 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
2290 vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); 2452 vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
2291 2453
2292 if (unlikely(idtv_info_field & INTR_INFO_DELIEVER_CODE_MASK)) 2454 if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK))
2293 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, 2455 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
2294 vmcs_read32(IDT_VECTORING_ERROR_CODE)); 2456 vmcs_read32(IDT_VECTORING_ERROR_CODE));
2295 if (unlikely(has_ext_irq)) 2457 if (unlikely(has_ext_irq))
@@ -2470,8 +2632,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2470 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 2632 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
2471 2633
2472 /* We need to handle NMIs before interrupts are enabled */ 2634 /* We need to handle NMIs before interrupts are enabled */
2473 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */ 2635 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
2636 KVMTRACE_0D(NMI, vcpu, handler);
2474 asm("int $2"); 2637 asm("int $2");
2638 }
2475} 2639}
2476 2640
2477static void vmx_free_vmcs(struct kvm_vcpu *vcpu) 2641static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
@@ -2489,6 +2653,10 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
2489{ 2653{
2490 struct vcpu_vmx *vmx = to_vmx(vcpu); 2654 struct vcpu_vmx *vmx = to_vmx(vcpu);
2491 2655
2656 spin_lock(&vmx_vpid_lock);
2657 if (vmx->vpid != 0)
2658 __clear_bit(vmx->vpid, vmx_vpid_bitmap);
2659 spin_unlock(&vmx_vpid_lock);
2492 vmx_free_vmcs(vcpu); 2660 vmx_free_vmcs(vcpu);
2493 kfree(vmx->host_msrs); 2661 kfree(vmx->host_msrs);
2494 kfree(vmx->guest_msrs); 2662 kfree(vmx->guest_msrs);
@@ -2505,6 +2673,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
2505 if (!vmx) 2673 if (!vmx)
2506 return ERR_PTR(-ENOMEM); 2674 return ERR_PTR(-ENOMEM);
2507 2675
2676 allocate_vpid(vmx);
2677
2508 err = kvm_vcpu_init(&vmx->vcpu, kvm, id); 2678 err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
2509 if (err) 2679 if (err)
2510 goto free_vcpu; 2680 goto free_vcpu;
@@ -2591,14 +2761,13 @@ static struct kvm_x86_ops vmx_x86_ops = {
2591 .get_segment_base = vmx_get_segment_base, 2761 .get_segment_base = vmx_get_segment_base,
2592 .get_segment = vmx_get_segment, 2762 .get_segment = vmx_get_segment,
2593 .set_segment = vmx_set_segment, 2763 .set_segment = vmx_set_segment,
2764 .get_cpl = vmx_get_cpl,
2594 .get_cs_db_l_bits = vmx_get_cs_db_l_bits, 2765 .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
2595 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, 2766 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
2596 .set_cr0 = vmx_set_cr0, 2767 .set_cr0 = vmx_set_cr0,
2597 .set_cr3 = vmx_set_cr3, 2768 .set_cr3 = vmx_set_cr3,
2598 .set_cr4 = vmx_set_cr4, 2769 .set_cr4 = vmx_set_cr4,
2599#ifdef CONFIG_X86_64
2600 .set_efer = vmx_set_efer, 2770 .set_efer = vmx_set_efer,
2601#endif
2602 .get_idt = vmx_get_idt, 2771 .get_idt = vmx_get_idt,
2603 .set_idt = vmx_set_idt, 2772 .set_idt = vmx_set_idt,
2604 .get_gdt = vmx_get_gdt, 2773 .get_gdt = vmx_get_gdt,
@@ -2626,7 +2795,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
2626 2795
2627static int __init vmx_init(void) 2796static int __init vmx_init(void)
2628{ 2797{
2629 void *iova; 2798 void *va;
2630 int r; 2799 int r;
2631 2800
2632 vmx_io_bitmap_a = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); 2801 vmx_io_bitmap_a = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
@@ -2639,28 +2808,48 @@ static int __init vmx_init(void)
2639 goto out; 2808 goto out;
2640 } 2809 }
2641 2810
2811 vmx_msr_bitmap = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
2812 if (!vmx_msr_bitmap) {
2813 r = -ENOMEM;
2814 goto out1;
2815 }
2816
2642 /* 2817 /*
2643 * Allow direct access to the PC debug port (it is often used for I/O 2818 * Allow direct access to the PC debug port (it is often used for I/O
2644 * delays, but the vmexits simply slow things down). 2819 * delays, but the vmexits simply slow things down).
2645 */ 2820 */
2646 iova = kmap(vmx_io_bitmap_a); 2821 va = kmap(vmx_io_bitmap_a);
2647 memset(iova, 0xff, PAGE_SIZE); 2822 memset(va, 0xff, PAGE_SIZE);
2648 clear_bit(0x80, iova); 2823 clear_bit(0x80, va);
2649 kunmap(vmx_io_bitmap_a); 2824 kunmap(vmx_io_bitmap_a);
2650 2825
2651 iova = kmap(vmx_io_bitmap_b); 2826 va = kmap(vmx_io_bitmap_b);
2652 memset(iova, 0xff, PAGE_SIZE); 2827 memset(va, 0xff, PAGE_SIZE);
2653 kunmap(vmx_io_bitmap_b); 2828 kunmap(vmx_io_bitmap_b);
2654 2829
2830 va = kmap(vmx_msr_bitmap);
2831 memset(va, 0xff, PAGE_SIZE);
2832 kunmap(vmx_msr_bitmap);
2833
2834 set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
2835
2655 r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); 2836 r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE);
2656 if (r) 2837 if (r)
2657 goto out1; 2838 goto out2;
2839
2840 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_FS_BASE);
2841 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_GS_BASE);
2842 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_CS);
2843 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP);
2844 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP);
2658 2845
2659 if (bypass_guest_pf) 2846 if (bypass_guest_pf)
2660 kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); 2847 kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);
2661 2848
2662 return 0; 2849 return 0;
2663 2850
2851out2:
2852 __free_page(vmx_msr_bitmap);
2664out1: 2853out1:
2665 __free_page(vmx_io_bitmap_b); 2854 __free_page(vmx_io_bitmap_b);
2666out: 2855out:
@@ -2670,6 +2859,7 @@ out:
2670 2859
2671static void __exit vmx_exit(void) 2860static void __exit vmx_exit(void)
2672{ 2861{
2862 __free_page(vmx_msr_bitmap);
2673 __free_page(vmx_io_bitmap_b); 2863 __free_page(vmx_io_bitmap_b);
2674 __free_page(vmx_io_bitmap_a); 2864 __free_page(vmx_io_bitmap_a);
2675 2865
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index d52ae8d7303d..5dff4606b988 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -49,6 +49,7 @@
49 * Definitions of Secondary Processor-Based VM-Execution Controls. 49 * Definitions of Secondary Processor-Based VM-Execution Controls.
50 */ 50 */
51#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 51#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
52#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
52#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 53#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
53 54
54 55
@@ -65,6 +66,7 @@
65 66
66/* VMCS Encodings */ 67/* VMCS Encodings */
67enum vmcs_field { 68enum vmcs_field {
69 VIRTUAL_PROCESSOR_ID = 0x00000000,
68 GUEST_ES_SELECTOR = 0x00000800, 70 GUEST_ES_SELECTOR = 0x00000800,
69 GUEST_CS_SELECTOR = 0x00000802, 71 GUEST_CS_SELECTOR = 0x00000802,
70 GUEST_SS_SELECTOR = 0x00000804, 72 GUEST_SS_SELECTOR = 0x00000804,
@@ -231,12 +233,12 @@ enum vmcs_field {
231 */ 233 */
232#define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */ 234#define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */
233#define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */ 235#define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */
234#define INTR_INFO_DELIEVER_CODE_MASK 0x800 /* 11 */ 236#define INTR_INFO_DELIVER_CODE_MASK 0x800 /* 11 */
235#define INTR_INFO_VALID_MASK 0x80000000 /* 31 */ 237#define INTR_INFO_VALID_MASK 0x80000000 /* 31 */
236 238
237#define VECTORING_INFO_VECTOR_MASK INTR_INFO_VECTOR_MASK 239#define VECTORING_INFO_VECTOR_MASK INTR_INFO_VECTOR_MASK
238#define VECTORING_INFO_TYPE_MASK INTR_INFO_INTR_TYPE_MASK 240#define VECTORING_INFO_TYPE_MASK INTR_INFO_INTR_TYPE_MASK
239#define VECTORING_INFO_DELIEVER_CODE_MASK INTR_INFO_DELIEVER_CODE_MASK 241#define VECTORING_INFO_DELIVER_CODE_MASK INTR_INFO_DELIVER_CODE_MASK
240#define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK 242#define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK
241 243
242#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ 244#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
@@ -321,4 +323,8 @@ enum vmcs_field {
321 323
322#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 324#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9
323 325
326#define VMX_NR_VPIDS (1 << 16)
327#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
328#define VMX_VPID_EXTENT_ALL_CONTEXT 2
329
324#endif 330#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6b01552bd1f1..0ce556372a4d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -15,10 +15,12 @@
15 */ 15 */
16 16
17#include <linux/kvm_host.h> 17#include <linux/kvm_host.h>
18#include "segment_descriptor.h"
19#include "irq.h" 18#include "irq.h"
20#include "mmu.h" 19#include "mmu.h"
20#include "i8254.h"
21#include "tss.h"
21 22
23#include <linux/clocksource.h>
22#include <linux/kvm.h> 24#include <linux/kvm.h>
23#include <linux/fs.h> 25#include <linux/fs.h>
24#include <linux/vmalloc.h> 26#include <linux/vmalloc.h>
@@ -28,6 +30,7 @@
28 30
29#include <asm/uaccess.h> 31#include <asm/uaccess.h>
30#include <asm/msr.h> 32#include <asm/msr.h>
33#include <asm/desc.h>
31 34
32#define MAX_IO_MSRS 256 35#define MAX_IO_MSRS 256
33#define CR0_RESERVED_BITS \ 36#define CR0_RESERVED_BITS \
@@ -41,7 +44,15 @@
41 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) 44 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
42 45
43#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) 46#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
44#define EFER_RESERVED_BITS 0xfffffffffffff2fe 47/* EFER defaults:
48 * - enable syscall per default because its emulated by KVM
49 * - enable LME and LMA per default on 64 bit KVM
50 */
51#ifdef CONFIG_X86_64
52static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffafeULL;
53#else
54static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
55#endif
45 56
46#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM 57#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
47#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 58#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
@@ -63,6 +74,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
63 { "irq_window", VCPU_STAT(irq_window_exits) }, 74 { "irq_window", VCPU_STAT(irq_window_exits) },
64 { "halt_exits", VCPU_STAT(halt_exits) }, 75 { "halt_exits", VCPU_STAT(halt_exits) },
65 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 76 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
77 { "hypercalls", VCPU_STAT(hypercalls) },
66 { "request_irq", VCPU_STAT(request_irq_exits) }, 78 { "request_irq", VCPU_STAT(request_irq_exits) },
67 { "irq_exits", VCPU_STAT(irq_exits) }, 79 { "irq_exits", VCPU_STAT(irq_exits) },
68 { "host_state_reload", VCPU_STAT(host_state_reload) }, 80 { "host_state_reload", VCPU_STAT(host_state_reload) },
@@ -78,6 +90,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
78 { "mmu_recycled", VM_STAT(mmu_recycled) }, 90 { "mmu_recycled", VM_STAT(mmu_recycled) },
79 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, 91 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
80 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, 92 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
93 { "largepages", VM_STAT(lpages) },
81 { NULL } 94 { NULL }
82}; 95};
83 96
@@ -85,7 +98,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
85unsigned long segment_base(u16 selector) 98unsigned long segment_base(u16 selector)
86{ 99{
87 struct descriptor_table gdt; 100 struct descriptor_table gdt;
88 struct segment_descriptor *d; 101 struct desc_struct *d;
89 unsigned long table_base; 102 unsigned long table_base;
90 unsigned long v; 103 unsigned long v;
91 104
@@ -101,13 +114,12 @@ unsigned long segment_base(u16 selector)
101 asm("sldt %0" : "=g"(ldt_selector)); 114 asm("sldt %0" : "=g"(ldt_selector));
102 table_base = segment_base(ldt_selector); 115 table_base = segment_base(ldt_selector);
103 } 116 }
104 d = (struct segment_descriptor *)(table_base + (selector & ~7)); 117 d = (struct desc_struct *)(table_base + (selector & ~7));
105 v = d->base_low | ((unsigned long)d->base_mid << 16) | 118 v = d->base0 | ((unsigned long)d->base1 << 16) |
106 ((unsigned long)d->base_high << 24); 119 ((unsigned long)d->base2 << 24);
107#ifdef CONFIG_X86_64 120#ifdef CONFIG_X86_64
108 if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) 121 if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
109 v |= ((unsigned long) \ 122 v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
110 ((struct segment_descriptor_64 *)d)->base_higher) << 32;
111#endif 123#endif
112 return v; 124 return v;
113} 125}
@@ -145,11 +157,16 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
145 u32 error_code) 157 u32 error_code)
146{ 158{
147 ++vcpu->stat.pf_guest; 159 ++vcpu->stat.pf_guest;
148 if (vcpu->arch.exception.pending && vcpu->arch.exception.nr == PF_VECTOR) { 160 if (vcpu->arch.exception.pending) {
149 printk(KERN_DEBUG "kvm: inject_page_fault:" 161 if (vcpu->arch.exception.nr == PF_VECTOR) {
150 " double fault 0x%lx\n", addr); 162 printk(KERN_DEBUG "kvm: inject_page_fault:"
151 vcpu->arch.exception.nr = DF_VECTOR; 163 " double fault 0x%lx\n", addr);
152 vcpu->arch.exception.error_code = 0; 164 vcpu->arch.exception.nr = DF_VECTOR;
165 vcpu->arch.exception.error_code = 0;
166 } else if (vcpu->arch.exception.nr == DF_VECTOR) {
167 /* triple fault -> shutdown */
168 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
169 }
153 return; 170 return;
154 } 171 }
155 vcpu->arch.cr2 = addr; 172 vcpu->arch.cr2 = addr;
@@ -184,7 +201,6 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
184 int ret; 201 int ret;
185 u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; 202 u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
186 203
187 down_read(&vcpu->kvm->slots_lock);
188 ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte, 204 ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
189 offset * sizeof(u64), sizeof(pdpte)); 205 offset * sizeof(u64), sizeof(pdpte));
190 if (ret < 0) { 206 if (ret < 0) {
@@ -201,10 +217,10 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
201 217
202 memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)); 218 memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
203out: 219out:
204 up_read(&vcpu->kvm->slots_lock);
205 220
206 return ret; 221 return ret;
207} 222}
223EXPORT_SYMBOL_GPL(load_pdptrs);
208 224
209static bool pdptrs_changed(struct kvm_vcpu *vcpu) 225static bool pdptrs_changed(struct kvm_vcpu *vcpu)
210{ 226{
@@ -215,18 +231,16 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
215 if (is_long_mode(vcpu) || !is_pae(vcpu)) 231 if (is_long_mode(vcpu) || !is_pae(vcpu))
216 return false; 232 return false;
217 233
218 down_read(&vcpu->kvm->slots_lock);
219 r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte)); 234 r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
220 if (r < 0) 235 if (r < 0)
221 goto out; 236 goto out;
222 changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0; 237 changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0;
223out: 238out:
224 up_read(&vcpu->kvm->slots_lock);
225 239
226 return changed; 240 return changed;
227} 241}
228 242
229void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 243void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
230{ 244{
231 if (cr0 & CR0_RESERVED_BITS) { 245 if (cr0 & CR0_RESERVED_BITS) {
232 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", 246 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
@@ -284,15 +298,18 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
284 kvm_mmu_reset_context(vcpu); 298 kvm_mmu_reset_context(vcpu);
285 return; 299 return;
286} 300}
287EXPORT_SYMBOL_GPL(set_cr0); 301EXPORT_SYMBOL_GPL(kvm_set_cr0);
288 302
289void lmsw(struct kvm_vcpu *vcpu, unsigned long msw) 303void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
290{ 304{
291 set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)); 305 kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
306 KVMTRACE_1D(LMSW, vcpu,
307 (u32)((vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)),
308 handler);
292} 309}
293EXPORT_SYMBOL_GPL(lmsw); 310EXPORT_SYMBOL_GPL(kvm_lmsw);
294 311
295void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 312void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
296{ 313{
297 if (cr4 & CR4_RESERVED_BITS) { 314 if (cr4 & CR4_RESERVED_BITS) {
298 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); 315 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
@@ -323,9 +340,9 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
323 vcpu->arch.cr4 = cr4; 340 vcpu->arch.cr4 = cr4;
324 kvm_mmu_reset_context(vcpu); 341 kvm_mmu_reset_context(vcpu);
325} 342}
326EXPORT_SYMBOL_GPL(set_cr4); 343EXPORT_SYMBOL_GPL(kvm_set_cr4);
327 344
328void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 345void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
329{ 346{
330 if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { 347 if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) {
331 kvm_mmu_flush_tlb(vcpu); 348 kvm_mmu_flush_tlb(vcpu);
@@ -359,7 +376,6 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
359 */ 376 */
360 } 377 }
361 378
362 down_read(&vcpu->kvm->slots_lock);
363 /* 379 /*
364 * Does the new cr3 value map to physical memory? (Note, we 380 * Does the new cr3 value map to physical memory? (Note, we
365 * catch an invalid cr3 even in real-mode, because it would 381 * catch an invalid cr3 even in real-mode, because it would
@@ -375,11 +391,10 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
375 vcpu->arch.cr3 = cr3; 391 vcpu->arch.cr3 = cr3;
376 vcpu->arch.mmu.new_cr3(vcpu); 392 vcpu->arch.mmu.new_cr3(vcpu);
377 } 393 }
378 up_read(&vcpu->kvm->slots_lock);
379} 394}
380EXPORT_SYMBOL_GPL(set_cr3); 395EXPORT_SYMBOL_GPL(kvm_set_cr3);
381 396
382void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) 397void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
383{ 398{
384 if (cr8 & CR8_RESERVED_BITS) { 399 if (cr8 & CR8_RESERVED_BITS) {
385 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); 400 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
@@ -391,16 +406,16 @@ void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
391 else 406 else
392 vcpu->arch.cr8 = cr8; 407 vcpu->arch.cr8 = cr8;
393} 408}
394EXPORT_SYMBOL_GPL(set_cr8); 409EXPORT_SYMBOL_GPL(kvm_set_cr8);
395 410
396unsigned long get_cr8(struct kvm_vcpu *vcpu) 411unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
397{ 412{
398 if (irqchip_in_kernel(vcpu->kvm)) 413 if (irqchip_in_kernel(vcpu->kvm))
399 return kvm_lapic_get_cr8(vcpu); 414 return kvm_lapic_get_cr8(vcpu);
400 else 415 else
401 return vcpu->arch.cr8; 416 return vcpu->arch.cr8;
402} 417}
403EXPORT_SYMBOL_GPL(get_cr8); 418EXPORT_SYMBOL_GPL(kvm_get_cr8);
404 419
405/* 420/*
406 * List of msr numbers which we expose to userspace through KVM_GET_MSRS 421 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
@@ -415,7 +430,8 @@ static u32 msrs_to_save[] = {
415#ifdef CONFIG_X86_64 430#ifdef CONFIG_X86_64
416 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, 431 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
417#endif 432#endif
418 MSR_IA32_TIME_STAMP_COUNTER, 433 MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
434 MSR_IA32_PERF_STATUS,
419}; 435};
420 436
421static unsigned num_msrs_to_save; 437static unsigned num_msrs_to_save;
@@ -424,11 +440,9 @@ static u32 emulated_msrs[] = {
424 MSR_IA32_MISC_ENABLE, 440 MSR_IA32_MISC_ENABLE,
425}; 441};
426 442
427#ifdef CONFIG_X86_64
428
429static void set_efer(struct kvm_vcpu *vcpu, u64 efer) 443static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
430{ 444{
431 if (efer & EFER_RESERVED_BITS) { 445 if (efer & efer_reserved_bits) {
432 printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n", 446 printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
433 efer); 447 efer);
434 kvm_inject_gp(vcpu, 0); 448 kvm_inject_gp(vcpu, 0);
@@ -450,7 +464,12 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
450 vcpu->arch.shadow_efer = efer; 464 vcpu->arch.shadow_efer = efer;
451} 465}
452 466
453#endif 467void kvm_enable_efer_bits(u64 mask)
468{
469 efer_reserved_bits &= ~mask;
470}
471EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
472
454 473
455/* 474/*
456 * Writes msr value into into the appropriate "register". 475 * Writes msr value into into the appropriate "register".
@@ -470,26 +489,86 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
470 return kvm_set_msr(vcpu, index, *data); 489 return kvm_set_msr(vcpu, index, *data);
471} 490}
472 491
492static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
493{
494 static int version;
495 struct kvm_wall_clock wc;
496 struct timespec wc_ts;
497
498 if (!wall_clock)
499 return;
500
501 version++;
502
503 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
504
505 wc_ts = current_kernel_time();
506 wc.wc_sec = wc_ts.tv_sec;
507 wc.wc_nsec = wc_ts.tv_nsec;
508 wc.wc_version = version;
509
510 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
511
512 version++;
513 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
514}
515
516static void kvm_write_guest_time(struct kvm_vcpu *v)
517{
518 struct timespec ts;
519 unsigned long flags;
520 struct kvm_vcpu_arch *vcpu = &v->arch;
521 void *shared_kaddr;
522
523 if ((!vcpu->time_page))
524 return;
525
526 /* Keep irq disabled to prevent changes to the clock */
527 local_irq_save(flags);
528 kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER,
529 &vcpu->hv_clock.tsc_timestamp);
530 ktime_get_ts(&ts);
531 local_irq_restore(flags);
532
533 /* With all the info we got, fill in the values */
534
535 vcpu->hv_clock.system_time = ts.tv_nsec +
536 (NSEC_PER_SEC * (u64)ts.tv_sec);
537 /*
538 * The interface expects us to write an even number signaling that the
539 * update is finished. Since the guest won't see the intermediate
540 * state, we just write "2" at the end
541 */
542 vcpu->hv_clock.version = 2;
543
544 shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
545
546 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
547 sizeof(vcpu->hv_clock));
548
549 kunmap_atomic(shared_kaddr, KM_USER0);
550
551 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
552}
553
473 554
474int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) 555int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
475{ 556{
476 switch (msr) { 557 switch (msr) {
477#ifdef CONFIG_X86_64
478 case MSR_EFER: 558 case MSR_EFER:
479 set_efer(vcpu, data); 559 set_efer(vcpu, data);
480 break; 560 break;
481#endif
482 case MSR_IA32_MC0_STATUS: 561 case MSR_IA32_MC0_STATUS:
483 pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", 562 pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
484 __FUNCTION__, data); 563 __func__, data);
485 break; 564 break;
486 case MSR_IA32_MCG_STATUS: 565 case MSR_IA32_MCG_STATUS:
487 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", 566 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
488 __FUNCTION__, data); 567 __func__, data);
489 break; 568 break;
490 case MSR_IA32_MCG_CTL: 569 case MSR_IA32_MCG_CTL:
491 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n", 570 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n",
492 __FUNCTION__, data); 571 __func__, data);
493 break; 572 break;
494 case MSR_IA32_UCODE_REV: 573 case MSR_IA32_UCODE_REV:
495 case MSR_IA32_UCODE_WRITE: 574 case MSR_IA32_UCODE_WRITE:
@@ -501,6 +580,42 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
501 case MSR_IA32_MISC_ENABLE: 580 case MSR_IA32_MISC_ENABLE:
502 vcpu->arch.ia32_misc_enable_msr = data; 581 vcpu->arch.ia32_misc_enable_msr = data;
503 break; 582 break;
583 case MSR_KVM_WALL_CLOCK:
584 vcpu->kvm->arch.wall_clock = data;
585 kvm_write_wall_clock(vcpu->kvm, data);
586 break;
587 case MSR_KVM_SYSTEM_TIME: {
588 if (vcpu->arch.time_page) {
589 kvm_release_page_dirty(vcpu->arch.time_page);
590 vcpu->arch.time_page = NULL;
591 }
592
593 vcpu->arch.time = data;
594
595 /* we verify if the enable bit is set... */
596 if (!(data & 1))
597 break;
598
599 /* ...but clean it before doing the actual write */
600 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
601
602 vcpu->arch.hv_clock.tsc_to_system_mul =
603 clocksource_khz2mult(tsc_khz, 22);
604 vcpu->arch.hv_clock.tsc_shift = 22;
605
606 down_read(&current->mm->mmap_sem);
607 vcpu->arch.time_page =
608 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
609 up_read(&current->mm->mmap_sem);
610
611 if (is_error_page(vcpu->arch.time_page)) {
612 kvm_release_page_clean(vcpu->arch.time_page);
613 vcpu->arch.time_page = NULL;
614 }
615
616 kvm_write_guest_time(vcpu);
617 break;
618 }
504 default: 619 default:
505 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); 620 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data);
506 return 1; 621 return 1;
@@ -540,7 +655,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
540 case MSR_IA32_MC0_MISC+12: 655 case MSR_IA32_MC0_MISC+12:
541 case MSR_IA32_MC0_MISC+16: 656 case MSR_IA32_MC0_MISC+16:
542 case MSR_IA32_UCODE_REV: 657 case MSR_IA32_UCODE_REV:
543 case MSR_IA32_PERF_STATUS:
544 case MSR_IA32_EBL_CR_POWERON: 658 case MSR_IA32_EBL_CR_POWERON:
545 /* MTRR registers */ 659 /* MTRR registers */
546 case 0xfe: 660 case 0xfe:
@@ -556,11 +670,21 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
556 case MSR_IA32_MISC_ENABLE: 670 case MSR_IA32_MISC_ENABLE:
557 data = vcpu->arch.ia32_misc_enable_msr; 671 data = vcpu->arch.ia32_misc_enable_msr;
558 break; 672 break;
559#ifdef CONFIG_X86_64 673 case MSR_IA32_PERF_STATUS:
674 /* TSC increment by tick */
675 data = 1000ULL;
676 /* CPU multiplier */
677 data |= (((uint64_t)4ULL) << 40);
678 break;
560 case MSR_EFER: 679 case MSR_EFER:
561 data = vcpu->arch.shadow_efer; 680 data = vcpu->arch.shadow_efer;
562 break; 681 break;
563#endif 682 case MSR_KVM_WALL_CLOCK:
683 data = vcpu->kvm->arch.wall_clock;
684 break;
685 case MSR_KVM_SYSTEM_TIME:
686 data = vcpu->arch.time;
687 break;
564 default: 688 default:
565 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); 689 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
566 return 1; 690 return 1;
@@ -584,9 +708,11 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
584 708
585 vcpu_load(vcpu); 709 vcpu_load(vcpu);
586 710
711 down_read(&vcpu->kvm->slots_lock);
587 for (i = 0; i < msrs->nmsrs; ++i) 712 for (i = 0; i < msrs->nmsrs; ++i)
588 if (do_msr(vcpu, entries[i].index, &entries[i].data)) 713 if (do_msr(vcpu, entries[i].index, &entries[i].data))
589 break; 714 break;
715 up_read(&vcpu->kvm->slots_lock);
590 716
591 vcpu_put(vcpu); 717 vcpu_put(vcpu);
592 718
@@ -688,11 +814,24 @@ int kvm_dev_ioctl_check_extension(long ext)
688 case KVM_CAP_USER_MEMORY: 814 case KVM_CAP_USER_MEMORY:
689 case KVM_CAP_SET_TSS_ADDR: 815 case KVM_CAP_SET_TSS_ADDR:
690 case KVM_CAP_EXT_CPUID: 816 case KVM_CAP_EXT_CPUID:
817 case KVM_CAP_CLOCKSOURCE:
818 case KVM_CAP_PIT:
819 case KVM_CAP_NOP_IO_DELAY:
820 case KVM_CAP_MP_STATE:
691 r = 1; 821 r = 1;
692 break; 822 break;
693 case KVM_CAP_VAPIC: 823 case KVM_CAP_VAPIC:
694 r = !kvm_x86_ops->cpu_has_accelerated_tpr(); 824 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
695 break; 825 break;
826 case KVM_CAP_NR_VCPUS:
827 r = KVM_MAX_VCPUS;
828 break;
829 case KVM_CAP_NR_MEMSLOTS:
830 r = KVM_MEMORY_SLOTS;
831 break;
832 case KVM_CAP_PV_MMU:
833 r = !tdp_enabled;
834 break;
696 default: 835 default:
697 r = 0; 836 r = 0;
698 break; 837 break;
@@ -763,6 +902,7 @@ out:
763void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 902void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
764{ 903{
765 kvm_x86_ops->vcpu_load(vcpu, cpu); 904 kvm_x86_ops->vcpu_load(vcpu, cpu);
905 kvm_write_guest_time(vcpu);
766} 906}
767 907
768void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 908void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -958,32 +1098,32 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
958 } 1098 }
959 /* function 4 and 0xb have additional index. */ 1099 /* function 4 and 0xb have additional index. */
960 case 4: { 1100 case 4: {
961 int index, cache_type; 1101 int i, cache_type;
962 1102
963 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1103 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
964 /* read more entries until cache_type is zero */ 1104 /* read more entries until cache_type is zero */
965 for (index = 1; *nent < maxnent; ++index) { 1105 for (i = 1; *nent < maxnent; ++i) {
966 cache_type = entry[index - 1].eax & 0x1f; 1106 cache_type = entry[i - 1].eax & 0x1f;
967 if (!cache_type) 1107 if (!cache_type)
968 break; 1108 break;
969 do_cpuid_1_ent(&entry[index], function, index); 1109 do_cpuid_1_ent(&entry[i], function, i);
970 entry[index].flags |= 1110 entry[i].flags |=
971 KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1111 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
972 ++*nent; 1112 ++*nent;
973 } 1113 }
974 break; 1114 break;
975 } 1115 }
976 case 0xb: { 1116 case 0xb: {
977 int index, level_type; 1117 int i, level_type;
978 1118
979 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1119 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
980 /* read more entries until level_type is zero */ 1120 /* read more entries until level_type is zero */
981 for (index = 1; *nent < maxnent; ++index) { 1121 for (i = 1; *nent < maxnent; ++i) {
982 level_type = entry[index - 1].ecx & 0xff; 1122 level_type = entry[i - 1].ecx & 0xff;
983 if (!level_type) 1123 if (!level_type)
984 break; 1124 break;
985 do_cpuid_1_ent(&entry[index], function, index); 1125 do_cpuid_1_ent(&entry[i], function, i);
986 entry[index].flags |= 1126 entry[i].flags |=
987 KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1127 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
988 ++*nent; 1128 ++*nent;
989 } 1129 }
@@ -1365,6 +1505,23 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
1365 return r; 1505 return r;
1366} 1506}
1367 1507
1508static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
1509{
1510 int r = 0;
1511
1512 memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
1513 return r;
1514}
1515
1516static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
1517{
1518 int r = 0;
1519
1520 memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
1521 kvm_pit_load_count(kvm, 0, ps->channels[0].count);
1522 return r;
1523}
1524
1368/* 1525/*
1369 * Get (and clear) the dirty memory log for a memory slot. 1526 * Get (and clear) the dirty memory log for a memory slot.
1370 */ 1527 */
@@ -1457,6 +1614,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
1457 } else 1614 } else
1458 goto out; 1615 goto out;
1459 break; 1616 break;
1617 case KVM_CREATE_PIT:
1618 r = -ENOMEM;
1619 kvm->arch.vpit = kvm_create_pit(kvm);
1620 if (kvm->arch.vpit)
1621 r = 0;
1622 break;
1460 case KVM_IRQ_LINE: { 1623 case KVM_IRQ_LINE: {
1461 struct kvm_irq_level irq_event; 1624 struct kvm_irq_level irq_event;
1462 1625
@@ -1512,6 +1675,37 @@ long kvm_arch_vm_ioctl(struct file *filp,
1512 r = 0; 1675 r = 0;
1513 break; 1676 break;
1514 } 1677 }
1678 case KVM_GET_PIT: {
1679 struct kvm_pit_state ps;
1680 r = -EFAULT;
1681 if (copy_from_user(&ps, argp, sizeof ps))
1682 goto out;
1683 r = -ENXIO;
1684 if (!kvm->arch.vpit)
1685 goto out;
1686 r = kvm_vm_ioctl_get_pit(kvm, &ps);
1687 if (r)
1688 goto out;
1689 r = -EFAULT;
1690 if (copy_to_user(argp, &ps, sizeof ps))
1691 goto out;
1692 r = 0;
1693 break;
1694 }
1695 case KVM_SET_PIT: {
1696 struct kvm_pit_state ps;
1697 r = -EFAULT;
1698 if (copy_from_user(&ps, argp, sizeof ps))
1699 goto out;
1700 r = -ENXIO;
1701 if (!kvm->arch.vpit)
1702 goto out;
1703 r = kvm_vm_ioctl_set_pit(kvm, &ps);
1704 if (r)
1705 goto out;
1706 r = 0;
1707 break;
1708 }
1515 default: 1709 default:
1516 ; 1710 ;
1517 } 1711 }
@@ -1570,7 +1764,6 @@ int emulator_read_std(unsigned long addr,
1570 void *data = val; 1764 void *data = val;
1571 int r = X86EMUL_CONTINUE; 1765 int r = X86EMUL_CONTINUE;
1572 1766
1573 down_read(&vcpu->kvm->slots_lock);
1574 while (bytes) { 1767 while (bytes) {
1575 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 1768 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
1576 unsigned offset = addr & (PAGE_SIZE-1); 1769 unsigned offset = addr & (PAGE_SIZE-1);
@@ -1592,7 +1785,6 @@ int emulator_read_std(unsigned long addr,
1592 addr += tocopy; 1785 addr += tocopy;
1593 } 1786 }
1594out: 1787out:
1595 up_read(&vcpu->kvm->slots_lock);
1596 return r; 1788 return r;
1597} 1789}
1598EXPORT_SYMBOL_GPL(emulator_read_std); 1790EXPORT_SYMBOL_GPL(emulator_read_std);
@@ -1611,9 +1803,7 @@ static int emulator_read_emulated(unsigned long addr,
1611 return X86EMUL_CONTINUE; 1803 return X86EMUL_CONTINUE;
1612 } 1804 }
1613 1805
1614 down_read(&vcpu->kvm->slots_lock);
1615 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 1806 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
1616 up_read(&vcpu->kvm->slots_lock);
1617 1807
1618 /* For APIC access vmexit */ 1808 /* For APIC access vmexit */
1619 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 1809 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -1646,19 +1836,15 @@ mmio:
1646 return X86EMUL_UNHANDLEABLE; 1836 return X86EMUL_UNHANDLEABLE;
1647} 1837}
1648 1838
1649static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, 1839int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
1650 const void *val, int bytes) 1840 const void *val, int bytes)
1651{ 1841{
1652 int ret; 1842 int ret;
1653 1843
1654 down_read(&vcpu->kvm->slots_lock);
1655 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); 1844 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
1656 if (ret < 0) { 1845 if (ret < 0)
1657 up_read(&vcpu->kvm->slots_lock);
1658 return 0; 1846 return 0;
1659 }
1660 kvm_mmu_pte_write(vcpu, gpa, val, bytes); 1847 kvm_mmu_pte_write(vcpu, gpa, val, bytes);
1661 up_read(&vcpu->kvm->slots_lock);
1662 return 1; 1848 return 1;
1663} 1849}
1664 1850
@@ -1670,9 +1856,7 @@ static int emulator_write_emulated_onepage(unsigned long addr,
1670 struct kvm_io_device *mmio_dev; 1856 struct kvm_io_device *mmio_dev;
1671 gpa_t gpa; 1857 gpa_t gpa;
1672 1858
1673 down_read(&vcpu->kvm->slots_lock);
1674 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 1859 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
1675 up_read(&vcpu->kvm->slots_lock);
1676 1860
1677 if (gpa == UNMAPPED_GVA) { 1861 if (gpa == UNMAPPED_GVA) {
1678 kvm_inject_page_fault(vcpu, addr, 2); 1862 kvm_inject_page_fault(vcpu, addr, 2);
@@ -1749,7 +1933,6 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
1749 char *kaddr; 1933 char *kaddr;
1750 u64 val; 1934 u64 val;
1751 1935
1752 down_read(&vcpu->kvm->slots_lock);
1753 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 1936 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
1754 1937
1755 if (gpa == UNMAPPED_GVA || 1938 if (gpa == UNMAPPED_GVA ||
@@ -1769,9 +1952,8 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
1769 set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); 1952 set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
1770 kunmap_atomic(kaddr, KM_USER0); 1953 kunmap_atomic(kaddr, KM_USER0);
1771 kvm_release_page_dirty(page); 1954 kvm_release_page_dirty(page);
1772 emul_write:
1773 up_read(&vcpu->kvm->slots_lock);
1774 } 1955 }
1956emul_write:
1775#endif 1957#endif
1776 1958
1777 return emulator_write_emulated(addr, new, bytes, vcpu); 1959 return emulator_write_emulated(addr, new, bytes, vcpu);
@@ -1802,7 +1984,7 @@ int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
1802 *dest = kvm_x86_ops->get_dr(vcpu, dr); 1984 *dest = kvm_x86_ops->get_dr(vcpu, dr);
1803 return X86EMUL_CONTINUE; 1985 return X86EMUL_CONTINUE;
1804 default: 1986 default:
1805 pr_unimpl(vcpu, "%s: unexpected dr %u\n", __FUNCTION__, dr); 1987 pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr);
1806 return X86EMUL_UNHANDLEABLE; 1988 return X86EMUL_UNHANDLEABLE;
1807 } 1989 }
1808} 1990}
@@ -1840,7 +2022,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
1840} 2022}
1841EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); 2023EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
1842 2024
1843struct x86_emulate_ops emulate_ops = { 2025static struct x86_emulate_ops emulate_ops = {
1844 .read_std = emulator_read_std, 2026 .read_std = emulator_read_std,
1845 .read_emulated = emulator_read_emulated, 2027 .read_emulated = emulator_read_emulated,
1846 .write_emulated = emulator_write_emulated, 2028 .write_emulated = emulator_write_emulated,
@@ -2091,6 +2273,13 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2091 vcpu->arch.pio.guest_page_offset = 0; 2273 vcpu->arch.pio.guest_page_offset = 0;
2092 vcpu->arch.pio.rep = 0; 2274 vcpu->arch.pio.rep = 0;
2093 2275
2276 if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
2277 KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
2278 handler);
2279 else
2280 KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
2281 handler);
2282
2094 kvm_x86_ops->cache_regs(vcpu); 2283 kvm_x86_ops->cache_regs(vcpu);
2095 memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4); 2284 memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4);
2096 kvm_x86_ops->decache_regs(vcpu); 2285 kvm_x86_ops->decache_regs(vcpu);
@@ -2129,6 +2318,13 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2129 vcpu->arch.pio.guest_page_offset = offset_in_page(address); 2318 vcpu->arch.pio.guest_page_offset = offset_in_page(address);
2130 vcpu->arch.pio.rep = rep; 2319 vcpu->arch.pio.rep = rep;
2131 2320
2321 if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
2322 KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
2323 handler);
2324 else
2325 KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
2326 handler);
2327
2132 if (!count) { 2328 if (!count) {
2133 kvm_x86_ops->skip_emulated_instruction(vcpu); 2329 kvm_x86_ops->skip_emulated_instruction(vcpu);
2134 return 1; 2330 return 1;
@@ -2163,10 +2359,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2163 kvm_x86_ops->skip_emulated_instruction(vcpu); 2359 kvm_x86_ops->skip_emulated_instruction(vcpu);
2164 2360
2165 for (i = 0; i < nr_pages; ++i) { 2361 for (i = 0; i < nr_pages; ++i) {
2166 down_read(&vcpu->kvm->slots_lock);
2167 page = gva_to_page(vcpu, address + i * PAGE_SIZE); 2362 page = gva_to_page(vcpu, address + i * PAGE_SIZE);
2168 vcpu->arch.pio.guest_pages[i] = page; 2363 vcpu->arch.pio.guest_pages[i] = page;
2169 up_read(&vcpu->kvm->slots_lock);
2170 if (!page) { 2364 if (!page) {
2171 kvm_inject_gp(vcpu, 0); 2365 kvm_inject_gp(vcpu, 0);
2172 free_pio_guest_pages(vcpu); 2366 free_pio_guest_pages(vcpu);
@@ -2238,10 +2432,13 @@ void kvm_arch_exit(void)
2238int kvm_emulate_halt(struct kvm_vcpu *vcpu) 2432int kvm_emulate_halt(struct kvm_vcpu *vcpu)
2239{ 2433{
2240 ++vcpu->stat.halt_exits; 2434 ++vcpu->stat.halt_exits;
2435 KVMTRACE_0D(HLT, vcpu, handler);
2241 if (irqchip_in_kernel(vcpu->kvm)) { 2436 if (irqchip_in_kernel(vcpu->kvm)) {
2242 vcpu->arch.mp_state = VCPU_MP_STATE_HALTED; 2437 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
2438 up_read(&vcpu->kvm->slots_lock);
2243 kvm_vcpu_block(vcpu); 2439 kvm_vcpu_block(vcpu);
2244 if (vcpu->arch.mp_state != VCPU_MP_STATE_RUNNABLE) 2440 down_read(&vcpu->kvm->slots_lock);
2441 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
2245 return -EINTR; 2442 return -EINTR;
2246 return 1; 2443 return 1;
2247 } else { 2444 } else {
@@ -2251,9 +2448,19 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
2251} 2448}
2252EXPORT_SYMBOL_GPL(kvm_emulate_halt); 2449EXPORT_SYMBOL_GPL(kvm_emulate_halt);
2253 2450
2451static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
2452 unsigned long a1)
2453{
2454 if (is_long_mode(vcpu))
2455 return a0;
2456 else
2457 return a0 | ((gpa_t)a1 << 32);
2458}
2459
2254int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) 2460int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2255{ 2461{
2256 unsigned long nr, a0, a1, a2, a3, ret; 2462 unsigned long nr, a0, a1, a2, a3, ret;
2463 int r = 1;
2257 2464
2258 kvm_x86_ops->cache_regs(vcpu); 2465 kvm_x86_ops->cache_regs(vcpu);
2259 2466
@@ -2263,6 +2470,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2263 a2 = vcpu->arch.regs[VCPU_REGS_RDX]; 2470 a2 = vcpu->arch.regs[VCPU_REGS_RDX];
2264 a3 = vcpu->arch.regs[VCPU_REGS_RSI]; 2471 a3 = vcpu->arch.regs[VCPU_REGS_RSI];
2265 2472
2473 KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler);
2474
2266 if (!is_long_mode(vcpu)) { 2475 if (!is_long_mode(vcpu)) {
2267 nr &= 0xFFFFFFFF; 2476 nr &= 0xFFFFFFFF;
2268 a0 &= 0xFFFFFFFF; 2477 a0 &= 0xFFFFFFFF;
@@ -2275,13 +2484,17 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2275 case KVM_HC_VAPIC_POLL_IRQ: 2484 case KVM_HC_VAPIC_POLL_IRQ:
2276 ret = 0; 2485 ret = 0;
2277 break; 2486 break;
2487 case KVM_HC_MMU_OP:
2488 r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
2489 break;
2278 default: 2490 default:
2279 ret = -KVM_ENOSYS; 2491 ret = -KVM_ENOSYS;
2280 break; 2492 break;
2281 } 2493 }
2282 vcpu->arch.regs[VCPU_REGS_RAX] = ret; 2494 vcpu->arch.regs[VCPU_REGS_RAX] = ret;
2283 kvm_x86_ops->decache_regs(vcpu); 2495 kvm_x86_ops->decache_regs(vcpu);
2284 return 0; 2496 ++vcpu->stat.hypercalls;
2497 return r;
2285} 2498}
2286EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); 2499EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
2287 2500
@@ -2329,7 +2542,7 @@ void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
2329void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, 2542void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
2330 unsigned long *rflags) 2543 unsigned long *rflags)
2331{ 2544{
2332 lmsw(vcpu, msw); 2545 kvm_lmsw(vcpu, msw);
2333 *rflags = kvm_x86_ops->get_rflags(vcpu); 2546 *rflags = kvm_x86_ops->get_rflags(vcpu);
2334} 2547}
2335 2548
@@ -2346,9 +2559,9 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
2346 case 4: 2559 case 4:
2347 return vcpu->arch.cr4; 2560 return vcpu->arch.cr4;
2348 case 8: 2561 case 8:
2349 return get_cr8(vcpu); 2562 return kvm_get_cr8(vcpu);
2350 default: 2563 default:
2351 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr); 2564 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
2352 return 0; 2565 return 0;
2353 } 2566 }
2354} 2567}
@@ -2358,23 +2571,23 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
2358{ 2571{
2359 switch (cr) { 2572 switch (cr) {
2360 case 0: 2573 case 0:
2361 set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); 2574 kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
2362 *rflags = kvm_x86_ops->get_rflags(vcpu); 2575 *rflags = kvm_x86_ops->get_rflags(vcpu);
2363 break; 2576 break;
2364 case 2: 2577 case 2:
2365 vcpu->arch.cr2 = val; 2578 vcpu->arch.cr2 = val;
2366 break; 2579 break;
2367 case 3: 2580 case 3:
2368 set_cr3(vcpu, val); 2581 kvm_set_cr3(vcpu, val);
2369 break; 2582 break;
2370 case 4: 2583 case 4:
2371 set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val)); 2584 kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val));
2372 break; 2585 break;
2373 case 8: 2586 case 8:
2374 set_cr8(vcpu, val & 0xfUL); 2587 kvm_set_cr8(vcpu, val & 0xfUL);
2375 break; 2588 break;
2376 default: 2589 default:
2377 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr); 2590 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
2378 } 2591 }
2379} 2592}
2380 2593
@@ -2447,6 +2660,11 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
2447 } 2660 }
2448 kvm_x86_ops->decache_regs(vcpu); 2661 kvm_x86_ops->decache_regs(vcpu);
2449 kvm_x86_ops->skip_emulated_instruction(vcpu); 2662 kvm_x86_ops->skip_emulated_instruction(vcpu);
2663 KVMTRACE_5D(CPUID, vcpu, function,
2664 (u32)vcpu->arch.regs[VCPU_REGS_RAX],
2665 (u32)vcpu->arch.regs[VCPU_REGS_RBX],
2666 (u32)vcpu->arch.regs[VCPU_REGS_RCX],
2667 (u32)vcpu->arch.regs[VCPU_REGS_RDX], handler);
2450} 2668}
2451EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); 2669EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
2452 2670
@@ -2469,7 +2687,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
2469 struct kvm_run *kvm_run) 2687 struct kvm_run *kvm_run)
2470{ 2688{
2471 kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0; 2689 kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
2472 kvm_run->cr8 = get_cr8(vcpu); 2690 kvm_run->cr8 = kvm_get_cr8(vcpu);
2473 kvm_run->apic_base = kvm_get_apic_base(vcpu); 2691 kvm_run->apic_base = kvm_get_apic_base(vcpu);
2474 if (irqchip_in_kernel(vcpu->kvm)) 2692 if (irqchip_in_kernel(vcpu->kvm))
2475 kvm_run->ready_for_interrupt_injection = 1; 2693 kvm_run->ready_for_interrupt_injection = 1;
@@ -2509,16 +2727,17 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2509{ 2727{
2510 int r; 2728 int r;
2511 2729
2512 if (unlikely(vcpu->arch.mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) { 2730 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
2513 pr_debug("vcpu %d received sipi with vector # %x\n", 2731 pr_debug("vcpu %d received sipi with vector # %x\n",
2514 vcpu->vcpu_id, vcpu->arch.sipi_vector); 2732 vcpu->vcpu_id, vcpu->arch.sipi_vector);
2515 kvm_lapic_reset(vcpu); 2733 kvm_lapic_reset(vcpu);
2516 r = kvm_x86_ops->vcpu_reset(vcpu); 2734 r = kvm_x86_ops->vcpu_reset(vcpu);
2517 if (r) 2735 if (r)
2518 return r; 2736 return r;
2519 vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; 2737 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2520 } 2738 }
2521 2739
2740 down_read(&vcpu->kvm->slots_lock);
2522 vapic_enter(vcpu); 2741 vapic_enter(vcpu);
2523 2742
2524preempted: 2743preempted:
@@ -2526,6 +2745,10 @@ preempted:
2526 kvm_x86_ops->guest_debug_pre(vcpu); 2745 kvm_x86_ops->guest_debug_pre(vcpu);
2527 2746
2528again: 2747again:
2748 if (vcpu->requests)
2749 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
2750 kvm_mmu_unload(vcpu);
2751
2529 r = kvm_mmu_reload(vcpu); 2752 r = kvm_mmu_reload(vcpu);
2530 if (unlikely(r)) 2753 if (unlikely(r))
2531 goto out; 2754 goto out;
@@ -2539,6 +2762,11 @@ again:
2539 r = 0; 2762 r = 0;
2540 goto out; 2763 goto out;
2541 } 2764 }
2765 if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
2766 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
2767 r = 0;
2768 goto out;
2769 }
2542 } 2770 }
2543 2771
2544 kvm_inject_pending_timer_irqs(vcpu); 2772 kvm_inject_pending_timer_irqs(vcpu);
@@ -2557,6 +2785,14 @@ again:
2557 goto out; 2785 goto out;
2558 } 2786 }
2559 2787
2788 if (vcpu->requests)
2789 if (test_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) {
2790 local_irq_enable();
2791 preempt_enable();
2792 r = 1;
2793 goto out;
2794 }
2795
2560 if (signal_pending(current)) { 2796 if (signal_pending(current)) {
2561 local_irq_enable(); 2797 local_irq_enable();
2562 preempt_enable(); 2798 preempt_enable();
@@ -2566,6 +2802,13 @@ again:
2566 goto out; 2802 goto out;
2567 } 2803 }
2568 2804
2805 vcpu->guest_mode = 1;
2806 /*
2807 * Make sure that guest_mode assignment won't happen after
2808 * testing the pending IRQ vector bitmap.
2809 */
2810 smp_wmb();
2811
2569 if (vcpu->arch.exception.pending) 2812 if (vcpu->arch.exception.pending)
2570 __queue_exception(vcpu); 2813 __queue_exception(vcpu);
2571 else if (irqchip_in_kernel(vcpu->kvm)) 2814 else if (irqchip_in_kernel(vcpu->kvm))
@@ -2575,13 +2818,15 @@ again:
2575 2818
2576 kvm_lapic_sync_to_vapic(vcpu); 2819 kvm_lapic_sync_to_vapic(vcpu);
2577 2820
2578 vcpu->guest_mode = 1; 2821 up_read(&vcpu->kvm->slots_lock);
2822
2579 kvm_guest_enter(); 2823 kvm_guest_enter();
2580 2824
2581 if (vcpu->requests) 2825 if (vcpu->requests)
2582 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) 2826 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
2583 kvm_x86_ops->tlb_flush(vcpu); 2827 kvm_x86_ops->tlb_flush(vcpu);
2584 2828
2829 KVMTRACE_0D(VMENTRY, vcpu, entryexit);
2585 kvm_x86_ops->run(vcpu, kvm_run); 2830 kvm_x86_ops->run(vcpu, kvm_run);
2586 2831
2587 vcpu->guest_mode = 0; 2832 vcpu->guest_mode = 0;
@@ -2601,6 +2846,8 @@ again:
2601 2846
2602 preempt_enable(); 2847 preempt_enable();
2603 2848
2849 down_read(&vcpu->kvm->slots_lock);
2850
2604 /* 2851 /*
2605 * Profile KVM exit RIPs: 2852 * Profile KVM exit RIPs:
2606 */ 2853 */
@@ -2628,14 +2875,18 @@ again:
2628 } 2875 }
2629 2876
2630out: 2877out:
2878 up_read(&vcpu->kvm->slots_lock);
2631 if (r > 0) { 2879 if (r > 0) {
2632 kvm_resched(vcpu); 2880 kvm_resched(vcpu);
2881 down_read(&vcpu->kvm->slots_lock);
2633 goto preempted; 2882 goto preempted;
2634 } 2883 }
2635 2884
2636 post_kvm_run_save(vcpu, kvm_run); 2885 post_kvm_run_save(vcpu, kvm_run);
2637 2886
2887 down_read(&vcpu->kvm->slots_lock);
2638 vapic_exit(vcpu); 2888 vapic_exit(vcpu);
2889 up_read(&vcpu->kvm->slots_lock);
2639 2890
2640 return r; 2891 return r;
2641} 2892}
@@ -2647,7 +2898,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2647 2898
2648 vcpu_load(vcpu); 2899 vcpu_load(vcpu);
2649 2900
2650 if (unlikely(vcpu->arch.mp_state == VCPU_MP_STATE_UNINITIALIZED)) { 2901 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
2651 kvm_vcpu_block(vcpu); 2902 kvm_vcpu_block(vcpu);
2652 vcpu_put(vcpu); 2903 vcpu_put(vcpu);
2653 return -EAGAIN; 2904 return -EAGAIN;
@@ -2658,7 +2909,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2658 2909
2659 /* re-sync apic's tpr */ 2910 /* re-sync apic's tpr */
2660 if (!irqchip_in_kernel(vcpu->kvm)) 2911 if (!irqchip_in_kernel(vcpu->kvm))
2661 set_cr8(vcpu, kvm_run->cr8); 2912 kvm_set_cr8(vcpu, kvm_run->cr8);
2662 2913
2663 if (vcpu->arch.pio.cur_count) { 2914 if (vcpu->arch.pio.cur_count) {
2664 r = complete_pio(vcpu); 2915 r = complete_pio(vcpu);
@@ -2670,9 +2921,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2670 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); 2921 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
2671 vcpu->mmio_read_completed = 1; 2922 vcpu->mmio_read_completed = 1;
2672 vcpu->mmio_needed = 0; 2923 vcpu->mmio_needed = 0;
2924
2925 down_read(&vcpu->kvm->slots_lock);
2673 r = emulate_instruction(vcpu, kvm_run, 2926 r = emulate_instruction(vcpu, kvm_run,
2674 vcpu->arch.mmio_fault_cr2, 0, 2927 vcpu->arch.mmio_fault_cr2, 0,
2675 EMULTYPE_NO_DECODE); 2928 EMULTYPE_NO_DECODE);
2929 up_read(&vcpu->kvm->slots_lock);
2676 if (r == EMULATE_DO_MMIO) { 2930 if (r == EMULATE_DO_MMIO) {
2677 /* 2931 /*
2678 * Read-modify-write. Back to userspace. 2932 * Read-modify-write. Back to userspace.
@@ -2773,7 +3027,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2773static void get_segment(struct kvm_vcpu *vcpu, 3027static void get_segment(struct kvm_vcpu *vcpu,
2774 struct kvm_segment *var, int seg) 3028 struct kvm_segment *var, int seg)
2775{ 3029{
2776 return kvm_x86_ops->get_segment(vcpu, var, seg); 3030 kvm_x86_ops->get_segment(vcpu, var, seg);
2777} 3031}
2778 3032
2779void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 3033void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
@@ -2816,7 +3070,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2816 sregs->cr2 = vcpu->arch.cr2; 3070 sregs->cr2 = vcpu->arch.cr2;
2817 sregs->cr3 = vcpu->arch.cr3; 3071 sregs->cr3 = vcpu->arch.cr3;
2818 sregs->cr4 = vcpu->arch.cr4; 3072 sregs->cr4 = vcpu->arch.cr4;
2819 sregs->cr8 = get_cr8(vcpu); 3073 sregs->cr8 = kvm_get_cr8(vcpu);
2820 sregs->efer = vcpu->arch.shadow_efer; 3074 sregs->efer = vcpu->arch.shadow_efer;
2821 sregs->apic_base = kvm_get_apic_base(vcpu); 3075 sregs->apic_base = kvm_get_apic_base(vcpu);
2822 3076
@@ -2836,12 +3090,438 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2836 return 0; 3090 return 0;
2837} 3091}
2838 3092
3093int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3094 struct kvm_mp_state *mp_state)
3095{
3096 vcpu_load(vcpu);
3097 mp_state->mp_state = vcpu->arch.mp_state;
3098 vcpu_put(vcpu);
3099 return 0;
3100}
3101
3102int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3103 struct kvm_mp_state *mp_state)
3104{
3105 vcpu_load(vcpu);
3106 vcpu->arch.mp_state = mp_state->mp_state;
3107 vcpu_put(vcpu);
3108 return 0;
3109}
3110
2839static void set_segment(struct kvm_vcpu *vcpu, 3111static void set_segment(struct kvm_vcpu *vcpu,
2840 struct kvm_segment *var, int seg) 3112 struct kvm_segment *var, int seg)
2841{ 3113{
2842 return kvm_x86_ops->set_segment(vcpu, var, seg); 3114 kvm_x86_ops->set_segment(vcpu, var, seg);
3115}
3116
3117static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
3118 struct kvm_segment *kvm_desct)
3119{
3120 kvm_desct->base = seg_desc->base0;
3121 kvm_desct->base |= seg_desc->base1 << 16;
3122 kvm_desct->base |= seg_desc->base2 << 24;
3123 kvm_desct->limit = seg_desc->limit0;
3124 kvm_desct->limit |= seg_desc->limit << 16;
3125 kvm_desct->selector = selector;
3126 kvm_desct->type = seg_desc->type;
3127 kvm_desct->present = seg_desc->p;
3128 kvm_desct->dpl = seg_desc->dpl;
3129 kvm_desct->db = seg_desc->d;
3130 kvm_desct->s = seg_desc->s;
3131 kvm_desct->l = seg_desc->l;
3132 kvm_desct->g = seg_desc->g;
3133 kvm_desct->avl = seg_desc->avl;
3134 if (!selector)
3135 kvm_desct->unusable = 1;
3136 else
3137 kvm_desct->unusable = 0;
3138 kvm_desct->padding = 0;
3139}
3140
3141static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
3142 u16 selector,
3143 struct descriptor_table *dtable)
3144{
3145 if (selector & 1 << 2) {
3146 struct kvm_segment kvm_seg;
3147
3148 get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR);
3149
3150 if (kvm_seg.unusable)
3151 dtable->limit = 0;
3152 else
3153 dtable->limit = kvm_seg.limit;
3154 dtable->base = kvm_seg.base;
3155 }
3156 else
3157 kvm_x86_ops->get_gdt(vcpu, dtable);
3158}
3159
3160/* allowed just for 8 bytes segments */
3161static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3162 struct desc_struct *seg_desc)
3163{
3164 struct descriptor_table dtable;
3165 u16 index = selector >> 3;
3166
3167 get_segment_descritptor_dtable(vcpu, selector, &dtable);
3168
3169 if (dtable.limit < index * 8 + 7) {
3170 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
3171 return 1;
3172 }
3173 return kvm_read_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8);
3174}
3175
3176/* allowed just for 8 bytes segments */
3177static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3178 struct desc_struct *seg_desc)
3179{
3180 struct descriptor_table dtable;
3181 u16 index = selector >> 3;
3182
3183 get_segment_descritptor_dtable(vcpu, selector, &dtable);
3184
3185 if (dtable.limit < index * 8 + 7)
3186 return 1;
3187 return kvm_write_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8);
3188}
3189
3190static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
3191 struct desc_struct *seg_desc)
3192{
3193 u32 base_addr;
3194
3195 base_addr = seg_desc->base0;
3196 base_addr |= (seg_desc->base1 << 16);
3197 base_addr |= (seg_desc->base2 << 24);
3198
3199 return base_addr;
3200}
3201
3202static int load_tss_segment32(struct kvm_vcpu *vcpu,
3203 struct desc_struct *seg_desc,
3204 struct tss_segment_32 *tss)
3205{
3206 u32 base_addr;
3207
3208 base_addr = get_tss_base_addr(vcpu, seg_desc);
3209
3210 return kvm_read_guest(vcpu->kvm, base_addr, tss,
3211 sizeof(struct tss_segment_32));
3212}
3213
3214static int save_tss_segment32(struct kvm_vcpu *vcpu,
3215 struct desc_struct *seg_desc,
3216 struct tss_segment_32 *tss)
3217{
3218 u32 base_addr;
3219
3220 base_addr = get_tss_base_addr(vcpu, seg_desc);
3221
3222 return kvm_write_guest(vcpu->kvm, base_addr, tss,
3223 sizeof(struct tss_segment_32));
3224}
3225
3226static int load_tss_segment16(struct kvm_vcpu *vcpu,
3227 struct desc_struct *seg_desc,
3228 struct tss_segment_16 *tss)
3229{
3230 u32 base_addr;
3231
3232 base_addr = get_tss_base_addr(vcpu, seg_desc);
3233
3234 return kvm_read_guest(vcpu->kvm, base_addr, tss,
3235 sizeof(struct tss_segment_16));
3236}
3237
3238static int save_tss_segment16(struct kvm_vcpu *vcpu,
3239 struct desc_struct *seg_desc,
3240 struct tss_segment_16 *tss)
3241{
3242 u32 base_addr;
3243
3244 base_addr = get_tss_base_addr(vcpu, seg_desc);
3245
3246 return kvm_write_guest(vcpu->kvm, base_addr, tss,
3247 sizeof(struct tss_segment_16));
3248}
3249
3250static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
3251{
3252 struct kvm_segment kvm_seg;
3253
3254 get_segment(vcpu, &kvm_seg, seg);
3255 return kvm_seg.selector;
3256}
3257
3258static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu,
3259 u16 selector,
3260 struct kvm_segment *kvm_seg)
3261{
3262 struct desc_struct seg_desc;
3263
3264 if (load_guest_segment_descriptor(vcpu, selector, &seg_desc))
3265 return 1;
3266 seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg);
3267 return 0;
3268}
3269
3270static int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3271 int type_bits, int seg)
3272{
3273 struct kvm_segment kvm_seg;
3274
3275 if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
3276 return 1;
3277 kvm_seg.type |= type_bits;
3278
3279 if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS &&
3280 seg != VCPU_SREG_LDTR)
3281 if (!kvm_seg.s)
3282 kvm_seg.unusable = 1;
3283
3284 set_segment(vcpu, &kvm_seg, seg);
3285 return 0;
3286}
3287
3288static void save_state_to_tss32(struct kvm_vcpu *vcpu,
3289 struct tss_segment_32 *tss)
3290{
3291 tss->cr3 = vcpu->arch.cr3;
3292 tss->eip = vcpu->arch.rip;
3293 tss->eflags = kvm_x86_ops->get_rflags(vcpu);
3294 tss->eax = vcpu->arch.regs[VCPU_REGS_RAX];
3295 tss->ecx = vcpu->arch.regs[VCPU_REGS_RCX];
3296 tss->edx = vcpu->arch.regs[VCPU_REGS_RDX];
3297 tss->ebx = vcpu->arch.regs[VCPU_REGS_RBX];
3298 tss->esp = vcpu->arch.regs[VCPU_REGS_RSP];
3299 tss->ebp = vcpu->arch.regs[VCPU_REGS_RBP];
3300 tss->esi = vcpu->arch.regs[VCPU_REGS_RSI];
3301 tss->edi = vcpu->arch.regs[VCPU_REGS_RDI];
3302
3303 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
3304 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
3305 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
3306 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
3307 tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
3308 tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
3309 tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
3310 tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
3311}
3312
3313static int load_state_from_tss32(struct kvm_vcpu *vcpu,
3314 struct tss_segment_32 *tss)
3315{
3316 kvm_set_cr3(vcpu, tss->cr3);
3317
3318 vcpu->arch.rip = tss->eip;
3319 kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2);
3320
3321 vcpu->arch.regs[VCPU_REGS_RAX] = tss->eax;
3322 vcpu->arch.regs[VCPU_REGS_RCX] = tss->ecx;
3323 vcpu->arch.regs[VCPU_REGS_RDX] = tss->edx;
3324 vcpu->arch.regs[VCPU_REGS_RBX] = tss->ebx;
3325 vcpu->arch.regs[VCPU_REGS_RSP] = tss->esp;
3326 vcpu->arch.regs[VCPU_REGS_RBP] = tss->ebp;
3327 vcpu->arch.regs[VCPU_REGS_RSI] = tss->esi;
3328 vcpu->arch.regs[VCPU_REGS_RDI] = tss->edi;
3329
3330 if (load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR))
3331 return 1;
3332
3333 if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
3334 return 1;
3335
3336 if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
3337 return 1;
3338
3339 if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
3340 return 1;
3341
3342 if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
3343 return 1;
3344
3345 if (load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS))
3346 return 1;
3347
3348 if (load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS))
3349 return 1;
3350 return 0;
3351}
3352
3353static void save_state_to_tss16(struct kvm_vcpu *vcpu,
3354 struct tss_segment_16 *tss)
3355{
3356 tss->ip = vcpu->arch.rip;
3357 tss->flag = kvm_x86_ops->get_rflags(vcpu);
3358 tss->ax = vcpu->arch.regs[VCPU_REGS_RAX];
3359 tss->cx = vcpu->arch.regs[VCPU_REGS_RCX];
3360 tss->dx = vcpu->arch.regs[VCPU_REGS_RDX];
3361 tss->bx = vcpu->arch.regs[VCPU_REGS_RBX];
3362 tss->sp = vcpu->arch.regs[VCPU_REGS_RSP];
3363 tss->bp = vcpu->arch.regs[VCPU_REGS_RBP];
3364 tss->si = vcpu->arch.regs[VCPU_REGS_RSI];
3365 tss->di = vcpu->arch.regs[VCPU_REGS_RDI];
3366
3367 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
3368 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
3369 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
3370 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
3371 tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
3372 tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
3373}
3374
3375static int load_state_from_tss16(struct kvm_vcpu *vcpu,
3376 struct tss_segment_16 *tss)
3377{
3378 vcpu->arch.rip = tss->ip;
3379 kvm_x86_ops->set_rflags(vcpu, tss->flag | 2);
3380 vcpu->arch.regs[VCPU_REGS_RAX] = tss->ax;
3381 vcpu->arch.regs[VCPU_REGS_RCX] = tss->cx;
3382 vcpu->arch.regs[VCPU_REGS_RDX] = tss->dx;
3383 vcpu->arch.regs[VCPU_REGS_RBX] = tss->bx;
3384 vcpu->arch.regs[VCPU_REGS_RSP] = tss->sp;
3385 vcpu->arch.regs[VCPU_REGS_RBP] = tss->bp;
3386 vcpu->arch.regs[VCPU_REGS_RSI] = tss->si;
3387 vcpu->arch.regs[VCPU_REGS_RDI] = tss->di;
3388
3389 if (load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR))
3390 return 1;
3391
3392 if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
3393 return 1;
3394
3395 if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
3396 return 1;
3397
3398 if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
3399 return 1;
3400
3401 if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
3402 return 1;
3403 return 0;
3404}
3405
3406int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
3407 struct desc_struct *cseg_desc,
3408 struct desc_struct *nseg_desc)
3409{
3410 struct tss_segment_16 tss_segment_16;
3411 int ret = 0;
3412
3413 if (load_tss_segment16(vcpu, cseg_desc, &tss_segment_16))
3414 goto out;
3415
3416 save_state_to_tss16(vcpu, &tss_segment_16);
3417 save_tss_segment16(vcpu, cseg_desc, &tss_segment_16);
3418
3419 if (load_tss_segment16(vcpu, nseg_desc, &tss_segment_16))
3420 goto out;
3421 if (load_state_from_tss16(vcpu, &tss_segment_16))
3422 goto out;
3423
3424 ret = 1;
3425out:
3426 return ret;
3427}
3428
3429int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
3430 struct desc_struct *cseg_desc,
3431 struct desc_struct *nseg_desc)
3432{
3433 struct tss_segment_32 tss_segment_32;
3434 int ret = 0;
3435
3436 if (load_tss_segment32(vcpu, cseg_desc, &tss_segment_32))
3437 goto out;
3438
3439 save_state_to_tss32(vcpu, &tss_segment_32);
3440 save_tss_segment32(vcpu, cseg_desc, &tss_segment_32);
3441
3442 if (load_tss_segment32(vcpu, nseg_desc, &tss_segment_32))
3443 goto out;
3444 if (load_state_from_tss32(vcpu, &tss_segment_32))
3445 goto out;
3446
3447 ret = 1;
3448out:
3449 return ret;
2843} 3450}
2844 3451
3452int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3453{
3454 struct kvm_segment tr_seg;
3455 struct desc_struct cseg_desc;
3456 struct desc_struct nseg_desc;
3457 int ret = 0;
3458
3459 get_segment(vcpu, &tr_seg, VCPU_SREG_TR);
3460
3461 if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
3462 goto out;
3463
3464 if (load_guest_segment_descriptor(vcpu, tr_seg.selector, &cseg_desc))
3465 goto out;
3466
3467
3468 if (reason != TASK_SWITCH_IRET) {
3469 int cpl;
3470
3471 cpl = kvm_x86_ops->get_cpl(vcpu);
3472 if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) {
3473 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
3474 return 1;
3475 }
3476 }
3477
3478 if (!nseg_desc.p || (nseg_desc.limit0 | nseg_desc.limit << 16) < 0x67) {
3479 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
3480 return 1;
3481 }
3482
3483 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
3484 cseg_desc.type &= ~(1 << 8); //clear the B flag
3485 save_guest_segment_descriptor(vcpu, tr_seg.selector,
3486 &cseg_desc);
3487 }
3488
3489 if (reason == TASK_SWITCH_IRET) {
3490 u32 eflags = kvm_x86_ops->get_rflags(vcpu);
3491 kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
3492 }
3493
3494 kvm_x86_ops->skip_emulated_instruction(vcpu);
3495 kvm_x86_ops->cache_regs(vcpu);
3496
3497 if (nseg_desc.type & 8)
3498 ret = kvm_task_switch_32(vcpu, tss_selector, &cseg_desc,
3499 &nseg_desc);
3500 else
3501 ret = kvm_task_switch_16(vcpu, tss_selector, &cseg_desc,
3502 &nseg_desc);
3503
3504 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
3505 u32 eflags = kvm_x86_ops->get_rflags(vcpu);
3506 kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT);
3507 }
3508
3509 if (reason != TASK_SWITCH_IRET) {
3510 nseg_desc.type |= (1 << 8);
3511 save_guest_segment_descriptor(vcpu, tss_selector,
3512 &nseg_desc);
3513 }
3514
3515 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS);
3516 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
3517 tr_seg.type = 11;
3518 set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
3519out:
3520 kvm_x86_ops->decache_regs(vcpu);
3521 return ret;
3522}
3523EXPORT_SYMBOL_GPL(kvm_task_switch);
3524
2845int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 3525int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2846 struct kvm_sregs *sregs) 3526 struct kvm_sregs *sregs)
2847{ 3527{
@@ -2862,12 +3542,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2862 mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; 3542 mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
2863 vcpu->arch.cr3 = sregs->cr3; 3543 vcpu->arch.cr3 = sregs->cr3;
2864 3544
2865 set_cr8(vcpu, sregs->cr8); 3545 kvm_set_cr8(vcpu, sregs->cr8);
2866 3546
2867 mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer; 3547 mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer;
2868#ifdef CONFIG_X86_64
2869 kvm_x86_ops->set_efer(vcpu, sregs->efer); 3548 kvm_x86_ops->set_efer(vcpu, sregs->efer);
2870#endif
2871 kvm_set_apic_base(vcpu, sregs->apic_base); 3549 kvm_set_apic_base(vcpu, sregs->apic_base);
2872 3550
2873 kvm_x86_ops->decache_cr4_guest_bits(vcpu); 3551 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
@@ -3141,9 +3819,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
3141 3819
3142 vcpu->arch.mmu.root_hpa = INVALID_PAGE; 3820 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
3143 if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0) 3821 if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0)
3144 vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; 3822 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
3145 else 3823 else
3146 vcpu->arch.mp_state = VCPU_MP_STATE_UNINITIALIZED; 3824 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
3147 3825
3148 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 3826 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
3149 if (!page) { 3827 if (!page) {
@@ -3175,7 +3853,9 @@ fail:
3175void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 3853void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
3176{ 3854{
3177 kvm_free_lapic(vcpu); 3855 kvm_free_lapic(vcpu);
3856 down_read(&vcpu->kvm->slots_lock);
3178 kvm_mmu_destroy(vcpu); 3857 kvm_mmu_destroy(vcpu);
3858 up_read(&vcpu->kvm->slots_lock);
3179 free_page((unsigned long)vcpu->arch.pio_data); 3859 free_page((unsigned long)vcpu->arch.pio_data);
3180} 3860}
3181 3861
@@ -3219,10 +3899,13 @@ static void kvm_free_vcpus(struct kvm *kvm)
3219 3899
3220void kvm_arch_destroy_vm(struct kvm *kvm) 3900void kvm_arch_destroy_vm(struct kvm *kvm)
3221{ 3901{
3902 kvm_free_pit(kvm);
3222 kfree(kvm->arch.vpic); 3903 kfree(kvm->arch.vpic);
3223 kfree(kvm->arch.vioapic); 3904 kfree(kvm->arch.vioapic);
3224 kvm_free_vcpus(kvm); 3905 kvm_free_vcpus(kvm);
3225 kvm_free_physmem(kvm); 3906 kvm_free_physmem(kvm);
3907 if (kvm->arch.apic_access_page)
3908 put_page(kvm->arch.apic_access_page);
3226 kfree(kvm); 3909 kfree(kvm);
3227} 3910}
3228 3911
@@ -3278,8 +3961,8 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
3278 3961
3279int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 3962int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3280{ 3963{
3281 return vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE 3964 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
3282 || vcpu->arch.mp_state == VCPU_MP_STATE_SIPI_RECEIVED; 3965 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED;
3283} 3966}
3284 3967
3285static void vcpu_kick_intr(void *info) 3968static void vcpu_kick_intr(void *info)
@@ -3293,11 +3976,17 @@ static void vcpu_kick_intr(void *info)
3293void kvm_vcpu_kick(struct kvm_vcpu *vcpu) 3976void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
3294{ 3977{
3295 int ipi_pcpu = vcpu->cpu; 3978 int ipi_pcpu = vcpu->cpu;
3979 int cpu = get_cpu();
3296 3980
3297 if (waitqueue_active(&vcpu->wq)) { 3981 if (waitqueue_active(&vcpu->wq)) {
3298 wake_up_interruptible(&vcpu->wq); 3982 wake_up_interruptible(&vcpu->wq);
3299 ++vcpu->stat.halt_wakeup; 3983 ++vcpu->stat.halt_wakeup;
3300 } 3984 }
3301 if (vcpu->guest_mode) 3985 /*
3986 * We may be called synchronously with irqs disabled in guest mode,
3987 * So need not to call smp_call_function_single() in that case.
3988 */
3989 if (vcpu->guest_mode && vcpu->cpu != cpu)
3302 smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0); 3990 smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
3991 put_cpu();
3303} 3992}
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 79586003397a..2ca08386f993 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -65,6 +65,14 @@
65#define MemAbs (1<<9) /* Memory operand is absolute displacement */ 65#define MemAbs (1<<9) /* Memory operand is absolute displacement */
66#define String (1<<10) /* String instruction (rep capable) */ 66#define String (1<<10) /* String instruction (rep capable) */
67#define Stack (1<<11) /* Stack instruction (push/pop) */ 67#define Stack (1<<11) /* Stack instruction (push/pop) */
68#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
69#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
70#define GroupMask 0xff /* Group number stored in bits 0:7 */
71
72enum {
73 Group1_80, Group1_81, Group1_82, Group1_83,
74 Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
75};
68 76
69static u16 opcode_table[256] = { 77static u16 opcode_table[256] = {
70 /* 0x00 - 0x07 */ 78 /* 0x00 - 0x07 */
@@ -123,14 +131,14 @@ static u16 opcode_table[256] = {
123 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, 131 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
124 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, 132 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
125 /* 0x80 - 0x87 */ 133 /* 0x80 - 0x87 */
126 ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, 134 Group | Group1_80, Group | Group1_81,
127 ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, 135 Group | Group1_82, Group | Group1_83,
128 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 136 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
129 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 137 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
130 /* 0x88 - 0x8F */ 138 /* 0x88 - 0x8F */
131 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, 139 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
132 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, 140 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
133 0, ModRM | DstReg, 0, DstMem | SrcNone | ModRM | Mov | Stack, 141 0, ModRM | DstReg, 0, Group | Group1A,
134 /* 0x90 - 0x9F */ 142 /* 0x90 - 0x9F */
135 0, 0, 0, 0, 0, 0, 0, 0, 143 0, 0, 0, 0, 0, 0, 0, 0,
136 0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, 144 0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
@@ -164,16 +172,15 @@ static u16 opcode_table[256] = {
164 0, 0, 0, 0, 172 0, 0, 0, 0,
165 /* 0xF0 - 0xF7 */ 173 /* 0xF0 - 0xF7 */
166 0, 0, 0, 0, 174 0, 0, 0, 0,
167 ImplicitOps, ImplicitOps, 175 ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3,
168 ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
169 /* 0xF8 - 0xFF */ 176 /* 0xF8 - 0xFF */
170 ImplicitOps, 0, ImplicitOps, ImplicitOps, 177 ImplicitOps, 0, ImplicitOps, ImplicitOps,
171 0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM 178 0, 0, Group | Group4, Group | Group5,
172}; 179};
173 180
174static u16 twobyte_table[256] = { 181static u16 twobyte_table[256] = {
175 /* 0x00 - 0x0F */ 182 /* 0x00 - 0x0F */
176 0, SrcMem | ModRM | DstReg, 0, 0, 0, 0, ImplicitOps, 0, 183 0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0,
177 ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 184 ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
178 /* 0x10 - 0x1F */ 185 /* 0x10 - 0x1F */
179 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, 186 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
@@ -229,6 +236,56 @@ static u16 twobyte_table[256] = {
229 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 236 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
230}; 237};
231 238
239static u16 group_table[] = {
240 [Group1_80*8] =
241 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
242 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
243 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
244 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
245 [Group1_81*8] =
246 DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
247 DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
248 DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
249 DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
250 [Group1_82*8] =
251 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
252 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
253 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
254 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
255 [Group1_83*8] =
256 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
257 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
258 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
259 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
260 [Group1A*8] =
261 DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
262 [Group3_Byte*8] =
263 ByteOp | SrcImm | DstMem | ModRM, 0,
264 ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
265 0, 0, 0, 0,
266 [Group3*8] =
267 DstMem | SrcImm | ModRM | SrcImm, 0,
268 DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
269 0, 0, 0, 0,
270 [Group4*8] =
271 ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
272 0, 0, 0, 0, 0, 0,
273 [Group5*8] =
274 DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, 0, 0,
275 SrcMem | ModRM, 0, SrcMem | ModRM | Stack, 0,
276 [Group7*8] =
277 0, 0, ModRM | SrcMem, ModRM | SrcMem,
278 SrcNone | ModRM | DstMem | Mov, 0,
279 SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp,
280};
281
282static u16 group2_table[] = {
283 [Group7*8] =
284 SrcNone | ModRM, 0, 0, 0,
285 SrcNone | ModRM | DstMem | Mov, 0,
286 SrcMem16 | ModRM | Mov, 0,
287};
288
232/* EFLAGS bit definitions. */ 289/* EFLAGS bit definitions. */
233#define EFLG_OF (1<<11) 290#define EFLG_OF (1<<11)
234#define EFLG_DF (1<<10) 291#define EFLG_DF (1<<10)
@@ -317,7 +374,7 @@ static u16 twobyte_table[256] = {
317 374
318#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ 375#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
319 do { \ 376 do { \
320 unsigned long _tmp; \ 377 unsigned long __tmp; \
321 switch ((_dst).bytes) { \ 378 switch ((_dst).bytes) { \
322 case 1: \ 379 case 1: \
323 __asm__ __volatile__ ( \ 380 __asm__ __volatile__ ( \
@@ -325,7 +382,7 @@ static u16 twobyte_table[256] = {
325 _op"b %"_bx"3,%1; " \ 382 _op"b %"_bx"3,%1; " \
326 _POST_EFLAGS("0", "4", "2") \ 383 _POST_EFLAGS("0", "4", "2") \
327 : "=m" (_eflags), "=m" ((_dst).val), \ 384 : "=m" (_eflags), "=m" ((_dst).val), \
328 "=&r" (_tmp) \ 385 "=&r" (__tmp) \
329 : _by ((_src).val), "i" (EFLAGS_MASK)); \ 386 : _by ((_src).val), "i" (EFLAGS_MASK)); \
330 break; \ 387 break; \
331 default: \ 388 default: \
@@ -426,29 +483,40 @@ static u16 twobyte_table[256] = {
426 (_type)_x; \ 483 (_type)_x; \
427}) 484})
428 485
486static inline unsigned long ad_mask(struct decode_cache *c)
487{
488 return (1UL << (c->ad_bytes << 3)) - 1;
489}
490
429/* Access/update address held in a register, based on addressing mode. */ 491/* Access/update address held in a register, based on addressing mode. */
430#define address_mask(reg) \ 492static inline unsigned long
431 ((c->ad_bytes == sizeof(unsigned long)) ? \ 493address_mask(struct decode_cache *c, unsigned long reg)
432 (reg) : ((reg) & ((1UL << (c->ad_bytes << 3)) - 1))) 494{
433#define register_address(base, reg) \ 495 if (c->ad_bytes == sizeof(unsigned long))
434 ((base) + address_mask(reg)) 496 return reg;
435#define register_address_increment(reg, inc) \ 497 else
436 do { \ 498 return reg & ad_mask(c);
437 /* signed type ensures sign extension to long */ \ 499}
438 int _inc = (inc); \
439 if (c->ad_bytes == sizeof(unsigned long)) \
440 (reg) += _inc; \
441 else \
442 (reg) = ((reg) & \
443 ~((1UL << (c->ad_bytes << 3)) - 1)) | \
444 (((reg) + _inc) & \
445 ((1UL << (c->ad_bytes << 3)) - 1)); \
446 } while (0)
447 500
448#define JMP_REL(rel) \ 501static inline unsigned long
449 do { \ 502register_address(struct decode_cache *c, unsigned long base, unsigned long reg)
450 register_address_increment(c->eip, rel); \ 503{
451 } while (0) 504 return base + address_mask(c, reg);
505}
506
507static inline void
508register_address_increment(struct decode_cache *c, unsigned long *reg, int inc)
509{
510 if (c->ad_bytes == sizeof(unsigned long))
511 *reg += inc;
512 else
513 *reg = (*reg & ~ad_mask(c)) | ((*reg + inc) & ad_mask(c));
514}
515
516static inline void jmp_rel(struct decode_cache *c, int rel)
517{
518 register_address_increment(c, &c->eip, rel);
519}
452 520
453static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, 521static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
454 struct x86_emulate_ops *ops, 522 struct x86_emulate_ops *ops,
@@ -763,7 +831,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
763 struct decode_cache *c = &ctxt->decode; 831 struct decode_cache *c = &ctxt->decode;
764 int rc = 0; 832 int rc = 0;
765 int mode = ctxt->mode; 833 int mode = ctxt->mode;
766 int def_op_bytes, def_ad_bytes; 834 int def_op_bytes, def_ad_bytes, group;
767 835
768 /* Shadow copy of register state. Committed on successful emulation. */ 836 /* Shadow copy of register state. Committed on successful emulation. */
769 837
@@ -864,12 +932,24 @@ done_prefixes:
864 c->b = insn_fetch(u8, 1, c->eip); 932 c->b = insn_fetch(u8, 1, c->eip);
865 c->d = twobyte_table[c->b]; 933 c->d = twobyte_table[c->b];
866 } 934 }
935 }
867 936
868 /* Unrecognised? */ 937 if (c->d & Group) {
869 if (c->d == 0) { 938 group = c->d & GroupMask;
870 DPRINTF("Cannot emulate %02x\n", c->b); 939 c->modrm = insn_fetch(u8, 1, c->eip);
871 return -1; 940 --c->eip;
872 } 941
942 group = (group << 3) + ((c->modrm >> 3) & 7);
943 if ((c->d & GroupDual) && (c->modrm >> 6) == 3)
944 c->d = group2_table[group];
945 else
946 c->d = group_table[group];
947 }
948
949 /* Unrecognised? */
950 if (c->d == 0) {
951 DPRINTF("Cannot emulate %02x\n", c->b);
952 return -1;
873 } 953 }
874 954
875 if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack)) 955 if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
@@ -924,6 +1004,7 @@ done_prefixes:
924 */ 1004 */
925 if ((c->d & ModRM) && c->modrm_mod == 3) { 1005 if ((c->d & ModRM) && c->modrm_mod == 3) {
926 c->src.type = OP_REG; 1006 c->src.type = OP_REG;
1007 c->src.val = c->modrm_val;
927 break; 1008 break;
928 } 1009 }
929 c->src.type = OP_MEM; 1010 c->src.type = OP_MEM;
@@ -967,6 +1048,7 @@ done_prefixes:
967 case DstMem: 1048 case DstMem:
968 if ((c->d & ModRM) && c->modrm_mod == 3) { 1049 if ((c->d & ModRM) && c->modrm_mod == 3) {
969 c->dst.type = OP_REG; 1050 c->dst.type = OP_REG;
1051 c->dst.val = c->dst.orig_val = c->modrm_val;
970 break; 1052 break;
971 } 1053 }
972 c->dst.type = OP_MEM; 1054 c->dst.type = OP_MEM;
@@ -984,8 +1066,8 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
984 c->dst.type = OP_MEM; 1066 c->dst.type = OP_MEM;
985 c->dst.bytes = c->op_bytes; 1067 c->dst.bytes = c->op_bytes;
986 c->dst.val = c->src.val; 1068 c->dst.val = c->src.val;
987 register_address_increment(c->regs[VCPU_REGS_RSP], -c->op_bytes); 1069 register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
988 c->dst.ptr = (void *) register_address(ctxt->ss_base, 1070 c->dst.ptr = (void *) register_address(c, ctxt->ss_base,
989 c->regs[VCPU_REGS_RSP]); 1071 c->regs[VCPU_REGS_RSP]);
990} 1072}
991 1073
@@ -995,13 +1077,13 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
995 struct decode_cache *c = &ctxt->decode; 1077 struct decode_cache *c = &ctxt->decode;
996 int rc; 1078 int rc;
997 1079
998 rc = ops->read_std(register_address(ctxt->ss_base, 1080 rc = ops->read_std(register_address(c, ctxt->ss_base,
999 c->regs[VCPU_REGS_RSP]), 1081 c->regs[VCPU_REGS_RSP]),
1000 &c->dst.val, c->dst.bytes, ctxt->vcpu); 1082 &c->dst.val, c->dst.bytes, ctxt->vcpu);
1001 if (rc != 0) 1083 if (rc != 0)
1002 return rc; 1084 return rc;
1003 1085
1004 register_address_increment(c->regs[VCPU_REGS_RSP], c->dst.bytes); 1086 register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes);
1005 1087
1006 return 0; 1088 return 0;
1007} 1089}
@@ -1043,26 +1125,6 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
1043 1125
1044 switch (c->modrm_reg) { 1126 switch (c->modrm_reg) {
1045 case 0 ... 1: /* test */ 1127 case 0 ... 1: /* test */
1046 /*
1047 * Special case in Grp3: test has an immediate
1048 * source operand.
1049 */
1050 c->src.type = OP_IMM;
1051 c->src.ptr = (unsigned long *)c->eip;
1052 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1053 if (c->src.bytes == 8)
1054 c->src.bytes = 4;
1055 switch (c->src.bytes) {
1056 case 1:
1057 c->src.val = insn_fetch(s8, 1, c->eip);
1058 break;
1059 case 2:
1060 c->src.val = insn_fetch(s16, 2, c->eip);
1061 break;
1062 case 4:
1063 c->src.val = insn_fetch(s32, 4, c->eip);
1064 break;
1065 }
1066 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); 1128 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
1067 break; 1129 break;
1068 case 2: /* not */ 1130 case 2: /* not */
@@ -1076,7 +1138,6 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
1076 rc = X86EMUL_UNHANDLEABLE; 1138 rc = X86EMUL_UNHANDLEABLE;
1077 break; 1139 break;
1078 } 1140 }
1079done:
1080 return rc; 1141 return rc;
1081} 1142}
1082 1143
@@ -1084,7 +1145,6 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
1084 struct x86_emulate_ops *ops) 1145 struct x86_emulate_ops *ops)
1085{ 1146{
1086 struct decode_cache *c = &ctxt->decode; 1147 struct decode_cache *c = &ctxt->decode;
1087 int rc;
1088 1148
1089 switch (c->modrm_reg) { 1149 switch (c->modrm_reg) {
1090 case 0: /* inc */ 1150 case 0: /* inc */
@@ -1094,36 +1154,11 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
1094 emulate_1op("dec", c->dst, ctxt->eflags); 1154 emulate_1op("dec", c->dst, ctxt->eflags);
1095 break; 1155 break;
1096 case 4: /* jmp abs */ 1156 case 4: /* jmp abs */
1097 if (c->b == 0xff) 1157 c->eip = c->src.val;
1098 c->eip = c->dst.val;
1099 else {
1100 DPRINTF("Cannot emulate %02x\n", c->b);
1101 return X86EMUL_UNHANDLEABLE;
1102 }
1103 break; 1158 break;
1104 case 6: /* push */ 1159 case 6: /* push */
1105 1160 emulate_push(ctxt);
1106 /* 64-bit mode: PUSH always pushes a 64-bit operand. */
1107
1108 if (ctxt->mode == X86EMUL_MODE_PROT64) {
1109 c->dst.bytes = 8;
1110 rc = ops->read_std((unsigned long)c->dst.ptr,
1111 &c->dst.val, 8, ctxt->vcpu);
1112 if (rc != 0)
1113 return rc;
1114 }
1115 register_address_increment(c->regs[VCPU_REGS_RSP],
1116 -c->dst.bytes);
1117 rc = ops->write_emulated(register_address(ctxt->ss_base,
1118 c->regs[VCPU_REGS_RSP]), &c->dst.val,
1119 c->dst.bytes, ctxt->vcpu);
1120 if (rc != 0)
1121 return rc;
1122 c->dst.type = OP_NONE;
1123 break; 1161 break;
1124 default:
1125 DPRINTF("Cannot emulate %02x\n", c->b);
1126 return X86EMUL_UNHANDLEABLE;
1127 } 1162 }
1128 return 0; 1163 return 0;
1129} 1164}
@@ -1361,19 +1396,19 @@ special_insn:
1361 c->dst.type = OP_MEM; 1396 c->dst.type = OP_MEM;
1362 c->dst.bytes = c->op_bytes; 1397 c->dst.bytes = c->op_bytes;
1363 c->dst.val = c->src.val; 1398 c->dst.val = c->src.val;
1364 register_address_increment(c->regs[VCPU_REGS_RSP], 1399 register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1365 -c->op_bytes); 1400 -c->op_bytes);
1366 c->dst.ptr = (void *) register_address( 1401 c->dst.ptr = (void *) register_address(
1367 ctxt->ss_base, c->regs[VCPU_REGS_RSP]); 1402 c, ctxt->ss_base, c->regs[VCPU_REGS_RSP]);
1368 break; 1403 break;
1369 case 0x58 ... 0x5f: /* pop reg */ 1404 case 0x58 ... 0x5f: /* pop reg */
1370 pop_instruction: 1405 pop_instruction:
1371 if ((rc = ops->read_std(register_address(ctxt->ss_base, 1406 if ((rc = ops->read_std(register_address(c, ctxt->ss_base,
1372 c->regs[VCPU_REGS_RSP]), c->dst.ptr, 1407 c->regs[VCPU_REGS_RSP]), c->dst.ptr,
1373 c->op_bytes, ctxt->vcpu)) != 0) 1408 c->op_bytes, ctxt->vcpu)) != 0)
1374 goto done; 1409 goto done;
1375 1410
1376 register_address_increment(c->regs[VCPU_REGS_RSP], 1411 register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1377 c->op_bytes); 1412 c->op_bytes);
1378 c->dst.type = OP_NONE; /* Disable writeback. */ 1413 c->dst.type = OP_NONE; /* Disable writeback. */
1379 break; 1414 break;
@@ -1393,9 +1428,9 @@ special_insn:
1393 1, 1428 1,
1394 (c->d & ByteOp) ? 1 : c->op_bytes, 1429 (c->d & ByteOp) ? 1 : c->op_bytes,
1395 c->rep_prefix ? 1430 c->rep_prefix ?
1396 address_mask(c->regs[VCPU_REGS_RCX]) : 1, 1431 address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
1397 (ctxt->eflags & EFLG_DF), 1432 (ctxt->eflags & EFLG_DF),
1398 register_address(ctxt->es_base, 1433 register_address(c, ctxt->es_base,
1399 c->regs[VCPU_REGS_RDI]), 1434 c->regs[VCPU_REGS_RDI]),
1400 c->rep_prefix, 1435 c->rep_prefix,
1401 c->regs[VCPU_REGS_RDX]) == 0) { 1436 c->regs[VCPU_REGS_RDX]) == 0) {
@@ -1409,9 +1444,9 @@ special_insn:
1409 0, 1444 0,
1410 (c->d & ByteOp) ? 1 : c->op_bytes, 1445 (c->d & ByteOp) ? 1 : c->op_bytes,
1411 c->rep_prefix ? 1446 c->rep_prefix ?
1412 address_mask(c->regs[VCPU_REGS_RCX]) : 1, 1447 address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
1413 (ctxt->eflags & EFLG_DF), 1448 (ctxt->eflags & EFLG_DF),
1414 register_address(c->override_base ? 1449 register_address(c, c->override_base ?
1415 *c->override_base : 1450 *c->override_base :
1416 ctxt->ds_base, 1451 ctxt->ds_base,
1417 c->regs[VCPU_REGS_RSI]), 1452 c->regs[VCPU_REGS_RSI]),
@@ -1425,7 +1460,7 @@ special_insn:
1425 int rel = insn_fetch(s8, 1, c->eip); 1460 int rel = insn_fetch(s8, 1, c->eip);
1426 1461
1427 if (test_cc(c->b, ctxt->eflags)) 1462 if (test_cc(c->b, ctxt->eflags))
1428 JMP_REL(rel); 1463 jmp_rel(c, rel);
1429 break; 1464 break;
1430 } 1465 }
1431 case 0x80 ... 0x83: /* Grp1 */ 1466 case 0x80 ... 0x83: /* Grp1 */
@@ -1477,7 +1512,7 @@ special_insn:
1477 case 0x88 ... 0x8b: /* mov */ 1512 case 0x88 ... 0x8b: /* mov */
1478 goto mov; 1513 goto mov;
1479 case 0x8d: /* lea r16/r32, m */ 1514 case 0x8d: /* lea r16/r32, m */
1480 c->dst.val = c->modrm_val; 1515 c->dst.val = c->modrm_ea;
1481 break; 1516 break;
1482 case 0x8f: /* pop (sole member of Grp1a) */ 1517 case 0x8f: /* pop (sole member of Grp1a) */
1483 rc = emulate_grp1a(ctxt, ops); 1518 rc = emulate_grp1a(ctxt, ops);
@@ -1501,27 +1536,27 @@ special_insn:
1501 case 0xa4 ... 0xa5: /* movs */ 1536 case 0xa4 ... 0xa5: /* movs */
1502 c->dst.type = OP_MEM; 1537 c->dst.type = OP_MEM;
1503 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1538 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1504 c->dst.ptr = (unsigned long *)register_address( 1539 c->dst.ptr = (unsigned long *)register_address(c,
1505 ctxt->es_base, 1540 ctxt->es_base,
1506 c->regs[VCPU_REGS_RDI]); 1541 c->regs[VCPU_REGS_RDI]);
1507 if ((rc = ops->read_emulated(register_address( 1542 if ((rc = ops->read_emulated(register_address(c,
1508 c->override_base ? *c->override_base : 1543 c->override_base ? *c->override_base :
1509 ctxt->ds_base, 1544 ctxt->ds_base,
1510 c->regs[VCPU_REGS_RSI]), 1545 c->regs[VCPU_REGS_RSI]),
1511 &c->dst.val, 1546 &c->dst.val,
1512 c->dst.bytes, ctxt->vcpu)) != 0) 1547 c->dst.bytes, ctxt->vcpu)) != 0)
1513 goto done; 1548 goto done;
1514 register_address_increment(c->regs[VCPU_REGS_RSI], 1549 register_address_increment(c, &c->regs[VCPU_REGS_RSI],
1515 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes 1550 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
1516 : c->dst.bytes); 1551 : c->dst.bytes);
1517 register_address_increment(c->regs[VCPU_REGS_RDI], 1552 register_address_increment(c, &c->regs[VCPU_REGS_RDI],
1518 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes 1553 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
1519 : c->dst.bytes); 1554 : c->dst.bytes);
1520 break; 1555 break;
1521 case 0xa6 ... 0xa7: /* cmps */ 1556 case 0xa6 ... 0xa7: /* cmps */
1522 c->src.type = OP_NONE; /* Disable writeback. */ 1557 c->src.type = OP_NONE; /* Disable writeback. */
1523 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1558 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1524 c->src.ptr = (unsigned long *)register_address( 1559 c->src.ptr = (unsigned long *)register_address(c,
1525 c->override_base ? *c->override_base : 1560 c->override_base ? *c->override_base :
1526 ctxt->ds_base, 1561 ctxt->ds_base,
1527 c->regs[VCPU_REGS_RSI]); 1562 c->regs[VCPU_REGS_RSI]);
@@ -1533,7 +1568,7 @@ special_insn:
1533 1568
1534 c->dst.type = OP_NONE; /* Disable writeback. */ 1569 c->dst.type = OP_NONE; /* Disable writeback. */
1535 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1570 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1536 c->dst.ptr = (unsigned long *)register_address( 1571 c->dst.ptr = (unsigned long *)register_address(c,
1537 ctxt->es_base, 1572 ctxt->es_base,
1538 c->regs[VCPU_REGS_RDI]); 1573 c->regs[VCPU_REGS_RDI]);
1539 if ((rc = ops->read_emulated((unsigned long)c->dst.ptr, 1574 if ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
@@ -1546,10 +1581,10 @@ special_insn:
1546 1581
1547 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); 1582 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
1548 1583
1549 register_address_increment(c->regs[VCPU_REGS_RSI], 1584 register_address_increment(c, &c->regs[VCPU_REGS_RSI],
1550 (ctxt->eflags & EFLG_DF) ? -c->src.bytes 1585 (ctxt->eflags & EFLG_DF) ? -c->src.bytes
1551 : c->src.bytes); 1586 : c->src.bytes);
1552 register_address_increment(c->regs[VCPU_REGS_RDI], 1587 register_address_increment(c, &c->regs[VCPU_REGS_RDI],
1553 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes 1588 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
1554 : c->dst.bytes); 1589 : c->dst.bytes);
1555 1590
@@ -1557,11 +1592,11 @@ special_insn:
1557 case 0xaa ... 0xab: /* stos */ 1592 case 0xaa ... 0xab: /* stos */
1558 c->dst.type = OP_MEM; 1593 c->dst.type = OP_MEM;
1559 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1594 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1560 c->dst.ptr = (unsigned long *)register_address( 1595 c->dst.ptr = (unsigned long *)register_address(c,
1561 ctxt->es_base, 1596 ctxt->es_base,
1562 c->regs[VCPU_REGS_RDI]); 1597 c->regs[VCPU_REGS_RDI]);
1563 c->dst.val = c->regs[VCPU_REGS_RAX]; 1598 c->dst.val = c->regs[VCPU_REGS_RAX];
1564 register_address_increment(c->regs[VCPU_REGS_RDI], 1599 register_address_increment(c, &c->regs[VCPU_REGS_RDI],
1565 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes 1600 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
1566 : c->dst.bytes); 1601 : c->dst.bytes);
1567 break; 1602 break;
@@ -1569,7 +1604,7 @@ special_insn:
1569 c->dst.type = OP_REG; 1604 c->dst.type = OP_REG;
1570 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1605 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1571 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; 1606 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
1572 if ((rc = ops->read_emulated(register_address( 1607 if ((rc = ops->read_emulated(register_address(c,
1573 c->override_base ? *c->override_base : 1608 c->override_base ? *c->override_base :
1574 ctxt->ds_base, 1609 ctxt->ds_base,
1575 c->regs[VCPU_REGS_RSI]), 1610 c->regs[VCPU_REGS_RSI]),
@@ -1577,7 +1612,7 @@ special_insn:
1577 c->dst.bytes, 1612 c->dst.bytes,
1578 ctxt->vcpu)) != 0) 1613 ctxt->vcpu)) != 0)
1579 goto done; 1614 goto done;
1580 register_address_increment(c->regs[VCPU_REGS_RSI], 1615 register_address_increment(c, &c->regs[VCPU_REGS_RSI],
1581 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes 1616 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
1582 : c->dst.bytes); 1617 : c->dst.bytes);
1583 break; 1618 break;
@@ -1616,14 +1651,14 @@ special_insn:
1616 goto cannot_emulate; 1651 goto cannot_emulate;
1617 } 1652 }
1618 c->src.val = (unsigned long) c->eip; 1653 c->src.val = (unsigned long) c->eip;
1619 JMP_REL(rel); 1654 jmp_rel(c, rel);
1620 c->op_bytes = c->ad_bytes; 1655 c->op_bytes = c->ad_bytes;
1621 emulate_push(ctxt); 1656 emulate_push(ctxt);
1622 break; 1657 break;
1623 } 1658 }
1624 case 0xe9: /* jmp rel */ 1659 case 0xe9: /* jmp rel */
1625 case 0xeb: /* jmp rel short */ 1660 case 0xeb: /* jmp rel short */
1626 JMP_REL(c->src.val); 1661 jmp_rel(c, c->src.val);
1627 c->dst.type = OP_NONE; /* Disable writeback. */ 1662 c->dst.type = OP_NONE; /* Disable writeback. */
1628 break; 1663 break;
1629 case 0xf4: /* hlt */ 1664 case 0xf4: /* hlt */
@@ -1690,6 +1725,8 @@ twobyte_insn:
1690 goto done; 1725 goto done;
1691 1726
1692 kvm_emulate_hypercall(ctxt->vcpu); 1727 kvm_emulate_hypercall(ctxt->vcpu);
1728 /* Disable writeback. */
1729 c->dst.type = OP_NONE;
1693 break; 1730 break;
1694 case 2: /* lgdt */ 1731 case 2: /* lgdt */
1695 rc = read_descriptor(ctxt, ops, c->src.ptr, 1732 rc = read_descriptor(ctxt, ops, c->src.ptr,
@@ -1697,6 +1734,8 @@ twobyte_insn:
1697 if (rc) 1734 if (rc)
1698 goto done; 1735 goto done;
1699 realmode_lgdt(ctxt->vcpu, size, address); 1736 realmode_lgdt(ctxt->vcpu, size, address);
1737 /* Disable writeback. */
1738 c->dst.type = OP_NONE;
1700 break; 1739 break;
1701 case 3: /* lidt/vmmcall */ 1740 case 3: /* lidt/vmmcall */
1702 if (c->modrm_mod == 3 && c->modrm_rm == 1) { 1741 if (c->modrm_mod == 3 && c->modrm_rm == 1) {
@@ -1712,27 +1751,25 @@ twobyte_insn:
1712 goto done; 1751 goto done;
1713 realmode_lidt(ctxt->vcpu, size, address); 1752 realmode_lidt(ctxt->vcpu, size, address);
1714 } 1753 }
1754 /* Disable writeback. */
1755 c->dst.type = OP_NONE;
1715 break; 1756 break;
1716 case 4: /* smsw */ 1757 case 4: /* smsw */
1717 if (c->modrm_mod != 3) 1758 c->dst.bytes = 2;
1718 goto cannot_emulate; 1759 c->dst.val = realmode_get_cr(ctxt->vcpu, 0);
1719 *(u16 *)&c->regs[c->modrm_rm]
1720 = realmode_get_cr(ctxt->vcpu, 0);
1721 break; 1760 break;
1722 case 6: /* lmsw */ 1761 case 6: /* lmsw */
1723 if (c->modrm_mod != 3) 1762 realmode_lmsw(ctxt->vcpu, (u16)c->src.val,
1724 goto cannot_emulate; 1763 &ctxt->eflags);
1725 realmode_lmsw(ctxt->vcpu, (u16)c->modrm_val,
1726 &ctxt->eflags);
1727 break; 1764 break;
1728 case 7: /* invlpg*/ 1765 case 7: /* invlpg*/
1729 emulate_invlpg(ctxt->vcpu, memop); 1766 emulate_invlpg(ctxt->vcpu, memop);
1767 /* Disable writeback. */
1768 c->dst.type = OP_NONE;
1730 break; 1769 break;
1731 default: 1770 default:
1732 goto cannot_emulate; 1771 goto cannot_emulate;
1733 } 1772 }
1734 /* Disable writeback. */
1735 c->dst.type = OP_NONE;
1736 break; 1773 break;
1737 case 0x06: 1774 case 0x06:
1738 emulate_clts(ctxt->vcpu); 1775 emulate_clts(ctxt->vcpu);
@@ -1823,7 +1860,7 @@ twobyte_insn:
1823 goto cannot_emulate; 1860 goto cannot_emulate;
1824 } 1861 }
1825 if (test_cc(c->b, ctxt->eflags)) 1862 if (test_cc(c->b, ctxt->eflags))
1826 JMP_REL(rel); 1863 jmp_rel(c, rel);
1827 c->dst.type = OP_NONE; 1864 c->dst.type = OP_NONE;
1828 break; 1865 break;
1829 } 1866 }
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 25df1c1989fe..76f60f52a885 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -11,7 +11,7 @@ lib-y += memcpy_$(BITS).o
11ifeq ($(CONFIG_X86_32),y) 11ifeq ($(CONFIG_X86_32),y)
12 lib-y += checksum_32.o 12 lib-y += checksum_32.o
13 lib-y += strstr_32.o 13 lib-y += strstr_32.o
14 lib-y += bitops_32.o semaphore_32.o string_32.o 14 lib-y += semaphore_32.o string_32.o
15 15
16 lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o 16 lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
17else 17else
@@ -21,7 +21,6 @@ else
21 21
22 lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o 22 lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
23 lib-y += thunk_64.o clear_page_64.o copy_page_64.o 23 lib-y += thunk_64.o clear_page_64.o copy_page_64.o
24 lib-y += bitops_64.o
25 lib-y += memmove_64.o memset_64.o 24 lib-y += memmove_64.o memset_64.o
26 lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o 25 lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
27endif 26endif
diff --git a/arch/x86/lib/bitops_32.c b/arch/x86/lib/bitops_32.c
deleted file mode 100644
index b65440459859..000000000000
--- a/arch/x86/lib/bitops_32.c
+++ /dev/null
@@ -1,70 +0,0 @@
1#include <linux/bitops.h>
2#include <linux/module.h>
3
4/**
5 * find_next_bit - find the next set bit in a memory region
6 * @addr: The address to base the search on
7 * @offset: The bitnumber to start searching at
8 * @size: The maximum size to search
9 */
10int find_next_bit(const unsigned long *addr, int size, int offset)
11{
12 const unsigned long *p = addr + (offset >> 5);
13 int set = 0, bit = offset & 31, res;
14
15 if (bit) {
16 /*
17 * Look for nonzero in the first 32 bits:
18 */
19 __asm__("bsfl %1,%0\n\t"
20 "jne 1f\n\t"
21 "movl $32, %0\n"
22 "1:"
23 : "=r" (set)
24 : "r" (*p >> bit));
25 if (set < (32 - bit))
26 return set + offset;
27 set = 32 - bit;
28 p++;
29 }
30 /*
31 * No set bit yet, search remaining full words for a bit
32 */
33 res = find_first_bit (p, size - 32 * (p - addr));
34 return (offset + set + res);
35}
36EXPORT_SYMBOL(find_next_bit);
37
38/**
39 * find_next_zero_bit - find the first zero bit in a memory region
40 * @addr: The address to base the search on
41 * @offset: The bitnumber to start searching at
42 * @size: The maximum size to search
43 */
44int find_next_zero_bit(const unsigned long *addr, int size, int offset)
45{
46 const unsigned long *p = addr + (offset >> 5);
47 int set = 0, bit = offset & 31, res;
48
49 if (bit) {
50 /*
51 * Look for zero in the first 32 bits.
52 */
53 __asm__("bsfl %1,%0\n\t"
54 "jne 1f\n\t"
55 "movl $32, %0\n"
56 "1:"
57 : "=r" (set)
58 : "r" (~(*p >> bit)));
59 if (set < (32 - bit))
60 return set + offset;
61 set = 32 - bit;
62 p++;
63 }
64 /*
65 * No zero yet, search remaining full bytes for a zero
66 */
67 res = find_first_zero_bit(p, size - 32 * (p - addr));
68 return (offset + set + res);
69}
70EXPORT_SYMBOL(find_next_zero_bit);
diff --git a/arch/x86/lib/bitops_64.c b/arch/x86/lib/bitops_64.c
deleted file mode 100644
index 0e8f491e6ccc..000000000000
--- a/arch/x86/lib/bitops_64.c
+++ /dev/null
@@ -1,175 +0,0 @@
1#include <linux/bitops.h>
2
3#undef find_first_zero_bit
4#undef find_next_zero_bit
5#undef find_first_bit
6#undef find_next_bit
7
8static inline long
9__find_first_zero_bit(const unsigned long * addr, unsigned long size)
10{
11 long d0, d1, d2;
12 long res;
13
14 /*
15 * We must test the size in words, not in bits, because
16 * otherwise incoming sizes in the range -63..-1 will not run
17 * any scasq instructions, and then the flags used by the je
18 * instruction will have whatever random value was in place
19 * before. Nobody should call us like that, but
20 * find_next_zero_bit() does when offset and size are at the
21 * same word and it fails to find a zero itself.
22 */
23 size += 63;
24 size >>= 6;
25 if (!size)
26 return 0;
27 asm volatile(
28 " repe; scasq\n"
29 " je 1f\n"
30 " xorq -8(%%rdi),%%rax\n"
31 " subq $8,%%rdi\n"
32 " bsfq %%rax,%%rdx\n"
33 "1: subq %[addr],%%rdi\n"
34 " shlq $3,%%rdi\n"
35 " addq %%rdi,%%rdx"
36 :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
37 :"0" (0ULL), "1" (size), "2" (addr), "3" (-1ULL),
38 [addr] "S" (addr) : "memory");
39 /*
40 * Any register would do for [addr] above, but GCC tends to
41 * prefer rbx over rsi, even though rsi is readily available
42 * and doesn't have to be saved.
43 */
44 return res;
45}
46
47/**
48 * find_first_zero_bit - find the first zero bit in a memory region
49 * @addr: The address to start the search at
50 * @size: The maximum size to search
51 *
52 * Returns the bit-number of the first zero bit, not the number of the byte
53 * containing a bit.
54 */
55long find_first_zero_bit(const unsigned long * addr, unsigned long size)
56{
57 return __find_first_zero_bit (addr, size);
58}
59
60/**
61 * find_next_zero_bit - find the next zero bit in a memory region
62 * @addr: The address to base the search on
63 * @offset: The bitnumber to start searching at
64 * @size: The maximum size to search
65 */
66long find_next_zero_bit (const unsigned long * addr, long size, long offset)
67{
68 const unsigned long * p = addr + (offset >> 6);
69 unsigned long set = 0;
70 unsigned long res, bit = offset&63;
71
72 if (bit) {
73 /*
74 * Look for zero in first word
75 */
76 asm("bsfq %1,%0\n\t"
77 "cmoveq %2,%0"
78 : "=r" (set)
79 : "r" (~(*p >> bit)), "r"(64L));
80 if (set < (64 - bit))
81 return set + offset;
82 set = 64 - bit;
83 p++;
84 }
85 /*
86 * No zero yet, search remaining full words for a zero
87 */
88 res = __find_first_zero_bit (p, size - 64 * (p - addr));
89
90 return (offset + set + res);
91}
92
93static inline long
94__find_first_bit(const unsigned long * addr, unsigned long size)
95{
96 long d0, d1;
97 long res;
98
99 /*
100 * We must test the size in words, not in bits, because
101 * otherwise incoming sizes in the range -63..-1 will not run
102 * any scasq instructions, and then the flags used by the jz
103 * instruction will have whatever random value was in place
104 * before. Nobody should call us like that, but
105 * find_next_bit() does when offset and size are at the same
106 * word and it fails to find a one itself.
107 */
108 size += 63;
109 size >>= 6;
110 if (!size)
111 return 0;
112 asm volatile(
113 " repe; scasq\n"
114 " jz 1f\n"
115 " subq $8,%%rdi\n"
116 " bsfq (%%rdi),%%rax\n"
117 "1: subq %[addr],%%rdi\n"
118 " shlq $3,%%rdi\n"
119 " addq %%rdi,%%rax"
120 :"=a" (res), "=&c" (d0), "=&D" (d1)
121 :"0" (0ULL), "1" (size), "2" (addr),
122 [addr] "r" (addr) : "memory");
123 return res;
124}
125
126/**
127 * find_first_bit - find the first set bit in a memory region
128 * @addr: The address to start the search at
129 * @size: The maximum size to search
130 *
131 * Returns the bit-number of the first set bit, not the number of the byte
132 * containing a bit.
133 */
134long find_first_bit(const unsigned long * addr, unsigned long size)
135{
136 return __find_first_bit(addr,size);
137}
138
139/**
140 * find_next_bit - find the first set bit in a memory region
141 * @addr: The address to base the search on
142 * @offset: The bitnumber to start searching at
143 * @size: The maximum size to search
144 */
145long find_next_bit(const unsigned long * addr, long size, long offset)
146{
147 const unsigned long * p = addr + (offset >> 6);
148 unsigned long set = 0, bit = offset & 63, res;
149
150 if (bit) {
151 /*
152 * Look for nonzero in the first 64 bits:
153 */
154 asm("bsfq %1,%0\n\t"
155 "cmoveq %2,%0\n\t"
156 : "=r" (set)
157 : "r" (*p >> bit), "r" (64L));
158 if (set < (64 - bit))
159 return set + offset;
160 set = 64 - bit;
161 p++;
162 }
163 /*
164 * No set bit yet, search remaining full words for a bit
165 */
166 res = __find_first_bit (p, size - 64 * (p - addr));
167 return (offset + set + res);
168}
169
170#include <linux/module.h>
171
172EXPORT_SYMBOL(find_next_bit);
173EXPORT_SYMBOL(find_first_bit);
174EXPORT_SYMBOL(find_first_zero_bit);
175EXPORT_SYMBOL(find_next_zero_bit);
diff --git a/arch/x86/mach-visws/mpparse.c b/arch/x86/mach-visws/mpparse.c
index 2a8456a1f44f..57484e91ab90 100644
--- a/arch/x86/mach-visws/mpparse.c
+++ b/arch/x86/mach-visws/mpparse.c
@@ -11,22 +11,9 @@
11/* Have we found an MP table */ 11/* Have we found an MP table */
12int smp_found_config; 12int smp_found_config;
13 13
14/*
15 * Various Linux-internal data structures created from the
16 * MP-table.
17 */
18int apic_version [MAX_APICS];
19
20int pic_mode; 14int pic_mode;
21unsigned long mp_lapic_addr;
22
23/* Processor that is doing the boot up */
24unsigned int boot_cpu_physical_apicid = -1U;
25
26/* Bitmask of physically existing CPUs */
27physid_mask_t phys_cpu_present_map;
28 15
29unsigned int __initdata maxcpus = NR_CPUS; 16extern unsigned int __cpuinitdata maxcpus;
30 17
31/* 18/*
32 * The Visual Workstation is Intel MP compliant in the hardware 19 * The Visual Workstation is Intel MP compliant in the hardware
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 96f60c7cd124..8acbf0cdf1a5 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -113,7 +113,7 @@ static inline void send_QIC_CPI(__u32 cpuset, __u8 cpi)
113 for_each_online_cpu(cpu) { 113 for_each_online_cpu(cpu) {
114 if (cpuset & (1 << cpu)) { 114 if (cpuset & (1 << cpu)) {
115#ifdef VOYAGER_DEBUG 115#ifdef VOYAGER_DEBUG
116 if (!cpu_isset(cpu, cpu_online_map)) 116 if (!cpu_online(cpu))
117 VDEBUG(("CPU%d sending cpi %d to CPU%d not in " 117 VDEBUG(("CPU%d sending cpi %d to CPU%d not in "
118 "cpu_online_map\n", 118 "cpu_online_map\n",
119 hard_smp_processor_id(), cpi, cpu)); 119 hard_smp_processor_id(), cpi, cpu));
@@ -206,11 +206,6 @@ static struct irq_chip vic_chip = {
206/* used to count up as CPUs are brought on line (starts at 0) */ 206/* used to count up as CPUs are brought on line (starts at 0) */
207static int cpucount = 0; 207static int cpucount = 0;
208 208
209/* steal a page from the bottom of memory for the trampoline and
210 * squirrel its address away here. This will be in kernel virtual
211 * space */
212unsigned char *trampoline_base;
213
214/* The per cpu profile stuff - used in smp_local_timer_interrupt */ 209/* The per cpu profile stuff - used in smp_local_timer_interrupt */
215static DEFINE_PER_CPU(int, prof_multiplier) = 1; 210static DEFINE_PER_CPU(int, prof_multiplier) = 1;
216static DEFINE_PER_CPU(int, prof_old_multiplier) = 1; 211static DEFINE_PER_CPU(int, prof_old_multiplier) = 1;
@@ -427,18 +422,6 @@ void __init smp_store_cpu_info(int id)
427 identify_secondary_cpu(c); 422 identify_secondary_cpu(c);
428} 423}
429 424
430/* set up the trampoline and return the physical address of the code */
431unsigned long __init setup_trampoline(void)
432{
433 /* these two are global symbols in trampoline.S */
434 extern const __u8 trampoline_end[];
435 extern const __u8 trampoline_data[];
436
437 memcpy(trampoline_base, trampoline_data,
438 trampoline_end - trampoline_data);
439 return virt_to_phys(trampoline_base);
440}
441
442/* Routine initially called when a non-boot CPU is brought online */ 425/* Routine initially called when a non-boot CPU is brought online */
443static void __init start_secondary(void *unused) 426static void __init start_secondary(void *unused)
444{ 427{
@@ -560,8 +543,8 @@ static void __init do_boot_cpu(__u8 cpu)
560 hijack_source.idt.Offset, stack_start.sp)); 543 hijack_source.idt.Offset, stack_start.sp));
561 544
562 /* init lowmem identity mapping */ 545 /* init lowmem identity mapping */
563 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, 546 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
564 min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); 547 min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
565 flush_tlb_all(); 548 flush_tlb_all();
566 549
567 if (quad_boot) { 550 if (quad_boot) {
@@ -700,9 +683,9 @@ void __init smp_boot_cpus(void)
700 * Code added from smpboot.c */ 683 * Code added from smpboot.c */
701 { 684 {
702 unsigned long bogosum = 0; 685 unsigned long bogosum = 0;
703 for (i = 0; i < NR_CPUS; i++) 686
704 if (cpu_isset(i, cpu_online_map)) 687 for_each_online_cpu(i)
705 bogosum += cpu_data(i).loops_per_jiffy; 688 bogosum += cpu_data(i).loops_per_jiffy;
706 printk(KERN_INFO "Total of %d processors activated " 689 printk(KERN_INFO "Total of %d processors activated "
707 "(%lu.%02lu BogoMIPS).\n", 690 "(%lu.%02lu BogoMIPS).\n",
708 cpucount + 1, bogosum / (500000 / HZ), 691 cpucount + 1, bogosum / (500000 / HZ),
@@ -1855,7 +1838,7 @@ static int __cpuinit voyager_cpu_up(unsigned int cpu)
1855 return -EIO; 1838 return -EIO;
1856 /* Unleash the CPU! */ 1839 /* Unleash the CPU! */
1857 cpu_set(cpu, smp_commenced_mask); 1840 cpu_set(cpu, smp_commenced_mask);
1858 while (!cpu_isset(cpu, cpu_online_map)) 1841 while (!cpu_online(cpu))
1859 mb(); 1842 mb();
1860 return 0; 1843 return 0;
1861} 1844}
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 20941d2954e2..b7b3e4c7cfc9 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,5 +1,5 @@
1obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ 1obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
2 pat.o 2 pat.o pgtable.o
3 3
4obj-$(CONFIG_X86_32) += pgtable_32.o 4obj-$(CONFIG_X86_32) += pgtable_32.o
5 5
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 6791b8334bc6..2c24bea92c66 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -324,7 +324,7 @@ static const struct file_operations ptdump_fops = {
324 .release = single_release, 324 .release = single_release,
325}; 325};
326 326
327int pt_dump_init(void) 327static int pt_dump_init(void)
328{ 328{
329 struct dentry *pe; 329 struct dentry *pe;
330 330
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 9ec62da85fd7..4a4761892951 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -71,7 +71,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
71 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { 71 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
72 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); 72 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
73 73
74 paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); 74 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
75 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); 75 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
76 pud = pud_offset(pgd, 0); 76 pud = pud_offset(pgd, 0);
77 BUG_ON(pmd_table != pmd_offset(pud, 0)); 77 BUG_ON(pmd_table != pmd_offset(pud, 0));
@@ -100,7 +100,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
100 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); 100 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
101 } 101 }
102 102
103 paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT); 103 paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
104 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); 104 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
105 BUG_ON(page_table != pte_offset_kernel(pmd, 0)); 105 BUG_ON(page_table != pte_offset_kernel(pmd, 0));
106 } 106 }
@@ -227,6 +227,25 @@ static inline int page_kills_ppro(unsigned long pagenr)
227 return 0; 227 return 0;
228} 228}
229 229
230/*
231 * devmem_is_allowed() checks to see if /dev/mem access to a certain address
232 * is valid. The argument is a physical page number.
233 *
234 *
235 * On x86, access has to be given to the first megabyte of ram because that area
236 * contains bios code and data regions used by X and dosemu and similar apps.
237 * Access has to be given to non-kernel-ram areas as well, these contain the PCI
238 * mmio resources as well as potential bios/acpi data regions.
239 */
240int devmem_is_allowed(unsigned long pagenr)
241{
242 if (pagenr <= 256)
243 return 1;
244 if (!page_is_ram(pagenr))
245 return 1;
246 return 0;
247}
248
230#ifdef CONFIG_HIGHMEM 249#ifdef CONFIG_HIGHMEM
231pte_t *kmap_pte; 250pte_t *kmap_pte;
232pgprot_t kmap_prot; 251pgprot_t kmap_prot;
@@ -365,7 +384,7 @@ void __init native_pagetable_setup_start(pgd_t *base)
365 384
366 pte_clear(NULL, va, pte); 385 pte_clear(NULL, va, pte);
367 } 386 }
368 paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT); 387 paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
369} 388}
370 389
371void __init native_pagetable_setup_done(pgd_t *base) 390void __init native_pagetable_setup_done(pgd_t *base)
@@ -457,7 +476,7 @@ void zap_low_mappings(void)
457 * Note that "pgd_clear()" doesn't do it for 476 * Note that "pgd_clear()" doesn't do it for
458 * us, because pgd_clear() is a no-op on i386. 477 * us, because pgd_clear() is a no-op on i386.
459 */ 478 */
460 for (i = 0; i < USER_PTRS_PER_PGD; i++) { 479 for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) {
461#ifdef CONFIG_X86_PAE 480#ifdef CONFIG_X86_PAE
462 set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); 481 set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
463#else 482#else
@@ -547,9 +566,9 @@ void __init paging_init(void)
547 566
548/* 567/*
549 * Test if the WP bit works in supervisor mode. It isn't supported on 386's 568 * Test if the WP bit works in supervisor mode. It isn't supported on 386's
550 * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This 569 * and also on some strange 486's. All 586+'s are OK. This used to involve
551 * used to involve black magic jumps to work around some nasty CPU bugs, 570 * black magic jumps to work around some nasty CPU bugs, but fortunately the
552 * but fortunately the switch to using exceptions got rid of all that. 571 * switch to using exceptions got rid of all that.
553 */ 572 */
554static void __init test_wp_bit(void) 573static void __init test_wp_bit(void)
555{ 574{
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 1ff7906a9a4d..5fbb8652cf59 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -135,7 +135,7 @@ static __init void *spp_getpage(void)
135 return ptr; 135 return ptr;
136} 136}
137 137
138static __init void 138static void
139set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) 139set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
140{ 140{
141 pgd_t *pgd; 141 pgd_t *pgd;
@@ -173,7 +173,7 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
173 new_pte = pfn_pte(phys >> PAGE_SHIFT, prot); 173 new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
174 174
175 pte = pte_offset_kernel(pmd, vaddr); 175 pte = pte_offset_kernel(pmd, vaddr);
176 if (!pte_none(*pte) && 176 if (!pte_none(*pte) && pte_val(new_pte) &&
177 pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask)) 177 pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
178 pte_ERROR(*pte); 178 pte_ERROR(*pte);
179 set_pte(pte, new_pte); 179 set_pte(pte, new_pte);
@@ -214,8 +214,7 @@ void __init cleanup_highmap(void)
214} 214}
215 215
216/* NOTE: this is meant to be run only at boot */ 216/* NOTE: this is meant to be run only at boot */
217void __init 217void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
218__set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
219{ 218{
220 unsigned long address = __fix_to_virt(idx); 219 unsigned long address = __fix_to_virt(idx);
221 220
@@ -664,6 +663,26 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
664 663
665#endif /* CONFIG_MEMORY_HOTPLUG */ 664#endif /* CONFIG_MEMORY_HOTPLUG */
666 665
666/*
667 * devmem_is_allowed() checks to see if /dev/mem access to a certain address
668 * is valid. The argument is a physical page number.
669 *
670 *
671 * On x86, access has to be given to the first megabyte of ram because that area
672 * contains bios code and data regions used by X and dosemu and similar apps.
673 * Access has to be given to non-kernel-ram areas as well, these contain the PCI
674 * mmio resources as well as potential bios/acpi data regions.
675 */
676int devmem_is_allowed(unsigned long pagenr)
677{
678 if (pagenr <= 256)
679 return 1;
680 if (!page_is_ram(pagenr))
681 return 1;
682 return 0;
683}
684
685
667static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, 686static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
668 kcore_modules, kcore_vsyscall; 687 kcore_modules, kcore_vsyscall;
669 688
@@ -791,7 +810,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
791void __init reserve_bootmem_generic(unsigned long phys, unsigned len) 810void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
792{ 811{
793#ifdef CONFIG_NUMA 812#ifdef CONFIG_NUMA
794 int nid = phys_to_nid(phys); 813 int nid, next_nid;
795#endif 814#endif
796 unsigned long pfn = phys >> PAGE_SHIFT; 815 unsigned long pfn = phys >> PAGE_SHIFT;
797 816
@@ -810,10 +829,16 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
810 829
811 /* Should check here against the e820 map to avoid double free */ 830 /* Should check here against the e820 map to avoid double free */
812#ifdef CONFIG_NUMA 831#ifdef CONFIG_NUMA
813 reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); 832 nid = phys_to_nid(phys);
833 next_nid = phys_to_nid(phys + len - 1);
834 if (nid == next_nid)
835 reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
836 else
837 reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
814#else 838#else
815 reserve_bootmem(phys, len, BOOTMEM_DEFAULT); 839 reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
816#endif 840#endif
841
817 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { 842 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
818 dma_reserve += len / PAGE_SIZE; 843 dma_reserve += len / PAGE_SIZE;
819 set_dma_reserve(dma_reserve); 844 set_dma_reserve(dma_reserve);
@@ -907,6 +932,10 @@ const char *arch_vma_name(struct vm_area_struct *vma)
907/* 932/*
908 * Initialise the sparsemem vmemmap using huge-pages at the PMD level. 933 * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
909 */ 934 */
935static long __meminitdata addr_start, addr_end;
936static void __meminitdata *p_start, *p_end;
937static int __meminitdata node_start;
938
910int __meminit 939int __meminit
911vmemmap_populate(struct page *start_page, unsigned long size, int node) 940vmemmap_populate(struct page *start_page, unsigned long size, int node)
912{ 941{
@@ -941,12 +970,32 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
941 PAGE_KERNEL_LARGE); 970 PAGE_KERNEL_LARGE);
942 set_pmd(pmd, __pmd(pte_val(entry))); 971 set_pmd(pmd, __pmd(pte_val(entry)));
943 972
944 printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n", 973 /* check to see if we have contiguous blocks */
945 addr, addr + PMD_SIZE - 1, p, node); 974 if (p_end != p || node_start != node) {
975 if (p_start)
976 printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
977 addr_start, addr_end-1, p_start, p_end-1, node_start);
978 addr_start = addr;
979 node_start = node;
980 p_start = p;
981 }
982 addr_end = addr + PMD_SIZE;
983 p_end = p + PMD_SIZE;
946 } else { 984 } else {
947 vmemmap_verify((pte_t *)pmd, node, addr, next); 985 vmemmap_verify((pte_t *)pmd, node, addr, next);
948 } 986 }
949 } 987 }
950 return 0; 988 return 0;
951} 989}
990
991void __meminit vmemmap_populate_print_last(void)
992{
993 if (p_start) {
994 printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
995 addr_start, addr_end-1, p_start, p_end-1, node_start);
996 p_start = NULL;
997 p_end = NULL;
998 node_start = 0;
999 }
1000}
952#endif 1001#endif
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 3a4baf95e24d..d176b23110cc 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -336,6 +336,35 @@ void iounmap(volatile void __iomem *addr)
336} 336}
337EXPORT_SYMBOL(iounmap); 337EXPORT_SYMBOL(iounmap);
338 338
339/*
340 * Convert a physical pointer to a virtual kernel pointer for /dev/mem
341 * access
342 */
343void *xlate_dev_mem_ptr(unsigned long phys)
344{
345 void *addr;
346 unsigned long start = phys & PAGE_MASK;
347
348 /* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
349 if (page_is_ram(start >> PAGE_SHIFT))
350 return __va(phys);
351
352 addr = (void *)ioremap(start, PAGE_SIZE);
353 if (addr)
354 addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
355
356 return addr;
357}
358
359void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
360{
361 if (page_is_ram(phys >> PAGE_SHIFT))
362 return;
363
364 iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
365 return;
366}
367
339#ifdef CONFIG_X86_32 368#ifdef CONFIG_X86_32
340 369
341int __initdata early_ioremap_debug; 370int __initdata early_ioremap_debug;
@@ -407,7 +436,7 @@ void __init early_ioremap_clear(void)
407 436
408 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); 437 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
409 pmd_clear(pmd); 438 pmd_clear(pmd);
410 paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT); 439 paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT);
411 __flush_tlb_all(); 440 __flush_tlb_all();
412} 441}
413 442
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 9a6892200b27..c5066d519e5d 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -196,6 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
196 unsigned long bootmap_start, nodedata_phys; 196 unsigned long bootmap_start, nodedata_phys;
197 void *bootmap; 197 void *bootmap;
198 const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); 198 const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
199 int nid;
199 200
200 start = round_up(start, ZONE_ALIGN); 201 start = round_up(start, ZONE_ALIGN);
201 202
@@ -218,9 +219,19 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
218 NODE_DATA(nodeid)->node_start_pfn = start_pfn; 219 NODE_DATA(nodeid)->node_start_pfn = start_pfn;
219 NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; 220 NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
220 221
221 /* Find a place for the bootmem map */ 222 /*
223 * Find a place for the bootmem map
224 * nodedata_phys could be on other nodes by alloc_bootmem,
225 * so need to sure bootmap_start not to be small, otherwise
226 * early_node_mem will get that with find_e820_area instead
227 * of alloc_bootmem, that could clash with reserved range
228 */
222 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 229 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
223 bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); 230 nid = phys_to_nid(nodedata_phys);
231 if (nid == nodeid)
232 bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
233 else
234 bootmap_start = round_up(start, PAGE_SIZE);
224 /* 235 /*
225 * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like 236 * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like
226 * to use that to align to PAGE_SIZE 237 * to use that to align to PAGE_SIZE
@@ -245,10 +256,29 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
245 256
246 free_bootmem_with_active_regions(nodeid, end); 257 free_bootmem_with_active_regions(nodeid, end);
247 258
248 reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size, 259 /*
249 BOOTMEM_DEFAULT); 260 * convert early reserve to bootmem reserve earlier
250 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, 261 * otherwise early_node_mem could use early reserved mem
251 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); 262 * on previous node
263 */
264 early_res_to_bootmem(start, end);
265
266 /*
267 * in some case early_node_mem could use alloc_bootmem
268 * to get range on other node, don't reserve that again
269 */
270 if (nid != nodeid)
271 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
272 else
273 reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys,
274 pgdat_size, BOOTMEM_DEFAULT);
275 nid = phys_to_nid(bootmap_start);
276 if (nid != nodeid)
277 printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid);
278 else
279 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
280 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
281
252#ifdef CONFIG_ACPI_NUMA 282#ifdef CONFIG_ACPI_NUMA
253 srat_reserve_add_area(nodeid); 283 srat_reserve_add_area(nodeid);
254#endif 284#endif
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index f7823a172868..bd5e05c654dc 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -483,9 +483,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
483 goto out_unlock; 483 goto out_unlock;
484 484
485 pbase = (pte_t *)page_address(base); 485 pbase = (pte_t *)page_address(base);
486#ifdef CONFIG_X86_32 486 paravirt_alloc_pte(&init_mm, page_to_pfn(base));
487 paravirt_alloc_pt(&init_mm, page_to_pfn(base));
488#endif
489 ref_prot = pte_pgprot(pte_clrhuge(*kpte)); 487 ref_prot = pte_pgprot(pte_clrhuge(*kpte));
490 488
491#ifdef CONFIG_X86_64 489#ifdef CONFIG_X86_64
@@ -993,7 +991,7 @@ static const struct file_operations dpa_fops = {
993 .release = single_release, 991 .release = single_release,
994}; 992};
995 993
996int __init debug_pagealloc_proc_init(void) 994static int __init debug_pagealloc_proc_init(void)
997{ 995{
998 struct dentry *de; 996 struct dentry *de;
999 997
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 72c0f6097402..e7ca7fc48d12 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -11,16 +11,19 @@
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/gfp.h> 12#include <linux/gfp.h>
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/bootmem.h>
14 15
15#include <asm/msr.h> 16#include <asm/msr.h>
16#include <asm/tlbflush.h> 17#include <asm/tlbflush.h>
17#include <asm/processor.h> 18#include <asm/processor.h>
19#include <asm/page.h>
18#include <asm/pgtable.h> 20#include <asm/pgtable.h>
19#include <asm/pat.h> 21#include <asm/pat.h>
20#include <asm/e820.h> 22#include <asm/e820.h>
21#include <asm/cacheflush.h> 23#include <asm/cacheflush.h>
22#include <asm/fcntl.h> 24#include <asm/fcntl.h>
23#include <asm/mtrr.h> 25#include <asm/mtrr.h>
26#include <asm/io.h>
24 27
25int pat_wc_enabled = 1; 28int pat_wc_enabled = 1;
26 29
@@ -190,6 +193,21 @@ static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot,
190 return 0; 193 return 0;
191} 194}
192 195
196/*
197 * req_type typically has one of the:
198 * - _PAGE_CACHE_WB
199 * - _PAGE_CACHE_WC
200 * - _PAGE_CACHE_UC_MINUS
201 * - _PAGE_CACHE_UC
202 *
203 * req_type will have a special case value '-1', when requester want to inherit
204 * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS.
205 *
206 * If ret_type is NULL, function will return an error if it cannot reserve the
207 * region with req_type. If ret_type is non-null, function will return
208 * available type in ret_type in case of no error. In case of any error
209 * it will return a negative return value.
210 */
193int reserve_memtype(u64 start, u64 end, unsigned long req_type, 211int reserve_memtype(u64 start, u64 end, unsigned long req_type,
194 unsigned long *ret_type) 212 unsigned long *ret_type)
195{ 213{
@@ -200,9 +218,14 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
200 218
201 /* Only track when pat_wc_enabled */ 219 /* Only track when pat_wc_enabled */
202 if (!pat_wc_enabled) { 220 if (!pat_wc_enabled) {
203 if (ret_type) 221 /* This is identical to page table setting without PAT */
204 *ret_type = req_type; 222 if (ret_type) {
205 223 if (req_type == -1) {
224 *ret_type = _PAGE_CACHE_WB;
225 } else {
226 *ret_type = req_type;
227 }
228 }
206 return 0; 229 return 0;
207 } 230 }
208 231
@@ -214,8 +237,29 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
214 return 0; 237 return 0;
215 } 238 }
216 239
217 req_type &= _PAGE_CACHE_MASK; 240 if (req_type == -1) {
218 err = pat_x_mtrr_type(start, end, req_type, &actual_type); 241 /*
242 * Special case where caller wants to inherit from mtrr or
243 * existing pat mapping, defaulting to UC_MINUS in case of
244 * no match.
245 */
246 u8 mtrr_type = mtrr_type_lookup(start, end);
247 if (mtrr_type == 0xFE) { /* MTRR match error */
248 err = -1;
249 }
250
251 if (mtrr_type == MTRR_TYPE_WRBACK) {
252 req_type = _PAGE_CACHE_WB;
253 actual_type = _PAGE_CACHE_WB;
254 } else {
255 req_type = _PAGE_CACHE_UC_MINUS;
256 actual_type = _PAGE_CACHE_UC_MINUS;
257 }
258 } else {
259 req_type &= _PAGE_CACHE_MASK;
260 err = pat_x_mtrr_type(start, end, req_type, &actual_type);
261 }
262
219 if (err) { 263 if (err) {
220 if (ret_type) 264 if (ret_type)
221 *ret_type = actual_type; 265 *ret_type = actual_type;
@@ -241,7 +285,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
241 struct memtype *saved_ptr; 285 struct memtype *saved_ptr;
242 286
243 if (parse->start >= end) { 287 if (parse->start >= end) {
244 printk("New Entry\n"); 288 pr_debug("New Entry\n");
245 list_add(&new_entry->nd, parse->nd.prev); 289 list_add(&new_entry->nd, parse->nd.prev);
246 new_entry = NULL; 290 new_entry = NULL;
247 break; 291 break;
@@ -291,7 +335,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
291 break; 335 break;
292 } 336 }
293 337
294 printk("Overlap at 0x%Lx-0x%Lx\n", 338 pr_debug("Overlap at 0x%Lx-0x%Lx\n",
295 saved_ptr->start, saved_ptr->end); 339 saved_ptr->start, saved_ptr->end);
296 /* No conflict. Go ahead and add this new entry */ 340 /* No conflict. Go ahead and add this new entry */
297 list_add(&new_entry->nd, saved_ptr->nd.prev); 341 list_add(&new_entry->nd, saved_ptr->nd.prev);
@@ -343,7 +387,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
343 break; 387 break;
344 } 388 }
345 389
346 printk("Overlap at 0x%Lx-0x%Lx\n", 390 printk(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n",
347 saved_ptr->start, saved_ptr->end); 391 saved_ptr->start, saved_ptr->end);
348 /* No conflict. Go ahead and add this new entry */ 392 /* No conflict. Go ahead and add this new entry */
349 list_add(&new_entry->nd, &saved_ptr->nd); 393 list_add(&new_entry->nd, &saved_ptr->nd);
@@ -353,7 +397,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
353 } 397 }
354 398
355 if (err) { 399 if (err) {
356 printk( 400 printk(KERN_INFO
357 "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n", 401 "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n",
358 start, end, cattr_name(new_entry->type), 402 start, end, cattr_name(new_entry->type),
359 cattr_name(req_type)); 403 cattr_name(req_type));
@@ -365,16 +409,16 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
365 if (new_entry) { 409 if (new_entry) {
366 /* No conflict. Not yet added to the list. Add to the tail */ 410 /* No conflict. Not yet added to the list. Add to the tail */
367 list_add_tail(&new_entry->nd, &memtype_list); 411 list_add_tail(&new_entry->nd, &memtype_list);
368 printk("New Entry\n"); 412 pr_debug("New Entry\n");
369 } 413 }
370 414
371 if (ret_type) { 415 if (ret_type) {
372 printk( 416 pr_debug(
373 "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", 417 "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
374 start, end, cattr_name(actual_type), 418 start, end, cattr_name(actual_type),
375 cattr_name(req_type), cattr_name(*ret_type)); 419 cattr_name(req_type), cattr_name(*ret_type));
376 } else { 420 } else {
377 printk( 421 pr_debug(
378 "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n", 422 "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n",
379 start, end, cattr_name(actual_type), 423 start, end, cattr_name(actual_type),
380 cattr_name(req_type)); 424 cattr_name(req_type));
@@ -411,11 +455,145 @@ int free_memtype(u64 start, u64 end)
411 spin_unlock(&memtype_lock); 455 spin_unlock(&memtype_lock);
412 456
413 if (err) { 457 if (err) {
414 printk(KERN_DEBUG "%s:%d freeing invalid memtype %Lx-%Lx\n", 458 printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n",
415 current->comm, current->pid, start, end); 459 current->comm, current->pid, start, end);
416 } 460 }
417 461
418 printk( "free_memtype request 0x%Lx-0x%Lx\n", start, end); 462 pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end);
419 return err; 463 return err;
420} 464}
421 465
466
467/*
468 * /dev/mem mmap interface. The memtype used for mapping varies:
469 * - Use UC for mappings with O_SYNC flag
470 * - Without O_SYNC flag, if there is any conflict in reserve_memtype,
471 * inherit the memtype from existing mapping.
472 * - Else use UC_MINUS memtype (for backward compatibility with existing
473 * X drivers.
474 */
475pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
476 unsigned long size, pgprot_t vma_prot)
477{
478 return vma_prot;
479}
480
481#ifdef CONFIG_NONPROMISC_DEVMEM
482/* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/
483static inline int range_is_allowed(unsigned long pfn, unsigned long size)
484{
485 return 1;
486}
487#else
488static inline int range_is_allowed(unsigned long pfn, unsigned long size)
489{
490 u64 from = ((u64)pfn) << PAGE_SHIFT;
491 u64 to = from + size;
492 u64 cursor = from;
493
494 while (cursor < to) {
495 if (!devmem_is_allowed(pfn)) {
496 printk(KERN_INFO
497 "Program %s tried to access /dev/mem between %Lx->%Lx.\n",
498 current->comm, from, to);
499 return 0;
500 }
501 cursor += PAGE_SIZE;
502 pfn++;
503 }
504 return 1;
505}
506#endif /* CONFIG_NONPROMISC_DEVMEM */
507
508int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
509 unsigned long size, pgprot_t *vma_prot)
510{
511 u64 offset = ((u64) pfn) << PAGE_SHIFT;
512 unsigned long flags = _PAGE_CACHE_UC_MINUS;
513 unsigned long ret_flags;
514 int retval;
515
516 if (!range_is_allowed(pfn, size))
517 return 0;
518
519 if (file->f_flags & O_SYNC) {
520 flags = _PAGE_CACHE_UC;
521 }
522
523#ifdef CONFIG_X86_32
524 /*
525 * On the PPro and successors, the MTRRs are used to set
526 * memory types for physical addresses outside main memory,
527 * so blindly setting UC or PWT on those pages is wrong.
528 * For Pentiums and earlier, the surround logic should disable
529 * caching for the high addresses through the KEN pin, but
530 * we maintain the tradition of paranoia in this code.
531 */
532 if (!pat_wc_enabled &&
533 ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) ||
534 test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) ||
535 test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) ||
536 test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) &&
537 (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
538 flags = _PAGE_CACHE_UC;
539 }
540#endif
541
542 /*
543 * With O_SYNC, we can only take UC mapping. Fail if we cannot.
544 * Without O_SYNC, we want to get
545 * - WB for WB-able memory and no other conflicting mappings
546 * - UC_MINUS for non-WB-able memory with no other conflicting mappings
547 * - Inherit from confliting mappings otherwise
548 */
549 if (flags != _PAGE_CACHE_UC_MINUS) {
550 retval = reserve_memtype(offset, offset + size, flags, NULL);
551 } else {
552 retval = reserve_memtype(offset, offset + size, -1, &ret_flags);
553 }
554
555 if (retval < 0)
556 return 0;
557
558 flags = ret_flags;
559
560 if (pfn <= max_pfn_mapped &&
561 ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
562 free_memtype(offset, offset + size);
563 printk(KERN_INFO
564 "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
565 current->comm, current->pid,
566 cattr_name(flags),
567 offset, offset + size);
568 return 0;
569 }
570
571 *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
572 flags);
573 return 1;
574}
575
576void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
577{
578 u64 addr = (u64)pfn << PAGE_SHIFT;
579 unsigned long flags;
580 unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
581
582 reserve_memtype(addr, addr + size, want_flags, &flags);
583 if (flags != want_flags) {
584 printk(KERN_INFO
585 "%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n",
586 current->comm, current->pid,
587 cattr_name(want_flags),
588 addr, addr + size,
589 cattr_name(flags));
590 }
591}
592
593void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
594{
595 u64 addr = (u64)pfn << PAGE_SHIFT;
596
597 free_memtype(addr, addr + size);
598}
599
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
new file mode 100644
index 000000000000..50159764f694
--- /dev/null
+++ b/arch/x86/mm/pgtable.c
@@ -0,0 +1,276 @@
1#include <linux/mm.h>
2#include <asm/pgalloc.h>
3#include <asm/pgtable.h>
4#include <asm/tlb.h>
5
6pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
7{
8 return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
9}
10
11pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
12{
13 struct page *pte;
14
15#ifdef CONFIG_HIGHPTE
16 pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
17#else
18 pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
19#endif
20 if (pte)
21 pgtable_page_ctor(pte);
22 return pte;
23}
24
25void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
26{
27 pgtable_page_dtor(pte);
28 paravirt_release_pte(page_to_pfn(pte));
29 tlb_remove_page(tlb, pte);
30}
31
32#if PAGETABLE_LEVELS > 2
33void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
34{
35 paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
36 tlb_remove_page(tlb, virt_to_page(pmd));
37}
38
39#if PAGETABLE_LEVELS > 3
40void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
41{
42 paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
43 tlb_remove_page(tlb, virt_to_page(pud));
44}
45#endif /* PAGETABLE_LEVELS > 3 */
46#endif /* PAGETABLE_LEVELS > 2 */
47
48static inline void pgd_list_add(pgd_t *pgd)
49{
50 struct page *page = virt_to_page(pgd);
51
52 list_add(&page->lru, &pgd_list);
53}
54
55static inline void pgd_list_del(pgd_t *pgd)
56{
57 struct page *page = virt_to_page(pgd);
58
59 list_del(&page->lru);
60}
61
62#define UNSHARED_PTRS_PER_PGD \
63 (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
64
65static void pgd_ctor(void *p)
66{
67 pgd_t *pgd = p;
68 unsigned long flags;
69
70 /* Clear usermode parts of PGD */
71 memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t));
72
73 spin_lock_irqsave(&pgd_lock, flags);
74
75 /* If the pgd points to a shared pagetable level (either the
76 ptes in non-PAE, or shared PMD in PAE), then just copy the
77 references from swapper_pg_dir. */
78 if (PAGETABLE_LEVELS == 2 ||
79 (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
80 PAGETABLE_LEVELS == 4) {
81 clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
82 swapper_pg_dir + KERNEL_PGD_BOUNDARY,
83 KERNEL_PGD_PTRS);
84 paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT,
85 __pa(swapper_pg_dir) >> PAGE_SHIFT,
86 KERNEL_PGD_BOUNDARY,
87 KERNEL_PGD_PTRS);
88 }
89
90 /* list required to sync kernel mapping updates */
91 if (!SHARED_KERNEL_PMD)
92 pgd_list_add(pgd);
93
94 spin_unlock_irqrestore(&pgd_lock, flags);
95}
96
97static void pgd_dtor(void *pgd)
98{
99 unsigned long flags; /* can be called from interrupt context */
100
101 if (SHARED_KERNEL_PMD)
102 return;
103
104 spin_lock_irqsave(&pgd_lock, flags);
105 pgd_list_del(pgd);
106 spin_unlock_irqrestore(&pgd_lock, flags);
107}
108
109/*
110 * List of all pgd's needed for non-PAE so it can invalidate entries
111 * in both cached and uncached pgd's; not needed for PAE since the
112 * kernel pmd is shared. If PAE were not to share the pmd a similar
113 * tactic would be needed. This is essentially codepath-based locking
114 * against pageattr.c; it is the unique case in which a valid change
115 * of kernel pagetables can't be lazily synchronized by vmalloc faults.
116 * vmalloc faults work because attached pagetables are never freed.
117 * -- wli
118 */
119
120#ifdef CONFIG_X86_PAE
121/*
122 * Mop up any pmd pages which may still be attached to the pgd.
123 * Normally they will be freed by munmap/exit_mmap, but any pmd we
124 * preallocate which never got a corresponding vma will need to be
125 * freed manually.
126 */
127static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
128{
129 int i;
130
131 for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
132 pgd_t pgd = pgdp[i];
133
134 if (pgd_val(pgd) != 0) {
135 pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
136
137 pgdp[i] = native_make_pgd(0);
138
139 paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
140 pmd_free(mm, pmd);
141 }
142 }
143}
144
145/*
146 * In PAE mode, we need to do a cr3 reload (=tlb flush) when
147 * updating the top-level pagetable entries to guarantee the
148 * processor notices the update. Since this is expensive, and
149 * all 4 top-level entries are used almost immediately in a
150 * new process's life, we just pre-populate them here.
151 *
152 * Also, if we're in a paravirt environment where the kernel pmd is
153 * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
154 * and initialize the kernel pmds here.
155 */
156static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
157{
158 pud_t *pud;
159 unsigned long addr;
160 int i;
161
162 pud = pud_offset(pgd, 0);
163 for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
164 i++, pud++, addr += PUD_SIZE) {
165 pmd_t *pmd = pmd_alloc_one(mm, addr);
166
167 if (!pmd) {
168 pgd_mop_up_pmds(mm, pgd);
169 return 0;
170 }
171
172 if (i >= KERNEL_PGD_BOUNDARY)
173 memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
174 sizeof(pmd_t) * PTRS_PER_PMD);
175
176 pud_populate(mm, pud, pmd);
177 }
178
179 return 1;
180}
181
182void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
183{
184 paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
185
186 /* Note: almost everything apart from _PAGE_PRESENT is
187 reserved at the pmd (PDPT) level. */
188 set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
189
190 /*
191 * According to Intel App note "TLBs, Paging-Structure Caches,
192 * and Their Invalidation", April 2007, document 317080-001,
193 * section 8.1: in PAE mode we explicitly have to flush the
194 * TLB via cr3 if the top-level pgd is changed...
195 */
196 if (mm == current->active_mm)
197 write_cr3(read_cr3());
198}
199#else /* !CONFIG_X86_PAE */
200/* No need to prepopulate any pagetable entries in non-PAE modes. */
201static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
202{
203 return 1;
204}
205
206static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd)
207{
208}
209#endif /* CONFIG_X86_PAE */
210
211pgd_t *pgd_alloc(struct mm_struct *mm)
212{
213 pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
214
215 /* so that alloc_pmd can use it */
216 mm->pgd = pgd;
217 if (pgd)
218 pgd_ctor(pgd);
219
220 if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
221 pgd_dtor(pgd);
222 free_page((unsigned long)pgd);
223 pgd = NULL;
224 }
225
226 return pgd;
227}
228
229void pgd_free(struct mm_struct *mm, pgd_t *pgd)
230{
231 pgd_mop_up_pmds(mm, pgd);
232 pgd_dtor(pgd);
233 free_page((unsigned long)pgd);
234}
235
236int ptep_set_access_flags(struct vm_area_struct *vma,
237 unsigned long address, pte_t *ptep,
238 pte_t entry, int dirty)
239{
240 int changed = !pte_same(*ptep, entry);
241
242 if (changed && dirty) {
243 *ptep = entry;
244 pte_update_defer(vma->vm_mm, address, ptep);
245 flush_tlb_page(vma, address);
246 }
247
248 return changed;
249}
250
251int ptep_test_and_clear_young(struct vm_area_struct *vma,
252 unsigned long addr, pte_t *ptep)
253{
254 int ret = 0;
255
256 if (pte_young(*ptep))
257 ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
258 &ptep->pte);
259
260 if (ret)
261 pte_update(vma->vm_mm, addr, ptep);
262
263 return ret;
264}
265
266int ptep_clear_flush_young(struct vm_area_struct *vma,
267 unsigned long address, pte_t *ptep)
268{
269 int young;
270
271 young = ptep_test_and_clear_young(vma, address, ptep);
272 if (young)
273 flush_tlb_page(vma, address);
274
275 return young;
276}
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 6fb9e7c6893f..9ee007be9142 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -173,210 +173,6 @@ void reserve_top_address(unsigned long reserve)
173 __VMALLOC_RESERVE += reserve; 173 __VMALLOC_RESERVE += reserve;
174} 174}
175 175
176pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
177{
178 return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
179}
180
181pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
182{
183 struct page *pte;
184
185#ifdef CONFIG_HIGHPTE
186 pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
187#else
188 pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
189#endif
190 if (pte)
191 pgtable_page_ctor(pte);
192 return pte;
193}
194
195/*
196 * List of all pgd's needed for non-PAE so it can invalidate entries
197 * in both cached and uncached pgd's; not needed for PAE since the
198 * kernel pmd is shared. If PAE were not to share the pmd a similar
199 * tactic would be needed. This is essentially codepath-based locking
200 * against pageattr.c; it is the unique case in which a valid change
201 * of kernel pagetables can't be lazily synchronized by vmalloc faults.
202 * vmalloc faults work because attached pagetables are never freed.
203 * -- wli
204 */
205static inline void pgd_list_add(pgd_t *pgd)
206{
207 struct page *page = virt_to_page(pgd);
208
209 list_add(&page->lru, &pgd_list);
210}
211
212static inline void pgd_list_del(pgd_t *pgd)
213{
214 struct page *page = virt_to_page(pgd);
215
216 list_del(&page->lru);
217}
218
219#define UNSHARED_PTRS_PER_PGD \
220 (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
221
222static void pgd_ctor(void *p)
223{
224 pgd_t *pgd = p;
225 unsigned long flags;
226
227 /* Clear usermode parts of PGD */
228 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
229
230 spin_lock_irqsave(&pgd_lock, flags);
231
232 /* If the pgd points to a shared pagetable level (either the
233 ptes in non-PAE, or shared PMD in PAE), then just copy the
234 references from swapper_pg_dir. */
235 if (PAGETABLE_LEVELS == 2 ||
236 (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) {
237 clone_pgd_range(pgd + USER_PTRS_PER_PGD,
238 swapper_pg_dir + USER_PTRS_PER_PGD,
239 KERNEL_PGD_PTRS);
240 paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
241 __pa(swapper_pg_dir) >> PAGE_SHIFT,
242 USER_PTRS_PER_PGD,
243 KERNEL_PGD_PTRS);
244 }
245
246 /* list required to sync kernel mapping updates */
247 if (!SHARED_KERNEL_PMD)
248 pgd_list_add(pgd);
249
250 spin_unlock_irqrestore(&pgd_lock, flags);
251}
252
253static void pgd_dtor(void *pgd)
254{
255 unsigned long flags; /* can be called from interrupt context */
256
257 if (SHARED_KERNEL_PMD)
258 return;
259
260 spin_lock_irqsave(&pgd_lock, flags);
261 pgd_list_del(pgd);
262 spin_unlock_irqrestore(&pgd_lock, flags);
263}
264
265#ifdef CONFIG_X86_PAE
266/*
267 * Mop up any pmd pages which may still be attached to the pgd.
268 * Normally they will be freed by munmap/exit_mmap, but any pmd we
269 * preallocate which never got a corresponding vma will need to be
270 * freed manually.
271 */
272static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
273{
274 int i;
275
276 for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
277 pgd_t pgd = pgdp[i];
278
279 if (pgd_val(pgd) != 0) {
280 pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
281
282 pgdp[i] = native_make_pgd(0);
283
284 paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
285 pmd_free(mm, pmd);
286 }
287 }
288}
289
290/*
291 * In PAE mode, we need to do a cr3 reload (=tlb flush) when
292 * updating the top-level pagetable entries to guarantee the
293 * processor notices the update. Since this is expensive, and
294 * all 4 top-level entries are used almost immediately in a
295 * new process's life, we just pre-populate them here.
296 *
297 * Also, if we're in a paravirt environment where the kernel pmd is
298 * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
299 * and initialize the kernel pmds here.
300 */
301static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
302{
303 pud_t *pud;
304 unsigned long addr;
305 int i;
306
307 pud = pud_offset(pgd, 0);
308 for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
309 i++, pud++, addr += PUD_SIZE) {
310 pmd_t *pmd = pmd_alloc_one(mm, addr);
311
312 if (!pmd) {
313 pgd_mop_up_pmds(mm, pgd);
314 return 0;
315 }
316
317 if (i >= USER_PTRS_PER_PGD)
318 memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
319 sizeof(pmd_t) * PTRS_PER_PMD);
320
321 pud_populate(mm, pud, pmd);
322 }
323
324 return 1;
325}
326#else /* !CONFIG_X86_PAE */
327/* No need to prepopulate any pagetable entries in non-PAE modes. */
328static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
329{
330 return 1;
331}
332
333static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
334{
335}
336#endif /* CONFIG_X86_PAE */
337
338pgd_t *pgd_alloc(struct mm_struct *mm)
339{
340 pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
341
342 /* so that alloc_pd can use it */
343 mm->pgd = pgd;
344 if (pgd)
345 pgd_ctor(pgd);
346
347 if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
348 pgd_dtor(pgd);
349 free_page((unsigned long)pgd);
350 pgd = NULL;
351 }
352
353 return pgd;
354}
355
356void pgd_free(struct mm_struct *mm, pgd_t *pgd)
357{
358 pgd_mop_up_pmds(mm, pgd);
359 pgd_dtor(pgd);
360 free_page((unsigned long)pgd);
361}
362
363void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
364{
365 pgtable_page_dtor(pte);
366 paravirt_release_pt(page_to_pfn(pte));
367 tlb_remove_page(tlb, pte);
368}
369
370#ifdef CONFIG_X86_PAE
371
372void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
373{
374 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
375 tlb_remove_page(tlb, virt_to_page(pmd));
376}
377
378#endif
379
380int pmd_bad(pmd_t pmd) 176int pmd_bad(pmd_t pmd)
381{ 177{
382 WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd)); 178 WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd));
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index fb43d89f46f3..3890234e5b26 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -163,7 +163,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
163 pxm, apic_id, node); 163 pxm, apic_id, node);
164} 164}
165 165
166int update_end_of_memory(unsigned long end) {return -1;} 166static int update_end_of_memory(unsigned long end) {return -1;}
167static int hotadd_enough_memory(struct bootnode *nd) {return 1;} 167static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
168#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 168#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
169static inline int save_add_info(void) {return 1;} 169static inline int save_add_info(void) {return 1;}
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 4d5f2649bee4..2e641be2737e 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -6,7 +6,7 @@ config XEN
6 bool "Xen guest support" 6 bool "Xen guest support"
7 select PARAVIRT 7 select PARAVIRT
8 depends on X86_32 8 depends on X86_32
9 depends on X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES && !(X86_VISWS || X86_VOYAGER) 9 depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER)
10 help 10 help
11 This is the Linux Xen port. Enabling this will allow the 11 This is the Linux Xen port. Enabling this will allow the
12 kernel to boot in a paravirtualized environment under the 12 kernel to boot in a paravirtualized environment under the
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 343df246bd3e..3d8df981d5fd 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,4 @@
1obj-y := enlighten.o setup.o features.o multicalls.o mmu.o \ 1obj-y := enlighten.o setup.o multicalls.o mmu.o \
2 events.o time.o manage.o xen-asm.o 2 time.o manage.o xen-asm.o grant-table.o
3 3
4obj-$(CONFIG_SMP) += smp.o 4obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c0388220cf97..c8a56e457d61 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -155,7 +155,8 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
155 if (*ax == 1) 155 if (*ax == 1)
156 maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ 156 maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */
157 (1 << X86_FEATURE_ACPI) | /* disable ACPI */ 157 (1 << X86_FEATURE_ACPI) | /* disable ACPI */
158 (1 << X86_FEATURE_SEP) | /* disable SEP */ 158 (1 << X86_FEATURE_MCE) | /* disable MCE */
159 (1 << X86_FEATURE_MCA) | /* disable MCA */
159 (1 << X86_FEATURE_ACC)); /* thermal monitoring */ 160 (1 << X86_FEATURE_ACC)); /* thermal monitoring */
160 161
161 asm(XEN_EMULATE_PREFIX "cpuid" 162 asm(XEN_EMULATE_PREFIX "cpuid"
@@ -531,26 +532,37 @@ static void xen_apic_write(unsigned long reg, u32 val)
531static void xen_flush_tlb(void) 532static void xen_flush_tlb(void)
532{ 533{
533 struct mmuext_op *op; 534 struct mmuext_op *op;
534 struct multicall_space mcs = xen_mc_entry(sizeof(*op)); 535 struct multicall_space mcs;
536
537 preempt_disable();
538
539 mcs = xen_mc_entry(sizeof(*op));
535 540
536 op = mcs.args; 541 op = mcs.args;
537 op->cmd = MMUEXT_TLB_FLUSH_LOCAL; 542 op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
538 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 543 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
539 544
540 xen_mc_issue(PARAVIRT_LAZY_MMU); 545 xen_mc_issue(PARAVIRT_LAZY_MMU);
546
547 preempt_enable();
541} 548}
542 549
543static void xen_flush_tlb_single(unsigned long addr) 550static void xen_flush_tlb_single(unsigned long addr)
544{ 551{
545 struct mmuext_op *op; 552 struct mmuext_op *op;
546 struct multicall_space mcs = xen_mc_entry(sizeof(*op)); 553 struct multicall_space mcs;
554
555 preempt_disable();
547 556
557 mcs = xen_mc_entry(sizeof(*op));
548 op = mcs.args; 558 op = mcs.args;
549 op->cmd = MMUEXT_INVLPG_LOCAL; 559 op->cmd = MMUEXT_INVLPG_LOCAL;
550 op->arg1.linear_addr = addr & PAGE_MASK; 560 op->arg1.linear_addr = addr & PAGE_MASK;
551 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 561 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
552 562
553 xen_mc_issue(PARAVIRT_LAZY_MMU); 563 xen_mc_issue(PARAVIRT_LAZY_MMU);
564
565 preempt_enable();
554} 566}
555 567
556static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, 568static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
@@ -655,15 +667,17 @@ static void xen_write_cr3(unsigned long cr3)
655 667
656/* Early in boot, while setting up the initial pagetable, assume 668/* Early in boot, while setting up the initial pagetable, assume
657 everything is pinned. */ 669 everything is pinned. */
658static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn) 670static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
659{ 671{
672#ifdef CONFIG_FLATMEM
660 BUG_ON(mem_map); /* should only be used early */ 673 BUG_ON(mem_map); /* should only be used early */
674#endif
661 make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); 675 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
662} 676}
663 677
664/* Early release_pt assumes that all pts are pinned, since there's 678/* Early release_pte assumes that all pts are pinned, since there's
665 only init_mm and anything attached to that is pinned. */ 679 only init_mm and anything attached to that is pinned. */
666static void xen_release_pt_init(u32 pfn) 680static void xen_release_pte_init(u32 pfn)
667{ 681{
668 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); 682 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
669} 683}
@@ -697,12 +711,12 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level)
697 } 711 }
698} 712}
699 713
700static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) 714static void xen_alloc_pte(struct mm_struct *mm, u32 pfn)
701{ 715{
702 xen_alloc_ptpage(mm, pfn, PT_PTE); 716 xen_alloc_ptpage(mm, pfn, PT_PTE);
703} 717}
704 718
705static void xen_alloc_pd(struct mm_struct *mm, u32 pfn) 719static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn)
706{ 720{
707 xen_alloc_ptpage(mm, pfn, PT_PMD); 721 xen_alloc_ptpage(mm, pfn, PT_PMD);
708} 722}
@@ -722,12 +736,12 @@ static void xen_release_ptpage(u32 pfn, unsigned level)
722 } 736 }
723} 737}
724 738
725static void xen_release_pt(u32 pfn) 739static void xen_release_pte(u32 pfn)
726{ 740{
727 xen_release_ptpage(pfn, PT_PTE); 741 xen_release_ptpage(pfn, PT_PTE);
728} 742}
729 743
730static void xen_release_pd(u32 pfn) 744static void xen_release_pmd(u32 pfn)
731{ 745{
732 xen_release_ptpage(pfn, PT_PMD); 746 xen_release_ptpage(pfn, PT_PMD);
733} 747}
@@ -849,10 +863,10 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
849{ 863{
850 /* This will work as long as patching hasn't happened yet 864 /* This will work as long as patching hasn't happened yet
851 (which it hasn't) */ 865 (which it hasn't) */
852 pv_mmu_ops.alloc_pt = xen_alloc_pt; 866 pv_mmu_ops.alloc_pte = xen_alloc_pte;
853 pv_mmu_ops.alloc_pd = xen_alloc_pd; 867 pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
854 pv_mmu_ops.release_pt = xen_release_pt; 868 pv_mmu_ops.release_pte = xen_release_pte;
855 pv_mmu_ops.release_pd = xen_release_pd; 869 pv_mmu_ops.release_pmd = xen_release_pmd;
856 pv_mmu_ops.set_pte = xen_set_pte; 870 pv_mmu_ops.set_pte = xen_set_pte;
857 871
858 setup_shared_info(); 872 setup_shared_info();
@@ -994,7 +1008,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
994 .read_pmc = native_read_pmc, 1008 .read_pmc = native_read_pmc,
995 1009
996 .iret = xen_iret, 1010 .iret = xen_iret,
997 .irq_enable_syscall_ret = NULL, /* never called */ 1011 .irq_enable_syscall_ret = xen_sysexit,
998 1012
999 .load_tr_desc = paravirt_nop, 1013 .load_tr_desc = paravirt_nop,
1000 .set_ldt = xen_set_ldt, 1014 .set_ldt = xen_set_ldt,
@@ -1059,11 +1073,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1059 .pte_update = paravirt_nop, 1073 .pte_update = paravirt_nop,
1060 .pte_update_defer = paravirt_nop, 1074 .pte_update_defer = paravirt_nop,
1061 1075
1062 .alloc_pt = xen_alloc_pt_init, 1076 .alloc_pte = xen_alloc_pte_init,
1063 .release_pt = xen_release_pt_init, 1077 .release_pte = xen_release_pte_init,
1064 .alloc_pd = xen_alloc_pt_init, 1078 .alloc_pmd = xen_alloc_pte_init,
1065 .alloc_pd_clone = paravirt_nop, 1079 .alloc_pmd_clone = paravirt_nop,
1066 .release_pd = xen_release_pt_init, 1080 .release_pmd = xen_release_pte_init,
1067 1081
1068#ifdef CONFIG_HIGHPTE 1082#ifdef CONFIG_HIGHPTE
1069 .kmap_atomic_pte = xen_kmap_atomic_pte, 1083 .kmap_atomic_pte = xen_kmap_atomic_pte,
diff --git a/arch/x86/xen/events.c b/arch/x86/xen/events.c
deleted file mode 100644
index dcf613e17581..000000000000
--- a/arch/x86/xen/events.c
+++ /dev/null
@@ -1,591 +0,0 @@
1/*
2 * Xen event channels
3 *
4 * Xen models interrupts with abstract event channels. Because each
5 * domain gets 1024 event channels, but NR_IRQ is not that large, we
6 * must dynamically map irqs<->event channels. The event channels
7 * interface with the rest of the kernel by defining a xen interrupt
8 * chip. When an event is recieved, it is mapped to an irq and sent
9 * through the normal interrupt processing path.
10 *
11 * There are four kinds of events which can be mapped to an event
12 * channel:
13 *
14 * 1. Inter-domain notifications. This includes all the virtual
15 * device events, since they're driven by front-ends in another domain
16 * (typically dom0).
17 * 2. VIRQs, typically used for timers. These are per-cpu events.
18 * 3. IPIs.
19 * 4. Hardware interrupts. Not supported at present.
20 *
21 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
22 */
23
24#include <linux/linkage.h>
25#include <linux/interrupt.h>
26#include <linux/irq.h>
27#include <linux/module.h>
28#include <linux/string.h>
29
30#include <asm/ptrace.h>
31#include <asm/irq.h>
32#include <asm/sync_bitops.h>
33#include <asm/xen/hypercall.h>
34#include <asm/xen/hypervisor.h>
35
36#include <xen/events.h>
37#include <xen/interface/xen.h>
38#include <xen/interface/event_channel.h>
39
40#include "xen-ops.h"
41
42/*
43 * This lock protects updates to the following mapping and reference-count
44 * arrays. The lock does not need to be acquired to read the mapping tables.
45 */
46static DEFINE_SPINLOCK(irq_mapping_update_lock);
47
48/* IRQ <-> VIRQ mapping. */
49static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
50
51/* IRQ <-> IPI mapping */
52static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1};
53
54/* Packed IRQ information: binding type, sub-type index, and event channel. */
55struct packed_irq
56{
57 unsigned short evtchn;
58 unsigned char index;
59 unsigned char type;
60};
61
62static struct packed_irq irq_info[NR_IRQS];
63
64/* Binding types. */
65enum {
66 IRQT_UNBOUND,
67 IRQT_PIRQ,
68 IRQT_VIRQ,
69 IRQT_IPI,
70 IRQT_EVTCHN
71};
72
73/* Convenient shorthand for packed representation of an unbound IRQ. */
74#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0)
75
76static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
77 [0 ... NR_EVENT_CHANNELS-1] = -1
78};
79static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG];
80static u8 cpu_evtchn[NR_EVENT_CHANNELS];
81
82/* Reference counts for bindings to IRQs. */
83static int irq_bindcount[NR_IRQS];
84
85/* Xen will never allocate port zero for any purpose. */
86#define VALID_EVTCHN(chn) ((chn) != 0)
87
88/*
89 * Force a proper event-channel callback from Xen after clearing the
90 * callback mask. We do this in a very simple manner, by making a call
91 * down into Xen. The pending flag will be checked by Xen on return.
92 */
93void force_evtchn_callback(void)
94{
95 (void)HYPERVISOR_xen_version(0, NULL);
96}
97EXPORT_SYMBOL_GPL(force_evtchn_callback);
98
99static struct irq_chip xen_dynamic_chip;
100
101/* Constructor for packed IRQ information. */
102static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn)
103{
104 return (struct packed_irq) { evtchn, index, type };
105}
106
107/*
108 * Accessors for packed IRQ information.
109 */
110static inline unsigned int evtchn_from_irq(int irq)
111{
112 return irq_info[irq].evtchn;
113}
114
115static inline unsigned int index_from_irq(int irq)
116{
117 return irq_info[irq].index;
118}
119
120static inline unsigned int type_from_irq(int irq)
121{
122 return irq_info[irq].type;
123}
124
125static inline unsigned long active_evtchns(unsigned int cpu,
126 struct shared_info *sh,
127 unsigned int idx)
128{
129 return (sh->evtchn_pending[idx] &
130 cpu_evtchn_mask[cpu][idx] &
131 ~sh->evtchn_mask[idx]);
132}
133
134static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
135{
136 int irq = evtchn_to_irq[chn];
137
138 BUG_ON(irq == -1);
139#ifdef CONFIG_SMP
140 irq_desc[irq].affinity = cpumask_of_cpu(cpu);
141#endif
142
143 __clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]);
144 __set_bit(chn, cpu_evtchn_mask[cpu]);
145
146 cpu_evtchn[chn] = cpu;
147}
148
149static void init_evtchn_cpu_bindings(void)
150{
151#ifdef CONFIG_SMP
152 int i;
153 /* By default all event channels notify CPU#0. */
154 for (i = 0; i < NR_IRQS; i++)
155 irq_desc[i].affinity = cpumask_of_cpu(0);
156#endif
157
158 memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
159 memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
160}
161
162static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
163{
164 return cpu_evtchn[evtchn];
165}
166
167static inline void clear_evtchn(int port)
168{
169 struct shared_info *s = HYPERVISOR_shared_info;
170 sync_clear_bit(port, &s->evtchn_pending[0]);
171}
172
173static inline void set_evtchn(int port)
174{
175 struct shared_info *s = HYPERVISOR_shared_info;
176 sync_set_bit(port, &s->evtchn_pending[0]);
177}
178
179
180/**
181 * notify_remote_via_irq - send event to remote end of event channel via irq
182 * @irq: irq of event channel to send event to
183 *
184 * Unlike notify_remote_via_evtchn(), this is safe to use across
185 * save/restore. Notifications on a broken connection are silently
186 * dropped.
187 */
188void notify_remote_via_irq(int irq)
189{
190 int evtchn = evtchn_from_irq(irq);
191
192 if (VALID_EVTCHN(evtchn))
193 notify_remote_via_evtchn(evtchn);
194}
195EXPORT_SYMBOL_GPL(notify_remote_via_irq);
196
197static void mask_evtchn(int port)
198{
199 struct shared_info *s = HYPERVISOR_shared_info;
200 sync_set_bit(port, &s->evtchn_mask[0]);
201}
202
203static void unmask_evtchn(int port)
204{
205 struct shared_info *s = HYPERVISOR_shared_info;
206 unsigned int cpu = get_cpu();
207
208 BUG_ON(!irqs_disabled());
209
210 /* Slow path (hypercall) if this is a non-local port. */
211 if (unlikely(cpu != cpu_from_evtchn(port))) {
212 struct evtchn_unmask unmask = { .port = port };
213 (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
214 } else {
215 struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
216
217 sync_clear_bit(port, &s->evtchn_mask[0]);
218
219 /*
220 * The following is basically the equivalent of
221 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
222 * the interrupt edge' if the channel is masked.
223 */
224 if (sync_test_bit(port, &s->evtchn_pending[0]) &&
225 !sync_test_and_set_bit(port / BITS_PER_LONG,
226 &vcpu_info->evtchn_pending_sel))
227 vcpu_info->evtchn_upcall_pending = 1;
228 }
229
230 put_cpu();
231}
232
233static int find_unbound_irq(void)
234{
235 int irq;
236
237 /* Only allocate from dynirq range */
238 for (irq = 0; irq < NR_IRQS; irq++)
239 if (irq_bindcount[irq] == 0)
240 break;
241
242 if (irq == NR_IRQS)
243 panic("No available IRQ to bind to: increase NR_IRQS!\n");
244
245 return irq;
246}
247
248int bind_evtchn_to_irq(unsigned int evtchn)
249{
250 int irq;
251
252 spin_lock(&irq_mapping_update_lock);
253
254 irq = evtchn_to_irq[evtchn];
255
256 if (irq == -1) {
257 irq = find_unbound_irq();
258
259 dynamic_irq_init(irq);
260 set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
261 handle_level_irq, "event");
262
263 evtchn_to_irq[evtchn] = irq;
264 irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn);
265 }
266
267 irq_bindcount[irq]++;
268
269 spin_unlock(&irq_mapping_update_lock);
270
271 return irq;
272}
273EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
274
275static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
276{
277 struct evtchn_bind_ipi bind_ipi;
278 int evtchn, irq;
279
280 spin_lock(&irq_mapping_update_lock);
281
282 irq = per_cpu(ipi_to_irq, cpu)[ipi];
283 if (irq == -1) {
284 irq = find_unbound_irq();
285 if (irq < 0)
286 goto out;
287
288 dynamic_irq_init(irq);
289 set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
290 handle_level_irq, "ipi");
291
292 bind_ipi.vcpu = cpu;
293 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
294 &bind_ipi) != 0)
295 BUG();
296 evtchn = bind_ipi.port;
297
298 evtchn_to_irq[evtchn] = irq;
299 irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
300
301 per_cpu(ipi_to_irq, cpu)[ipi] = irq;
302
303 bind_evtchn_to_cpu(evtchn, cpu);
304 }
305
306 irq_bindcount[irq]++;
307
308 out:
309 spin_unlock(&irq_mapping_update_lock);
310 return irq;
311}
312
313
314static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
315{
316 struct evtchn_bind_virq bind_virq;
317 int evtchn, irq;
318
319 spin_lock(&irq_mapping_update_lock);
320
321 irq = per_cpu(virq_to_irq, cpu)[virq];
322
323 if (irq == -1) {
324 bind_virq.virq = virq;
325 bind_virq.vcpu = cpu;
326 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
327 &bind_virq) != 0)
328 BUG();
329 evtchn = bind_virq.port;
330
331 irq = find_unbound_irq();
332
333 dynamic_irq_init(irq);
334 set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
335 handle_level_irq, "virq");
336
337 evtchn_to_irq[evtchn] = irq;
338 irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
339
340 per_cpu(virq_to_irq, cpu)[virq] = irq;
341
342 bind_evtchn_to_cpu(evtchn, cpu);
343 }
344
345 irq_bindcount[irq]++;
346
347 spin_unlock(&irq_mapping_update_lock);
348
349 return irq;
350}
351
352static void unbind_from_irq(unsigned int irq)
353{
354 struct evtchn_close close;
355 int evtchn = evtchn_from_irq(irq);
356
357 spin_lock(&irq_mapping_update_lock);
358
359 if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) {
360 close.port = evtchn;
361 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
362 BUG();
363
364 switch (type_from_irq(irq)) {
365 case IRQT_VIRQ:
366 per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
367 [index_from_irq(irq)] = -1;
368 break;
369 default:
370 break;
371 }
372
373 /* Closed ports are implicitly re-bound to VCPU0. */
374 bind_evtchn_to_cpu(evtchn, 0);
375
376 evtchn_to_irq[evtchn] = -1;
377 irq_info[irq] = IRQ_UNBOUND;
378
379 dynamic_irq_init(irq);
380 }
381
382 spin_unlock(&irq_mapping_update_lock);
383}
384
385int bind_evtchn_to_irqhandler(unsigned int evtchn,
386 irq_handler_t handler,
387 unsigned long irqflags,
388 const char *devname, void *dev_id)
389{
390 unsigned int irq;
391 int retval;
392
393 irq = bind_evtchn_to_irq(evtchn);
394 retval = request_irq(irq, handler, irqflags, devname, dev_id);
395 if (retval != 0) {
396 unbind_from_irq(irq);
397 return retval;
398 }
399
400 return irq;
401}
402EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
403
404int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
405 irq_handler_t handler,
406 unsigned long irqflags, const char *devname, void *dev_id)
407{
408 unsigned int irq;
409 int retval;
410
411 irq = bind_virq_to_irq(virq, cpu);
412 retval = request_irq(irq, handler, irqflags, devname, dev_id);
413 if (retval != 0) {
414 unbind_from_irq(irq);
415 return retval;
416 }
417
418 return irq;
419}
420EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
421
422int bind_ipi_to_irqhandler(enum ipi_vector ipi,
423 unsigned int cpu,
424 irq_handler_t handler,
425 unsigned long irqflags,
426 const char *devname,
427 void *dev_id)
428{
429 int irq, retval;
430
431 irq = bind_ipi_to_irq(ipi, cpu);
432 if (irq < 0)
433 return irq;
434
435 retval = request_irq(irq, handler, irqflags, devname, dev_id);
436 if (retval != 0) {
437 unbind_from_irq(irq);
438 return retval;
439 }
440
441 return irq;
442}
443
444void unbind_from_irqhandler(unsigned int irq, void *dev_id)
445{
446 free_irq(irq, dev_id);
447 unbind_from_irq(irq);
448}
449EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
450
451void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
452{
453 int irq = per_cpu(ipi_to_irq, cpu)[vector];
454 BUG_ON(irq < 0);
455 notify_remote_via_irq(irq);
456}
457
458
459/*
460 * Search the CPUs pending events bitmasks. For each one found, map
461 * the event number to an irq, and feed it into do_IRQ() for
462 * handling.
463 *
464 * Xen uses a two-level bitmap to speed searching. The first level is
465 * a bitset of words which contain pending event bits. The second
466 * level is a bitset of pending events themselves.
467 */
468void xen_evtchn_do_upcall(struct pt_regs *regs)
469{
470 int cpu = get_cpu();
471 struct shared_info *s = HYPERVISOR_shared_info;
472 struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
473 unsigned long pending_words;
474
475 vcpu_info->evtchn_upcall_pending = 0;
476
477 /* NB. No need for a barrier here -- XCHG is a barrier on x86. */
478 pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
479 while (pending_words != 0) {
480 unsigned long pending_bits;
481 int word_idx = __ffs(pending_words);
482 pending_words &= ~(1UL << word_idx);
483
484 while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) {
485 int bit_idx = __ffs(pending_bits);
486 int port = (word_idx * BITS_PER_LONG) + bit_idx;
487 int irq = evtchn_to_irq[port];
488
489 if (irq != -1) {
490 regs->orig_ax = ~irq;
491 do_IRQ(regs);
492 }
493 }
494 }
495
496 put_cpu();
497}
498
499/* Rebind an evtchn so that it gets delivered to a specific cpu */
500static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
501{
502 struct evtchn_bind_vcpu bind_vcpu;
503 int evtchn = evtchn_from_irq(irq);
504
505 if (!VALID_EVTCHN(evtchn))
506 return;
507
508 /* Send future instances of this interrupt to other vcpu. */
509 bind_vcpu.port = evtchn;
510 bind_vcpu.vcpu = tcpu;
511
512 /*
513 * If this fails, it usually just indicates that we're dealing with a
514 * virq or IPI channel, which don't actually need to be rebound. Ignore
515 * it, but don't do the xenlinux-level rebind in that case.
516 */
517 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
518 bind_evtchn_to_cpu(evtchn, tcpu);
519}
520
521
522static void set_affinity_irq(unsigned irq, cpumask_t dest)
523{
524 unsigned tcpu = first_cpu(dest);
525 rebind_irq_to_cpu(irq, tcpu);
526}
527
528static void enable_dynirq(unsigned int irq)
529{
530 int evtchn = evtchn_from_irq(irq);
531
532 if (VALID_EVTCHN(evtchn))
533 unmask_evtchn(evtchn);
534}
535
536static void disable_dynirq(unsigned int irq)
537{
538 int evtchn = evtchn_from_irq(irq);
539
540 if (VALID_EVTCHN(evtchn))
541 mask_evtchn(evtchn);
542}
543
544static void ack_dynirq(unsigned int irq)
545{
546 int evtchn = evtchn_from_irq(irq);
547
548 move_native_irq(irq);
549
550 if (VALID_EVTCHN(evtchn))
551 clear_evtchn(evtchn);
552}
553
554static int retrigger_dynirq(unsigned int irq)
555{
556 int evtchn = evtchn_from_irq(irq);
557 int ret = 0;
558
559 if (VALID_EVTCHN(evtchn)) {
560 set_evtchn(evtchn);
561 ret = 1;
562 }
563
564 return ret;
565}
566
567static struct irq_chip xen_dynamic_chip __read_mostly = {
568 .name = "xen-dyn",
569 .mask = disable_dynirq,
570 .unmask = enable_dynirq,
571 .ack = ack_dynirq,
572 .set_affinity = set_affinity_irq,
573 .retrigger = retrigger_dynirq,
574};
575
576void __init xen_init_IRQ(void)
577{
578 int i;
579
580 init_evtchn_cpu_bindings();
581
582 /* No event channels are 'live' right now. */
583 for (i = 0; i < NR_EVENT_CHANNELS; i++)
584 mask_evtchn(i);
585
586 /* Dynamic IRQ space is currently unbound. Zero the refcnts. */
587 for (i = 0; i < NR_IRQS; i++)
588 irq_bindcount[i] = 0;
589
590 irq_ctx_init(smp_processor_id());
591}
diff --git a/arch/x86/xen/features.c b/arch/x86/xen/features.c
deleted file mode 100644
index 0707714e40d6..000000000000
--- a/arch/x86/xen/features.c
+++ /dev/null
@@ -1,29 +0,0 @@
1/******************************************************************************
2 * features.c
3 *
4 * Xen feature flags.
5 *
6 * Copyright (c) 2006, Ian Campbell, XenSource Inc.
7 */
8#include <linux/types.h>
9#include <linux/cache.h>
10#include <linux/module.h>
11#include <asm/xen/hypervisor.h>
12#include <xen/features.h>
13
14u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
15EXPORT_SYMBOL_GPL(xen_features);
16
17void xen_setup_features(void)
18{
19 struct xen_feature_info fi;
20 int i, j;
21
22 for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) {
23 fi.submap_idx = i;
24 if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0)
25 break;
26 for (j = 0; j < 32; j++)
27 xen_features[i * 32 + j] = !!(fi.submap & 1<<j);
28 }
29}
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
new file mode 100644
index 000000000000..49ba9b5224d1
--- /dev/null
+++ b/arch/x86/xen/grant-table.c
@@ -0,0 +1,91 @@
1/******************************************************************************
2 * grant_table.c
3 * x86 specific part
4 *
5 * Granting foreign access to our memory reservation.
6 *
7 * Copyright (c) 2005-2006, Christopher Clark
8 * Copyright (c) 2004-2005, K A Fraser
9 * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
10 * VA Linux Systems Japan. Split out x86 specific part.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License version 2
14 * as published by the Free Software Foundation; or, when distributed
15 * separately from the Linux kernel or incorporated into other
16 * software packages, subject to the following license:
17 *
18 * Permission is hereby granted, free of charge, to any person obtaining a copy
19 * of this source file (the "Software"), to deal in the Software without
20 * restriction, including without limitation the rights to use, copy, modify,
21 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
22 * and to permit persons to whom the Software is furnished to do so, subject to
23 * the following conditions:
24 *
25 * The above copyright notice and this permission notice shall be included in
26 * all copies or substantial portions of the Software.
27 *
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
34 * IN THE SOFTWARE.
35 */
36
37#include <linux/sched.h>
38#include <linux/mm.h>
39#include <linux/vmalloc.h>
40
41#include <xen/interface/xen.h>
42#include <xen/page.h>
43#include <xen/grant_table.h>
44
45#include <asm/pgtable.h>
46
47static int map_pte_fn(pte_t *pte, struct page *pmd_page,
48 unsigned long addr, void *data)
49{
50 unsigned long **frames = (unsigned long **)data;
51
52 set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
53 (*frames)++;
54 return 0;
55}
56
57static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
58 unsigned long addr, void *data)
59{
60
61 set_pte_at(&init_mm, addr, pte, __pte(0));
62 return 0;
63}
64
65int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
66 unsigned long max_nr_gframes,
67 struct grant_entry **__shared)
68{
69 int rc;
70 struct grant_entry *shared = *__shared;
71
72 if (shared == NULL) {
73 struct vm_struct *area =
74 xen_alloc_vm_area(PAGE_SIZE * max_nr_gframes);
75 BUG_ON(area == NULL);
76 shared = area->addr;
77 *__shared = shared;
78 }
79
80 rc = apply_to_page_range(&init_mm, (unsigned long)shared,
81 PAGE_SIZE * nr_gframes,
82 map_pte_fn, &frames);
83 return rc;
84}
85
86void arch_gnttab_unmap_shared(struct grant_entry *shared,
87 unsigned long nr_gframes)
88{
89 apply_to_page_range(&init_mm, (unsigned long)shared,
90 PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
91}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 2a054ef2a3da..6cbcf65609ad 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -156,6 +156,10 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
156void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, 156void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
157 pte_t *ptep, pte_t pteval) 157 pte_t *ptep, pte_t pteval)
158{ 158{
159 /* updates to init_mm may be done without lock */
160 if (mm == &init_mm)
161 preempt_disable();
162
159 if (mm == current->mm || mm == &init_mm) { 163 if (mm == current->mm || mm == &init_mm) {
160 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { 164 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
161 struct multicall_space mcs; 165 struct multicall_space mcs;
@@ -163,14 +167,61 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
163 167
164 MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); 168 MULTI_update_va_mapping(mcs.mc, addr, pteval, 0);
165 xen_mc_issue(PARAVIRT_LAZY_MMU); 169 xen_mc_issue(PARAVIRT_LAZY_MMU);
166 return; 170 goto out;
167 } else 171 } else
168 if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0) 172 if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0)
169 return; 173 goto out;
170 } 174 }
171 xen_set_pte(ptep, pteval); 175 xen_set_pte(ptep, pteval);
176
177out:
178 if (mm == &init_mm)
179 preempt_enable();
180}
181
182pteval_t xen_pte_val(pte_t pte)
183{
184 pteval_t ret = pte.pte;
185
186 if (ret & _PAGE_PRESENT)
187 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
188
189 return ret;
190}
191
192pgdval_t xen_pgd_val(pgd_t pgd)
193{
194 pgdval_t ret = pgd.pgd;
195 if (ret & _PAGE_PRESENT)
196 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
197 return ret;
198}
199
200pte_t xen_make_pte(pteval_t pte)
201{
202 if (pte & _PAGE_PRESENT) {
203 pte = phys_to_machine(XPADDR(pte)).maddr;
204 pte &= ~(_PAGE_PCD | _PAGE_PWT);
205 }
206
207 return (pte_t){ .pte = pte };
172} 208}
173 209
210pgd_t xen_make_pgd(pgdval_t pgd)
211{
212 if (pgd & _PAGE_PRESENT)
213 pgd = phys_to_machine(XPADDR(pgd)).maddr;
214
215 return (pgd_t){ pgd };
216}
217
218pmdval_t xen_pmd_val(pmd_t pmd)
219{
220 pmdval_t ret = native_pmd_val(pmd);
221 if (ret & _PAGE_PRESENT)
222 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
223 return ret;
224}
174#ifdef CONFIG_X86_PAE 225#ifdef CONFIG_X86_PAE
175void xen_set_pud(pud_t *ptr, pud_t val) 226void xen_set_pud(pud_t *ptr, pud_t val)
176{ 227{
@@ -214,100 +265,18 @@ void xen_pmd_clear(pmd_t *pmdp)
214 xen_set_pmd(pmdp, __pmd(0)); 265 xen_set_pmd(pmdp, __pmd(0));
215} 266}
216 267
217unsigned long long xen_pte_val(pte_t pte) 268pmd_t xen_make_pmd(pmdval_t pmd)
218{ 269{
219 unsigned long long ret = 0; 270 if (pmd & _PAGE_PRESENT)
220
221 if (pte.pte_low) {
222 ret = ((unsigned long long)pte.pte_high << 32) | pte.pte_low;
223 ret = machine_to_phys(XMADDR(ret)).paddr | 1;
224 }
225
226 return ret;
227}
228
229unsigned long long xen_pmd_val(pmd_t pmd)
230{
231 unsigned long long ret = pmd.pmd;
232 if (ret)
233 ret = machine_to_phys(XMADDR(ret)).paddr | 1;
234 return ret;
235}
236
237unsigned long long xen_pgd_val(pgd_t pgd)
238{
239 unsigned long long ret = pgd.pgd;
240 if (ret)
241 ret = machine_to_phys(XMADDR(ret)).paddr | 1;
242 return ret;
243}
244
245pte_t xen_make_pte(unsigned long long pte)
246{
247 if (pte & _PAGE_PRESENT) {
248 pte = phys_to_machine(XPADDR(pte)).maddr;
249 pte &= ~(_PAGE_PCD | _PAGE_PWT);
250 }
251
252 return (pte_t){ .pte = pte };
253}
254
255pmd_t xen_make_pmd(unsigned long long pmd)
256{
257 if (pmd & 1)
258 pmd = phys_to_machine(XPADDR(pmd)).maddr; 271 pmd = phys_to_machine(XPADDR(pmd)).maddr;
259 272
260 return (pmd_t){ pmd }; 273 return native_make_pmd(pmd);
261}
262
263pgd_t xen_make_pgd(unsigned long long pgd)
264{
265 if (pgd & _PAGE_PRESENT)
266 pgd = phys_to_machine(XPADDR(pgd)).maddr;
267
268 return (pgd_t){ pgd };
269} 274}
270#else /* !PAE */ 275#else /* !PAE */
271void xen_set_pte(pte_t *ptep, pte_t pte) 276void xen_set_pte(pte_t *ptep, pte_t pte)
272{ 277{
273 *ptep = pte; 278 *ptep = pte;
274} 279}
275
276unsigned long xen_pte_val(pte_t pte)
277{
278 unsigned long ret = pte.pte_low;
279
280 if (ret & _PAGE_PRESENT)
281 ret = machine_to_phys(XMADDR(ret)).paddr;
282
283 return ret;
284}
285
286unsigned long xen_pgd_val(pgd_t pgd)
287{
288 unsigned long ret = pgd.pgd;
289 if (ret)
290 ret = machine_to_phys(XMADDR(ret)).paddr | 1;
291 return ret;
292}
293
294pte_t xen_make_pte(unsigned long pte)
295{
296 if (pte & _PAGE_PRESENT) {
297 pte = phys_to_machine(XPADDR(pte)).maddr;
298 pte &= ~(_PAGE_PCD | _PAGE_PWT);
299 }
300
301 return (pte_t){ pte };
302}
303
304pgd_t xen_make_pgd(unsigned long pgd)
305{
306 if (pgd & _PAGE_PRESENT)
307 pgd = phys_to_machine(XPADDR(pgd)).maddr;
308
309 return (pgd_t){ pgd };
310}
311#endif /* CONFIG_X86_PAE */ 280#endif /* CONFIG_X86_PAE */
312 281
313/* 282/*
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 2341492bf7a0..82517e4a752a 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -16,6 +16,7 @@
16#include <asm/xen/hypervisor.h> 16#include <asm/xen/hypervisor.h>
17#include <asm/xen/hypercall.h> 17#include <asm/xen/hypercall.h>
18 18
19#include <xen/interface/callback.h>
19#include <xen/interface/physdev.h> 20#include <xen/interface/physdev.h>
20#include <xen/features.h> 21#include <xen/features.h>
21 22
@@ -68,6 +69,24 @@ static void __init fiddle_vdso(void)
68 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; 69 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
69} 70}
70 71
72void xen_enable_sysenter(void)
73{
74 int cpu = smp_processor_id();
75 extern void xen_sysenter_target(void);
76 /* Mask events on entry, even though they get enabled immediately */
77 static struct callback_register sysenter = {
78 .type = CALLBACKTYPE_sysenter,
79 .address = { __KERNEL_CS, (unsigned long)xen_sysenter_target },
80 .flags = CALLBACKF_mask_events,
81 };
82
83 if (!boot_cpu_has(X86_FEATURE_SEP) ||
84 HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) != 0) {
85 clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP);
86 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
87 }
88}
89
71void __init xen_arch_setup(void) 90void __init xen_arch_setup(void)
72{ 91{
73 struct physdev_set_iopl set_iopl; 92 struct physdev_set_iopl set_iopl;
@@ -82,6 +101,8 @@ void __init xen_arch_setup(void)
82 HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback, 101 HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback,
83 __KERNEL_CS, (unsigned long)xen_failsafe_callback); 102 __KERNEL_CS, (unsigned long)xen_failsafe_callback);
84 103
104 xen_enable_sysenter();
105
85 set_iopl.iopl = 1; 106 set_iopl.iopl = 1;
86 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); 107 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
87 if (rc != 0) 108 if (rc != 0)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index e340ff92f6b6..94e69000f982 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -36,8 +36,9 @@
36#include "mmu.h" 36#include "mmu.h"
37 37
38static cpumask_t xen_cpu_initialized_map; 38static cpumask_t xen_cpu_initialized_map;
39static DEFINE_PER_CPU(int, resched_irq); 39static DEFINE_PER_CPU(int, resched_irq) = -1;
40static DEFINE_PER_CPU(int, callfunc_irq); 40static DEFINE_PER_CPU(int, callfunc_irq) = -1;
41static DEFINE_PER_CPU(int, debug_irq) = -1;
41 42
42/* 43/*
43 * Structure and data for smp_call_function(). This is designed to minimise 44 * Structure and data for smp_call_function(). This is designed to minimise
@@ -72,6 +73,7 @@ static __cpuinit void cpu_bringup_and_idle(void)
72 int cpu = smp_processor_id(); 73 int cpu = smp_processor_id();
73 74
74 cpu_init(); 75 cpu_init();
76 xen_enable_sysenter();
75 77
76 preempt_disable(); 78 preempt_disable();
77 per_cpu(cpu_state, cpu) = CPU_ONLINE; 79 per_cpu(cpu_state, cpu) = CPU_ONLINE;
@@ -88,9 +90,7 @@ static __cpuinit void cpu_bringup_and_idle(void)
88static int xen_smp_intr_init(unsigned int cpu) 90static int xen_smp_intr_init(unsigned int cpu)
89{ 91{
90 int rc; 92 int rc;
91 const char *resched_name, *callfunc_name; 93 const char *resched_name, *callfunc_name, *debug_name;
92
93 per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
94 94
95 resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); 95 resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
96 rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, 96 rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
@@ -114,6 +114,14 @@ static int xen_smp_intr_init(unsigned int cpu)
114 goto fail; 114 goto fail;
115 per_cpu(callfunc_irq, cpu) = rc; 115 per_cpu(callfunc_irq, cpu) = rc;
116 116
117 debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
118 rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt,
119 IRQF_DISABLED | IRQF_PERCPU | IRQF_NOBALANCING,
120 debug_name, NULL);
121 if (rc < 0)
122 goto fail;
123 per_cpu(debug_irq, cpu) = rc;
124
117 return 0; 125 return 0;
118 126
119 fail: 127 fail:
@@ -121,6 +129,8 @@ static int xen_smp_intr_init(unsigned int cpu)
121 unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); 129 unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
122 if (per_cpu(callfunc_irq, cpu) >= 0) 130 if (per_cpu(callfunc_irq, cpu) >= 0)
123 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); 131 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
132 if (per_cpu(debug_irq, cpu) >= 0)
133 unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
124 return rc; 134 return rc;
125} 135}
126 136
@@ -183,7 +193,7 @@ void __init xen_smp_prepare_cpus(unsigned int max_cpus)
183 193
184 /* Restrict the possible_map according to max_cpus. */ 194 /* Restrict the possible_map according to max_cpus. */
185 while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { 195 while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
186 for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--) 196 for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--)
187 continue; 197 continue;
188 cpu_clear(cpu, cpu_possible_map); 198 cpu_clear(cpu, cpu_possible_map);
189 } 199 }
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index fe161ed4b01e..2497a30f41de 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -108,6 +108,20 @@ ENDPATCH(xen_restore_fl_direct)
108 RELOC(xen_restore_fl_direct, 2b+1) 108 RELOC(xen_restore_fl_direct, 2b+1)
109 109
110/* 110/*
111 We can't use sysexit directly, because we're not running in ring0.
112 But we can easily fake it up using iret. Assuming xen_sysexit
113 is jumped to with a standard stack frame, we can just strip it
114 back to a standard iret frame and use iret.
115 */
116ENTRY(xen_sysexit)
117 movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */
118 orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
119 lea PT_EIP(%esp), %esp
120
121 jmp xen_iret
122ENDPROC(xen_sysexit)
123
124/*
111 This is run where a normal iret would be run, with the same stack setup: 125 This is run where a normal iret would be run, with the same stack setup:
112 8: eflags 126 8: eflags
113 4: cs 127 4: cs
@@ -184,8 +198,12 @@ iret_restore_end:
184 region is OK. */ 198 region is OK. */
185 je xen_hypervisor_callback 199 je xen_hypervisor_callback
186 200
187 iret 2011: iret
188xen_iret_end_crit: 202xen_iret_end_crit:
203.section __ex_table,"a"
204 .align 4
205 .long 1b,iret_exc
206.previous
189 207
190hyper_iret: 208hyper_iret:
191 /* put this out of line since its very rarely used */ 209 /* put this out of line since its very rarely used */
@@ -219,9 +237,7 @@ hyper_iret:
219 ds } SAVE_ALL state 237 ds } SAVE_ALL state
220 eax } 238 eax }
221 : : 239 : :
222 ebx } 240 ebx }<- esp
223 ----------------
224 return addr <- esp
225 ---------------- 241 ----------------
226 242
227 In order to deliver the nested exception properly, we need to shift 243 In order to deliver the nested exception properly, we need to shift
@@ -236,10 +252,8 @@ hyper_iret:
236 it's usermode state which we eventually need to restore. 252 it's usermode state which we eventually need to restore.
237 */ 253 */
238ENTRY(xen_iret_crit_fixup) 254ENTRY(xen_iret_crit_fixup)
239 /* offsets +4 for return address */
240
241 /* 255 /*
242 Paranoia: Make sure we're really coming from userspace. 256 Paranoia: Make sure we're really coming from kernel space.
243 One could imagine a case where userspace jumps into the 257 One could imagine a case where userspace jumps into the
244 critical range address, but just before the CPU delivers a GP, 258 critical range address, but just before the CPU delivers a GP,
245 it decides to deliver an interrupt instead. Unlikely? 259 it decides to deliver an interrupt instead. Unlikely?
@@ -248,32 +262,32 @@ ENTRY(xen_iret_crit_fixup)
248 jump instruction itself, not the destination, but some virtual 262 jump instruction itself, not the destination, but some virtual
249 environments get this wrong. 263 environments get this wrong.
250 */ 264 */
251 movl PT_CS+4(%esp), %ecx 265 movl PT_CS(%esp), %ecx
252 andl $SEGMENT_RPL_MASK, %ecx 266 andl $SEGMENT_RPL_MASK, %ecx
253 cmpl $USER_RPL, %ecx 267 cmpl $USER_RPL, %ecx
254 je 2f 268 je 2f
255 269
256 lea PT_ORIG_EAX+4(%esp), %esi 270 lea PT_ORIG_EAX(%esp), %esi
257 lea PT_EFLAGS+4(%esp), %edi 271 lea PT_EFLAGS(%esp), %edi
258 272
259 /* If eip is before iret_restore_end then stack 273 /* If eip is before iret_restore_end then stack
260 hasn't been restored yet. */ 274 hasn't been restored yet. */
261 cmp $iret_restore_end, %eax 275 cmp $iret_restore_end, %eax
262 jae 1f 276 jae 1f
263 277
264 movl 0+4(%edi),%eax /* copy EAX */ 278 movl 0+4(%edi),%eax /* copy EAX (just above top of frame) */
265 movl %eax, PT_EAX+4(%esp) 279 movl %eax, PT_EAX(%esp)
266 280
267 lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */ 281 lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */
268 282
269 /* set up the copy */ 283 /* set up the copy */
2701: std 2841: std
271 mov $(PT_EIP+4) / 4, %ecx /* copy ret+saved regs up to orig_eax */ 285 mov $PT_EIP / 4, %ecx /* saved regs up to orig_eax */
272 rep movsl 286 rep movsl
273 cld 287 cld
274 288
275 lea 4(%edi),%esp /* point esp to new frame */ 289 lea 4(%edi),%esp /* point esp to new frame */
2762: ret 2902: jmp xen_do_upcall
277 291
278 292
279/* 293/*
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 956a491ea998..f1063ae08037 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -2,6 +2,8 @@
2#define XEN_OPS_H 2#define XEN_OPS_H
3 3
4#include <linux/init.h> 4#include <linux/init.h>
5#include <linux/irqreturn.h>
6#include <xen/xen-ops.h>
5 7
6/* These are code, but not functions. Defined in entry.S */ 8/* These are code, but not functions. Defined in entry.S */
7extern const char xen_hypervisor_callback[]; 9extern const char xen_hypervisor_callback[];
@@ -9,7 +11,6 @@ extern const char xen_failsafe_callback[];
9 11
10void xen_copy_trap_info(struct trap_info *traps); 12void xen_copy_trap_info(struct trap_info *traps);
11 13
12DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
13DECLARE_PER_CPU(unsigned long, xen_cr3); 14DECLARE_PER_CPU(unsigned long, xen_cr3);
14DECLARE_PER_CPU(unsigned long, xen_current_cr3); 15DECLARE_PER_CPU(unsigned long, xen_current_cr3);
15 16
@@ -19,6 +20,7 @@ extern struct shared_info *HYPERVISOR_shared_info;
19char * __init xen_memory_setup(void); 20char * __init xen_memory_setup(void);
20void __init xen_arch_setup(void); 21void __init xen_arch_setup(void);
21void __init xen_init_IRQ(void); 22void __init xen_init_IRQ(void);
23void xen_enable_sysenter(void);
22 24
23void xen_setup_timer(int cpu); 25void xen_setup_timer(int cpu);
24void xen_setup_cpu_clockevents(void); 26void xen_setup_cpu_clockevents(void);
@@ -28,6 +30,8 @@ unsigned long xen_get_wallclock(void);
28int xen_set_wallclock(unsigned long time); 30int xen_set_wallclock(unsigned long time);
29unsigned long long xen_sched_clock(void); 31unsigned long long xen_sched_clock(void);
30 32
33irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
34
31bool xen_vcpu_stolen(int vcpu); 35bool xen_vcpu_stolen(int vcpu);
32 36
33void xen_mark_init_mm_pinned(void); 37void xen_mark_init_mm_pinned(void);
@@ -64,4 +68,6 @@ DECL_ASM(unsigned long, xen_save_fl_direct, void);
64DECL_ASM(void, xen_restore_fl_direct, unsigned long); 68DECL_ASM(void, xen_restore_fl_direct, unsigned long);
65 69
66void xen_iret(void); 70void xen_iret(void);
71void xen_sysexit(void);
72
67#endif /* XEN_OPS_H */ 73#endif /* XEN_OPS_H */