aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig13
-rw-r--r--arch/alpha/kernel/osf_sys.c3
-rw-r--r--arch/arm/include/asm/hardware/iop3xx-adma.h12
-rw-r--r--arch/arm/mach-u300/include/mach/coh901318.h2
-rw-r--r--arch/cris/Kconfig6
-rw-r--r--arch/cris/arch-v32/drivers/cryptocop.c2
-rw-r--r--arch/cris/arch-v32/mach-fs/arbiter.c2
-rw-r--r--arch/cris/kernel/time.c68
-rw-r--r--arch/frv/include/asm/pci.h37
-rw-r--r--arch/ia64/include/asm/elf.h48
-rw-r--r--arch/ia64/kernel/Makefile2
-rw-r--r--arch/ia64/kernel/elfcore.c80
-rw-r--r--arch/ia64/kernel/perfmon.c1
-rw-r--r--arch/ia64/kvm/Kconfig1
-rw-r--r--arch/ia64/kvm/kvm-ia64.c50
-rw-r--r--arch/ia64/kvm/kvm_fw.c28
-rw-r--r--arch/ia64/kvm/mmio.c4
-rw-r--r--arch/ia64/kvm/vcpu.c4
-rw-r--r--arch/ia64/mm/init.c2
-rw-r--r--arch/parisc/Kconfig.debug14
-rw-r--r--arch/parisc/include/asm/param.h23
-rw-r--r--arch/parisc/include/asm/system.h2
-rw-r--r--arch/parisc/include/asm/uaccess.h27
-rw-r--r--arch/parisc/include/asm/unistd.h4
-rw-r--r--arch/parisc/kernel/cache.c4
-rw-r--r--arch/parisc/kernel/syscall_table.S2
-rw-r--r--arch/parisc/kernel/time.c29
-rw-r--r--arch/parisc/kernel/unaligned.c14
-rw-r--r--arch/parisc/lib/memcpy.c3
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h6
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h11
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64_asm.h18
-rw-r--r--arch/powerpc/include/asm/kvm_e500.h3
-rw-r--r--arch/powerpc/include/asm/kvm_host.h23
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h83
-rw-r--r--arch/powerpc/include/asm/paca.h5
-rw-r--r--arch/powerpc/include/asm/reg.h4
-rw-r--r--arch/powerpc/kernel/asm-offsets.c33
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c1
-rw-r--r--arch/powerpc/kvm/44x_emulate.c25
-rw-r--r--arch/powerpc/kvm/44x_tlb.c20
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/book3s.c309
-rw-r--r--arch/powerpc/kvm/book3s_64_emulate.c77
-rw-r--r--arch/powerpc/kvm/book3s_64_exports.c8
-rw-r--r--arch/powerpc/kvm/book3s_64_interrupts.S336
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c10
-rw-r--r--arch/powerpc/kvm/book3s_64_rmhandlers.S119
-rw-r--r--arch/powerpc/kvm/book3s_64_slb.S160
-rw-r--r--arch/powerpc/kvm/booke.c87
-rw-r--r--arch/powerpc/kvm/booke_emulate.c107
-rw-r--r--arch/powerpc/kvm/e500.c6
-rw-r--r--arch/powerpc/kvm/e500_emulate.c93
-rw-r--r--arch/powerpc/kvm/e500_tlb.c10
-rw-r--r--arch/powerpc/kvm/emulate.c118
-rw-r--r--arch/powerpc/kvm/powerpc.c40
-rw-r--r--arch/powerpc/mm/numa.c6
-rw-r--r--arch/s390/hypfs/inode.c42
-rw-r--r--arch/s390/kvm/kvm-s390.c26
-rw-r--r--arch/s390/kvm/kvm-s390.h10
-rw-r--r--arch/sh/boards/mach-migor/setup.c16
-rw-r--r--arch/sh/boot/compressed/cache.c2
-rw-r--r--arch/sh/include/asm/cacheflush.h4
-rw-r--r--arch/sh/include/asm/dma-register.h51
-rw-r--r--arch/sh/include/asm/dma-sh.h88
-rw-r--r--arch/sh/include/asm/dmaengine.h93
-rw-r--r--arch/sh/include/asm/io.h23
-rw-r--r--arch/sh/include/asm/mmu.h31
-rw-r--r--arch/sh/include/asm/siu.h2
-rw-r--r--arch/sh/include/asm/topology.h2
-rw-r--r--arch/sh/include/cpu-sh3/cpu/dma-register.h41
-rw-r--r--arch/sh/include/cpu-sh3/cpu/dma.h27
-rw-r--r--arch/sh/include/cpu-sh4/cpu/dma-register.h112
-rw-r--r--arch/sh/include/cpu-sh4/cpu/dma-sh4a.h62
-rw-r--r--arch/sh/include/cpu-sh4/cpu/dma.h36
-rw-r--r--arch/sh/include/mach-migor/mach/migor.h1
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7722.c190
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7724.c186
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7780.c134
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7785.c134
-rw-r--r--arch/sh/kernel/hw_breakpoint.c30
-rw-r--r--arch/sh/kernel/setup.c3
-rw-r--r--arch/sh/kernel/time.c6
-rw-r--r--arch/sh/lib/libgcc.h3
-rw-r--r--arch/sh/mm/ioremap.c70
-rw-r--r--arch/sh/mm/ioremap_fixed.c11
-rw-r--r--arch/sh/mm/numa.c3
-rw-r--r--arch/sh/mm/pmb.c412
-rw-r--r--arch/sparc/configs/sparc32_defconfig56
-rw-r--r--arch/sparc/configs/sparc64_defconfig34
-rw-r--r--arch/sparc/include/asm/io_32.h4
-rw-r--r--arch/sparc/include/asm/io_64.h4
-rw-r--r--arch/sparc/include/asm/perfctr.h4
-rw-r--r--arch/sparc/include/asm/system_64.h15
-rw-r--r--arch/sparc/include/asm/thread_info_64.h25
-rw-r--r--arch/sparc/kernel/entry.h1
-rw-r--r--arch/sparc/kernel/process_64.c23
-rw-r--r--arch/sparc/kernel/rtrap_64.S54
-rw-r--r--arch/sparc/kernel/sys32.S1
-rw-r--r--arch/sparc/kernel/sys_sparc_64.c104
-rw-r--r--arch/sparc/kernel/syscalls.S23
-rw-r--r--arch/sparc/kernel/systbls.h2
-rw-r--r--arch/sparc/kernel/systbls_64.S4
-rw-r--r--arch/sparc/kernel/traps_64.c9
-rw-r--r--arch/sparc/prom/p1275.c12
-rw-r--r--arch/um/.gitignore3
-rw-r--r--arch/um/drivers/line.c4
-rw-r--r--arch/um/drivers/mconsole_kern.c2
-rw-r--r--arch/um/sys-i386/Makefile2
-rw-r--r--arch/um/sys-i386/asm/elf.h43
-rw-r--r--arch/um/sys-i386/elfcore.c83
-rw-r--r--arch/x86/Kconfig16
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/alternative.h4
-rw-r--r--arch/x86/include/asm/apb_timer.h70
-rw-r--r--arch/x86/include/asm/hw_irq.h7
-rw-r--r--arch/x86/include/asm/hyperv.h186
-rw-r--r--arch/x86/include/asm/i8259.h19
-rw-r--r--arch/x86/include/asm/io_apic.h7
-rw-r--r--arch/x86/include/asm/irq.h1
-rw-r--r--arch/x86/include/asm/kprobes.h31
-rw-r--r--arch/x86/include/asm/kvm_emulate.h17
-rw-r--r--arch/x86/include/asm/kvm_host.h60
-rw-r--r--arch/x86/include/asm/kvm_para.h1
-rw-r--r--arch/x86/include/asm/mrst.h19
-rw-r--r--arch/x86/include/asm/numaq.h1
-rw-r--r--arch/x86/include/asm/olpc.h20
-rw-r--r--arch/x86/include/asm/pci.h9
-rw-r--r--arch/x86/include/asm/pci_x86.h22
-rw-r--r--arch/x86/include/asm/setup.h2
-rw-r--r--arch/x86/include/asm/svm.h2
-rw-r--r--arch/x86/include/asm/visws/cobalt.h2
-rw-r--r--arch/x86/include/asm/vmx.h5
-rw-r--r--arch/x86/include/asm/x86_init.h15
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c4
-rw-r--r--arch/x86/kernel/alternative.c60
-rw-r--r--arch/x86/kernel/apb_timer.c784
-rw-r--r--arch/x86/kernel/apic/apic.c8
-rw-r--r--arch/x86/kernel/apic/io_apic.c86
-rw-r--r--arch/x86/kernel/apic/nmi.c2
-rw-r--r--arch/x86/kernel/apic/numaq_32.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig14
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Makefile1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c620
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c2
-rw-r--r--arch/x86/kernel/i8259.c64
-rw-r--r--arch/x86/kernel/irqinit.c9
-rw-r--r--arch/x86/kernel/kprobes.c609
-rw-r--r--arch/x86/kernel/mrst.c216
-rw-r--r--arch/x86/kernel/olpc.c10
-rw-r--r--arch/x86/kernel/smpboot.c9
-rw-r--r--arch/x86/kernel/visws_quirks.c21
-rw-r--r--arch/x86/kernel/vsyscall_64.c3
-rw-r--r--arch/x86/kernel/x86_init.c8
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/emulate.c440
-rw-r--r--arch/x86/kvm/i8254.c23
-rw-r--r--arch/x86/kvm/i8254.h2
-rw-r--r--arch/x86/kvm/i8259.c46
-rw-r--r--arch/x86/kvm/irq.h3
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h31
-rw-r--r--arch/x86/kvm/lapic.c31
-rw-r--r--arch/x86/kvm/lapic.h8
-rw-r--r--arch/x86/kvm/mmu.c137
-rw-r--r--arch/x86/kvm/mmu.h35
-rw-r--r--arch/x86/kvm/paging_tmpl.h13
-rw-r--r--arch/x86/kvm/svm.c237
-rw-r--r--arch/x86/kvm/trace.h59
-rw-r--r--arch/x86/kvm/vmx.c396
-rw-r--r--arch/x86/kvm/x86.c1098
-rw-r--r--arch/x86/kvm/x86.h30
-rw-r--r--arch/x86/pci/Makefile2
-rw-r--r--arch/x86/pci/acpi.c7
-rw-r--r--arch/x86/pci/common.c6
-rw-r--r--arch/x86/pci/init.c8
-rw-r--r--arch/x86/pci/irq.c16
-rw-r--r--arch/x86/pci/legacy.c24
-rw-r--r--arch/x86/pci/mrst.c262
-rw-r--r--arch/x86/pci/numaq_32.c6
-rw-r--r--arch/x86/pci/olpc.c3
-rw-r--r--arch/x86/pci/visws.c6
184 files changed, 7783 insertions, 2768 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 215e46073c4..e5eb1337a53 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -41,6 +41,17 @@ config KPROBES
41 for kernel debugging, non-intrusive instrumentation and testing. 41 for kernel debugging, non-intrusive instrumentation and testing.
42 If in doubt, say "N". 42 If in doubt, say "N".
43 43
44config OPTPROBES
45 bool "Kprobes jump optimization support (EXPERIMENTAL)"
46 default y
47 depends on KPROBES
48 depends on !PREEMPT
49 depends on HAVE_OPTPROBES
50 select KALLSYMS_ALL
51 help
52 This option will allow kprobes to optimize breakpoint to
53 a jump for reducing its overhead.
54
44config HAVE_EFFICIENT_UNALIGNED_ACCESS 55config HAVE_EFFICIENT_UNALIGNED_ACCESS
45 bool 56 bool
46 help 57 help
@@ -83,6 +94,8 @@ config HAVE_KPROBES
83config HAVE_KRETPROBES 94config HAVE_KRETPROBES
84 bool 95 bool
85 96
97config HAVE_OPTPROBES
98 bool
86# 99#
87# An arch should select this if it provides all these things: 100# An arch should select this if it provides all these things:
88# 101#
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 62619f25132..53c213f70fc 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -361,7 +361,7 @@ osf_procfs_mount(char *dirname, struct procfs_args __user *args, int flags)
361SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, char __user *, path, 361SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, char __user *, path,
362 int, flag, void __user *, data) 362 int, flag, void __user *, data)
363{ 363{
364 int retval = -EINVAL; 364 int retval;
365 char *name; 365 char *name;
366 366
367 name = getname(path); 367 name = getname(path);
@@ -379,6 +379,7 @@ SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, char __user *, path,
379 retval = osf_procfs_mount(name, data, flag); 379 retval = osf_procfs_mount(name, data, flag);
380 break; 380 break;
381 default: 381 default:
382 retval = -EINVAL;
382 printk("osf_mount(%ld, %x)\n", typenr, flag); 383 printk("osf_mount(%ld, %x)\n", typenr, flag);
383 } 384 }
384 putname(name); 385 putname(name);
diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h
index 1a8c7279a28..9b28f1243bd 100644
--- a/arch/arm/include/asm/hardware/iop3xx-adma.h
+++ b/arch/arm/include/asm/hardware/iop3xx-adma.h
@@ -366,8 +366,7 @@ static inline int iop_chan_xor_slot_count(size_t len, int src_cnt,
366 slot_cnt += *slots_per_op; 366 slot_cnt += *slots_per_op;
367 } 367 }
368 368
369 if (len) 369 slot_cnt += *slots_per_op;
370 slot_cnt += *slots_per_op;
371 370
372 return slot_cnt; 371 return slot_cnt;
373} 372}
@@ -389,8 +388,7 @@ static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt,
389 slot_cnt += *slots_per_op; 388 slot_cnt += *slots_per_op;
390 } 389 }
391 390
392 if (len) 391 slot_cnt += *slots_per_op;
393 slot_cnt += *slots_per_op;
394 392
395 return slot_cnt; 393 return slot_cnt;
396} 394}
@@ -737,10 +735,8 @@ iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
737 i += slots_per_op; 735 i += slots_per_op;
738 } while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT); 736 } while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT);
739 737
740 if (len) { 738 iter = iop_hw_desc_slot_idx(hw_desc, i);
741 iter = iop_hw_desc_slot_idx(hw_desc, i); 739 iter->byte_count = len;
742 iter->byte_count = len;
743 }
744 } 740 }
745} 741}
746 742
diff --git a/arch/arm/mach-u300/include/mach/coh901318.h b/arch/arm/mach-u300/include/mach/coh901318.h
index f4cfee9c7d2..b8155b4e5ff 100644
--- a/arch/arm/mach-u300/include/mach/coh901318.h
+++ b/arch/arm/mach-u300/include/mach/coh901318.h
@@ -53,7 +53,7 @@ struct coh901318_params {
53 * struct coh_dma_channel - dma channel base 53 * struct coh_dma_channel - dma channel base
54 * @name: ascii name of dma channel 54 * @name: ascii name of dma channel
55 * @number: channel id number 55 * @number: channel id number
56 * @desc_nbr_max: number of preallocated descriptortors 56 * @desc_nbr_max: number of preallocated descriptors
57 * @priority_high: prio of channel, 0 low otherwise high. 57 * @priority_high: prio of channel, 0 low otherwise high.
58 * @param: configuration parameters 58 * @param: configuration parameters
59 * @dev_addr: physical address of periphal connected to channel 59 * @dev_addr: physical address of periphal connected to channel
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 7adac388a77..059eac6abda 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -20,6 +20,12 @@ config RWSEM_GENERIC_SPINLOCK
20config RWSEM_XCHGADD_ALGORITHM 20config RWSEM_XCHGADD_ALGORITHM
21 bool 21 bool
22 22
23config GENERIC_TIME
24 def_bool y
25
26config ARCH_USES_GETTIMEOFFSET
27 def_bool y
28
23config GENERIC_IOMAP 29config GENERIC_IOMAP
24 bool 30 bool
25 default y 31 default y
diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c
index fd529a0ec75..b70fb34939d 100644
--- a/arch/cris/arch-v32/drivers/cryptocop.c
+++ b/arch/cris/arch-v32/drivers/cryptocop.c
@@ -628,9 +628,9 @@ static int create_output_descriptors(struct cryptocop_operation *operation, int
628 cdesc->dma_descr->buf = (char*)virt_to_phys(operation->tfrm_op.indata[*iniov_ix].iov_base + *iniov_offset); 628 cdesc->dma_descr->buf = (char*)virt_to_phys(operation->tfrm_op.indata[*iniov_ix].iov_base + *iniov_offset);
629 cdesc->dma_descr->after = cdesc->dma_descr->buf + dlength; 629 cdesc->dma_descr->after = cdesc->dma_descr->buf + dlength;
630 630
631 assert(desc_len >= dlength);
631 desc_len -= dlength; 632 desc_len -= dlength;
632 *iniov_offset += dlength; 633 *iniov_offset += dlength;
633 assert(desc_len >= 0);
634 if (*iniov_offset >= operation->tfrm_op.indata[*iniov_ix].iov_len) { 634 if (*iniov_offset >= operation->tfrm_op.indata[*iniov_ix].iov_len) {
635 *iniov_offset = 0; 635 *iniov_offset = 0;
636 ++(*iniov_ix); 636 ++(*iniov_ix);
diff --git a/arch/cris/arch-v32/mach-fs/arbiter.c b/arch/cris/arch-v32/mach-fs/arbiter.c
index 84d31bd7b69..82ef293c4c8 100644
--- a/arch/cris/arch-v32/mach-fs/arbiter.c
+++ b/arch/cris/arch-v32/mach-fs/arbiter.c
@@ -332,7 +332,7 @@ int crisv32_arbiter_unwatch(int id)
332 if (id == 0) 332 if (id == 0)
333 intr_mask.bp0 = regk_marb_no; 333 intr_mask.bp0 = regk_marb_no;
334 else if (id == 1) 334 else if (id == 1)
335 intr_mask.bp2 = regk_marb_no; 335 intr_mask.bp1 = regk_marb_no;
336 else if (id == 2) 336 else if (id == 2)
337 intr_mask.bp2 = regk_marb_no; 337 intr_mask.bp2 = regk_marb_no;
338 else if (id == 3) 338 else if (id == 3)
diff --git a/arch/cris/kernel/time.c b/arch/cris/kernel/time.c
index 074fe7dea96..a05dd31f3ef 100644
--- a/arch/cris/kernel/time.c
+++ b/arch/cris/kernel/time.c
@@ -42,75 +42,11 @@ unsigned long loops_per_usec;
42extern unsigned long do_slow_gettimeoffset(void); 42extern unsigned long do_slow_gettimeoffset(void);
43static unsigned long (*do_gettimeoffset)(void) = do_slow_gettimeoffset; 43static unsigned long (*do_gettimeoffset)(void) = do_slow_gettimeoffset;
44 44
45/* 45u32 arch_gettimeoffset(void)
46 * This version of gettimeofday has near microsecond resolution.
47 *
48 * Note: Division is quite slow on CRIS and do_gettimeofday is called
49 * rather often. Maybe we should do some kind of approximation here
50 * (a naive approximation would be to divide by 1024).
51 */
52void do_gettimeofday(struct timeval *tv)
53{
54 unsigned long flags;
55 signed long usec, sec;
56 local_irq_save(flags);
57 usec = do_gettimeoffset();
58
59 /*
60 * If time_adjust is negative then NTP is slowing the clock
61 * so make sure not to go into next possible interval.
62 * Better to lose some accuracy than have time go backwards..
63 */
64 if (unlikely(time_adjust < 0) && usec > tickadj)
65 usec = tickadj;
66
67 sec = xtime.tv_sec;
68 usec += xtime.tv_nsec / 1000;
69 local_irq_restore(flags);
70
71 while (usec >= 1000000) {
72 usec -= 1000000;
73 sec++;
74 }
75
76 tv->tv_sec = sec;
77 tv->tv_usec = usec;
78}
79
80EXPORT_SYMBOL(do_gettimeofday);
81
82int do_settimeofday(struct timespec *tv)
83{ 46{
84 time_t wtm_sec, sec = tv->tv_sec; 47 return do_gettimeoffset() * 1000;
85 long wtm_nsec, nsec = tv->tv_nsec;
86
87 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
88 return -EINVAL;
89
90 write_seqlock_irq(&xtime_lock);
91 /*
92 * This is revolting. We need to set "xtime" correctly. However, the
93 * value in this location is the value at the most recent update of
94 * wall time. Discover what correction gettimeofday() would have
95 * made, and then undo it!
96 */
97 nsec -= do_gettimeoffset() * NSEC_PER_USEC;
98
99 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
100 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
101
102 set_normalized_timespec(&xtime, sec, nsec);
103 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
104
105 ntp_clear();
106 write_sequnlock_irq(&xtime_lock);
107 clock_was_set();
108 return 0;
109} 48}
110 49
111EXPORT_SYMBOL(do_settimeofday);
112
113
114/* 50/*
115 * BUG: This routine does not handle hour overflow properly; it just 51 * BUG: This routine does not handle hour overflow properly; it just
116 * sets the minutes. Usually you'll only notice that after reboot! 52 * sets the minutes. Usually you'll only notice that after reboot!
diff --git a/arch/frv/include/asm/pci.h b/arch/frv/include/asm/pci.h
index 492b5c4dfed..8c7260a3cd4 100644
--- a/arch/frv/include/asm/pci.h
+++ b/arch/frv/include/asm/pci.h
@@ -68,41 +68,4 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
68#define PCIBIOS_MIN_IO 0x100 68#define PCIBIOS_MIN_IO 0x100
69#define PCIBIOS_MIN_MEM 0x00010000 69#define PCIBIOS_MIN_MEM 0x00010000
70 70
71/* Make physical memory consistent for a single
72 * streaming mode DMA translation after a transfer.
73 *
74 * If you perform a pci_map_single() but wish to interrogate the
75 * buffer using the cpu, yet do not wish to teardown the PCI dma
76 * mapping, you must call this function before doing so. At the
77 * next point you give the PCI dma address back to the card, the
78 * device again owns the buffer.
79 */
80static inline void pci_dma_sync_single(struct pci_dev *hwdev,
81 dma_addr_t dma_handle,
82 size_t size, int direction)
83{
84 BUG_ON(direction == PCI_DMA_NONE);
85
86 frv_cache_wback_inv((unsigned long)bus_to_virt(dma_handle),
87 (unsigned long)bus_to_virt(dma_handle) + size);
88}
89
90/* Make physical memory consistent for a set of streaming
91 * mode DMA translations after a transfer.
92 *
93 * The same as pci_dma_sync_single but for a scatter-gather list,
94 * same rules and usage.
95 */
96static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
97 struct scatterlist *sg,
98 int nelems, int direction)
99{
100 int i;
101 BUG_ON(direction == PCI_DMA_NONE);
102
103 for (i = 0; i < nelems; i++)
104 frv_cache_wback_inv(sg_dma_address(&sg[i]),
105 sg_dma_address(&sg[i])+sg_dma_len(&sg[i]));
106}
107
108#endif /* _ASM_FRV_PCI_H */ 71#endif /* _ASM_FRV_PCI_H */
diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h
index 4c41656ede8..b5298eb09ad 100644
--- a/arch/ia64/include/asm/elf.h
+++ b/arch/ia64/include/asm/elf.h
@@ -219,54 +219,6 @@ do { \
219 NEW_AUX_ENT(AT_SYSINFO_EHDR, (unsigned long) GATE_EHDR); \ 219 NEW_AUX_ENT(AT_SYSINFO_EHDR, (unsigned long) GATE_EHDR); \
220} while (0) 220} while (0)
221 221
222
223/*
224 * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out
225 * extra segments containing the gate DSO contents. Dumping its
226 * contents makes post-mortem fully interpretable later without matching up
227 * the same kernel and hardware config to see what PC values meant.
228 * Dumping its extra ELF program headers includes all the other information
229 * a debugger needs to easily find how the gate DSO was being used.
230 */
231#define ELF_CORE_EXTRA_PHDRS (GATE_EHDR->e_phnum)
232#define ELF_CORE_WRITE_EXTRA_PHDRS \
233do { \
234 const struct elf_phdr *const gate_phdrs = \
235 (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); \
236 int i; \
237 Elf64_Off ofs = 0; \
238 for (i = 0; i < GATE_EHDR->e_phnum; ++i) { \
239 struct elf_phdr phdr = gate_phdrs[i]; \
240 if (phdr.p_type == PT_LOAD) { \
241 phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz); \
242 phdr.p_filesz = phdr.p_memsz; \
243 if (ofs == 0) { \
244 ofs = phdr.p_offset = offset; \
245 offset += phdr.p_filesz; \
246 } \
247 else \
248 phdr.p_offset = ofs; \
249 } \
250 else \
251 phdr.p_offset += ofs; \
252 phdr.p_paddr = 0; /* match other core phdrs */ \
253 DUMP_WRITE(&phdr, sizeof(phdr)); \
254 } \
255} while (0)
256#define ELF_CORE_WRITE_EXTRA_DATA \
257do { \
258 const struct elf_phdr *const gate_phdrs = \
259 (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); \
260 int i; \
261 for (i = 0; i < GATE_EHDR->e_phnum; ++i) { \
262 if (gate_phdrs[i].p_type == PT_LOAD) { \
263 DUMP_WRITE((void *) gate_phdrs[i].p_vaddr, \
264 PAGE_ALIGN(gate_phdrs[i].p_memsz)); \
265 break; \
266 } \
267 } \
268} while (0)
269
270/* 222/*
271 * format for entries in the Global Offset Table 223 * format for entries in the Global Offset Table
272 */ 224 */
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 4138282aefa..db10b1e378b 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -45,6 +45,8 @@ endif
45obj-$(CONFIG_DMAR) += pci-dma.o 45obj-$(CONFIG_DMAR) += pci-dma.o
46obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o 46obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
47 47
48obj-$(CONFIG_BINFMT_ELF) += elfcore.o
49
48# fp_emulate() expects f2-f5,f16-f31 to contain the user-level state. 50# fp_emulate() expects f2-f5,f16-f31 to contain the user-level state.
49CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31 51CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31
50 52
diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c
new file mode 100644
index 00000000000..bac1639bc32
--- /dev/null
+++ b/arch/ia64/kernel/elfcore.c
@@ -0,0 +1,80 @@
1#include <linux/elf.h>
2#include <linux/coredump.h>
3#include <linux/fs.h>
4#include <linux/mm.h>
5
6#include <asm/elf.h>
7
8
9Elf64_Half elf_core_extra_phdrs(void)
10{
11 return GATE_EHDR->e_phnum;
12}
13
14int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
15 unsigned long limit)
16{
17 const struct elf_phdr *const gate_phdrs =
18 (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
19 int i;
20 Elf64_Off ofs = 0;
21
22 for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
23 struct elf_phdr phdr = gate_phdrs[i];
24
25 if (phdr.p_type == PT_LOAD) {
26 phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz);
27 phdr.p_filesz = phdr.p_memsz;
28 if (ofs == 0) {
29 ofs = phdr.p_offset = offset;
30 offset += phdr.p_filesz;
31 } else {
32 phdr.p_offset = ofs;
33 }
34 } else {
35 phdr.p_offset += ofs;
36 }
37 phdr.p_paddr = 0; /* match other core phdrs */
38 *size += sizeof(phdr);
39 if (*size > limit || !dump_write(file, &phdr, sizeof(phdr)))
40 return 0;
41 }
42 return 1;
43}
44
45int elf_core_write_extra_data(struct file *file, size_t *size,
46 unsigned long limit)
47{
48 const struct elf_phdr *const gate_phdrs =
49 (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
50 int i;
51
52 for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
53 if (gate_phdrs[i].p_type == PT_LOAD) {
54 void *addr = (void *)gate_phdrs[i].p_vaddr;
55 size_t memsz = PAGE_ALIGN(gate_phdrs[i].p_memsz);
56
57 *size += memsz;
58 if (*size > limit || !dump_write(file, addr, memsz))
59 return 0;
60 break;
61 }
62 }
63 return 1;
64}
65
66size_t elf_core_extra_data_size(void)
67{
68 const struct elf_phdr *const gate_phdrs =
69 (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
70 int i;
71 size_t size = 0;
72
73 for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
74 if (gate_phdrs[i].p_type == PT_LOAD) {
75 size += PAGE_ALIGN(gate_phdrs[i].p_memsz);
76 break;
77 }
78 }
79 return size;
80}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index b81e46b1629..703062c44fb 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2315,6 +2315,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
2315 DPRINT(("Cannot allocate vma\n")); 2315 DPRINT(("Cannot allocate vma\n"));
2316 goto error_kmem; 2316 goto error_kmem;
2317 } 2317 }
2318 INIT_LIST_HEAD(&vma->anon_vma_chain);
2318 2319
2319 /* 2320 /*
2320 * partially initialize the vma for the sampling buffer 2321 * partially initialize the vma for the sampling buffer
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index 01c75797119..fa4d1e59deb 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -26,6 +26,7 @@ config KVM
26 select ANON_INODES 26 select ANON_INODES
27 select HAVE_KVM_IRQCHIP 27 select HAVE_KVM_IRQCHIP
28 select KVM_APIC_ARCHITECTURE 28 select KVM_APIC_ARCHITECTURE
29 select KVM_MMIO
29 ---help--- 30 ---help---
30 Support hosting fully virtualized guest machines using hardware 31 Support hosting fully virtualized guest machines using hardware
31 virtualization extensions. You will need a fairly recent 32 virtualization extensions. You will need a fairly recent
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 5fdeec5fddc..26e0e089bfe 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -241,10 +241,10 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
241 return 0; 241 return 0;
242mmio: 242mmio:
243 if (p->dir) 243 if (p->dir)
244 r = kvm_io_bus_read(&vcpu->kvm->mmio_bus, p->addr, 244 r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr,
245 p->size, &p->data); 245 p->size, &p->data);
246 else 246 else
247 r = kvm_io_bus_write(&vcpu->kvm->mmio_bus, p->addr, 247 r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr,
248 p->size, &p->data); 248 p->size, &p->data);
249 if (r) 249 if (r)
250 printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr); 250 printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
@@ -636,12 +636,9 @@ static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
636static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 636static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
637{ 637{
638 union context *host_ctx, *guest_ctx; 638 union context *host_ctx, *guest_ctx;
639 int r; 639 int r, idx;
640 640
641 /* 641 idx = srcu_read_lock(&vcpu->kvm->srcu);
642 * down_read() may sleep and return with interrupts enabled
643 */
644 down_read(&vcpu->kvm->slots_lock);
645 642
646again: 643again:
647 if (signal_pending(current)) { 644 if (signal_pending(current)) {
@@ -663,7 +660,7 @@ again:
663 if (r < 0) 660 if (r < 0)
664 goto vcpu_run_fail; 661 goto vcpu_run_fail;
665 662
666 up_read(&vcpu->kvm->slots_lock); 663 srcu_read_unlock(&vcpu->kvm->srcu, idx);
667 kvm_guest_enter(); 664 kvm_guest_enter();
668 665
669 /* 666 /*
@@ -687,7 +684,7 @@ again:
687 kvm_guest_exit(); 684 kvm_guest_exit();
688 preempt_enable(); 685 preempt_enable();
689 686
690 down_read(&vcpu->kvm->slots_lock); 687 idx = srcu_read_lock(&vcpu->kvm->srcu);
691 688
692 r = kvm_handle_exit(kvm_run, vcpu); 689 r = kvm_handle_exit(kvm_run, vcpu);
693 690
@@ -697,10 +694,10 @@ again:
697 } 694 }
698 695
699out: 696out:
700 up_read(&vcpu->kvm->slots_lock); 697 srcu_read_unlock(&vcpu->kvm->srcu, idx);
701 if (r > 0) { 698 if (r > 0) {
702 kvm_resched(vcpu); 699 kvm_resched(vcpu);
703 down_read(&vcpu->kvm->slots_lock); 700 idx = srcu_read_lock(&vcpu->kvm->srcu);
704 goto again; 701 goto again;
705 } 702 }
706 703
@@ -971,7 +968,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
971 goto out; 968 goto out;
972 r = kvm_setup_default_irq_routing(kvm); 969 r = kvm_setup_default_irq_routing(kvm);
973 if (r) { 970 if (r) {
974 kfree(kvm->arch.vioapic); 971 kvm_ioapic_destroy(kvm);
975 goto out; 972 goto out;
976 } 973 }
977 break; 974 break;
@@ -1377,12 +1374,14 @@ static void free_kvm(struct kvm *kvm)
1377 1374
1378static void kvm_release_vm_pages(struct kvm *kvm) 1375static void kvm_release_vm_pages(struct kvm *kvm)
1379{ 1376{
1377 struct kvm_memslots *slots;
1380 struct kvm_memory_slot *memslot; 1378 struct kvm_memory_slot *memslot;
1381 int i, j; 1379 int i, j;
1382 unsigned long base_gfn; 1380 unsigned long base_gfn;
1383 1381
1384 for (i = 0; i < kvm->nmemslots; i++) { 1382 slots = rcu_dereference(kvm->memslots);
1385 memslot = &kvm->memslots[i]; 1383 for (i = 0; i < slots->nmemslots; i++) {
1384 memslot = &slots->memslots[i];
1386 base_gfn = memslot->base_gfn; 1385 base_gfn = memslot->base_gfn;
1387 1386
1388 for (j = 0; j < memslot->npages; j++) { 1387 for (j = 0; j < memslot->npages; j++) {
@@ -1405,6 +1404,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
1405 kfree(kvm->arch.vioapic); 1404 kfree(kvm->arch.vioapic);
1406 kvm_release_vm_pages(kvm); 1405 kvm_release_vm_pages(kvm);
1407 kvm_free_physmem(kvm); 1406 kvm_free_physmem(kvm);
1407 cleanup_srcu_struct(&kvm->srcu);
1408 free_kvm(kvm); 1408 free_kvm(kvm);
1409} 1409}
1410 1410
@@ -1576,15 +1576,15 @@ out:
1576 return r; 1576 return r;
1577} 1577}
1578 1578
1579int kvm_arch_set_memory_region(struct kvm *kvm, 1579int kvm_arch_prepare_memory_region(struct kvm *kvm,
1580 struct kvm_userspace_memory_region *mem, 1580 struct kvm_memory_slot *memslot,
1581 struct kvm_memory_slot old, 1581 struct kvm_memory_slot old,
1582 struct kvm_userspace_memory_region *mem,
1582 int user_alloc) 1583 int user_alloc)
1583{ 1584{
1584 unsigned long i; 1585 unsigned long i;
1585 unsigned long pfn; 1586 unsigned long pfn;
1586 int npages = mem->memory_size >> PAGE_SHIFT; 1587 int npages = memslot->npages;
1587 struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
1588 unsigned long base_gfn = memslot->base_gfn; 1588 unsigned long base_gfn = memslot->base_gfn;
1589 1589
1590 if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT)) 1590 if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
@@ -1608,6 +1608,14 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
1608 return 0; 1608 return 0;
1609} 1609}
1610 1610
1611void kvm_arch_commit_memory_region(struct kvm *kvm,
1612 struct kvm_userspace_memory_region *mem,
1613 struct kvm_memory_slot old,
1614 int user_alloc)
1615{
1616 return;
1617}
1618
1611void kvm_arch_flush_shadow(struct kvm *kvm) 1619void kvm_arch_flush_shadow(struct kvm *kvm)
1612{ 1620{
1613 kvm_flush_remote_tlbs(kvm); 1621 kvm_flush_remote_tlbs(kvm);
@@ -1802,7 +1810,7 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
1802 if (log->slot >= KVM_MEMORY_SLOTS) 1810 if (log->slot >= KVM_MEMORY_SLOTS)
1803 goto out; 1811 goto out;
1804 1812
1805 memslot = &kvm->memslots[log->slot]; 1813 memslot = &kvm->memslots->memslots[log->slot];
1806 r = -ENOENT; 1814 r = -ENOENT;
1807 if (!memslot->dirty_bitmap) 1815 if (!memslot->dirty_bitmap)
1808 goto out; 1816 goto out;
@@ -1827,6 +1835,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1827 struct kvm_memory_slot *memslot; 1835 struct kvm_memory_slot *memslot;
1828 int is_dirty = 0; 1836 int is_dirty = 0;
1829 1837
1838 mutex_lock(&kvm->slots_lock);
1830 spin_lock(&kvm->arch.dirty_log_lock); 1839 spin_lock(&kvm->arch.dirty_log_lock);
1831 1840
1832 r = kvm_ia64_sync_dirty_log(kvm, log); 1841 r = kvm_ia64_sync_dirty_log(kvm, log);
@@ -1840,12 +1849,13 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1840 /* If nothing is dirty, don't bother messing with page tables. */ 1849 /* If nothing is dirty, don't bother messing with page tables. */
1841 if (is_dirty) { 1850 if (is_dirty) {
1842 kvm_flush_remote_tlbs(kvm); 1851 kvm_flush_remote_tlbs(kvm);
1843 memslot = &kvm->memslots[log->slot]; 1852 memslot = &kvm->memslots->memslots[log->slot];
1844 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 1853 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
1845 memset(memslot->dirty_bitmap, 0, n); 1854 memset(memslot->dirty_bitmap, 0, n);
1846 } 1855 }
1847 r = 0; 1856 r = 0;
1848out: 1857out:
1858 mutex_unlock(&kvm->slots_lock);
1849 spin_unlock(&kvm->arch.dirty_log_lock); 1859 spin_unlock(&kvm->arch.dirty_log_lock);
1850 return r; 1860 return r;
1851} 1861}
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
index e4b82319881..cb548ee9fca 100644
--- a/arch/ia64/kvm/kvm_fw.c
+++ b/arch/ia64/kvm/kvm_fw.c
@@ -75,7 +75,7 @@ static void set_pal_result(struct kvm_vcpu *vcpu,
75 struct exit_ctl_data *p; 75 struct exit_ctl_data *p;
76 76
77 p = kvm_get_exit_data(vcpu); 77 p = kvm_get_exit_data(vcpu);
78 if (p && p->exit_reason == EXIT_REASON_PAL_CALL) { 78 if (p->exit_reason == EXIT_REASON_PAL_CALL) {
79 p->u.pal_data.ret = result; 79 p->u.pal_data.ret = result;
80 return ; 80 return ;
81 } 81 }
@@ -87,7 +87,7 @@ static void set_sal_result(struct kvm_vcpu *vcpu,
87 struct exit_ctl_data *p; 87 struct exit_ctl_data *p;
88 88
89 p = kvm_get_exit_data(vcpu); 89 p = kvm_get_exit_data(vcpu);
90 if (p && p->exit_reason == EXIT_REASON_SAL_CALL) { 90 if (p->exit_reason == EXIT_REASON_SAL_CALL) {
91 p->u.sal_data.ret = result; 91 p->u.sal_data.ret = result;
92 return ; 92 return ;
93 } 93 }
@@ -322,7 +322,7 @@ static u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu)
322 struct exit_ctl_data *p; 322 struct exit_ctl_data *p;
323 323
324 p = kvm_get_exit_data(vcpu); 324 p = kvm_get_exit_data(vcpu);
325 if (p && (p->exit_reason == EXIT_REASON_PAL_CALL)) 325 if (p->exit_reason == EXIT_REASON_PAL_CALL)
326 index = p->u.pal_data.gr28; 326 index = p->u.pal_data.gr28;
327 327
328 return index; 328 return index;
@@ -646,18 +646,16 @@ static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1,
646 646
647 p = kvm_get_exit_data(vcpu); 647 p = kvm_get_exit_data(vcpu);
648 648
649 if (p) { 649 if (p->exit_reason == EXIT_REASON_SAL_CALL) {
650 if (p->exit_reason == EXIT_REASON_SAL_CALL) { 650 *in0 = p->u.sal_data.in0;
651 *in0 = p->u.sal_data.in0; 651 *in1 = p->u.sal_data.in1;
652 *in1 = p->u.sal_data.in1; 652 *in2 = p->u.sal_data.in2;
653 *in2 = p->u.sal_data.in2; 653 *in3 = p->u.sal_data.in3;
654 *in3 = p->u.sal_data.in3; 654 *in4 = p->u.sal_data.in4;
655 *in4 = p->u.sal_data.in4; 655 *in5 = p->u.sal_data.in5;
656 *in5 = p->u.sal_data.in5; 656 *in6 = p->u.sal_data.in6;
657 *in6 = p->u.sal_data.in6; 657 *in7 = p->u.sal_data.in7;
658 *in7 = p->u.sal_data.in7; 658 return ;
659 return ;
660 }
661 } 659 }
662 *in0 = 0; 660 *in0 = 0;
663} 661}
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
index 9bf55afd08d..fb8f9f59a1e 100644
--- a/arch/ia64/kvm/mmio.c
+++ b/arch/ia64/kvm/mmio.c
@@ -316,8 +316,8 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
316 return; 316 return;
317 } else { 317 } else {
318 inst_type = -1; 318 inst_type = -1;
319 panic_vm(vcpu, "Unsupported MMIO access instruction! \ 319 panic_vm(vcpu, "Unsupported MMIO access instruction! "
320 Bunld[0]=0x%lx, Bundle[1]=0x%lx\n", 320 "Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
321 bundle.i64[0], bundle.i64[1]); 321 bundle.i64[0], bundle.i64[1]);
322 } 322 }
323 323
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index dce75b70cdd..958815c9787 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -1639,8 +1639,8 @@ void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
1639 * Otherwise panic 1639 * Otherwise panic
1640 */ 1640 */
1641 if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM)) 1641 if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
1642 panic_vm(vcpu, "Only support guests with vpsr.pk =0 \ 1642 panic_vm(vcpu, "Only support guests with vpsr.pk =0 "
1643 & vpsr.is=0\n"); 1643 "& vpsr.is=0\n");
1644 1644
1645 /* 1645 /*
1646 * For those IA64_PSR bits: id/da/dd/ss/ed/ia 1646 * For those IA64_PSR bits: id/da/dd/ss/ed/ia
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index ca3335ea56c..ed41759efca 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -117,6 +117,7 @@ ia64_init_addr_space (void)
117 */ 117 */
118 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); 118 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
119 if (vma) { 119 if (vma) {
120 INIT_LIST_HEAD(&vma->anon_vma_chain);
120 vma->vm_mm = current->mm; 121 vma->vm_mm = current->mm;
121 vma->vm_start = current->thread.rbs_bot & PAGE_MASK; 122 vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
122 vma->vm_end = vma->vm_start + PAGE_SIZE; 123 vma->vm_end = vma->vm_start + PAGE_SIZE;
@@ -135,6 +136,7 @@ ia64_init_addr_space (void)
135 if (!(current->personality & MMAP_PAGE_ZERO)) { 136 if (!(current->personality & MMAP_PAGE_ZERO)) {
136 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); 137 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
137 if (vma) { 138 if (vma) {
139 INIT_LIST_HEAD(&vma->anon_vma_chain);
138 vma->vm_mm = current->mm; 140 vma->vm_mm = current->mm;
139 vma->vm_end = PAGE_SIZE; 141 vma->vm_end = PAGE_SIZE;
140 vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); 142 vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
diff --git a/arch/parisc/Kconfig.debug b/arch/parisc/Kconfig.debug
index bc989e522a0..7305ac8f7f5 100644
--- a/arch/parisc/Kconfig.debug
+++ b/arch/parisc/Kconfig.debug
@@ -12,4 +12,18 @@ config DEBUG_RODATA
12 portion of the kernel code won't be covered by a TLB anymore. 12 portion of the kernel code won't be covered by a TLB anymore.
13 If in doubt, say "N". 13 If in doubt, say "N".
14 14
15config DEBUG_STRICT_USER_COPY_CHECKS
16 bool "Strict copy size checks"
17 depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING
18 ---help---
19 Enabling this option turns a certain set of sanity checks for user
20 copy operations into compile time failures.
21
22 The copy_from_user() etc checks are there to help test if there
23 are sufficient security checks on the length argument of
24 the copy operation, by having gcc prove that the argument is
25 within bounds.
26
27 If unsure, or if you run an older (pre 4.4) gcc, say N.
28
15endmenu 29endmenu
diff --git a/arch/parisc/include/asm/param.h b/arch/parisc/include/asm/param.h
index 32e03d87785..965d4542797 100644
--- a/arch/parisc/include/asm/param.h
+++ b/arch/parisc/include/asm/param.h
@@ -1,22 +1 @@
1#ifndef _ASMPARISC_PARAM_H #include <asm-generic/param.h>
2#define _ASMPARISC_PARAM_H
3
4#ifdef __KERNEL__
5#define HZ CONFIG_HZ
6#define USER_HZ 100 /* some user API use "ticks" */
7#define CLOCKS_PER_SEC (USER_HZ) /* like times() */
8#endif
9
10#ifndef HZ
11#define HZ 100
12#endif
13
14#define EXEC_PAGESIZE 4096
15
16#ifndef NOGROUP
17#define NOGROUP (-1)
18#endif
19
20#define MAXHOSTNAMELEN 64 /* max length of hostname */
21
22#endif
diff --git a/arch/parisc/include/asm/system.h b/arch/parisc/include/asm/system.h
index d91357bca5b..4653c77bf9d 100644
--- a/arch/parisc/include/asm/system.h
+++ b/arch/parisc/include/asm/system.h
@@ -160,7 +160,7 @@ static inline void set_eiem(unsigned long val)
160 ldcd). */ 160 ldcd). */
161 161
162#define __PA_LDCW_ALIGNMENT 4 162#define __PA_LDCW_ALIGNMENT 4
163#define __ldcw_align(a) ((volatile unsigned int *)a) 163#define __ldcw_align(a) (&(a)->slock)
164#define __LDCW "ldcw,co" 164#define __LDCW "ldcw,co"
165 165
166#endif /*!CONFIG_PA20*/ 166#endif /*!CONFIG_PA20*/
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 7cf799d70b4..ff4cf9dab8d 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -7,6 +7,7 @@
7#include <asm/page.h> 7#include <asm/page.h>
8#include <asm/system.h> 8#include <asm/system.h>
9#include <asm/cache.h> 9#include <asm/cache.h>
10#include <asm/errno.h>
10#include <asm-generic/uaccess-unaligned.h> 11#include <asm-generic/uaccess-unaligned.h>
11 12
12#define VERIFY_READ 0 13#define VERIFY_READ 0
@@ -234,13 +235,35 @@ extern long lstrnlen_user(const char __user *,long);
234 235
235unsigned long copy_to_user(void __user *dst, const void *src, unsigned long len); 236unsigned long copy_to_user(void __user *dst, const void *src, unsigned long len);
236#define __copy_to_user copy_to_user 237#define __copy_to_user copy_to_user
237unsigned long copy_from_user(void *dst, const void __user *src, unsigned long len); 238unsigned long __copy_from_user(void *dst, const void __user *src, unsigned long len);
238#define __copy_from_user copy_from_user
239unsigned long copy_in_user(void __user *dst, const void __user *src, unsigned long len); 239unsigned long copy_in_user(void __user *dst, const void __user *src, unsigned long len);
240#define __copy_in_user copy_in_user 240#define __copy_in_user copy_in_user
241#define __copy_to_user_inatomic __copy_to_user 241#define __copy_to_user_inatomic __copy_to_user
242#define __copy_from_user_inatomic __copy_from_user 242#define __copy_from_user_inatomic __copy_from_user
243 243
244extern void copy_from_user_overflow(void)
245#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
246 __compiletime_error("copy_from_user() buffer size is not provably correct")
247#else
248 __compiletime_warning("copy_from_user() buffer size is not provably correct")
249#endif
250;
251
252static inline unsigned long __must_check copy_from_user(void *to,
253 const void __user *from,
254 unsigned long n)
255{
256 int sz = __compiletime_object_size(to);
257 int ret = -EFAULT;
258
259 if (likely(sz == -1 || !__builtin_constant_p(n) || sz >= n))
260 ret = __copy_from_user(to, from, n);
261 else
262 copy_from_user_overflow();
263
264 return ret;
265}
266
244struct pt_regs; 267struct pt_regs;
245int fixup_exception(struct pt_regs *regs); 268int fixup_exception(struct pt_regs *regs);
246 269
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index cda158318c6..1ce7d2851d9 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -811,8 +811,10 @@
811#define __NR_pwritev (__NR_Linux + 316) 811#define __NR_pwritev (__NR_Linux + 316)
812#define __NR_rt_tgsigqueueinfo (__NR_Linux + 317) 812#define __NR_rt_tgsigqueueinfo (__NR_Linux + 317)
813#define __NR_perf_event_open (__NR_Linux + 318) 813#define __NR_perf_event_open (__NR_Linux + 318)
814#define __NR_recvmmsg (__NR_Linux + 319)
815#define __NR_accept4 (__NR_Linux + 320)
814 816
815#define __NR_Linux_syscalls (__NR_perf_event_open + 1) 817#define __NR_Linux_syscalls (__NR_accept4 + 1)
816 818
817 819
818#define __IGNORE_select /* newselect */ 820#define __IGNORE_select /* newselect */
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 1054baa2fc6..d054f3da3ff 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -171,14 +171,14 @@ parisc_cache_init(void)
171 cache_info.ic_conf.cc_cst, 171 cache_info.ic_conf.cc_cst,
172 cache_info.ic_conf.cc_hv); 172 cache_info.ic_conf.cc_hv);
173 173
174 printk("D-TLB conf: sh %d page %d cst %d aid %d pad1 %d \n", 174 printk("D-TLB conf: sh %d page %d cst %d aid %d pad1 %d\n",
175 cache_info.dt_conf.tc_sh, 175 cache_info.dt_conf.tc_sh,
176 cache_info.dt_conf.tc_page, 176 cache_info.dt_conf.tc_page,
177 cache_info.dt_conf.tc_cst, 177 cache_info.dt_conf.tc_cst,
178 cache_info.dt_conf.tc_aid, 178 cache_info.dt_conf.tc_aid,
179 cache_info.dt_conf.tc_pad1); 179 cache_info.dt_conf.tc_pad1);
180 180
181 printk("I-TLB conf: sh %d page %d cst %d aid %d pad1 %d \n", 181 printk("I-TLB conf: sh %d page %d cst %d aid %d pad1 %d\n",
182 cache_info.it_conf.tc_sh, 182 cache_info.it_conf.tc_sh,
183 cache_info.it_conf.tc_page, 183 cache_info.it_conf.tc_page,
184 cache_info.it_conf.tc_cst, 184 cache_info.it_conf.tc_cst,
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 01c4fcf8f48..de5f6dab48b 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -417,6 +417,8 @@
417 ENTRY_COMP(pwritev) 417 ENTRY_COMP(pwritev)
418 ENTRY_COMP(rt_tgsigqueueinfo) 418 ENTRY_COMP(rt_tgsigqueueinfo)
419 ENTRY_SAME(perf_event_open) 419 ENTRY_SAME(perf_event_open)
420 ENTRY_COMP(recvmmsg)
421 ENTRY_SAME(accept4) /* 320 */
420 422
421 /* Nothing yet */ 423 /* Nothing yet */
422 424
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index a79c6f9e7e2..05511ccb61d 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -250,9 +250,21 @@ static int __init rtc_init(void)
250} 250}
251module_init(rtc_init); 251module_init(rtc_init);
252 252
253void __init time_init(void) 253void read_persistent_clock(struct timespec *ts)
254{ 254{
255 static struct pdc_tod tod_data; 255 static struct pdc_tod tod_data;
256 if (pdc_tod_read(&tod_data) == 0) {
257 ts->tv_sec = tod_data.tod_sec;
258 ts->tv_nsec = tod_data.tod_usec * 1000;
259 } else {
260 printk(KERN_ERR "Error reading tod clock\n");
261 ts->tv_sec = 0;
262 ts->tv_nsec = 0;
263 }
264}
265
266void __init time_init(void)
267{
256 unsigned long current_cr16_khz; 268 unsigned long current_cr16_khz;
257 269
258 clocktick = (100 * PAGE0->mem_10msec) / HZ; 270 clocktick = (100 * PAGE0->mem_10msec) / HZ;
@@ -264,19 +276,4 @@ void __init time_init(void)
264 clocksource_cr16.mult = clocksource_khz2mult(current_cr16_khz, 276 clocksource_cr16.mult = clocksource_khz2mult(current_cr16_khz,
265 clocksource_cr16.shift); 277 clocksource_cr16.shift);
266 clocksource_register(&clocksource_cr16); 278 clocksource_register(&clocksource_cr16);
267
268 if (pdc_tod_read(&tod_data) == 0) {
269 unsigned long flags;
270
271 write_seqlock_irqsave(&xtime_lock, flags);
272 xtime.tv_sec = tod_data.tod_sec;
273 xtime.tv_nsec = tod_data.tod_usec * 1000;
274 set_normalized_timespec(&wall_to_monotonic,
275 -xtime.tv_sec, -xtime.tv_nsec);
276 write_sequnlock_irqrestore(&xtime_lock, flags);
277 } else {
278 printk(KERN_ERR "Error reading tod clock\n");
279 xtime.tv_sec = 0;
280 xtime.tv_nsec = 0;
281 }
282} 279}
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index e6f4b7a4b7e..92d977bb5ea 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -25,6 +25,7 @@
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/sched.h> 26#include <linux/sched.h>
27#include <linux/signal.h> 27#include <linux/signal.h>
28#include <linux/ratelimit.h>
28#include <asm/uaccess.h> 29#include <asm/uaccess.h>
29 30
30/* #define DEBUG_UNALIGNED 1 */ 31/* #define DEBUG_UNALIGNED 1 */
@@ -446,8 +447,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
446 447
447void handle_unaligned(struct pt_regs *regs) 448void handle_unaligned(struct pt_regs *regs)
448{ 449{
449 static unsigned long unaligned_count = 0; 450 static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5);
450 static unsigned long last_time = 0;
451 unsigned long newbase = R1(regs->iir)?regs->gr[R1(regs->iir)]:0; 451 unsigned long newbase = R1(regs->iir)?regs->gr[R1(regs->iir)]:0;
452 int modify = 0; 452 int modify = 0;
453 int ret = ERR_NOTHANDLED; 453 int ret = ERR_NOTHANDLED;
@@ -460,14 +460,8 @@ void handle_unaligned(struct pt_regs *regs)
460 goto force_sigbus; 460 goto force_sigbus;
461 } 461 }
462 462
463 if (unaligned_count > 5 && 463 if (!(current->thread.flags & PARISC_UAC_NOPRINT) &&
464 time_after(jiffies, last_time + 5 * HZ)) { 464 __ratelimit(&ratelimit)) {
465 unaligned_count = 0;
466 last_time = jiffies;
467 }
468
469 if (!(current->thread.flags & PARISC_UAC_NOPRINT)
470 && ++unaligned_count < 5) {
471 char buf[256]; 465 char buf[256];
472 sprintf(buf, "%s(%d): unaligned access to 0x" RFMT " at ip=0x" RFMT "\n", 466 sprintf(buf, "%s(%d): unaligned access to 0x" RFMT " at ip=0x" RFMT "\n",
473 current->comm, task_pid_nr(current), regs->ior, regs->iaoq[0]); 467 current->comm, task_pid_nr(current), regs->ior, regs->iaoq[0]);
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
index abf41f4632a..1dbca5c31b3 100644
--- a/arch/parisc/lib/memcpy.c
+++ b/arch/parisc/lib/memcpy.c
@@ -475,7 +475,8 @@ unsigned long copy_to_user(void __user *dst, const void *src, unsigned long len)
475 return pa_memcpy((void __force *)dst, src, len); 475 return pa_memcpy((void __force *)dst, src, len);
476} 476}
477 477
478unsigned long copy_from_user(void *dst, const void __user *src, unsigned long len) 478EXPORT_SYMBOL(__copy_from_user);
479unsigned long __copy_from_user(void *dst, const void __user *src, unsigned long len)
479{ 480{
480 mtsp(get_user_space(), 1); 481 mtsp(get_user_space(), 1);
481 mtsp(get_kernel_space(), 2); 482 mtsp(get_kernel_space(), 2);
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index af2abe74f54..aadf2dd6f84 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -97,4 +97,10 @@
97#define RESUME_HOST RESUME_FLAG_HOST 97#define RESUME_HOST RESUME_FLAG_HOST
98#define RESUME_HOST_NV (RESUME_FLAG_HOST|RESUME_FLAG_NV) 98#define RESUME_HOST_NV (RESUME_FLAG_HOST|RESUME_FLAG_NV)
99 99
100#define KVM_GUEST_MODE_NONE 0
101#define KVM_GUEST_MODE_GUEST 1
102#define KVM_GUEST_MODE_SKIP 2
103
104#define KVM_INST_FETCH_FAILED -1
105
100#endif /* __POWERPC_KVM_ASM_H__ */ 106#endif /* __POWERPC_KVM_ASM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 74b7369770d..db7db0a9696 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -22,7 +22,7 @@
22 22
23#include <linux/types.h> 23#include <linux/types.h>
24#include <linux/kvm_host.h> 24#include <linux/kvm_host.h>
25#include <asm/kvm_ppc.h> 25#include <asm/kvm_book3s_64_asm.h>
26 26
27struct kvmppc_slb { 27struct kvmppc_slb {
28 u64 esid; 28 u64 esid;
@@ -33,7 +33,8 @@ struct kvmppc_slb {
33 bool Ks; 33 bool Ks;
34 bool Kp; 34 bool Kp;
35 bool nx; 35 bool nx;
36 bool large; 36 bool large; /* PTEs are 16MB */
37 bool tb; /* 1TB segment */
37 bool class; 38 bool class;
38}; 39};
39 40
@@ -69,6 +70,7 @@ struct kvmppc_sid_map {
69 70
70struct kvmppc_vcpu_book3s { 71struct kvmppc_vcpu_book3s {
71 struct kvm_vcpu vcpu; 72 struct kvm_vcpu vcpu;
73 struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
72 struct kvmppc_sid_map sid_map[SID_MAP_NUM]; 74 struct kvmppc_sid_map sid_map[SID_MAP_NUM];
73 struct kvmppc_slb slb[64]; 75 struct kvmppc_slb slb[64];
74 struct { 76 struct {
@@ -89,6 +91,7 @@ struct kvmppc_vcpu_book3s {
89 u64 vsid_next; 91 u64 vsid_next;
90 u64 vsid_max; 92 u64 vsid_max;
91 int context_id; 93 int context_id;
94 ulong prog_flags; /* flags to inject when giving a 700 trap */
92}; 95};
93 96
94#define CONTEXT_HOST 0 97#define CONTEXT_HOST 0
@@ -119,6 +122,10 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
119 122
120extern u32 kvmppc_trampoline_lowmem; 123extern u32 kvmppc_trampoline_lowmem;
121extern u32 kvmppc_trampoline_enter; 124extern u32 kvmppc_trampoline_enter;
125extern void kvmppc_rmcall(ulong srr0, ulong srr1);
126extern void kvmppc_load_up_fpu(void);
127extern void kvmppc_load_up_altivec(void);
128extern void kvmppc_load_up_vsx(void);
122 129
123static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) 130static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
124{ 131{
diff --git a/arch/powerpc/include/asm/kvm_book3s_64_asm.h b/arch/powerpc/include/asm/kvm_book3s_64_asm.h
index 2e06ee8184e..183461b4840 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64_asm.h
@@ -20,6 +20,8 @@
20#ifndef __ASM_KVM_BOOK3S_ASM_H__ 20#ifndef __ASM_KVM_BOOK3S_ASM_H__
21#define __ASM_KVM_BOOK3S_ASM_H__ 21#define __ASM_KVM_BOOK3S_ASM_H__
22 22
23#ifdef __ASSEMBLY__
24
23#ifdef CONFIG_KVM_BOOK3S_64_HANDLER 25#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
24 26
25#include <asm/kvm_asm.h> 27#include <asm/kvm_asm.h>
@@ -55,4 +57,20 @@ kvmppc_resume_\intno:
55 57
56#endif /* CONFIG_KVM_BOOK3S_64_HANDLER */ 58#endif /* CONFIG_KVM_BOOK3S_64_HANDLER */
57 59
60#else /*__ASSEMBLY__ */
61
62struct kvmppc_book3s_shadow_vcpu {
63 ulong gpr[14];
64 u32 cr;
65 u32 xer;
66 ulong host_r1;
67 ulong host_r2;
68 ulong handler;
69 ulong scratch0;
70 ulong scratch1;
71 ulong vmhandler;
72};
73
74#endif /*__ASSEMBLY__ */
75
58#endif /* __ASM_KVM_BOOK3S_ASM_H__ */ 76#endif /* __ASM_KVM_BOOK3S_ASM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
index 9d497ce4972..7fea26fffb2 100644
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -52,9 +52,12 @@ struct kvmppc_vcpu_e500 {
52 u32 mas5; 52 u32 mas5;
53 u32 mas6; 53 u32 mas6;
54 u32 mas7; 54 u32 mas7;
55 u32 l1csr0;
55 u32 l1csr1; 56 u32 l1csr1;
56 u32 hid0; 57 u32 hid0;
57 u32 hid1; 58 u32 hid1;
59 u32 tlb0cfg;
60 u32 tlb1cfg;
58 61
59 struct kvm_vcpu vcpu; 62 struct kvm_vcpu vcpu;
60}; 63};
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1201f62d0d7..5e5bae7e152 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -167,23 +167,40 @@ struct kvm_vcpu_arch {
167 ulong trampoline_lowmem; 167 ulong trampoline_lowmem;
168 ulong trampoline_enter; 168 ulong trampoline_enter;
169 ulong highmem_handler; 169 ulong highmem_handler;
170 ulong rmcall;
170 ulong host_paca_phys; 171 ulong host_paca_phys;
171 struct kvmppc_mmu mmu; 172 struct kvmppc_mmu mmu;
172#endif 173#endif
173 174
174 u64 fpr[32];
175 ulong gpr[32]; 175 ulong gpr[32];
176 176
177 u64 fpr[32];
178 u32 fpscr;
179
180#ifdef CONFIG_ALTIVEC
181 vector128 vr[32];
182 vector128 vscr;
183#endif
184
185#ifdef CONFIG_VSX
186 u64 vsr[32];
187#endif
188
177 ulong pc; 189 ulong pc;
178 u32 cr;
179 ulong ctr; 190 ulong ctr;
180 ulong lr; 191 ulong lr;
192
193#ifdef CONFIG_BOOKE
181 ulong xer; 194 ulong xer;
195 u32 cr;
196#endif
182 197
183 ulong msr; 198 ulong msr;
184#ifdef CONFIG_PPC64 199#ifdef CONFIG_PPC64
185 ulong shadow_msr; 200 ulong shadow_msr;
201 ulong shadow_srr1;
186 ulong hflags; 202 ulong hflags;
203 ulong guest_owned_ext;
187#endif 204#endif
188 u32 mmucr; 205 u32 mmucr;
189 ulong sprg0; 206 ulong sprg0;
@@ -242,6 +259,8 @@ struct kvm_vcpu_arch {
242#endif 259#endif
243 ulong fault_dear; 260 ulong fault_dear;
244 ulong fault_esr; 261 ulong fault_esr;
262 ulong queued_dear;
263 ulong queued_esr;
245 gpa_t paddr_accessed; 264 gpa_t paddr_accessed;
246 265
247 u8 io_gpr; /* GPR used as IO source/target */ 266 u8 io_gpr; /* GPR used as IO source/target */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 269ee46ab02..e2642829e43 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -28,6 +28,9 @@
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/kvm_types.h> 29#include <linux/kvm_types.h>
30#include <linux/kvm_host.h> 30#include <linux/kvm_host.h>
31#ifdef CONFIG_PPC_BOOK3S
32#include <asm/kvm_book3s.h>
33#endif
31 34
32enum emulation_result { 35enum emulation_result {
33 EMULATE_DONE, /* no further processing */ 36 EMULATE_DONE, /* no further processing */
@@ -80,8 +83,9 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
80 83
81extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu); 84extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
82extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); 85extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
83extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu); 86extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
84extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); 87extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
88extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
85extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, 89extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
86 struct kvm_interrupt *irq); 90 struct kvm_interrupt *irq);
87 91
@@ -95,4 +99,81 @@ extern void kvmppc_booke_exit(void);
95 99
96extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); 100extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
97 101
102#ifdef CONFIG_PPC_BOOK3S
103
104/* We assume we're always acting on the current vcpu */
105
106static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
107{
108 if ( num < 14 ) {
109 get_paca()->shadow_vcpu.gpr[num] = val;
110 to_book3s(vcpu)->shadow_vcpu.gpr[num] = val;
111 } else
112 vcpu->arch.gpr[num] = val;
113}
114
115static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
116{
117 if ( num < 14 )
118 return get_paca()->shadow_vcpu.gpr[num];
119 else
120 return vcpu->arch.gpr[num];
121}
122
123static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
124{
125 get_paca()->shadow_vcpu.cr = val;
126 to_book3s(vcpu)->shadow_vcpu.cr = val;
127}
128
129static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
130{
131 return get_paca()->shadow_vcpu.cr;
132}
133
134static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
135{
136 get_paca()->shadow_vcpu.xer = val;
137 to_book3s(vcpu)->shadow_vcpu.xer = val;
138}
139
140static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
141{
142 return get_paca()->shadow_vcpu.xer;
143}
144
145#else
146
147static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
148{
149 vcpu->arch.gpr[num] = val;
150}
151
152static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
153{
154 return vcpu->arch.gpr[num];
155}
156
157static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
158{
159 vcpu->arch.cr = val;
160}
161
162static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
163{
164 return vcpu->arch.cr;
165}
166
167static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
168{
169 vcpu->arch.xer = val;
170}
171
172static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
173{
174 return vcpu->arch.xer;
175}
176
177#endif
178
98#endif /* __POWERPC_KVM_PPC_H__ */ 179#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 5e9b4ef7141..d8a693109c8 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -19,6 +19,9 @@
19#include <asm/mmu.h> 19#include <asm/mmu.h>
20#include <asm/page.h> 20#include <asm/page.h>
21#include <asm/exception-64e.h> 21#include <asm/exception-64e.h>
22#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
23#include <asm/kvm_book3s_64_asm.h>
24#endif
22 25
23register struct paca_struct *local_paca asm("r13"); 26register struct paca_struct *local_paca asm("r13");
24 27
@@ -135,6 +138,8 @@ struct paca_struct {
135 u64 esid; 138 u64 esid;
136 u64 vsid; 139 u64 vsid;
137 } kvm_slb[64]; /* guest SLB */ 140 } kvm_slb[64]; /* guest SLB */
141 /* We use this to store guest state in */
142 struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
138 u8 kvm_slb_max; /* highest used guest slb entry */ 143 u8 kvm_slb_max; /* highest used guest slb entry */
139 u8 kvm_in_guest; /* are we inside the guest? */ 144 u8 kvm_in_guest; /* are we inside the guest? */
140#endif 145#endif
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index bc8dd53f718..5572e86223f 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -426,6 +426,10 @@
426#define SRR1_WAKEMT 0x00280000 /* mtctrl */ 426#define SRR1_WAKEMT 0x00280000 /* mtctrl */
427#define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */ 427#define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */
428#define SRR1_WAKETHERM 0x00100000 /* Thermal management interrupt */ 428#define SRR1_WAKETHERM 0x00100000 /* Thermal management interrupt */
429#define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */
430#define SRR1_PROGPRIV 0x00040000 /* Privileged instruction */
431#define SRR1_PROGTRAP 0x00020000 /* Trap */
432#define SRR1_PROGADDR 0x00010000 /* SRR0 contains subsequent addr */
429#define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */ 433#define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */
430#define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */ 434#define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */
431 435
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index a6c2b63227b..957ceb7059c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -194,6 +194,30 @@ int main(void)
194 DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest)); 194 DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest));
195 DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb)); 195 DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb));
196 DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max)); 196 DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max));
197 DEFINE(PACA_KVM_CR, offsetof(struct paca_struct, shadow_vcpu.cr));
198 DEFINE(PACA_KVM_XER, offsetof(struct paca_struct, shadow_vcpu.xer));
199 DEFINE(PACA_KVM_R0, offsetof(struct paca_struct, shadow_vcpu.gpr[0]));
200 DEFINE(PACA_KVM_R1, offsetof(struct paca_struct, shadow_vcpu.gpr[1]));
201 DEFINE(PACA_KVM_R2, offsetof(struct paca_struct, shadow_vcpu.gpr[2]));
202 DEFINE(PACA_KVM_R3, offsetof(struct paca_struct, shadow_vcpu.gpr[3]));
203 DEFINE(PACA_KVM_R4, offsetof(struct paca_struct, shadow_vcpu.gpr[4]));
204 DEFINE(PACA_KVM_R5, offsetof(struct paca_struct, shadow_vcpu.gpr[5]));
205 DEFINE(PACA_KVM_R6, offsetof(struct paca_struct, shadow_vcpu.gpr[6]));
206 DEFINE(PACA_KVM_R7, offsetof(struct paca_struct, shadow_vcpu.gpr[7]));
207 DEFINE(PACA_KVM_R8, offsetof(struct paca_struct, shadow_vcpu.gpr[8]));
208 DEFINE(PACA_KVM_R9, offsetof(struct paca_struct, shadow_vcpu.gpr[9]));
209 DEFINE(PACA_KVM_R10, offsetof(struct paca_struct, shadow_vcpu.gpr[10]));
210 DEFINE(PACA_KVM_R11, offsetof(struct paca_struct, shadow_vcpu.gpr[11]));
211 DEFINE(PACA_KVM_R12, offsetof(struct paca_struct, shadow_vcpu.gpr[12]));
212 DEFINE(PACA_KVM_R13, offsetof(struct paca_struct, shadow_vcpu.gpr[13]));
213 DEFINE(PACA_KVM_HOST_R1, offsetof(struct paca_struct, shadow_vcpu.host_r1));
214 DEFINE(PACA_KVM_HOST_R2, offsetof(struct paca_struct, shadow_vcpu.host_r2));
215 DEFINE(PACA_KVM_VMHANDLER, offsetof(struct paca_struct,
216 shadow_vcpu.vmhandler));
217 DEFINE(PACA_KVM_SCRATCH0, offsetof(struct paca_struct,
218 shadow_vcpu.scratch0));
219 DEFINE(PACA_KVM_SCRATCH1, offsetof(struct paca_struct,
220 shadow_vcpu.scratch1));
197#endif 221#endif
198#endif /* CONFIG_PPC64 */ 222#endif /* CONFIG_PPC64 */
199 223
@@ -389,8 +413,6 @@ int main(void)
389 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); 413 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
390 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); 414 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
391 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); 415 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
392 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
393 DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
394 DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); 416 DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
395 DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); 417 DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
396 DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr)); 418 DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr));
@@ -411,11 +433,16 @@ int main(void)
411 DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2)); 433 DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2));
412 DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); 434 DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr));
413 DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); 435 DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
436 DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
414 DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); 437 DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem));
415 DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); 438 DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter));
416 DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); 439 DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler));
440 DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall));
417 DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); 441 DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
418#endif 442#else
443 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
444 DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
445#endif /* CONFIG_PPC64 */
419#endif 446#endif
420#ifdef CONFIG_44x 447#ifdef CONFIG_44x
421 DEFINE(PGD_T_LOG2, PGD_T_LOG2); 448 DEFINE(PGD_T_LOG2, PGD_T_LOG2);
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 425451453e9..ab3e392ac63 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -107,6 +107,7 @@ EXPORT_SYMBOL(giveup_altivec);
107#endif /* CONFIG_ALTIVEC */ 107#endif /* CONFIG_ALTIVEC */
108#ifdef CONFIG_VSX 108#ifdef CONFIG_VSX
109EXPORT_SYMBOL(giveup_vsx); 109EXPORT_SYMBOL(giveup_vsx);
110EXPORT_SYMBOL_GPL(__giveup_vsx);
110#endif /* CONFIG_VSX */ 111#endif /* CONFIG_VSX */
111#ifdef CONFIG_SPE 112#ifdef CONFIG_SPE
112EXPORT_SYMBOL(giveup_spe); 113EXPORT_SYMBOL(giveup_spe);
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 61af58fcece..65ea083a5b2 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -65,13 +65,14 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
65 */ 65 */
66 switch (dcrn) { 66 switch (dcrn) {
67 case DCRN_CPR0_CONFIG_ADDR: 67 case DCRN_CPR0_CONFIG_ADDR:
68 vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr; 68 kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
69 break; 69 break;
70 case DCRN_CPR0_CONFIG_DATA: 70 case DCRN_CPR0_CONFIG_DATA:
71 local_irq_disable(); 71 local_irq_disable();
72 mtdcr(DCRN_CPR0_CONFIG_ADDR, 72 mtdcr(DCRN_CPR0_CONFIG_ADDR,
73 vcpu->arch.cpr0_cfgaddr); 73 vcpu->arch.cpr0_cfgaddr);
74 vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA); 74 kvmppc_set_gpr(vcpu, rt,
75 mfdcr(DCRN_CPR0_CONFIG_DATA));
75 local_irq_enable(); 76 local_irq_enable();
76 break; 77 break;
77 default: 78 default:
@@ -93,11 +94,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
93 /* emulate some access in kernel */ 94 /* emulate some access in kernel */
94 switch (dcrn) { 95 switch (dcrn) {
95 case DCRN_CPR0_CONFIG_ADDR: 96 case DCRN_CPR0_CONFIG_ADDR:
96 vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs]; 97 vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
97 break; 98 break;
98 default: 99 default:
99 run->dcr.dcrn = dcrn; 100 run->dcr.dcrn = dcrn;
100 run->dcr.data = vcpu->arch.gpr[rs]; 101 run->dcr.data = kvmppc_get_gpr(vcpu, rs);
101 run->dcr.is_write = 1; 102 run->dcr.is_write = 1;
102 vcpu->arch.dcr_needed = 1; 103 vcpu->arch.dcr_needed = 1;
103 kvmppc_account_exit(vcpu, DCR_EXITS); 104 kvmppc_account_exit(vcpu, DCR_EXITS);
@@ -146,13 +147,13 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
146 147
147 switch (sprn) { 148 switch (sprn) {
148 case SPRN_PID: 149 case SPRN_PID:
149 kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break; 150 kvmppc_set_pid(vcpu, kvmppc_get_gpr(vcpu, rs)); break;
150 case SPRN_MMUCR: 151 case SPRN_MMUCR:
151 vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; 152 vcpu->arch.mmucr = kvmppc_get_gpr(vcpu, rs); break;
152 case SPRN_CCR0: 153 case SPRN_CCR0:
153 vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; 154 vcpu->arch.ccr0 = kvmppc_get_gpr(vcpu, rs); break;
154 case SPRN_CCR1: 155 case SPRN_CCR1:
155 vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break; 156 vcpu->arch.ccr1 = kvmppc_get_gpr(vcpu, rs); break;
156 default: 157 default:
157 emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); 158 emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
158 } 159 }
@@ -167,13 +168,13 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
167 168
168 switch (sprn) { 169 switch (sprn) {
169 case SPRN_PID: 170 case SPRN_PID:
170 vcpu->arch.gpr[rt] = vcpu->arch.pid; break; 171 kvmppc_set_gpr(vcpu, rt, vcpu->arch.pid); break;
171 case SPRN_MMUCR: 172 case SPRN_MMUCR:
172 vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break; 173 kvmppc_set_gpr(vcpu, rt, vcpu->arch.mmucr); break;
173 case SPRN_CCR0: 174 case SPRN_CCR0:
174 vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break; 175 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr0); break;
175 case SPRN_CCR1: 176 case SPRN_CCR1:
176 vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break; 177 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr1); break;
177 default: 178 default:
178 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); 179 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
179 } 180 }
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index ff3cb63b811..2570fcc7665 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -439,7 +439,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
439 struct kvmppc_44x_tlbe *tlbe; 439 struct kvmppc_44x_tlbe *tlbe;
440 unsigned int gtlb_index; 440 unsigned int gtlb_index;
441 441
442 gtlb_index = vcpu->arch.gpr[ra]; 442 gtlb_index = kvmppc_get_gpr(vcpu, ra);
443 if (gtlb_index > KVM44x_GUEST_TLB_SIZE) { 443 if (gtlb_index > KVM44x_GUEST_TLB_SIZE) {
444 printk("%s: index %d\n", __func__, gtlb_index); 444 printk("%s: index %d\n", __func__, gtlb_index);
445 kvmppc_dump_vcpu(vcpu); 445 kvmppc_dump_vcpu(vcpu);
@@ -455,15 +455,15 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
455 switch (ws) { 455 switch (ws) {
456 case PPC44x_TLB_PAGEID: 456 case PPC44x_TLB_PAGEID:
457 tlbe->tid = get_mmucr_stid(vcpu); 457 tlbe->tid = get_mmucr_stid(vcpu);
458 tlbe->word0 = vcpu->arch.gpr[rs]; 458 tlbe->word0 = kvmppc_get_gpr(vcpu, rs);
459 break; 459 break;
460 460
461 case PPC44x_TLB_XLAT: 461 case PPC44x_TLB_XLAT:
462 tlbe->word1 = vcpu->arch.gpr[rs]; 462 tlbe->word1 = kvmppc_get_gpr(vcpu, rs);
463 break; 463 break;
464 464
465 case PPC44x_TLB_ATTRIB: 465 case PPC44x_TLB_ATTRIB:
466 tlbe->word2 = vcpu->arch.gpr[rs]; 466 tlbe->word2 = kvmppc_get_gpr(vcpu, rs);
467 break; 467 break;
468 468
469 default: 469 default:
@@ -500,18 +500,20 @@ int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
500 unsigned int as = get_mmucr_sts(vcpu); 500 unsigned int as = get_mmucr_sts(vcpu);
501 unsigned int pid = get_mmucr_stid(vcpu); 501 unsigned int pid = get_mmucr_stid(vcpu);
502 502
503 ea = vcpu->arch.gpr[rb]; 503 ea = kvmppc_get_gpr(vcpu, rb);
504 if (ra) 504 if (ra)
505 ea += vcpu->arch.gpr[ra]; 505 ea += kvmppc_get_gpr(vcpu, ra);
506 506
507 gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); 507 gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
508 if (rc) { 508 if (rc) {
509 u32 cr = kvmppc_get_cr(vcpu);
510
509 if (gtlb_index < 0) 511 if (gtlb_index < 0)
510 vcpu->arch.cr &= ~0x20000000; 512 kvmppc_set_cr(vcpu, cr & ~0x20000000);
511 else 513 else
512 vcpu->arch.cr |= 0x20000000; 514 kvmppc_set_cr(vcpu, cr | 0x20000000);
513 } 515 }
514 vcpu->arch.gpr[rt] = gtlb_index; 516 kvmppc_set_gpr(vcpu, rt, gtlb_index);
515 517
516 kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); 518 kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
517 return EMULATE_DONE; 519 return EMULATE_DONE;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index fe037fdaf1b..60624cc9f4d 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -20,6 +20,7 @@ config KVM
20 bool 20 bool
21 select PREEMPT_NOTIFIERS 21 select PREEMPT_NOTIFIERS
22 select ANON_INODES 22 select ANON_INODES
23 select KVM_MMIO
23 24
24config KVM_BOOK3S_64_HANDLER 25config KVM_BOOK3S_64_HANDLER
25 bool 26 bool
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 3e294bd9b8c..9a271f0929c 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -33,12 +33,9 @@
33 33
34/* #define EXIT_DEBUG */ 34/* #define EXIT_DEBUG */
35/* #define EXIT_DEBUG_SIMPLE */ 35/* #define EXIT_DEBUG_SIMPLE */
36/* #define DEBUG_EXT */
36 37
37/* Without AGGRESSIVE_DEC we only fire off a DEC interrupt when DEC turns 0. 38static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
38 * When set, we retrigger a DEC interrupt after that if DEC <= 0.
39 * PPC32 Linux runs faster without AGGRESSIVE_DEC, PPC64 Linux requires it. */
40
41/* #define AGGRESSIVE_DEC */
42 39
43struct kvm_stats_debugfs_item debugfs_entries[] = { 40struct kvm_stats_debugfs_item debugfs_entries[] = {
44 { "exits", VCPU_STAT(sum_exits) }, 41 { "exits", VCPU_STAT(sum_exits) },
@@ -72,16 +69,24 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
72void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 69void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
73{ 70{
74 memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb)); 71 memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb));
72 memcpy(&get_paca()->shadow_vcpu, &to_book3s(vcpu)->shadow_vcpu,
73 sizeof(get_paca()->shadow_vcpu));
75 get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max; 74 get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max;
76} 75}
77 76
78void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 77void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
79{ 78{
80 memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb)); 79 memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb));
80 memcpy(&to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
81 sizeof(get_paca()->shadow_vcpu));
81 to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max; 82 to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max;
83
84 kvmppc_giveup_ext(vcpu, MSR_FP);
85 kvmppc_giveup_ext(vcpu, MSR_VEC);
86 kvmppc_giveup_ext(vcpu, MSR_VSX);
82} 87}
83 88
84#if defined(AGGRESSIVE_DEC) || defined(EXIT_DEBUG) 89#if defined(EXIT_DEBUG)
85static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu) 90static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu)
86{ 91{
87 u64 jd = mftb() - vcpu->arch.dec_jiffies; 92 u64 jd = mftb() - vcpu->arch.dec_jiffies;
@@ -89,6 +94,23 @@ static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu)
89} 94}
90#endif 95#endif
91 96
97static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
98{
99 vcpu->arch.shadow_msr = vcpu->arch.msr;
100 /* Guest MSR values */
101 vcpu->arch.shadow_msr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE |
102 MSR_BE | MSR_DE;
103 /* Process MSR values */
104 vcpu->arch.shadow_msr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR |
105 MSR_EE;
106 /* External providers the guest reserved */
107 vcpu->arch.shadow_msr |= (vcpu->arch.msr & vcpu->arch.guest_owned_ext);
108 /* 64-bit Process MSR values */
109#ifdef CONFIG_PPC_BOOK3S_64
110 vcpu->arch.shadow_msr |= MSR_ISF | MSR_HV;
111#endif
112}
113
92void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 114void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
93{ 115{
94 ulong old_msr = vcpu->arch.msr; 116 ulong old_msr = vcpu->arch.msr;
@@ -96,12 +118,10 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
96#ifdef EXIT_DEBUG 118#ifdef EXIT_DEBUG
97 printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr); 119 printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
98#endif 120#endif
121
99 msr &= to_book3s(vcpu)->msr_mask; 122 msr &= to_book3s(vcpu)->msr_mask;
100 vcpu->arch.msr = msr; 123 vcpu->arch.msr = msr;
101 vcpu->arch.shadow_msr = msr | MSR_USER32; 124 kvmppc_recalc_shadow_msr(vcpu);
102 vcpu->arch.shadow_msr &= ( MSR_VEC | MSR_VSX | MSR_FP | MSR_FE0 |
103 MSR_USER64 | MSR_SE | MSR_BE | MSR_DE |
104 MSR_FE1);
105 125
106 if (msr & (MSR_WE|MSR_POW)) { 126 if (msr & (MSR_WE|MSR_POW)) {
107 if (!vcpu->arch.pending_exceptions) { 127 if (!vcpu->arch.pending_exceptions) {
@@ -125,11 +145,10 @@ void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
125 vcpu->arch.mmu.reset_msr(vcpu); 145 vcpu->arch.mmu.reset_msr(vcpu);
126} 146}
127 147
128void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) 148static int kvmppc_book3s_vec2irqprio(unsigned int vec)
129{ 149{
130 unsigned int prio; 150 unsigned int prio;
131 151
132 vcpu->stat.queue_intr++;
133 switch (vec) { 152 switch (vec) {
134 case 0x100: prio = BOOK3S_IRQPRIO_SYSTEM_RESET; break; 153 case 0x100: prio = BOOK3S_IRQPRIO_SYSTEM_RESET; break;
135 case 0x200: prio = BOOK3S_IRQPRIO_MACHINE_CHECK; break; 154 case 0x200: prio = BOOK3S_IRQPRIO_MACHINE_CHECK; break;
@@ -149,15 +168,31 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
149 default: prio = BOOK3S_IRQPRIO_MAX; break; 168 default: prio = BOOK3S_IRQPRIO_MAX; break;
150 } 169 }
151 170
152 set_bit(prio, &vcpu->arch.pending_exceptions); 171 return prio;
172}
173
174static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
175 unsigned int vec)
176{
177 clear_bit(kvmppc_book3s_vec2irqprio(vec),
178 &vcpu->arch.pending_exceptions);
179}
180
181void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
182{
183 vcpu->stat.queue_intr++;
184
185 set_bit(kvmppc_book3s_vec2irqprio(vec),
186 &vcpu->arch.pending_exceptions);
153#ifdef EXIT_DEBUG 187#ifdef EXIT_DEBUG
154 printk(KERN_INFO "Queueing interrupt %x\n", vec); 188 printk(KERN_INFO "Queueing interrupt %x\n", vec);
155#endif 189#endif
156} 190}
157 191
158 192
159void kvmppc_core_queue_program(struct kvm_vcpu *vcpu) 193void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
160{ 194{
195 to_book3s(vcpu)->prog_flags = flags;
161 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM); 196 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM);
162} 197}
163 198
@@ -171,6 +206,11 @@ int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
171 return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions); 206 return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions);
172} 207}
173 208
209void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
210{
211 kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
212}
213
174void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, 214void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
175 struct kvm_interrupt *irq) 215 struct kvm_interrupt *irq)
176{ 216{
@@ -181,6 +221,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
181{ 221{
182 int deliver = 1; 222 int deliver = 1;
183 int vec = 0; 223 int vec = 0;
224 ulong flags = 0ULL;
184 225
185 switch (priority) { 226 switch (priority) {
186 case BOOK3S_IRQPRIO_DECREMENTER: 227 case BOOK3S_IRQPRIO_DECREMENTER:
@@ -214,6 +255,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
214 break; 255 break;
215 case BOOK3S_IRQPRIO_PROGRAM: 256 case BOOK3S_IRQPRIO_PROGRAM:
216 vec = BOOK3S_INTERRUPT_PROGRAM; 257 vec = BOOK3S_INTERRUPT_PROGRAM;
258 flags = to_book3s(vcpu)->prog_flags;
217 break; 259 break;
218 case BOOK3S_IRQPRIO_VSX: 260 case BOOK3S_IRQPRIO_VSX:
219 vec = BOOK3S_INTERRUPT_VSX; 261 vec = BOOK3S_INTERRUPT_VSX;
@@ -244,7 +286,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
244#endif 286#endif
245 287
246 if (deliver) 288 if (deliver)
247 kvmppc_inject_interrupt(vcpu, vec, 0ULL); 289 kvmppc_inject_interrupt(vcpu, vec, flags);
248 290
249 return deliver; 291 return deliver;
250} 292}
@@ -254,21 +296,15 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
254 unsigned long *pending = &vcpu->arch.pending_exceptions; 296 unsigned long *pending = &vcpu->arch.pending_exceptions;
255 unsigned int priority; 297 unsigned int priority;
256 298
257 /* XXX be more clever here - no need to mftb() on every entry */
258 /* Issue DEC again if it's still active */
259#ifdef AGGRESSIVE_DEC
260 if (vcpu->arch.msr & MSR_EE)
261 if (kvmppc_get_dec(vcpu) & 0x80000000)
262 kvmppc_core_queue_dec(vcpu);
263#endif
264
265#ifdef EXIT_DEBUG 299#ifdef EXIT_DEBUG
266 if (vcpu->arch.pending_exceptions) 300 if (vcpu->arch.pending_exceptions)
267 printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions); 301 printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions);
268#endif 302#endif
269 priority = __ffs(*pending); 303 priority = __ffs(*pending);
270 while (priority <= (sizeof(unsigned int) * 8)) { 304 while (priority <= (sizeof(unsigned int) * 8)) {
271 if (kvmppc_book3s_irqprio_deliver(vcpu, priority)) { 305 if (kvmppc_book3s_irqprio_deliver(vcpu, priority) &&
306 (priority != BOOK3S_IRQPRIO_DECREMENTER)) {
307 /* DEC interrupts get cleared by mtdec */
272 clear_bit(priority, &vcpu->arch.pending_exceptions); 308 clear_bit(priority, &vcpu->arch.pending_exceptions);
273 break; 309 break;
274 } 310 }
@@ -503,14 +539,14 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
503 /* Page not found in guest PTE entries */ 539 /* Page not found in guest PTE entries */
504 vcpu->arch.dear = vcpu->arch.fault_dear; 540 vcpu->arch.dear = vcpu->arch.fault_dear;
505 to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr; 541 to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr;
506 vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL); 542 vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL);
507 kvmppc_book3s_queue_irqprio(vcpu, vec); 543 kvmppc_book3s_queue_irqprio(vcpu, vec);
508 } else if (page_found == -EPERM) { 544 } else if (page_found == -EPERM) {
509 /* Storage protection */ 545 /* Storage protection */
510 vcpu->arch.dear = vcpu->arch.fault_dear; 546 vcpu->arch.dear = vcpu->arch.fault_dear;
511 to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE; 547 to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE;
512 to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT; 548 to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT;
513 vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL); 549 vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL);
514 kvmppc_book3s_queue_irqprio(vcpu, vec); 550 kvmppc_book3s_queue_irqprio(vcpu, vec);
515 } else if (page_found == -EINVAL) { 551 } else if (page_found == -EINVAL) {
516 /* Page not found in guest SLB */ 552 /* Page not found in guest SLB */
@@ -532,13 +568,122 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
532 r = kvmppc_emulate_mmio(run, vcpu); 568 r = kvmppc_emulate_mmio(run, vcpu);
533 if ( r == RESUME_HOST_NV ) 569 if ( r == RESUME_HOST_NV )
534 r = RESUME_HOST; 570 r = RESUME_HOST;
535 if ( r == RESUME_GUEST_NV )
536 r = RESUME_GUEST;
537 } 571 }
538 572
539 return r; 573 return r;
540} 574}
541 575
576static inline int get_fpr_index(int i)
577{
578#ifdef CONFIG_VSX
579 i *= 2;
580#endif
581 return i;
582}
583
584/* Give up external provider (FPU, Altivec, VSX) */
585static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
586{
587 struct thread_struct *t = &current->thread;
588 u64 *vcpu_fpr = vcpu->arch.fpr;
589 u64 *vcpu_vsx = vcpu->arch.vsr;
590 u64 *thread_fpr = (u64*)t->fpr;
591 int i;
592
593 if (!(vcpu->arch.guest_owned_ext & msr))
594 return;
595
596#ifdef DEBUG_EXT
597 printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
598#endif
599
600 switch (msr) {
601 case MSR_FP:
602 giveup_fpu(current);
603 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
604 vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
605
606 vcpu->arch.fpscr = t->fpscr.val;
607 break;
608 case MSR_VEC:
609#ifdef CONFIG_ALTIVEC
610 giveup_altivec(current);
611 memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
612 vcpu->arch.vscr = t->vscr;
613#endif
614 break;
615 case MSR_VSX:
616#ifdef CONFIG_VSX
617 __giveup_vsx(current);
618 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
619 vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
620#endif
621 break;
622 default:
623 BUG();
624 }
625
626 vcpu->arch.guest_owned_ext &= ~msr;
627 current->thread.regs->msr &= ~msr;
628 kvmppc_recalc_shadow_msr(vcpu);
629}
630
631/* Handle external providers (FPU, Altivec, VSX) */
632static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
633 ulong msr)
634{
635 struct thread_struct *t = &current->thread;
636 u64 *vcpu_fpr = vcpu->arch.fpr;
637 u64 *vcpu_vsx = vcpu->arch.vsr;
638 u64 *thread_fpr = (u64*)t->fpr;
639 int i;
640
641 if (!(vcpu->arch.msr & msr)) {
642 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
643 return RESUME_GUEST;
644 }
645
646#ifdef DEBUG_EXT
647 printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
648#endif
649
650 current->thread.regs->msr |= msr;
651
652 switch (msr) {
653 case MSR_FP:
654 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
655 thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
656
657 t->fpscr.val = vcpu->arch.fpscr;
658 t->fpexc_mode = 0;
659 kvmppc_load_up_fpu();
660 break;
661 case MSR_VEC:
662#ifdef CONFIG_ALTIVEC
663 memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
664 t->vscr = vcpu->arch.vscr;
665 t->vrsave = -1;
666 kvmppc_load_up_altivec();
667#endif
668 break;
669 case MSR_VSX:
670#ifdef CONFIG_VSX
671 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
672 thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
673 kvmppc_load_up_vsx();
674#endif
675 break;
676 default:
677 BUG();
678 }
679
680 vcpu->arch.guest_owned_ext |= msr;
681
682 kvmppc_recalc_shadow_msr(vcpu);
683
684 return RESUME_GUEST;
685}
686
542int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, 687int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
543 unsigned int exit_nr) 688 unsigned int exit_nr)
544{ 689{
@@ -563,7 +708,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
563 case BOOK3S_INTERRUPT_INST_STORAGE: 708 case BOOK3S_INTERRUPT_INST_STORAGE:
564 vcpu->stat.pf_instruc++; 709 vcpu->stat.pf_instruc++;
565 /* only care about PTEG not found errors, but leave NX alone */ 710 /* only care about PTEG not found errors, but leave NX alone */
566 if (vcpu->arch.shadow_msr & 0x40000000) { 711 if (vcpu->arch.shadow_srr1 & 0x40000000) {
567 r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr); 712 r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr);
568 vcpu->stat.sp_instruc++; 713 vcpu->stat.sp_instruc++;
569 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && 714 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
@@ -575,7 +720,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
575 */ 720 */
576 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); 721 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
577 } else { 722 } else {
578 vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x58000000); 723 vcpu->arch.msr |= vcpu->arch.shadow_srr1 & 0x58000000;
579 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 724 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
580 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); 725 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
581 r = RESUME_GUEST; 726 r = RESUME_GUEST;
@@ -621,6 +766,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
621 case BOOK3S_INTERRUPT_PROGRAM: 766 case BOOK3S_INTERRUPT_PROGRAM:
622 { 767 {
623 enum emulation_result er; 768 enum emulation_result er;
769 ulong flags;
770
771 flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
624 772
625 if (vcpu->arch.msr & MSR_PR) { 773 if (vcpu->arch.msr & MSR_PR) {
626#ifdef EXIT_DEBUG 774#ifdef EXIT_DEBUG
@@ -628,7 +776,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
628#endif 776#endif
629 if ((vcpu->arch.last_inst & 0xff0007ff) != 777 if ((vcpu->arch.last_inst & 0xff0007ff) !=
630 (INS_DCBZ & 0xfffffff7)) { 778 (INS_DCBZ & 0xfffffff7)) {
631 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 779 kvmppc_core_queue_program(vcpu, flags);
632 r = RESUME_GUEST; 780 r = RESUME_GUEST;
633 break; 781 break;
634 } 782 }
@@ -638,12 +786,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
638 er = kvmppc_emulate_instruction(run, vcpu); 786 er = kvmppc_emulate_instruction(run, vcpu);
639 switch (er) { 787 switch (er) {
640 case EMULATE_DONE: 788 case EMULATE_DONE:
641 r = RESUME_GUEST; 789 r = RESUME_GUEST_NV;
642 break; 790 break;
643 case EMULATE_FAIL: 791 case EMULATE_FAIL:
644 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", 792 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
645 __func__, vcpu->arch.pc, vcpu->arch.last_inst); 793 __func__, vcpu->arch.pc, vcpu->arch.last_inst);
646 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 794 kvmppc_core_queue_program(vcpu, flags);
647 r = RESUME_GUEST; 795 r = RESUME_GUEST;
648 break; 796 break;
649 default: 797 default:
@@ -653,23 +801,30 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
653 } 801 }
654 case BOOK3S_INTERRUPT_SYSCALL: 802 case BOOK3S_INTERRUPT_SYSCALL:
655#ifdef EXIT_DEBUG 803#ifdef EXIT_DEBUG
656 printk(KERN_INFO "Syscall Nr %d\n", (int)vcpu->arch.gpr[0]); 804 printk(KERN_INFO "Syscall Nr %d\n", (int)kvmppc_get_gpr(vcpu, 0));
657#endif 805#endif
658 vcpu->stat.syscall_exits++; 806 vcpu->stat.syscall_exits++;
659 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 807 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
660 r = RESUME_GUEST; 808 r = RESUME_GUEST;
661 break; 809 break;
662 case BOOK3S_INTERRUPT_MACHINE_CHECK:
663 case BOOK3S_INTERRUPT_FP_UNAVAIL: 810 case BOOK3S_INTERRUPT_FP_UNAVAIL:
664 case BOOK3S_INTERRUPT_TRACE: 811 r = kvmppc_handle_ext(vcpu, exit_nr, MSR_FP);
812 break;
665 case BOOK3S_INTERRUPT_ALTIVEC: 813 case BOOK3S_INTERRUPT_ALTIVEC:
814 r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VEC);
815 break;
666 case BOOK3S_INTERRUPT_VSX: 816 case BOOK3S_INTERRUPT_VSX:
817 r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VSX);
818 break;
819 case BOOK3S_INTERRUPT_MACHINE_CHECK:
820 case BOOK3S_INTERRUPT_TRACE:
667 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 821 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
668 r = RESUME_GUEST; 822 r = RESUME_GUEST;
669 break; 823 break;
670 default: 824 default:
671 /* Ugh - bork here! What did we get? */ 825 /* Ugh - bork here! What did we get? */
672 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", exit_nr, vcpu->arch.pc, vcpu->arch.shadow_msr); 826 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
827 exit_nr, vcpu->arch.pc, vcpu->arch.shadow_srr1);
673 r = RESUME_HOST; 828 r = RESUME_HOST;
674 BUG(); 829 BUG();
675 break; 830 break;
@@ -712,10 +867,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
712 int i; 867 int i;
713 868
714 regs->pc = vcpu->arch.pc; 869 regs->pc = vcpu->arch.pc;
715 regs->cr = vcpu->arch.cr; 870 regs->cr = kvmppc_get_cr(vcpu);
716 regs->ctr = vcpu->arch.ctr; 871 regs->ctr = vcpu->arch.ctr;
717 regs->lr = vcpu->arch.lr; 872 regs->lr = vcpu->arch.lr;
718 regs->xer = vcpu->arch.xer; 873 regs->xer = kvmppc_get_xer(vcpu);
719 regs->msr = vcpu->arch.msr; 874 regs->msr = vcpu->arch.msr;
720 regs->srr0 = vcpu->arch.srr0; 875 regs->srr0 = vcpu->arch.srr0;
721 regs->srr1 = vcpu->arch.srr1; 876 regs->srr1 = vcpu->arch.srr1;
@@ -729,7 +884,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
729 regs->sprg7 = vcpu->arch.sprg6; 884 regs->sprg7 = vcpu->arch.sprg6;
730 885
731 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 886 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
732 regs->gpr[i] = vcpu->arch.gpr[i]; 887 regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
733 888
734 return 0; 889 return 0;
735} 890}
@@ -739,10 +894,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
739 int i; 894 int i;
740 895
741 vcpu->arch.pc = regs->pc; 896 vcpu->arch.pc = regs->pc;
742 vcpu->arch.cr = regs->cr; 897 kvmppc_set_cr(vcpu, regs->cr);
743 vcpu->arch.ctr = regs->ctr; 898 vcpu->arch.ctr = regs->ctr;
744 vcpu->arch.lr = regs->lr; 899 vcpu->arch.lr = regs->lr;
745 vcpu->arch.xer = regs->xer; 900 kvmppc_set_xer(vcpu, regs->xer);
746 kvmppc_set_msr(vcpu, regs->msr); 901 kvmppc_set_msr(vcpu, regs->msr);
747 vcpu->arch.srr0 = regs->srr0; 902 vcpu->arch.srr0 = regs->srr0;
748 vcpu->arch.srr1 = regs->srr1; 903 vcpu->arch.srr1 = regs->srr1;
@@ -754,8 +909,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
754 vcpu->arch.sprg6 = regs->sprg5; 909 vcpu->arch.sprg6 = regs->sprg5;
755 vcpu->arch.sprg7 = regs->sprg6; 910 vcpu->arch.sprg7 = regs->sprg6;
756 911
757 for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++) 912 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
758 vcpu->arch.gpr[i] = regs->gpr[i]; 913 kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
759 914
760 return 0; 915 return 0;
761} 916}
@@ -850,7 +1005,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
850 int is_dirty = 0; 1005 int is_dirty = 0;
851 int r, n; 1006 int r, n;
852 1007
853 down_write(&kvm->slots_lock); 1008 mutex_lock(&kvm->slots_lock);
854 1009
855 r = kvm_get_dirty_log(kvm, log, &is_dirty); 1010 r = kvm_get_dirty_log(kvm, log, &is_dirty);
856 if (r) 1011 if (r)
@@ -858,7 +1013,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
858 1013
859 /* If nothing is dirty, don't bother messing with page tables. */ 1014 /* If nothing is dirty, don't bother messing with page tables. */
860 if (is_dirty) { 1015 if (is_dirty) {
861 memslot = &kvm->memslots[log->slot]; 1016 memslot = &kvm->memslots->memslots[log->slot];
862 1017
863 ga = memslot->base_gfn << PAGE_SHIFT; 1018 ga = memslot->base_gfn << PAGE_SHIFT;
864 ga_end = ga + (memslot->npages << PAGE_SHIFT); 1019 ga_end = ga + (memslot->npages << PAGE_SHIFT);
@@ -872,7 +1027,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
872 1027
873 r = 0; 1028 r = 0;
874out: 1029out:
875 up_write(&kvm->slots_lock); 1030 mutex_unlock(&kvm->slots_lock);
876 return r; 1031 return r;
877} 1032}
878 1033
@@ -910,6 +1065,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
910 vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem; 1065 vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem;
911 vcpu->arch.trampoline_enter = kvmppc_trampoline_enter; 1066 vcpu->arch.trampoline_enter = kvmppc_trampoline_enter;
912 vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem; 1067 vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
1068 vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
913 1069
914 vcpu->arch.shadow_msr = MSR_USER64; 1070 vcpu->arch.shadow_msr = MSR_USER64;
915 1071
@@ -943,6 +1099,10 @@ extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
943int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1099int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
944{ 1100{
945 int ret; 1101 int ret;
1102 struct thread_struct ext_bkp;
1103 bool save_vec = current->thread.used_vr;
1104 bool save_vsx = current->thread.used_vsr;
1105 ulong ext_msr;
946 1106
947 /* No need to go into the guest when all we do is going out */ 1107 /* No need to go into the guest when all we do is going out */
948 if (signal_pending(current)) { 1108 if (signal_pending(current)) {
@@ -950,6 +1110,35 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
950 return -EINTR; 1110 return -EINTR;
951 } 1111 }
952 1112
1113 /* Save FPU state in stack */
1114 if (current->thread.regs->msr & MSR_FP)
1115 giveup_fpu(current);
1116 memcpy(ext_bkp.fpr, current->thread.fpr, sizeof(current->thread.fpr));
1117 ext_bkp.fpscr = current->thread.fpscr;
1118 ext_bkp.fpexc_mode = current->thread.fpexc_mode;
1119
1120#ifdef CONFIG_ALTIVEC
1121 /* Save Altivec state in stack */
1122 if (save_vec) {
1123 if (current->thread.regs->msr & MSR_VEC)
1124 giveup_altivec(current);
1125 memcpy(ext_bkp.vr, current->thread.vr, sizeof(ext_bkp.vr));
1126 ext_bkp.vscr = current->thread.vscr;
1127 ext_bkp.vrsave = current->thread.vrsave;
1128 }
1129 ext_bkp.used_vr = current->thread.used_vr;
1130#endif
1131
1132#ifdef CONFIG_VSX
1133 /* Save VSX state in stack */
1134 if (save_vsx && (current->thread.regs->msr & MSR_VSX))
1135 __giveup_vsx(current);
1136 ext_bkp.used_vsr = current->thread.used_vsr;
1137#endif
1138
1139 /* Remember the MSR with disabled extensions */
1140 ext_msr = current->thread.regs->msr;
1141
953 /* XXX we get called with irq disabled - change that! */ 1142 /* XXX we get called with irq disabled - change that! */
954 local_irq_enable(); 1143 local_irq_enable();
955 1144
@@ -957,6 +1146,32 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
957 1146
958 local_irq_disable(); 1147 local_irq_disable();
959 1148
1149 current->thread.regs->msr = ext_msr;
1150
1151 /* Make sure we save the guest FPU/Altivec/VSX state */
1152 kvmppc_giveup_ext(vcpu, MSR_FP);
1153 kvmppc_giveup_ext(vcpu, MSR_VEC);
1154 kvmppc_giveup_ext(vcpu, MSR_VSX);
1155
1156 /* Restore FPU state from stack */
1157 memcpy(current->thread.fpr, ext_bkp.fpr, sizeof(ext_bkp.fpr));
1158 current->thread.fpscr = ext_bkp.fpscr;
1159 current->thread.fpexc_mode = ext_bkp.fpexc_mode;
1160
1161#ifdef CONFIG_ALTIVEC
1162 /* Restore Altivec state from stack */
1163 if (save_vec && current->thread.used_vr) {
1164 memcpy(current->thread.vr, ext_bkp.vr, sizeof(ext_bkp.vr));
1165 current->thread.vscr = ext_bkp.vscr;
1166 current->thread.vrsave= ext_bkp.vrsave;
1167 }
1168 current->thread.used_vr = ext_bkp.used_vr;
1169#endif
1170
1171#ifdef CONFIG_VSX
1172 current->thread.used_vsr = ext_bkp.used_vsr;
1173#endif
1174
960 return ret; 1175 return ret;
961} 1176}
962 1177
diff --git a/arch/powerpc/kvm/book3s_64_emulate.c b/arch/powerpc/kvm/book3s_64_emulate.c
index 1027eac6d47..2b0ee7e040c 100644
--- a/arch/powerpc/kvm/book3s_64_emulate.c
+++ b/arch/powerpc/kvm/book3s_64_emulate.c
@@ -65,11 +65,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
65 case 31: 65 case 31:
66 switch (get_xop(inst)) { 66 switch (get_xop(inst)) {
67 case OP_31_XOP_MFMSR: 67 case OP_31_XOP_MFMSR:
68 vcpu->arch.gpr[get_rt(inst)] = vcpu->arch.msr; 68 kvmppc_set_gpr(vcpu, get_rt(inst), vcpu->arch.msr);
69 break; 69 break;
70 case OP_31_XOP_MTMSRD: 70 case OP_31_XOP_MTMSRD:
71 { 71 {
72 ulong rs = vcpu->arch.gpr[get_rs(inst)]; 72 ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst));
73 if (inst & 0x10000) { 73 if (inst & 0x10000) {
74 vcpu->arch.msr &= ~(MSR_RI | MSR_EE); 74 vcpu->arch.msr &= ~(MSR_RI | MSR_EE);
75 vcpu->arch.msr |= rs & (MSR_RI | MSR_EE); 75 vcpu->arch.msr |= rs & (MSR_RI | MSR_EE);
@@ -78,30 +78,30 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
78 break; 78 break;
79 } 79 }
80 case OP_31_XOP_MTMSR: 80 case OP_31_XOP_MTMSR:
81 kvmppc_set_msr(vcpu, vcpu->arch.gpr[get_rs(inst)]); 81 kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst)));
82 break; 82 break;
83 case OP_31_XOP_MFSRIN: 83 case OP_31_XOP_MFSRIN:
84 { 84 {
85 int srnum; 85 int srnum;
86 86
87 srnum = (vcpu->arch.gpr[get_rb(inst)] >> 28) & 0xf; 87 srnum = (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf;
88 if (vcpu->arch.mmu.mfsrin) { 88 if (vcpu->arch.mmu.mfsrin) {
89 u32 sr; 89 u32 sr;
90 sr = vcpu->arch.mmu.mfsrin(vcpu, srnum); 90 sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
91 vcpu->arch.gpr[get_rt(inst)] = sr; 91 kvmppc_set_gpr(vcpu, get_rt(inst), sr);
92 } 92 }
93 break; 93 break;
94 } 94 }
95 case OP_31_XOP_MTSRIN: 95 case OP_31_XOP_MTSRIN:
96 vcpu->arch.mmu.mtsrin(vcpu, 96 vcpu->arch.mmu.mtsrin(vcpu,
97 (vcpu->arch.gpr[get_rb(inst)] >> 28) & 0xf, 97 (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf,
98 vcpu->arch.gpr[get_rs(inst)]); 98 kvmppc_get_gpr(vcpu, get_rs(inst)));
99 break; 99 break;
100 case OP_31_XOP_TLBIE: 100 case OP_31_XOP_TLBIE:
101 case OP_31_XOP_TLBIEL: 101 case OP_31_XOP_TLBIEL:
102 { 102 {
103 bool large = (inst & 0x00200000) ? true : false; 103 bool large = (inst & 0x00200000) ? true : false;
104 ulong addr = vcpu->arch.gpr[get_rb(inst)]; 104 ulong addr = kvmppc_get_gpr(vcpu, get_rb(inst));
105 vcpu->arch.mmu.tlbie(vcpu, addr, large); 105 vcpu->arch.mmu.tlbie(vcpu, addr, large);
106 break; 106 break;
107 } 107 }
@@ -111,14 +111,16 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
111 if (!vcpu->arch.mmu.slbmte) 111 if (!vcpu->arch.mmu.slbmte)
112 return EMULATE_FAIL; 112 return EMULATE_FAIL;
113 113
114 vcpu->arch.mmu.slbmte(vcpu, vcpu->arch.gpr[get_rs(inst)], 114 vcpu->arch.mmu.slbmte(vcpu,
115 vcpu->arch.gpr[get_rb(inst)]); 115 kvmppc_get_gpr(vcpu, get_rs(inst)),
116 kvmppc_get_gpr(vcpu, get_rb(inst)));
116 break; 117 break;
117 case OP_31_XOP_SLBIE: 118 case OP_31_XOP_SLBIE:
118 if (!vcpu->arch.mmu.slbie) 119 if (!vcpu->arch.mmu.slbie)
119 return EMULATE_FAIL; 120 return EMULATE_FAIL;
120 121
121 vcpu->arch.mmu.slbie(vcpu, vcpu->arch.gpr[get_rb(inst)]); 122 vcpu->arch.mmu.slbie(vcpu,
123 kvmppc_get_gpr(vcpu, get_rb(inst)));
122 break; 124 break;
123 case OP_31_XOP_SLBIA: 125 case OP_31_XOP_SLBIA:
124 if (!vcpu->arch.mmu.slbia) 126 if (!vcpu->arch.mmu.slbia)
@@ -132,9 +134,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
132 } else { 134 } else {
133 ulong t, rb; 135 ulong t, rb;
134 136
135 rb = vcpu->arch.gpr[get_rb(inst)]; 137 rb = kvmppc_get_gpr(vcpu, get_rb(inst));
136 t = vcpu->arch.mmu.slbmfee(vcpu, rb); 138 t = vcpu->arch.mmu.slbmfee(vcpu, rb);
137 vcpu->arch.gpr[get_rt(inst)] = t; 139 kvmppc_set_gpr(vcpu, get_rt(inst), t);
138 } 140 }
139 break; 141 break;
140 case OP_31_XOP_SLBMFEV: 142 case OP_31_XOP_SLBMFEV:
@@ -143,20 +145,20 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
143 } else { 145 } else {
144 ulong t, rb; 146 ulong t, rb;
145 147
146 rb = vcpu->arch.gpr[get_rb(inst)]; 148 rb = kvmppc_get_gpr(vcpu, get_rb(inst));
147 t = vcpu->arch.mmu.slbmfev(vcpu, rb); 149 t = vcpu->arch.mmu.slbmfev(vcpu, rb);
148 vcpu->arch.gpr[get_rt(inst)] = t; 150 kvmppc_set_gpr(vcpu, get_rt(inst), t);
149 } 151 }
150 break; 152 break;
151 case OP_31_XOP_DCBZ: 153 case OP_31_XOP_DCBZ:
152 { 154 {
153 ulong rb = vcpu->arch.gpr[get_rb(inst)]; 155 ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst));
154 ulong ra = 0; 156 ulong ra = 0;
155 ulong addr; 157 ulong addr;
156 u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; 158 u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
157 159
158 if (get_ra(inst)) 160 if (get_ra(inst))
159 ra = vcpu->arch.gpr[get_ra(inst)]; 161 ra = kvmppc_get_gpr(vcpu, get_ra(inst));
160 162
161 addr = (ra + rb) & ~31ULL; 163 addr = (ra + rb) & ~31ULL;
162 if (!(vcpu->arch.msr & MSR_SF)) 164 if (!(vcpu->arch.msr & MSR_SF))
@@ -233,43 +235,44 @@ static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val)
233int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) 235int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
234{ 236{
235 int emulated = EMULATE_DONE; 237 int emulated = EMULATE_DONE;
238 ulong spr_val = kvmppc_get_gpr(vcpu, rs);
236 239
237 switch (sprn) { 240 switch (sprn) {
238 case SPRN_SDR1: 241 case SPRN_SDR1:
239 to_book3s(vcpu)->sdr1 = vcpu->arch.gpr[rs]; 242 to_book3s(vcpu)->sdr1 = spr_val;
240 break; 243 break;
241 case SPRN_DSISR: 244 case SPRN_DSISR:
242 to_book3s(vcpu)->dsisr = vcpu->arch.gpr[rs]; 245 to_book3s(vcpu)->dsisr = spr_val;
243 break; 246 break;
244 case SPRN_DAR: 247 case SPRN_DAR:
245 vcpu->arch.dear = vcpu->arch.gpr[rs]; 248 vcpu->arch.dear = spr_val;
246 break; 249 break;
247 case SPRN_HIOR: 250 case SPRN_HIOR:
248 to_book3s(vcpu)->hior = vcpu->arch.gpr[rs]; 251 to_book3s(vcpu)->hior = spr_val;
249 break; 252 break;
250 case SPRN_IBAT0U ... SPRN_IBAT3L: 253 case SPRN_IBAT0U ... SPRN_IBAT3L:
251 case SPRN_IBAT4U ... SPRN_IBAT7L: 254 case SPRN_IBAT4U ... SPRN_IBAT7L:
252 case SPRN_DBAT0U ... SPRN_DBAT3L: 255 case SPRN_DBAT0U ... SPRN_DBAT3L:
253 case SPRN_DBAT4U ... SPRN_DBAT7L: 256 case SPRN_DBAT4U ... SPRN_DBAT7L:
254 kvmppc_write_bat(vcpu, sprn, (u32)vcpu->arch.gpr[rs]); 257 kvmppc_write_bat(vcpu, sprn, (u32)spr_val);
255 /* BAT writes happen so rarely that we're ok to flush 258 /* BAT writes happen so rarely that we're ok to flush
256 * everything here */ 259 * everything here */
257 kvmppc_mmu_pte_flush(vcpu, 0, 0); 260 kvmppc_mmu_pte_flush(vcpu, 0, 0);
258 break; 261 break;
259 case SPRN_HID0: 262 case SPRN_HID0:
260 to_book3s(vcpu)->hid[0] = vcpu->arch.gpr[rs]; 263 to_book3s(vcpu)->hid[0] = spr_val;
261 break; 264 break;
262 case SPRN_HID1: 265 case SPRN_HID1:
263 to_book3s(vcpu)->hid[1] = vcpu->arch.gpr[rs]; 266 to_book3s(vcpu)->hid[1] = spr_val;
264 break; 267 break;
265 case SPRN_HID2: 268 case SPRN_HID2:
266 to_book3s(vcpu)->hid[2] = vcpu->arch.gpr[rs]; 269 to_book3s(vcpu)->hid[2] = spr_val;
267 break; 270 break;
268 case SPRN_HID4: 271 case SPRN_HID4:
269 to_book3s(vcpu)->hid[4] = vcpu->arch.gpr[rs]; 272 to_book3s(vcpu)->hid[4] = spr_val;
270 break; 273 break;
271 case SPRN_HID5: 274 case SPRN_HID5:
272 to_book3s(vcpu)->hid[5] = vcpu->arch.gpr[rs]; 275 to_book3s(vcpu)->hid[5] = spr_val;
273 /* guest HID5 set can change is_dcbz32 */ 276 /* guest HID5 set can change is_dcbz32 */
274 if (vcpu->arch.mmu.is_dcbz32(vcpu) && 277 if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
275 (mfmsr() & MSR_HV)) 278 (mfmsr() & MSR_HV))
@@ -299,38 +302,38 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
299 302
300 switch (sprn) { 303 switch (sprn) {
301 case SPRN_SDR1: 304 case SPRN_SDR1:
302 vcpu->arch.gpr[rt] = to_book3s(vcpu)->sdr1; 305 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1);
303 break; 306 break;
304 case SPRN_DSISR: 307 case SPRN_DSISR:
305 vcpu->arch.gpr[rt] = to_book3s(vcpu)->dsisr; 308 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->dsisr);
306 break; 309 break;
307 case SPRN_DAR: 310 case SPRN_DAR:
308 vcpu->arch.gpr[rt] = vcpu->arch.dear; 311 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear);
309 break; 312 break;
310 case SPRN_HIOR: 313 case SPRN_HIOR:
311 vcpu->arch.gpr[rt] = to_book3s(vcpu)->hior; 314 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior);
312 break; 315 break;
313 case SPRN_HID0: 316 case SPRN_HID0:
314 vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[0]; 317 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[0]);
315 break; 318 break;
316 case SPRN_HID1: 319 case SPRN_HID1:
317 vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[1]; 320 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]);
318 break; 321 break;
319 case SPRN_HID2: 322 case SPRN_HID2:
320 vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[2]; 323 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]);
321 break; 324 break;
322 case SPRN_HID4: 325 case SPRN_HID4:
323 vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[4]; 326 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]);
324 break; 327 break;
325 case SPRN_HID5: 328 case SPRN_HID5:
326 vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[5]; 329 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]);
327 break; 330 break;
328 case SPRN_THRM1: 331 case SPRN_THRM1:
329 case SPRN_THRM2: 332 case SPRN_THRM2:
330 case SPRN_THRM3: 333 case SPRN_THRM3:
331 case SPRN_CTRLF: 334 case SPRN_CTRLF:
332 case SPRN_CTRLT: 335 case SPRN_CTRLT:
333 vcpu->arch.gpr[rt] = 0; 336 kvmppc_set_gpr(vcpu, rt, 0);
334 break; 337 break;
335 default: 338 default:
336 printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn); 339 printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn);
diff --git a/arch/powerpc/kvm/book3s_64_exports.c b/arch/powerpc/kvm/book3s_64_exports.c
index 5b2db38ed86..1dd5a1ddfd0 100644
--- a/arch/powerpc/kvm/book3s_64_exports.c
+++ b/arch/powerpc/kvm/book3s_64_exports.c
@@ -22,3 +22,11 @@
22 22
23EXPORT_SYMBOL_GPL(kvmppc_trampoline_enter); 23EXPORT_SYMBOL_GPL(kvmppc_trampoline_enter);
24EXPORT_SYMBOL_GPL(kvmppc_trampoline_lowmem); 24EXPORT_SYMBOL_GPL(kvmppc_trampoline_lowmem);
25EXPORT_SYMBOL_GPL(kvmppc_rmcall);
26EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
27#ifdef CONFIG_ALTIVEC
28EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
29#endif
30#ifdef CONFIG_VSX
31EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx);
32#endif
diff --git a/arch/powerpc/kvm/book3s_64_interrupts.S b/arch/powerpc/kvm/book3s_64_interrupts.S
index 7b55d8094c8..c1584d0cbce 100644
--- a/arch/powerpc/kvm/book3s_64_interrupts.S
+++ b/arch/powerpc/kvm/book3s_64_interrupts.S
@@ -28,11 +28,6 @@
28#define ULONG_SIZE 8 28#define ULONG_SIZE 8
29#define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE)) 29#define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE))
30 30
31.macro mfpaca tmp_reg, src_reg, offset, vcpu_reg
32 ld \tmp_reg, (PACA_EXMC+\offset)(r13)
33 std \tmp_reg, VCPU_GPR(\src_reg)(\vcpu_reg)
34.endm
35
36.macro DISABLE_INTERRUPTS 31.macro DISABLE_INTERRUPTS
37 mfmsr r0 32 mfmsr r0
38 rldicl r0,r0,48,1 33 rldicl r0,r0,48,1
@@ -40,6 +35,26 @@
40 mtmsrd r0,1 35 mtmsrd r0,1
41.endm 36.endm
42 37
38#define VCPU_LOAD_NVGPRS(vcpu) \
39 ld r14, VCPU_GPR(r14)(vcpu); \
40 ld r15, VCPU_GPR(r15)(vcpu); \
41 ld r16, VCPU_GPR(r16)(vcpu); \
42 ld r17, VCPU_GPR(r17)(vcpu); \
43 ld r18, VCPU_GPR(r18)(vcpu); \
44 ld r19, VCPU_GPR(r19)(vcpu); \
45 ld r20, VCPU_GPR(r20)(vcpu); \
46 ld r21, VCPU_GPR(r21)(vcpu); \
47 ld r22, VCPU_GPR(r22)(vcpu); \
48 ld r23, VCPU_GPR(r23)(vcpu); \
49 ld r24, VCPU_GPR(r24)(vcpu); \
50 ld r25, VCPU_GPR(r25)(vcpu); \
51 ld r26, VCPU_GPR(r26)(vcpu); \
52 ld r27, VCPU_GPR(r27)(vcpu); \
53 ld r28, VCPU_GPR(r28)(vcpu); \
54 ld r29, VCPU_GPR(r29)(vcpu); \
55 ld r30, VCPU_GPR(r30)(vcpu); \
56 ld r31, VCPU_GPR(r31)(vcpu); \
57
43/***************************************************************************** 58/*****************************************************************************
44 * * 59 * *
45 * Guest entry / exit code that is in kernel module memory (highmem) * 60 * Guest entry / exit code that is in kernel module memory (highmem) *
@@ -67,61 +82,32 @@ kvm_start_entry:
67 SAVE_NVGPRS(r1) 82 SAVE_NVGPRS(r1)
68 83
69 /* Save LR */ 84 /* Save LR */
70 mflr r14 85 std r0, _LINK(r1)
71 std r14, _LINK(r1)
72
73/* XXX optimize non-volatile loading away */
74kvm_start_lightweight:
75 86
76 DISABLE_INTERRUPTS 87 /* Load non-volatile guest state from the vcpu */
88 VCPU_LOAD_NVGPRS(r4)
77 89
78 /* Save R1/R2 in the PACA */ 90 /* Save R1/R2 in the PACA */
79 std r1, PACAR1(r13) 91 std r1, PACA_KVM_HOST_R1(r13)
80 std r2, (PACA_EXMC+EX_SRR0)(r13) 92 std r2, PACA_KVM_HOST_R2(r13)
93
94 /* XXX swap in/out on load? */
81 ld r3, VCPU_HIGHMEM_HANDLER(r4) 95 ld r3, VCPU_HIGHMEM_HANDLER(r4)
82 std r3, PACASAVEDMSR(r13) 96 std r3, PACA_KVM_VMHANDLER(r13)
83 97
84 /* Load non-volatile guest state from the vcpu */ 98kvm_start_lightweight:
85 ld r14, VCPU_GPR(r14)(r4)
86 ld r15, VCPU_GPR(r15)(r4)
87 ld r16, VCPU_GPR(r16)(r4)
88 ld r17, VCPU_GPR(r17)(r4)
89 ld r18, VCPU_GPR(r18)(r4)
90 ld r19, VCPU_GPR(r19)(r4)
91 ld r20, VCPU_GPR(r20)(r4)
92 ld r21, VCPU_GPR(r21)(r4)
93 ld r22, VCPU_GPR(r22)(r4)
94 ld r23, VCPU_GPR(r23)(r4)
95 ld r24, VCPU_GPR(r24)(r4)
96 ld r25, VCPU_GPR(r25)(r4)
97 ld r26, VCPU_GPR(r26)(r4)
98 ld r27, VCPU_GPR(r27)(r4)
99 ld r28, VCPU_GPR(r28)(r4)
100 ld r29, VCPU_GPR(r29)(r4)
101 ld r30, VCPU_GPR(r30)(r4)
102 ld r31, VCPU_GPR(r31)(r4)
103 99
104 ld r9, VCPU_PC(r4) /* r9 = vcpu->arch.pc */ 100 ld r9, VCPU_PC(r4) /* r9 = vcpu->arch.pc */
105 ld r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */ 101 ld r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */
106 102
107 ld r3, VCPU_TRAMPOLINE_ENTER(r4) 103 /* Load some guest state in the respective registers */
108 mtsrr0 r3 104 ld r5, VCPU_CTR(r4) /* r5 = vcpu->arch.ctr */
109 105 /* will be swapped in by rmcall */
110 LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR))
111 mtsrr1 r3
112
113 /* Load guest state in the respective registers */
114 lwz r3, VCPU_CR(r4) /* r3 = vcpu->arch.cr */
115 stw r3, (PACA_EXMC + EX_CCR)(r13)
116
117 ld r3, VCPU_CTR(r4) /* r3 = vcpu->arch.ctr */
118 mtctr r3 /* CTR = r3 */
119 106
120 ld r3, VCPU_LR(r4) /* r3 = vcpu->arch.lr */ 107 ld r3, VCPU_LR(r4) /* r3 = vcpu->arch.lr */
121 mtlr r3 /* LR = r3 */ 108 mtlr r3 /* LR = r3 */
122 109
123 ld r3, VCPU_XER(r4) /* r3 = vcpu->arch.xer */ 110 DISABLE_INTERRUPTS
124 std r3, (PACA_EXMC + EX_R3)(r13)
125 111
126 /* Some guests may need to have dcbz set to 32 byte length. 112 /* Some guests may need to have dcbz set to 32 byte length.
127 * 113 *
@@ -141,36 +127,15 @@ kvm_start_lightweight:
141 mtspr SPRN_HID5,r3 127 mtspr SPRN_HID5,r3
142 128
143no_dcbz32_on: 129no_dcbz32_on:
144 /* Load guest GPRs */ 130
145 131 ld r6, VCPU_RMCALL(r4)
146 ld r3, VCPU_GPR(r9)(r4) 132 mtctr r6
147 std r3, (PACA_EXMC + EX_R9)(r13) 133
148 ld r3, VCPU_GPR(r10)(r4) 134 ld r3, VCPU_TRAMPOLINE_ENTER(r4)
149 std r3, (PACA_EXMC + EX_R10)(r13) 135 LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR))
150 ld r3, VCPU_GPR(r11)(r4)
151 std r3, (PACA_EXMC + EX_R11)(r13)
152 ld r3, VCPU_GPR(r12)(r4)
153 std r3, (PACA_EXMC + EX_R12)(r13)
154 ld r3, VCPU_GPR(r13)(r4)
155 std r3, (PACA_EXMC + EX_R13)(r13)
156
157 ld r0, VCPU_GPR(r0)(r4)
158 ld r1, VCPU_GPR(r1)(r4)
159 ld r2, VCPU_GPR(r2)(r4)
160 ld r3, VCPU_GPR(r3)(r4)
161 ld r5, VCPU_GPR(r5)(r4)
162 ld r6, VCPU_GPR(r6)(r4)
163 ld r7, VCPU_GPR(r7)(r4)
164 ld r8, VCPU_GPR(r8)(r4)
165 ld r4, VCPU_GPR(r4)(r4)
166
167 /* This sets the Magic value for the trampoline */
168
169 li r11, 1
170 stb r11, PACA_KVM_IN_GUEST(r13)
171 136
172 /* Jump to SLB patching handlder and into our guest */ 137 /* Jump to SLB patching handlder and into our guest */
173 RFI 138 bctr
174 139
175/* 140/*
176 * This is the handler in module memory. It gets jumped at from the 141 * This is the handler in module memory. It gets jumped at from the
@@ -184,125 +149,70 @@ kvmppc_handler_highmem:
184 /* 149 /*
185 * Register usage at this point: 150 * Register usage at this point:
186 * 151 *
187 * R00 = guest R13 152 * R0 = guest last inst
188 * R01 = host R1 153 * R1 = host R1
189 * R02 = host R2 154 * R2 = host R2
190 * R10 = guest PC 155 * R3 = guest PC
191 * R11 = guest MSR 156 * R4 = guest MSR
192 * R12 = exit handler id 157 * R5 = guest DAR
193 * R13 = PACA 158 * R6 = guest DSISR
194 * PACA.exmc.R9 = guest R1 159 * R13 = PACA
195 * PACA.exmc.R10 = guest R10 160 * PACA.KVM.* = guest *
196 * PACA.exmc.R11 = guest R11
197 * PACA.exmc.R12 = guest R12
198 * PACA.exmc.R13 = guest R2
199 * PACA.exmc.DAR = guest DAR
200 * PACA.exmc.DSISR = guest DSISR
201 * PACA.exmc.LR = guest instruction
202 * PACA.exmc.CCR = guest CR
203 * PACA.exmc.SRR0 = guest R0
204 * 161 *
205 */ 162 */
206 163
207 std r3, (PACA_EXMC+EX_R3)(r13) 164 /* R7 = vcpu */
165 ld r7, GPR4(r1)
208 166
209 /* save the exit id in R3 */ 167 /* Now save the guest state */
210 mr r3, r12
211 168
212 /* R12 = vcpu */ 169 stw r0, VCPU_LAST_INST(r7)
213 ld r12, GPR4(r1)
214 170
215 /* Now save the guest state */ 171 std r3, VCPU_PC(r7)
172 std r4, VCPU_SHADOW_SRR1(r7)
173 std r5, VCPU_FAULT_DEAR(r7)
174 std r6, VCPU_FAULT_DSISR(r7)
216 175
217 std r0, VCPU_GPR(r13)(r12) 176 ld r5, VCPU_HFLAGS(r7)
218 std r4, VCPU_GPR(r4)(r12)
219 std r5, VCPU_GPR(r5)(r12)
220 std r6, VCPU_GPR(r6)(r12)
221 std r7, VCPU_GPR(r7)(r12)
222 std r8, VCPU_GPR(r8)(r12)
223 std r9, VCPU_GPR(r9)(r12)
224
225 /* get registers from PACA */
226 mfpaca r5, r0, EX_SRR0, r12
227 mfpaca r5, r3, EX_R3, r12
228 mfpaca r5, r1, EX_R9, r12
229 mfpaca r5, r10, EX_R10, r12
230 mfpaca r5, r11, EX_R11, r12
231 mfpaca r5, r12, EX_R12, r12
232 mfpaca r5, r2, EX_R13, r12
233
234 lwz r5, (PACA_EXMC+EX_LR)(r13)
235 stw r5, VCPU_LAST_INST(r12)
236
237 lwz r5, (PACA_EXMC+EX_CCR)(r13)
238 stw r5, VCPU_CR(r12)
239
240 ld r5, VCPU_HFLAGS(r12)
241 rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */ 177 rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */
242 beq no_dcbz32_off 178 beq no_dcbz32_off
243 179
180 li r4, 0
244 mfspr r5,SPRN_HID5 181 mfspr r5,SPRN_HID5
245 rldimi r5,r5,6,56 182 rldimi r5,r4,6,56
246 mtspr SPRN_HID5,r5 183 mtspr SPRN_HID5,r5
247 184
248no_dcbz32_off: 185no_dcbz32_off:
249 186
250 /* XXX maybe skip on lightweight? */ 187 std r14, VCPU_GPR(r14)(r7)
251 std r14, VCPU_GPR(r14)(r12) 188 std r15, VCPU_GPR(r15)(r7)
252 std r15, VCPU_GPR(r15)(r12) 189 std r16, VCPU_GPR(r16)(r7)
253 std r16, VCPU_GPR(r16)(r12) 190 std r17, VCPU_GPR(r17)(r7)
254 std r17, VCPU_GPR(r17)(r12) 191 std r18, VCPU_GPR(r18)(r7)
255 std r18, VCPU_GPR(r18)(r12) 192 std r19, VCPU_GPR(r19)(r7)
256 std r19, VCPU_GPR(r19)(r12) 193 std r20, VCPU_GPR(r20)(r7)
257 std r20, VCPU_GPR(r20)(r12) 194 std r21, VCPU_GPR(r21)(r7)
258 std r21, VCPU_GPR(r21)(r12) 195 std r22, VCPU_GPR(r22)(r7)
259 std r22, VCPU_GPR(r22)(r12) 196 std r23, VCPU_GPR(r23)(r7)
260 std r23, VCPU_GPR(r23)(r12) 197 std r24, VCPU_GPR(r24)(r7)
261 std r24, VCPU_GPR(r24)(r12) 198 std r25, VCPU_GPR(r25)(r7)
262 std r25, VCPU_GPR(r25)(r12) 199 std r26, VCPU_GPR(r26)(r7)
263 std r26, VCPU_GPR(r26)(r12) 200 std r27, VCPU_GPR(r27)(r7)
264 std r27, VCPU_GPR(r27)(r12) 201 std r28, VCPU_GPR(r28)(r7)
265 std r28, VCPU_GPR(r28)(r12) 202 std r29, VCPU_GPR(r29)(r7)
266 std r29, VCPU_GPR(r29)(r12) 203 std r30, VCPU_GPR(r30)(r7)
267 std r30, VCPU_GPR(r30)(r12) 204 std r31, VCPU_GPR(r31)(r7)
268 std r31, VCPU_GPR(r31)(r12) 205
269 206 /* Save guest CTR */
270 /* Restore non-volatile host registers (r14 - r31) */
271 REST_NVGPRS(r1)
272
273 /* Save guest PC (R10) */
274 std r10, VCPU_PC(r12)
275
276 /* Save guest msr (R11) */
277 std r11, VCPU_SHADOW_MSR(r12)
278
279 /* Save guest CTR (in R12) */
280 mfctr r5 207 mfctr r5
281 std r5, VCPU_CTR(r12) 208 std r5, VCPU_CTR(r7)
282 209
283 /* Save guest LR */ 210 /* Save guest LR */
284 mflr r5 211 mflr r5
285 std r5, VCPU_LR(r12) 212 std r5, VCPU_LR(r7)
286
287 /* Save guest XER */
288 mfxer r5
289 std r5, VCPU_XER(r12)
290
291 /* Save guest DAR */
292 ld r5, (PACA_EXMC+EX_DAR)(r13)
293 std r5, VCPU_FAULT_DEAR(r12)
294
295 /* Save guest DSISR */
296 lwz r5, (PACA_EXMC+EX_DSISR)(r13)
297 std r5, VCPU_FAULT_DSISR(r12)
298 213
299 /* Restore host msr -> SRR1 */ 214 /* Restore host msr -> SRR1 */
300 ld r7, VCPU_HOST_MSR(r12) 215 ld r6, VCPU_HOST_MSR(r7)
301 mtsrr1 r7
302
303 /* Restore host IP -> SRR0 */
304 ld r6, VCPU_HOST_RETIP(r12)
305 mtsrr0 r6
306 216
307 /* 217 /*
308 * For some interrupts, we need to call the real Linux 218 * For some interrupts, we need to call the real Linux
@@ -314,13 +224,14 @@ no_dcbz32_off:
314 * r3 = address of interrupt handler (exit reason) 224 * r3 = address of interrupt handler (exit reason)
315 */ 225 */
316 226
317 cmpwi r3, BOOK3S_INTERRUPT_EXTERNAL 227 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
318 beq call_linux_handler 228 beq call_linux_handler
319 cmpwi r3, BOOK3S_INTERRUPT_DECREMENTER 229 cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER
320 beq call_linux_handler 230 beq call_linux_handler
321 231
322 /* Back to Interruptable Mode! (goto kvm_return_point) */ 232 /* Back to EE=1 */
323 RFI 233 mtmsr r6
234 b kvm_return_point
324 235
325call_linux_handler: 236call_linux_handler:
326 237
@@ -333,16 +244,22 @@ call_linux_handler:
333 * interrupt handler! 244 * interrupt handler!
334 * 245 *
335 * R3 still contains the exit code, 246 * R3 still contains the exit code,
336 * R6 VCPU_HOST_RETIP and 247 * R5 VCPU_HOST_RETIP and
337 * R7 VCPU_HOST_MSR 248 * R6 VCPU_HOST_MSR
338 */ 249 */
339 250
340 mtlr r3 251 /* Restore host IP -> SRR0 */
252 ld r5, VCPU_HOST_RETIP(r7)
253
254 /* XXX Better move to a safe function?
255 * What if we get an HTAB flush in between mtsrr0 and mtsrr1? */
341 256
342 ld r5, VCPU_TRAMPOLINE_LOWMEM(r12) 257 mtlr r12
343 mtsrr0 r5 258
344 LOAD_REG_IMMEDIATE(r5, MSR_KERNEL & ~(MSR_IR | MSR_DR)) 259 ld r4, VCPU_TRAMPOLINE_LOWMEM(r7)
345 mtsrr1 r5 260 mtsrr0 r4
261 LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR))
262 mtsrr1 r3
346 263
347 RFI 264 RFI
348 265
@@ -351,42 +268,51 @@ kvm_return_point:
351 268
352 /* Jump back to lightweight entry if we're supposed to */ 269 /* Jump back to lightweight entry if we're supposed to */
353 /* go back into the guest */ 270 /* go back into the guest */
354 mr r5, r3 271
272 /* Pass the exit number as 3rd argument to kvmppc_handle_exit */
273 mr r5, r12
274
355 /* Restore r3 (kvm_run) and r4 (vcpu) */ 275 /* Restore r3 (kvm_run) and r4 (vcpu) */
356 REST_2GPRS(3, r1) 276 REST_2GPRS(3, r1)
357 bl KVMPPC_HANDLE_EXIT 277 bl KVMPPC_HANDLE_EXIT
358 278
359#if 0 /* XXX get lightweight exits back */ 279 /* If RESUME_GUEST, get back in the loop */
360 cmpwi r3, RESUME_GUEST 280 cmpwi r3, RESUME_GUEST
361 bne kvm_exit_heavyweight 281 beq kvm_loop_lightweight
362 282
363 /* put VCPU and KVM_RUN back into place and roll again! */ 283 cmpwi r3, RESUME_GUEST_NV
364 REST_2GPRS(3, r1) 284 beq kvm_loop_heavyweight
365 b kvm_start_lightweight
366 285
367kvm_exit_heavyweight: 286kvm_exit_loop:
368 /* Restore non-volatile host registers */
369 ld r14, _LINK(r1)
370 mtlr r14
371 REST_NVGPRS(r1)
372 287
373 addi r1, r1, SWITCH_FRAME_SIZE
374#else
375 ld r4, _LINK(r1) 288 ld r4, _LINK(r1)
376 mtlr r4 289 mtlr r4
377 290
378 cmpwi r3, RESUME_GUEST 291 /* Restore non-volatile host registers (r14 - r31) */
379 bne kvm_exit_heavyweight 292 REST_NVGPRS(r1)
293
294 addi r1, r1, SWITCH_FRAME_SIZE
295 blr
296
297kvm_loop_heavyweight:
298
299 ld r4, _LINK(r1)
300 std r4, (16 + SWITCH_FRAME_SIZE)(r1)
380 301
302 /* Load vcpu and cpu_run */
381 REST_2GPRS(3, r1) 303 REST_2GPRS(3, r1)
382 304
383 addi r1, r1, SWITCH_FRAME_SIZE 305 /* Load non-volatile guest state from the vcpu */
306 VCPU_LOAD_NVGPRS(r4)
384 307
385 b kvm_start_entry 308 /* Jump back into the beginning of this function */
309 b kvm_start_lightweight
386 310
387kvm_exit_heavyweight: 311kvm_loop_lightweight:
388 312
389 addi r1, r1, SWITCH_FRAME_SIZE 313 /* We'll need the vcpu pointer */
390#endif 314 REST_GPR(4, r1)
315
316 /* Jump back into the beginning of this function */
317 b kvm_start_lightweight
391 318
392 blr
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index e4beeb371a7..512dcff7755 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -54,7 +54,7 @@ static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
54 if (!vcpu_book3s->slb[i].valid) 54 if (!vcpu_book3s->slb[i].valid)
55 continue; 55 continue;
56 56
57 if (vcpu_book3s->slb[i].large) 57 if (vcpu_book3s->slb[i].tb)
58 cmp_esid = esid_1t; 58 cmp_esid = esid_1t;
59 59
60 if (vcpu_book3s->slb[i].esid == cmp_esid) 60 if (vcpu_book3s->slb[i].esid == cmp_esid)
@@ -65,9 +65,10 @@ static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
65 eaddr, esid, esid_1t); 65 eaddr, esid, esid_1t);
66 for (i = 0; i < vcpu_book3s->slb_nr; i++) { 66 for (i = 0; i < vcpu_book3s->slb_nr; i++) {
67 if (vcpu_book3s->slb[i].vsid) 67 if (vcpu_book3s->slb[i].vsid)
68 dprintk(" %d: %c%c %llx %llx\n", i, 68 dprintk(" %d: %c%c%c %llx %llx\n", i,
69 vcpu_book3s->slb[i].valid ? 'v' : ' ', 69 vcpu_book3s->slb[i].valid ? 'v' : ' ',
70 vcpu_book3s->slb[i].large ? 'l' : ' ', 70 vcpu_book3s->slb[i].large ? 'l' : ' ',
71 vcpu_book3s->slb[i].tb ? 't' : ' ',
71 vcpu_book3s->slb[i].esid, 72 vcpu_book3s->slb[i].esid,
72 vcpu_book3s->slb[i].vsid); 73 vcpu_book3s->slb[i].vsid);
73 } 74 }
@@ -84,7 +85,7 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
84 if (!slb) 85 if (!slb)
85 return 0; 86 return 0;
86 87
87 if (slb->large) 88 if (slb->tb)
88 return (((u64)eaddr >> 12) & 0xfffffff) | 89 return (((u64)eaddr >> 12) & 0xfffffff) |
89 (((u64)slb->vsid) << 28); 90 (((u64)slb->vsid) << 28);
90 91
@@ -309,7 +310,8 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
309 slbe = &vcpu_book3s->slb[slb_nr]; 310 slbe = &vcpu_book3s->slb[slb_nr];
310 311
311 slbe->large = (rs & SLB_VSID_L) ? 1 : 0; 312 slbe->large = (rs & SLB_VSID_L) ? 1 : 0;
312 slbe->esid = slbe->large ? esid_1t : esid; 313 slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0;
314 slbe->esid = slbe->tb ? esid_1t : esid;
313 slbe->vsid = rs >> 12; 315 slbe->vsid = rs >> 12;
314 slbe->valid = (rb & SLB_ESID_V) ? 1 : 0; 316 slbe->valid = (rb & SLB_ESID_V) ? 1 : 0;
315 slbe->Ks = (rs & SLB_VSID_KS) ? 1 : 0; 317 slbe->Ks = (rs & SLB_VSID_KS) ? 1 : 0;
diff --git a/arch/powerpc/kvm/book3s_64_rmhandlers.S b/arch/powerpc/kvm/book3s_64_rmhandlers.S
index fb7dd2e9ac8..c83c60ad96c 100644
--- a/arch/powerpc/kvm/book3s_64_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_64_rmhandlers.S
@@ -45,36 +45,25 @@ kvmppc_trampoline_\intno:
45 * To distinguish, we check a magic byte in the PACA 45 * To distinguish, we check a magic byte in the PACA
46 */ 46 */
47 mfspr r13, SPRN_SPRG_PACA /* r13 = PACA */ 47 mfspr r13, SPRN_SPRG_PACA /* r13 = PACA */
48 std r12, (PACA_EXMC + EX_R12)(r13) 48 std r12, PACA_KVM_SCRATCH0(r13)
49 mfcr r12 49 mfcr r12
50 stw r12, (PACA_EXMC + EX_CCR)(r13) 50 stw r12, PACA_KVM_SCRATCH1(r13)
51 lbz r12, PACA_KVM_IN_GUEST(r13) 51 lbz r12, PACA_KVM_IN_GUEST(r13)
52 cmpwi r12, 0 52 cmpwi r12, KVM_GUEST_MODE_NONE
53 bne ..kvmppc_handler_hasmagic_\intno 53 bne ..kvmppc_handler_hasmagic_\intno
54 /* No KVM guest? Then jump back to the Linux handler! */ 54 /* No KVM guest? Then jump back to the Linux handler! */
55 lwz r12, (PACA_EXMC + EX_CCR)(r13) 55 lwz r12, PACA_KVM_SCRATCH1(r13)
56 mtcr r12 56 mtcr r12
57 ld r12, (PACA_EXMC + EX_R12)(r13) 57 ld r12, PACA_KVM_SCRATCH0(r13)
58 mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */ 58 mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */
59 b kvmppc_resume_\intno /* Get back original handler */ 59 b kvmppc_resume_\intno /* Get back original handler */
60 60
61 /* Now we know we're handling a KVM guest */ 61 /* Now we know we're handling a KVM guest */
62..kvmppc_handler_hasmagic_\intno: 62..kvmppc_handler_hasmagic_\intno:
63 /* Unset guest state */
64 li r12, 0
65 stb r12, PACA_KVM_IN_GUEST(r13)
66 63
67 std r1, (PACA_EXMC+EX_R9)(r13) 64 /* Should we just skip the faulting instruction? */
68 std r10, (PACA_EXMC+EX_R10)(r13) 65 cmpwi r12, KVM_GUEST_MODE_SKIP
69 std r11, (PACA_EXMC+EX_R11)(r13) 66 beq kvmppc_handler_skip_ins
70 std r2, (PACA_EXMC+EX_R13)(r13)
71
72 mfsrr0 r10
73 mfsrr1 r11
74
75 /* Restore R1/R2 so we can handle faults */
76 ld r1, PACAR1(r13)
77 ld r2, (PACA_EXMC+EX_SRR0)(r13)
78 67
79 /* Let's store which interrupt we're handling */ 68 /* Let's store which interrupt we're handling */
80 li r12, \intno 69 li r12, \intno
@@ -102,23 +91,107 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALTIVEC
102INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX 91INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX
103 92
104/* 93/*
94 * Bring us back to the faulting code, but skip the
95 * faulting instruction.
96 *
97 * This is a generic exit path from the interrupt
98 * trampolines above.
99 *
100 * Input Registers:
101 *
102 * R12 = free
103 * R13 = PACA
104 * PACA.KVM.SCRATCH0 = guest R12
105 * PACA.KVM.SCRATCH1 = guest CR
106 * SPRG_SCRATCH0 = guest R13
107 *
108 */
109kvmppc_handler_skip_ins:
110
111 /* Patch the IP to the next instruction */
112 mfsrr0 r12
113 addi r12, r12, 4
114 mtsrr0 r12
115
116 /* Clean up all state */
117 lwz r12, PACA_KVM_SCRATCH1(r13)
118 mtcr r12
119 ld r12, PACA_KVM_SCRATCH0(r13)
120 mfspr r13, SPRN_SPRG_SCRATCH0
121
122 /* And get back into the code */
123 RFI
124
125/*
105 * This trampoline brings us back to a real mode handler 126 * This trampoline brings us back to a real mode handler
106 * 127 *
107 * Input Registers: 128 * Input Registers:
108 * 129 *
109 * R6 = SRR0 130 * R5 = SRR0
110 * R7 = SRR1 131 * R6 = SRR1
111 * LR = real-mode IP 132 * LR = real-mode IP
112 * 133 *
113 */ 134 */
114.global kvmppc_handler_lowmem_trampoline 135.global kvmppc_handler_lowmem_trampoline
115kvmppc_handler_lowmem_trampoline: 136kvmppc_handler_lowmem_trampoline:
116 137
117 mtsrr0 r6 138 mtsrr0 r5
118 mtsrr1 r7 139 mtsrr1 r6
119 blr 140 blr
120kvmppc_handler_lowmem_trampoline_end: 141kvmppc_handler_lowmem_trampoline_end:
121 142
143/*
144 * Call a function in real mode
145 *
146 * Input Registers:
147 *
148 * R3 = function
149 * R4 = MSR
150 * R5 = CTR
151 *
152 */
153_GLOBAL(kvmppc_rmcall)
154 mtmsr r4 /* Disable relocation, so mtsrr
155 doesn't get interrupted */
156 mtctr r5
157 mtsrr0 r3
158 mtsrr1 r4
159 RFI
160
161/*
162 * Activate current's external feature (FPU/Altivec/VSX)
163 */
164#define define_load_up(what) \
165 \
166_GLOBAL(kvmppc_load_up_ ## what); \
167 subi r1, r1, INT_FRAME_SIZE; \
168 mflr r3; \
169 std r3, _LINK(r1); \
170 mfmsr r4; \
171 std r31, GPR3(r1); \
172 mr r31, r4; \
173 li r5, MSR_DR; \
174 oris r5, r5, MSR_EE@h; \
175 andc r4, r4, r5; \
176 mtmsr r4; \
177 \
178 bl .load_up_ ## what; \
179 \
180 mtmsr r31; \
181 ld r3, _LINK(r1); \
182 ld r31, GPR3(r1); \
183 addi r1, r1, INT_FRAME_SIZE; \
184 mtlr r3; \
185 blr
186
187define_load_up(fpu)
188#ifdef CONFIG_ALTIVEC
189define_load_up(altivec)
190#endif
191#ifdef CONFIG_VSX
192define_load_up(vsx)
193#endif
194
122.global kvmppc_trampoline_lowmem 195.global kvmppc_trampoline_lowmem
123kvmppc_trampoline_lowmem: 196kvmppc_trampoline_lowmem:
124 .long kvmppc_handler_lowmem_trampoline - _stext 197 .long kvmppc_handler_lowmem_trampoline - _stext
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
index ecd237a03fd..35b76272218 100644
--- a/arch/powerpc/kvm/book3s_64_slb.S
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -31,7 +31,7 @@
31#define REBOLT_SLB_ENTRY(num) \ 31#define REBOLT_SLB_ENTRY(num) \
32 ld r10, SHADOW_SLB_ESID(num)(r11); \ 32 ld r10, SHADOW_SLB_ESID(num)(r11); \
33 cmpdi r10, 0; \ 33 cmpdi r10, 0; \
34 beq slb_exit_skip_1; \ 34 beq slb_exit_skip_ ## num; \
35 oris r10, r10, SLB_ESID_V@h; \ 35 oris r10, r10, SLB_ESID_V@h; \
36 ld r9, SHADOW_SLB_VSID(num)(r11); \ 36 ld r9, SHADOW_SLB_VSID(num)(r11); \
37 slbmte r9, r10; \ 37 slbmte r9, r10; \
@@ -51,23 +51,21 @@ kvmppc_handler_trampoline_enter:
51 * 51 *
52 * MSR = ~IR|DR 52 * MSR = ~IR|DR
53 * R13 = PACA 53 * R13 = PACA
54 * R1 = host R1
55 * R2 = host R2
54 * R9 = guest IP 56 * R9 = guest IP
55 * R10 = guest MSR 57 * R10 = guest MSR
56 * R11 = free 58 * all other GPRS = free
57 * R12 = free 59 * PACA[KVM_CR] = guest CR
58 * PACA[PACA_EXMC + EX_R9] = guest R9 60 * PACA[KVM_XER] = guest XER
59 * PACA[PACA_EXMC + EX_R10] = guest R10
60 * PACA[PACA_EXMC + EX_R11] = guest R11
61 * PACA[PACA_EXMC + EX_R12] = guest R12
62 * PACA[PACA_EXMC + EX_R13] = guest R13
63 * PACA[PACA_EXMC + EX_CCR] = guest CR
64 * PACA[PACA_EXMC + EX_R3] = guest XER
65 */ 61 */
66 62
67 mtsrr0 r9 63 mtsrr0 r9
68 mtsrr1 r10 64 mtsrr1 r10
69 65
70 mtspr SPRN_SPRG_SCRATCH0, r0 66 /* Activate guest mode, so faults get handled by KVM */
67 li r11, KVM_GUEST_MODE_GUEST
68 stb r11, PACA_KVM_IN_GUEST(r13)
71 69
72 /* Remove LPAR shadow entries */ 70 /* Remove LPAR shadow entries */
73 71
@@ -131,20 +129,27 @@ slb_do_enter:
131 129
132 /* Enter guest */ 130 /* Enter guest */
133 131
134 mfspr r0, SPRN_SPRG_SCRATCH0 132 ld r0, (PACA_KVM_R0)(r13)
135 133 ld r1, (PACA_KVM_R1)(r13)
136 ld r9, (PACA_EXMC+EX_R9)(r13) 134 ld r2, (PACA_KVM_R2)(r13)
137 ld r10, (PACA_EXMC+EX_R10)(r13) 135 ld r3, (PACA_KVM_R3)(r13)
138 ld r12, (PACA_EXMC+EX_R12)(r13) 136 ld r4, (PACA_KVM_R4)(r13)
139 137 ld r5, (PACA_KVM_R5)(r13)
140 lwz r11, (PACA_EXMC+EX_CCR)(r13) 138 ld r6, (PACA_KVM_R6)(r13)
139 ld r7, (PACA_KVM_R7)(r13)
140 ld r8, (PACA_KVM_R8)(r13)
141 ld r9, (PACA_KVM_R9)(r13)
142 ld r10, (PACA_KVM_R10)(r13)
143 ld r12, (PACA_KVM_R12)(r13)
144
145 lwz r11, (PACA_KVM_CR)(r13)
141 mtcr r11 146 mtcr r11
142 147
143 ld r11, (PACA_EXMC+EX_R3)(r13) 148 ld r11, (PACA_KVM_XER)(r13)
144 mtxer r11 149 mtxer r11
145 150
146 ld r11, (PACA_EXMC+EX_R11)(r13) 151 ld r11, (PACA_KVM_R11)(r13)
147 ld r13, (PACA_EXMC+EX_R13)(r13) 152 ld r13, (PACA_KVM_R13)(r13)
148 153
149 RFI 154 RFI
150kvmppc_handler_trampoline_enter_end: 155kvmppc_handler_trampoline_enter_end:
@@ -162,28 +167,54 @@ kvmppc_handler_trampoline_exit:
162 167
163 /* Register usage at this point: 168 /* Register usage at this point:
164 * 169 *
165 * SPRG_SCRATCH0 = guest R13 170 * SPRG_SCRATCH0 = guest R13
166 * R01 = host R1 171 * R12 = exit handler id
167 * R02 = host R2 172 * R13 = PACA
168 * R10 = guest PC 173 * PACA.KVM.SCRATCH0 = guest R12
169 * R11 = guest MSR 174 * PACA.KVM.SCRATCH1 = guest CR
170 * R12 = exit handler id
171 * R13 = PACA
172 * PACA.exmc.CCR = guest CR
173 * PACA.exmc.R9 = guest R1
174 * PACA.exmc.R10 = guest R10
175 * PACA.exmc.R11 = guest R11
176 * PACA.exmc.R12 = guest R12
177 * PACA.exmc.R13 = guest R2
178 * 175 *
179 */ 176 */
180 177
181 /* Save registers */ 178 /* Save registers */
182 179
183 std r0, (PACA_EXMC+EX_SRR0)(r13) 180 std r0, PACA_KVM_R0(r13)
184 std r9, (PACA_EXMC+EX_R3)(r13) 181 std r1, PACA_KVM_R1(r13)
185 std r10, (PACA_EXMC+EX_LR)(r13) 182 std r2, PACA_KVM_R2(r13)
186 std r11, (PACA_EXMC+EX_DAR)(r13) 183 std r3, PACA_KVM_R3(r13)
184 std r4, PACA_KVM_R4(r13)
185 std r5, PACA_KVM_R5(r13)
186 std r6, PACA_KVM_R6(r13)
187 std r7, PACA_KVM_R7(r13)
188 std r8, PACA_KVM_R8(r13)
189 std r9, PACA_KVM_R9(r13)
190 std r10, PACA_KVM_R10(r13)
191 std r11, PACA_KVM_R11(r13)
192
193 /* Restore R1/R2 so we can handle faults */
194 ld r1, PACA_KVM_HOST_R1(r13)
195 ld r2, PACA_KVM_HOST_R2(r13)
196
197 /* Save guest PC and MSR in GPRs */
198 mfsrr0 r3
199 mfsrr1 r4
200
201 /* Get scratch'ed off registers */
202 mfspr r9, SPRN_SPRG_SCRATCH0
203 std r9, PACA_KVM_R13(r13)
204
205 ld r8, PACA_KVM_SCRATCH0(r13)
206 std r8, PACA_KVM_R12(r13)
207
208 lwz r7, PACA_KVM_SCRATCH1(r13)
209 stw r7, PACA_KVM_CR(r13)
210
211 /* Save more register state */
212
213 mfxer r6
214 stw r6, PACA_KVM_XER(r13)
215
216 mfdar r5
217 mfdsisr r6
187 218
188 /* 219 /*
189 * In order for us to easily get the last instruction, 220 * In order for us to easily get the last instruction,
@@ -202,17 +233,28 @@ kvmppc_handler_trampoline_exit:
202 233
203ld_last_inst: 234ld_last_inst:
204 /* Save off the guest instruction we're at */ 235 /* Save off the guest instruction we're at */
236
237 /* Set guest mode to 'jump over instruction' so if lwz faults
238 * we'll just continue at the next IP. */
239 li r9, KVM_GUEST_MODE_SKIP
240 stb r9, PACA_KVM_IN_GUEST(r13)
241
205 /* 1) enable paging for data */ 242 /* 1) enable paging for data */
206 mfmsr r9 243 mfmsr r9
207 ori r11, r9, MSR_DR /* Enable paging for data */ 244 ori r11, r9, MSR_DR /* Enable paging for data */
208 mtmsr r11 245 mtmsr r11
209 /* 2) fetch the instruction */ 246 /* 2) fetch the instruction */
210 lwz r0, 0(r10) 247 li r0, KVM_INST_FETCH_FAILED /* In case lwz faults */
248 lwz r0, 0(r3)
211 /* 3) disable paging again */ 249 /* 3) disable paging again */
212 mtmsr r9 250 mtmsr r9
213 251
214no_ld_last_inst: 252no_ld_last_inst:
215 253
254 /* Unset guest mode */
255 li r9, KVM_GUEST_MODE_NONE
256 stb r9, PACA_KVM_IN_GUEST(r13)
257
216 /* Restore bolted entries from the shadow and fix it along the way */ 258 /* Restore bolted entries from the shadow and fix it along the way */
217 259
218 /* We don't store anything in entry 0, so we don't need to take care of it */ 260 /* We don't store anything in entry 0, so we don't need to take care of it */
@@ -233,29 +275,27 @@ no_ld_last_inst:
233 275
234slb_do_exit: 276slb_do_exit:
235 277
236 /* Restore registers */ 278 /* Register usage at this point:
237 279 *
238 ld r11, (PACA_EXMC+EX_DAR)(r13) 280 * R0 = guest last inst
239 ld r10, (PACA_EXMC+EX_LR)(r13) 281 * R1 = host R1
240 ld r9, (PACA_EXMC+EX_R3)(r13) 282 * R2 = host R2
241 283 * R3 = guest PC
242 /* Save last inst */ 284 * R4 = guest MSR
243 stw r0, (PACA_EXMC+EX_LR)(r13) 285 * R5 = guest DAR
244 286 * R6 = guest DSISR
245 /* Save DAR and DSISR before going to paged mode */ 287 * R12 = exit handler id
246 mfdar r0 288 * R13 = PACA
247 std r0, (PACA_EXMC+EX_DAR)(r13) 289 * PACA.KVM.* = guest *
248 mfdsisr r0 290 *
249 stw r0, (PACA_EXMC+EX_DSISR)(r13) 291 */
250 292
251 /* RFI into the highmem handler */ 293 /* RFI into the highmem handler */
252 mfmsr r0 294 mfmsr r7
253 ori r0, r0, MSR_IR|MSR_DR|MSR_RI /* Enable paging */ 295 ori r7, r7, MSR_IR|MSR_DR|MSR_RI /* Enable paging */
254 mtsrr1 r0 296 mtsrr1 r7
255 ld r0, PACASAVEDMSR(r13) /* Highmem handler address */ 297 ld r8, PACA_KVM_VMHANDLER(r13) /* Highmem handler address */
256 mtsrr0 r0 298 mtsrr0 r8
257
258 mfspr r0, SPRN_SPRG_SCRATCH0
259 299
260 RFI 300 RFI
261kvmppc_handler_trampoline_exit_end: 301kvmppc_handler_trampoline_exit_end:
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 06f5a9ecc42..4d686cc6b26 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -69,10 +69,10 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
69 69
70 for (i = 0; i < 32; i += 4) { 70 for (i = 0; i < 32; i += 4) {
71 printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i, 71 printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
72 vcpu->arch.gpr[i], 72 kvmppc_get_gpr(vcpu, i),
73 vcpu->arch.gpr[i+1], 73 kvmppc_get_gpr(vcpu, i+1),
74 vcpu->arch.gpr[i+2], 74 kvmppc_get_gpr(vcpu, i+2),
75 vcpu->arch.gpr[i+3]); 75 kvmppc_get_gpr(vcpu, i+3));
76 } 76 }
77} 77}
78 78
@@ -82,8 +82,32 @@ static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
82 set_bit(priority, &vcpu->arch.pending_exceptions); 82 set_bit(priority, &vcpu->arch.pending_exceptions);
83} 83}
84 84
85void kvmppc_core_queue_program(struct kvm_vcpu *vcpu) 85static void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu,
86 ulong dear_flags, ulong esr_flags)
86{ 87{
88 vcpu->arch.queued_dear = dear_flags;
89 vcpu->arch.queued_esr = esr_flags;
90 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
91}
92
93static void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu,
94 ulong dear_flags, ulong esr_flags)
95{
96 vcpu->arch.queued_dear = dear_flags;
97 vcpu->arch.queued_esr = esr_flags;
98 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
99}
100
101static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu,
102 ulong esr_flags)
103{
104 vcpu->arch.queued_esr = esr_flags;
105 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
106}
107
108void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags)
109{
110 vcpu->arch.queued_esr = esr_flags;
87 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); 111 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
88} 112}
89 113
@@ -97,6 +121,11 @@ int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
97 return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); 121 return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
98} 122}
99 123
124void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
125{
126 clear_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
127}
128
100void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, 129void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
101 struct kvm_interrupt *irq) 130 struct kvm_interrupt *irq)
102{ 131{
@@ -109,14 +138,19 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
109{ 138{
110 int allowed = 0; 139 int allowed = 0;
111 ulong msr_mask; 140 ulong msr_mask;
141 bool update_esr = false, update_dear = false;
112 142
113 switch (priority) { 143 switch (priority) {
114 case BOOKE_IRQPRIO_PROGRAM:
115 case BOOKE_IRQPRIO_DTLB_MISS: 144 case BOOKE_IRQPRIO_DTLB_MISS:
116 case BOOKE_IRQPRIO_ITLB_MISS:
117 case BOOKE_IRQPRIO_SYSCALL:
118 case BOOKE_IRQPRIO_DATA_STORAGE: 145 case BOOKE_IRQPRIO_DATA_STORAGE:
146 update_dear = true;
147 /* fall through */
119 case BOOKE_IRQPRIO_INST_STORAGE: 148 case BOOKE_IRQPRIO_INST_STORAGE:
149 case BOOKE_IRQPRIO_PROGRAM:
150 update_esr = true;
151 /* fall through */
152 case BOOKE_IRQPRIO_ITLB_MISS:
153 case BOOKE_IRQPRIO_SYSCALL:
120 case BOOKE_IRQPRIO_FP_UNAVAIL: 154 case BOOKE_IRQPRIO_FP_UNAVAIL:
121 case BOOKE_IRQPRIO_SPE_UNAVAIL: 155 case BOOKE_IRQPRIO_SPE_UNAVAIL:
122 case BOOKE_IRQPRIO_SPE_FP_DATA: 156 case BOOKE_IRQPRIO_SPE_FP_DATA:
@@ -151,6 +185,10 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
151 vcpu->arch.srr0 = vcpu->arch.pc; 185 vcpu->arch.srr0 = vcpu->arch.pc;
152 vcpu->arch.srr1 = vcpu->arch.msr; 186 vcpu->arch.srr1 = vcpu->arch.msr;
153 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; 187 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
188 if (update_esr == true)
189 vcpu->arch.esr = vcpu->arch.queued_esr;
190 if (update_dear == true)
191 vcpu->arch.dear = vcpu->arch.queued_dear;
154 kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask); 192 kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
155 193
156 clear_bit(priority, &vcpu->arch.pending_exceptions); 194 clear_bit(priority, &vcpu->arch.pending_exceptions);
@@ -223,8 +261,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
223 if (vcpu->arch.msr & MSR_PR) { 261 if (vcpu->arch.msr & MSR_PR) {
224 /* Program traps generated by user-level software must be handled 262 /* Program traps generated by user-level software must be handled
225 * by the guest kernel. */ 263 * by the guest kernel. */
226 vcpu->arch.esr = vcpu->arch.fault_esr; 264 kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr);
227 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
228 r = RESUME_GUEST; 265 r = RESUME_GUEST;
229 kvmppc_account_exit(vcpu, USR_PR_INST); 266 kvmppc_account_exit(vcpu, USR_PR_INST);
230 break; 267 break;
@@ -280,16 +317,14 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
280 break; 317 break;
281 318
282 case BOOKE_INTERRUPT_DATA_STORAGE: 319 case BOOKE_INTERRUPT_DATA_STORAGE:
283 vcpu->arch.dear = vcpu->arch.fault_dear; 320 kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dear,
284 vcpu->arch.esr = vcpu->arch.fault_esr; 321 vcpu->arch.fault_esr);
285 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
286 kvmppc_account_exit(vcpu, DSI_EXITS); 322 kvmppc_account_exit(vcpu, DSI_EXITS);
287 r = RESUME_GUEST; 323 r = RESUME_GUEST;
288 break; 324 break;
289 325
290 case BOOKE_INTERRUPT_INST_STORAGE: 326 case BOOKE_INTERRUPT_INST_STORAGE:
291 vcpu->arch.esr = vcpu->arch.fault_esr; 327 kvmppc_core_queue_inst_storage(vcpu, vcpu->arch.fault_esr);
292 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
293 kvmppc_account_exit(vcpu, ISI_EXITS); 328 kvmppc_account_exit(vcpu, ISI_EXITS);
294 r = RESUME_GUEST; 329 r = RESUME_GUEST;
295 break; 330 break;
@@ -310,9 +345,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
310 gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr); 345 gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr);
311 if (gtlb_index < 0) { 346 if (gtlb_index < 0) {
312 /* The guest didn't have a mapping for it. */ 347 /* The guest didn't have a mapping for it. */
313 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); 348 kvmppc_core_queue_dtlb_miss(vcpu,
314 vcpu->arch.dear = vcpu->arch.fault_dear; 349 vcpu->arch.fault_dear,
315 vcpu->arch.esr = vcpu->arch.fault_esr; 350 vcpu->arch.fault_esr);
316 kvmppc_mmu_dtlb_miss(vcpu); 351 kvmppc_mmu_dtlb_miss(vcpu);
317 kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS); 352 kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
318 r = RESUME_GUEST; 353 r = RESUME_GUEST;
@@ -426,7 +461,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
426{ 461{
427 vcpu->arch.pc = 0; 462 vcpu->arch.pc = 0;
428 vcpu->arch.msr = 0; 463 vcpu->arch.msr = 0;
429 vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */ 464 kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
430 465
431 vcpu->arch.shadow_pid = 1; 466 vcpu->arch.shadow_pid = 1;
432 467
@@ -444,10 +479,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
444 int i; 479 int i;
445 480
446 regs->pc = vcpu->arch.pc; 481 regs->pc = vcpu->arch.pc;
447 regs->cr = vcpu->arch.cr; 482 regs->cr = kvmppc_get_cr(vcpu);
448 regs->ctr = vcpu->arch.ctr; 483 regs->ctr = vcpu->arch.ctr;
449 regs->lr = vcpu->arch.lr; 484 regs->lr = vcpu->arch.lr;
450 regs->xer = vcpu->arch.xer; 485 regs->xer = kvmppc_get_xer(vcpu);
451 regs->msr = vcpu->arch.msr; 486 regs->msr = vcpu->arch.msr;
452 regs->srr0 = vcpu->arch.srr0; 487 regs->srr0 = vcpu->arch.srr0;
453 regs->srr1 = vcpu->arch.srr1; 488 regs->srr1 = vcpu->arch.srr1;
@@ -461,7 +496,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
461 regs->sprg7 = vcpu->arch.sprg6; 496 regs->sprg7 = vcpu->arch.sprg6;
462 497
463 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 498 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
464 regs->gpr[i] = vcpu->arch.gpr[i]; 499 regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
465 500
466 return 0; 501 return 0;
467} 502}
@@ -471,10 +506,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
471 int i; 506 int i;
472 507
473 vcpu->arch.pc = regs->pc; 508 vcpu->arch.pc = regs->pc;
474 vcpu->arch.cr = regs->cr; 509 kvmppc_set_cr(vcpu, regs->cr);
475 vcpu->arch.ctr = regs->ctr; 510 vcpu->arch.ctr = regs->ctr;
476 vcpu->arch.lr = regs->lr; 511 vcpu->arch.lr = regs->lr;
477 vcpu->arch.xer = regs->xer; 512 kvmppc_set_xer(vcpu, regs->xer);
478 kvmppc_set_msr(vcpu, regs->msr); 513 kvmppc_set_msr(vcpu, regs->msr);
479 vcpu->arch.srr0 = regs->srr0; 514 vcpu->arch.srr0 = regs->srr0;
480 vcpu->arch.srr1 = regs->srr1; 515 vcpu->arch.srr1 = regs->srr1;
@@ -486,8 +521,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
486 vcpu->arch.sprg6 = regs->sprg5; 521 vcpu->arch.sprg6 = regs->sprg5;
487 vcpu->arch.sprg7 = regs->sprg6; 522 vcpu->arch.sprg7 = regs->sprg6;
488 523
489 for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++) 524 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
490 vcpu->arch.gpr[i] = regs->gpr[i]; 525 kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
491 526
492 return 0; 527 return 0;
493} 528}
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index aebc65e93f4..cbc790ee192 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -62,20 +62,20 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
62 62
63 case OP_31_XOP_MFMSR: 63 case OP_31_XOP_MFMSR:
64 rt = get_rt(inst); 64 rt = get_rt(inst);
65 vcpu->arch.gpr[rt] = vcpu->arch.msr; 65 kvmppc_set_gpr(vcpu, rt, vcpu->arch.msr);
66 kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); 66 kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
67 break; 67 break;
68 68
69 case OP_31_XOP_MTMSR: 69 case OP_31_XOP_MTMSR:
70 rs = get_rs(inst); 70 rs = get_rs(inst);
71 kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS); 71 kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
72 kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]); 72 kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs));
73 break; 73 break;
74 74
75 case OP_31_XOP_WRTEE: 75 case OP_31_XOP_WRTEE:
76 rs = get_rs(inst); 76 rs = get_rs(inst);
77 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) 77 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
78 | (vcpu->arch.gpr[rs] & MSR_EE); 78 | (kvmppc_get_gpr(vcpu, rs) & MSR_EE);
79 kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); 79 kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
80 break; 80 break;
81 81
@@ -101,22 +101,23 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
101int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) 101int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
102{ 102{
103 int emulated = EMULATE_DONE; 103 int emulated = EMULATE_DONE;
104 ulong spr_val = kvmppc_get_gpr(vcpu, rs);
104 105
105 switch (sprn) { 106 switch (sprn) {
106 case SPRN_DEAR: 107 case SPRN_DEAR:
107 vcpu->arch.dear = vcpu->arch.gpr[rs]; break; 108 vcpu->arch.dear = spr_val; break;
108 case SPRN_ESR: 109 case SPRN_ESR:
109 vcpu->arch.esr = vcpu->arch.gpr[rs]; break; 110 vcpu->arch.esr = spr_val; break;
110 case SPRN_DBCR0: 111 case SPRN_DBCR0:
111 vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; 112 vcpu->arch.dbcr0 = spr_val; break;
112 case SPRN_DBCR1: 113 case SPRN_DBCR1:
113 vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; 114 vcpu->arch.dbcr1 = spr_val; break;
114 case SPRN_DBSR: 115 case SPRN_DBSR:
115 vcpu->arch.dbsr &= ~vcpu->arch.gpr[rs]; break; 116 vcpu->arch.dbsr &= ~spr_val; break;
116 case SPRN_TSR: 117 case SPRN_TSR:
117 vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; 118 vcpu->arch.tsr &= ~spr_val; break;
118 case SPRN_TCR: 119 case SPRN_TCR:
119 vcpu->arch.tcr = vcpu->arch.gpr[rs]; 120 vcpu->arch.tcr = spr_val;
120 kvmppc_emulate_dec(vcpu); 121 kvmppc_emulate_dec(vcpu);
121 break; 122 break;
122 123
@@ -124,64 +125,64 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
124 * loaded into the real SPRGs when resuming the 125 * loaded into the real SPRGs when resuming the
125 * guest. */ 126 * guest. */
126 case SPRN_SPRG4: 127 case SPRN_SPRG4:
127 vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break; 128 vcpu->arch.sprg4 = spr_val; break;
128 case SPRN_SPRG5: 129 case SPRN_SPRG5:
129 vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break; 130 vcpu->arch.sprg5 = spr_val; break;
130 case SPRN_SPRG6: 131 case SPRN_SPRG6:
131 vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break; 132 vcpu->arch.sprg6 = spr_val; break;
132 case SPRN_SPRG7: 133 case SPRN_SPRG7:
133 vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break; 134 vcpu->arch.sprg7 = spr_val; break;
134 135
135 case SPRN_IVPR: 136 case SPRN_IVPR:
136 vcpu->arch.ivpr = vcpu->arch.gpr[rs]; 137 vcpu->arch.ivpr = spr_val;
137 break; 138 break;
138 case SPRN_IVOR0: 139 case SPRN_IVOR0:
139 vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs]; 140 vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = spr_val;
140 break; 141 break;
141 case SPRN_IVOR1: 142 case SPRN_IVOR1:
142 vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs]; 143 vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = spr_val;
143 break; 144 break;
144 case SPRN_IVOR2: 145 case SPRN_IVOR2:
145 vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs]; 146 vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = spr_val;
146 break; 147 break;
147 case SPRN_IVOR3: 148 case SPRN_IVOR3:
148 vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs]; 149 vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = spr_val;
149 break; 150 break;
150 case SPRN_IVOR4: 151 case SPRN_IVOR4:
151 vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs]; 152 vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = spr_val;
152 break; 153 break;
153 case SPRN_IVOR5: 154 case SPRN_IVOR5:
154 vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs]; 155 vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = spr_val;
155 break; 156 break;
156 case SPRN_IVOR6: 157 case SPRN_IVOR6:
157 vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs]; 158 vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = spr_val;
158 break; 159 break;
159 case SPRN_IVOR7: 160 case SPRN_IVOR7:
160 vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs]; 161 vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = spr_val;
161 break; 162 break;
162 case SPRN_IVOR8: 163 case SPRN_IVOR8:
163 vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs]; 164 vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = spr_val;
164 break; 165 break;
165 case SPRN_IVOR9: 166 case SPRN_IVOR9:
166 vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs]; 167 vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = spr_val;
167 break; 168 break;
168 case SPRN_IVOR10: 169 case SPRN_IVOR10:
169 vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs]; 170 vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = spr_val;
170 break; 171 break;
171 case SPRN_IVOR11: 172 case SPRN_IVOR11:
172 vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs]; 173 vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = spr_val;
173 break; 174 break;
174 case SPRN_IVOR12: 175 case SPRN_IVOR12:
175 vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs]; 176 vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = spr_val;
176 break; 177 break;
177 case SPRN_IVOR13: 178 case SPRN_IVOR13:
178 vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs]; 179 vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = spr_val;
179 break; 180 break;
180 case SPRN_IVOR14: 181 case SPRN_IVOR14:
181 vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs]; 182 vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = spr_val;
182 break; 183 break;
183 case SPRN_IVOR15: 184 case SPRN_IVOR15:
184 vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs]; 185 vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val;
185 break; 186 break;
186 187
187 default: 188 default:
@@ -197,65 +198,65 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
197 198
198 switch (sprn) { 199 switch (sprn) {
199 case SPRN_IVPR: 200 case SPRN_IVPR:
200 vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break; 201 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break;
201 case SPRN_DEAR: 202 case SPRN_DEAR:
202 vcpu->arch.gpr[rt] = vcpu->arch.dear; break; 203 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear); break;
203 case SPRN_ESR: 204 case SPRN_ESR:
204 vcpu->arch.gpr[rt] = vcpu->arch.esr; break; 205 kvmppc_set_gpr(vcpu, rt, vcpu->arch.esr); break;
205 case SPRN_DBCR0: 206 case SPRN_DBCR0:
206 vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; 207 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break;
207 case SPRN_DBCR1: 208 case SPRN_DBCR1:
208 vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; 209 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break;
209 case SPRN_DBSR: 210 case SPRN_DBSR:
210 vcpu->arch.gpr[rt] = vcpu->arch.dbsr; break; 211 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break;
211 212
212 case SPRN_IVOR0: 213 case SPRN_IVOR0:
213 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; 214 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]);
214 break; 215 break;
215 case SPRN_IVOR1: 216 case SPRN_IVOR1:
216 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; 217 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]);
217 break; 218 break;
218 case SPRN_IVOR2: 219 case SPRN_IVOR2:
219 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; 220 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]);
220 break; 221 break;
221 case SPRN_IVOR3: 222 case SPRN_IVOR3:
222 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; 223 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]);
223 break; 224 break;
224 case SPRN_IVOR4: 225 case SPRN_IVOR4:
225 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; 226 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]);
226 break; 227 break;
227 case SPRN_IVOR5: 228 case SPRN_IVOR5:
228 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; 229 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]);
229 break; 230 break;
230 case SPRN_IVOR6: 231 case SPRN_IVOR6:
231 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; 232 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]);
232 break; 233 break;
233 case SPRN_IVOR7: 234 case SPRN_IVOR7:
234 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; 235 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]);
235 break; 236 break;
236 case SPRN_IVOR8: 237 case SPRN_IVOR8:
237 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; 238 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]);
238 break; 239 break;
239 case SPRN_IVOR9: 240 case SPRN_IVOR9:
240 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; 241 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]);
241 break; 242 break;
242 case SPRN_IVOR10: 243 case SPRN_IVOR10:
243 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; 244 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]);
244 break; 245 break;
245 case SPRN_IVOR11: 246 case SPRN_IVOR11:
246 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; 247 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]);
247 break; 248 break;
248 case SPRN_IVOR12: 249 case SPRN_IVOR12:
249 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; 250 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]);
250 break; 251 break;
251 case SPRN_IVOR13: 252 case SPRN_IVOR13:
252 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; 253 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]);
253 break; 254 break;
254 case SPRN_IVOR14: 255 case SPRN_IVOR14:
255 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; 256 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]);
256 break; 257 break;
257 case SPRN_IVOR15: 258 case SPRN_IVOR15:
258 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; 259 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]);
259 break; 260 break;
260 261
261 default: 262 default:
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 64949eef43f..efa1198940a 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -60,6 +60,12 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
60 60
61 kvmppc_e500_tlb_setup(vcpu_e500); 61 kvmppc_e500_tlb_setup(vcpu_e500);
62 62
63 /* Registers init */
64 vcpu->arch.pvr = mfspr(SPRN_PVR);
65
66 /* Since booke kvm only support one core, update all vcpus' PIR to 0 */
67 vcpu->vcpu_id = 0;
68
63 return 0; 69 return 0;
64} 70}
65 71
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index be95b8d8e3b..8e3edfbc963 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -74,54 +74,59 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
74{ 74{
75 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 75 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
76 int emulated = EMULATE_DONE; 76 int emulated = EMULATE_DONE;
77 ulong spr_val = kvmppc_get_gpr(vcpu, rs);
77 78
78 switch (sprn) { 79 switch (sprn) {
79 case SPRN_PID: 80 case SPRN_PID:
80 vcpu_e500->pid[0] = vcpu->arch.shadow_pid = 81 vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
81 vcpu->arch.pid = vcpu->arch.gpr[rs]; 82 vcpu->arch.pid = spr_val;
82 break; 83 break;
83 case SPRN_PID1: 84 case SPRN_PID1:
84 vcpu_e500->pid[1] = vcpu->arch.gpr[rs]; break; 85 vcpu_e500->pid[1] = spr_val; break;
85 case SPRN_PID2: 86 case SPRN_PID2:
86 vcpu_e500->pid[2] = vcpu->arch.gpr[rs]; break; 87 vcpu_e500->pid[2] = spr_val; break;
87 case SPRN_MAS0: 88 case SPRN_MAS0:
88 vcpu_e500->mas0 = vcpu->arch.gpr[rs]; break; 89 vcpu_e500->mas0 = spr_val; break;
89 case SPRN_MAS1: 90 case SPRN_MAS1:
90 vcpu_e500->mas1 = vcpu->arch.gpr[rs]; break; 91 vcpu_e500->mas1 = spr_val; break;
91 case SPRN_MAS2: 92 case SPRN_MAS2:
92 vcpu_e500->mas2 = vcpu->arch.gpr[rs]; break; 93 vcpu_e500->mas2 = spr_val; break;
93 case SPRN_MAS3: 94 case SPRN_MAS3:
94 vcpu_e500->mas3 = vcpu->arch.gpr[rs]; break; 95 vcpu_e500->mas3 = spr_val; break;
95 case SPRN_MAS4: 96 case SPRN_MAS4:
96 vcpu_e500->mas4 = vcpu->arch.gpr[rs]; break; 97 vcpu_e500->mas4 = spr_val; break;
97 case SPRN_MAS6: 98 case SPRN_MAS6:
98 vcpu_e500->mas6 = vcpu->arch.gpr[rs]; break; 99 vcpu_e500->mas6 = spr_val; break;
99 case SPRN_MAS7: 100 case SPRN_MAS7:
100 vcpu_e500->mas7 = vcpu->arch.gpr[rs]; break; 101 vcpu_e500->mas7 = spr_val; break;
102 case SPRN_L1CSR0:
103 vcpu_e500->l1csr0 = spr_val;
104 vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC);
105 break;
101 case SPRN_L1CSR1: 106 case SPRN_L1CSR1:
102 vcpu_e500->l1csr1 = vcpu->arch.gpr[rs]; break; 107 vcpu_e500->l1csr1 = spr_val; break;
103 case SPRN_HID0: 108 case SPRN_HID0:
104 vcpu_e500->hid0 = vcpu->arch.gpr[rs]; break; 109 vcpu_e500->hid0 = spr_val; break;
105 case SPRN_HID1: 110 case SPRN_HID1:
106 vcpu_e500->hid1 = vcpu->arch.gpr[rs]; break; 111 vcpu_e500->hid1 = spr_val; break;
107 112
108 case SPRN_MMUCSR0: 113 case SPRN_MMUCSR0:
109 emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500, 114 emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500,
110 vcpu->arch.gpr[rs]); 115 spr_val);
111 break; 116 break;
112 117
113 /* extra exceptions */ 118 /* extra exceptions */
114 case SPRN_IVOR32: 119 case SPRN_IVOR32:
115 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = vcpu->arch.gpr[rs]; 120 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val;
116 break; 121 break;
117 case SPRN_IVOR33: 122 case SPRN_IVOR33:
118 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = vcpu->arch.gpr[rs]; 123 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = spr_val;
119 break; 124 break;
120 case SPRN_IVOR34: 125 case SPRN_IVOR34:
121 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = vcpu->arch.gpr[rs]; 126 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val;
122 break; 127 break;
123 case SPRN_IVOR35: 128 case SPRN_IVOR35:
124 vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = vcpu->arch.gpr[rs]; 129 vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val;
125 break; 130 break;
126 131
127 default: 132 default:
@@ -138,63 +143,57 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
138 143
139 switch (sprn) { 144 switch (sprn) {
140 case SPRN_PID: 145 case SPRN_PID:
141 vcpu->arch.gpr[rt] = vcpu_e500->pid[0]; break; 146 kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[0]); break;
142 case SPRN_PID1: 147 case SPRN_PID1:
143 vcpu->arch.gpr[rt] = vcpu_e500->pid[1]; break; 148 kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[1]); break;
144 case SPRN_PID2: 149 case SPRN_PID2:
145 vcpu->arch.gpr[rt] = vcpu_e500->pid[2]; break; 150 kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break;
146 case SPRN_MAS0: 151 case SPRN_MAS0:
147 vcpu->arch.gpr[rt] = vcpu_e500->mas0; break; 152 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas0); break;
148 case SPRN_MAS1: 153 case SPRN_MAS1:
149 vcpu->arch.gpr[rt] = vcpu_e500->mas1; break; 154 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas1); break;
150 case SPRN_MAS2: 155 case SPRN_MAS2:
151 vcpu->arch.gpr[rt] = vcpu_e500->mas2; break; 156 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break;
152 case SPRN_MAS3: 157 case SPRN_MAS3:
153 vcpu->arch.gpr[rt] = vcpu_e500->mas3; break; 158 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas3); break;
154 case SPRN_MAS4: 159 case SPRN_MAS4:
155 vcpu->arch.gpr[rt] = vcpu_e500->mas4; break; 160 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break;
156 case SPRN_MAS6: 161 case SPRN_MAS6:
157 vcpu->arch.gpr[rt] = vcpu_e500->mas6; break; 162 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break;
158 case SPRN_MAS7: 163 case SPRN_MAS7:
159 vcpu->arch.gpr[rt] = vcpu_e500->mas7; break; 164 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7); break;
160 165
161 case SPRN_TLB0CFG: 166 case SPRN_TLB0CFG:
162 vcpu->arch.gpr[rt] = mfspr(SPRN_TLB0CFG); 167 kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break;
163 vcpu->arch.gpr[rt] &= ~0xfffUL;
164 vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[0];
165 break;
166
167 case SPRN_TLB1CFG: 168 case SPRN_TLB1CFG:
168 vcpu->arch.gpr[rt] = mfspr(SPRN_TLB1CFG); 169 kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb1cfg); break;
169 vcpu->arch.gpr[rt] &= ~0xfffUL; 170 case SPRN_L1CSR0:
170 vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[1]; 171 kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr0); break;
171 break;
172
173 case SPRN_L1CSR1: 172 case SPRN_L1CSR1:
174 vcpu->arch.gpr[rt] = vcpu_e500->l1csr1; break; 173 kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr1); break;
175 case SPRN_HID0: 174 case SPRN_HID0:
176 vcpu->arch.gpr[rt] = vcpu_e500->hid0; break; 175 kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break;
177 case SPRN_HID1: 176 case SPRN_HID1:
178 vcpu->arch.gpr[rt] = vcpu_e500->hid1; break; 177 kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break;
179 178
180 case SPRN_MMUCSR0: 179 case SPRN_MMUCSR0:
181 vcpu->arch.gpr[rt] = 0; break; 180 kvmppc_set_gpr(vcpu, rt, 0); break;
182 181
183 case SPRN_MMUCFG: 182 case SPRN_MMUCFG:
184 vcpu->arch.gpr[rt] = mfspr(SPRN_MMUCFG); break; 183 kvmppc_set_gpr(vcpu, rt, mfspr(SPRN_MMUCFG)); break;
185 184
186 /* extra exceptions */ 185 /* extra exceptions */
187 case SPRN_IVOR32: 186 case SPRN_IVOR32:
188 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; 187 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]);
189 break; 188 break;
190 case SPRN_IVOR33: 189 case SPRN_IVOR33:
191 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]; 190 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]);
192 break; 191 break;
193 case SPRN_IVOR34: 192 case SPRN_IVOR34:
194 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; 193 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]);
195 break; 194 break;
196 case SPRN_IVOR35: 195 case SPRN_IVOR35:
197 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; 196 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]);
198 break; 197 break;
199 default: 198 default:
200 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); 199 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index fb1e1dc11ba..0d772e6b631 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -417,7 +417,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
417 int esel, tlbsel; 417 int esel, tlbsel;
418 gva_t ea; 418 gva_t ea;
419 419
420 ea = ((ra) ? vcpu->arch.gpr[ra] : 0) + vcpu->arch.gpr[rb]; 420 ea = ((ra) ? kvmppc_get_gpr(vcpu, ra) : 0) + kvmppc_get_gpr(vcpu, rb);
421 421
422 ia = (ea >> 2) & 0x1; 422 ia = (ea >> 2) & 0x1;
423 423
@@ -470,7 +470,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
470 struct tlbe *gtlbe = NULL; 470 struct tlbe *gtlbe = NULL;
471 gva_t ea; 471 gva_t ea;
472 472
473 ea = vcpu->arch.gpr[rb]; 473 ea = kvmppc_get_gpr(vcpu, rb);
474 474
475 for (tlbsel = 0; tlbsel < 2; tlbsel++) { 475 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
476 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as); 476 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
@@ -728,6 +728,12 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
728 if (vcpu_e500->shadow_pages[1] == NULL) 728 if (vcpu_e500->shadow_pages[1] == NULL)
729 goto err_out_page0; 729 goto err_out_page0;
730 730
731 /* Init TLB configuration register */
732 vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
733 vcpu_e500->tlb0cfg |= vcpu_e500->guest_tlb_size[0];
734 vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL;
735 vcpu_e500->tlb1cfg |= vcpu_e500->guest_tlb_size[1];
736
731 return 0; 737 return 0;
732 738
733err_out_page0: 739err_out_page0:
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 4a9ac6640fa..cb72a65f4ec 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -83,6 +83,9 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
83 83
84 pr_debug("mtDEC: %x\n", vcpu->arch.dec); 84 pr_debug("mtDEC: %x\n", vcpu->arch.dec);
85#ifdef CONFIG_PPC64 85#ifdef CONFIG_PPC64
86 /* mtdec lowers the interrupt line when positive. */
87 kvmppc_core_dequeue_dec(vcpu);
88
86 /* POWER4+ triggers a dec interrupt if the value is < 0 */ 89 /* POWER4+ triggers a dec interrupt if the value is < 0 */
87 if (vcpu->arch.dec & 0x80000000) { 90 if (vcpu->arch.dec & 0x80000000) {
88 hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 91 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
@@ -140,14 +143,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
140 143
141 pr_debug(KERN_INFO "Emulating opcode %d / %d\n", get_op(inst), get_xop(inst)); 144 pr_debug(KERN_INFO "Emulating opcode %d / %d\n", get_op(inst), get_xop(inst));
142 145
146 /* Try again next time */
147 if (inst == KVM_INST_FETCH_FAILED)
148 return EMULATE_DONE;
149
143 switch (get_op(inst)) { 150 switch (get_op(inst)) {
144 case OP_TRAP: 151 case OP_TRAP:
145#ifdef CONFIG_PPC64 152#ifdef CONFIG_PPC64
146 case OP_TRAP_64: 153 case OP_TRAP_64:
154 kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP);
147#else 155#else
148 vcpu->arch.esr |= ESR_PTR; 156 kvmppc_core_queue_program(vcpu, vcpu->arch.esr | ESR_PTR);
149#endif 157#endif
150 kvmppc_core_queue_program(vcpu);
151 advance = 0; 158 advance = 0;
152 break; 159 break;
153 160
@@ -167,14 +174,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
167 case OP_31_XOP_STWX: 174 case OP_31_XOP_STWX:
168 rs = get_rs(inst); 175 rs = get_rs(inst);
169 emulated = kvmppc_handle_store(run, vcpu, 176 emulated = kvmppc_handle_store(run, vcpu,
170 vcpu->arch.gpr[rs], 177 kvmppc_get_gpr(vcpu, rs),
171 4, 1); 178 4, 1);
172 break; 179 break;
173 180
174 case OP_31_XOP_STBX: 181 case OP_31_XOP_STBX:
175 rs = get_rs(inst); 182 rs = get_rs(inst);
176 emulated = kvmppc_handle_store(run, vcpu, 183 emulated = kvmppc_handle_store(run, vcpu,
177 vcpu->arch.gpr[rs], 184 kvmppc_get_gpr(vcpu, rs),
178 1, 1); 185 1, 1);
179 break; 186 break;
180 187
@@ -183,14 +190,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
183 ra = get_ra(inst); 190 ra = get_ra(inst);
184 rb = get_rb(inst); 191 rb = get_rb(inst);
185 192
186 ea = vcpu->arch.gpr[rb]; 193 ea = kvmppc_get_gpr(vcpu, rb);
187 if (ra) 194 if (ra)
188 ea += vcpu->arch.gpr[ra]; 195 ea += kvmppc_get_gpr(vcpu, ra);
189 196
190 emulated = kvmppc_handle_store(run, vcpu, 197 emulated = kvmppc_handle_store(run, vcpu,
191 vcpu->arch.gpr[rs], 198 kvmppc_get_gpr(vcpu, rs),
192 1, 1); 199 1, 1);
193 vcpu->arch.gpr[rs] = ea; 200 kvmppc_set_gpr(vcpu, rs, ea);
194 break; 201 break;
195 202
196 case OP_31_XOP_LHZX: 203 case OP_31_XOP_LHZX:
@@ -203,12 +210,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
203 ra = get_ra(inst); 210 ra = get_ra(inst);
204 rb = get_rb(inst); 211 rb = get_rb(inst);
205 212
206 ea = vcpu->arch.gpr[rb]; 213 ea = kvmppc_get_gpr(vcpu, rb);
207 if (ra) 214 if (ra)
208 ea += vcpu->arch.gpr[ra]; 215 ea += kvmppc_get_gpr(vcpu, ra);
209 216
210 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); 217 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
211 vcpu->arch.gpr[ra] = ea; 218 kvmppc_set_gpr(vcpu, ra, ea);
212 break; 219 break;
213 220
214 case OP_31_XOP_MFSPR: 221 case OP_31_XOP_MFSPR:
@@ -217,47 +224,49 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
217 224
218 switch (sprn) { 225 switch (sprn) {
219 case SPRN_SRR0: 226 case SPRN_SRR0:
220 vcpu->arch.gpr[rt] = vcpu->arch.srr0; break; 227 kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr0); break;
221 case SPRN_SRR1: 228 case SPRN_SRR1:
222 vcpu->arch.gpr[rt] = vcpu->arch.srr1; break; 229 kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr1); break;
223 case SPRN_PVR: 230 case SPRN_PVR:
224 vcpu->arch.gpr[rt] = vcpu->arch.pvr; break; 231 kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break;
225 case SPRN_PIR: 232 case SPRN_PIR:
226 vcpu->arch.gpr[rt] = vcpu->vcpu_id; break; 233 kvmppc_set_gpr(vcpu, rt, vcpu->vcpu_id); break;
227 case SPRN_MSSSR0: 234 case SPRN_MSSSR0:
228 vcpu->arch.gpr[rt] = 0; break; 235 kvmppc_set_gpr(vcpu, rt, 0); break;
229 236
230 /* Note: mftb and TBRL/TBWL are user-accessible, so 237 /* Note: mftb and TBRL/TBWL are user-accessible, so
231 * the guest can always access the real TB anyways. 238 * the guest can always access the real TB anyways.
232 * In fact, we probably will never see these traps. */ 239 * In fact, we probably will never see these traps. */
233 case SPRN_TBWL: 240 case SPRN_TBWL:
234 vcpu->arch.gpr[rt] = get_tb() >> 32; break; 241 kvmppc_set_gpr(vcpu, rt, get_tb() >> 32); break;
235 case SPRN_TBWU: 242 case SPRN_TBWU:
236 vcpu->arch.gpr[rt] = get_tb(); break; 243 kvmppc_set_gpr(vcpu, rt, get_tb()); break;
237 244
238 case SPRN_SPRG0: 245 case SPRN_SPRG0:
239 vcpu->arch.gpr[rt] = vcpu->arch.sprg0; break; 246 kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg0); break;
240 case SPRN_SPRG1: 247 case SPRN_SPRG1:
241 vcpu->arch.gpr[rt] = vcpu->arch.sprg1; break; 248 kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg1); break;
242 case SPRN_SPRG2: 249 case SPRN_SPRG2:
243 vcpu->arch.gpr[rt] = vcpu->arch.sprg2; break; 250 kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg2); break;
244 case SPRN_SPRG3: 251 case SPRN_SPRG3:
245 vcpu->arch.gpr[rt] = vcpu->arch.sprg3; break; 252 kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg3); break;
246 /* Note: SPRG4-7 are user-readable, so we don't get 253 /* Note: SPRG4-7 are user-readable, so we don't get
247 * a trap. */ 254 * a trap. */
248 255
249 case SPRN_DEC: 256 case SPRN_DEC:
250 { 257 {
251 u64 jd = get_tb() - vcpu->arch.dec_jiffies; 258 u64 jd = get_tb() - vcpu->arch.dec_jiffies;
252 vcpu->arch.gpr[rt] = vcpu->arch.dec - jd; 259 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd);
253 pr_debug(KERN_INFO "mfDEC: %x - %llx = %lx\n", vcpu->arch.dec, jd, vcpu->arch.gpr[rt]); 260 pr_debug(KERN_INFO "mfDEC: %x - %llx = %lx\n",
261 vcpu->arch.dec, jd,
262 kvmppc_get_gpr(vcpu, rt));
254 break; 263 break;
255 } 264 }
256 default: 265 default:
257 emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt); 266 emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
258 if (emulated == EMULATE_FAIL) { 267 if (emulated == EMULATE_FAIL) {
259 printk("mfspr: unknown spr %x\n", sprn); 268 printk("mfspr: unknown spr %x\n", sprn);
260 vcpu->arch.gpr[rt] = 0; 269 kvmppc_set_gpr(vcpu, rt, 0);
261 } 270 }
262 break; 271 break;
263 } 272 }
@@ -269,7 +278,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
269 rb = get_rb(inst); 278 rb = get_rb(inst);
270 279
271 emulated = kvmppc_handle_store(run, vcpu, 280 emulated = kvmppc_handle_store(run, vcpu,
272 vcpu->arch.gpr[rs], 281 kvmppc_get_gpr(vcpu, rs),
273 2, 1); 282 2, 1);
274 break; 283 break;
275 284
@@ -278,14 +287,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
278 ra = get_ra(inst); 287 ra = get_ra(inst);
279 rb = get_rb(inst); 288 rb = get_rb(inst);
280 289
281 ea = vcpu->arch.gpr[rb]; 290 ea = kvmppc_get_gpr(vcpu, rb);
282 if (ra) 291 if (ra)
283 ea += vcpu->arch.gpr[ra]; 292 ea += kvmppc_get_gpr(vcpu, ra);
284 293
285 emulated = kvmppc_handle_store(run, vcpu, 294 emulated = kvmppc_handle_store(run, vcpu,
286 vcpu->arch.gpr[rs], 295 kvmppc_get_gpr(vcpu, rs),
287 2, 1); 296 2, 1);
288 vcpu->arch.gpr[ra] = ea; 297 kvmppc_set_gpr(vcpu, ra, ea);
289 break; 298 break;
290 299
291 case OP_31_XOP_MTSPR: 300 case OP_31_XOP_MTSPR:
@@ -293,9 +302,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
293 rs = get_rs(inst); 302 rs = get_rs(inst);
294 switch (sprn) { 303 switch (sprn) {
295 case SPRN_SRR0: 304 case SPRN_SRR0:
296 vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break; 305 vcpu->arch.srr0 = kvmppc_get_gpr(vcpu, rs); break;
297 case SPRN_SRR1: 306 case SPRN_SRR1:
298 vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break; 307 vcpu->arch.srr1 = kvmppc_get_gpr(vcpu, rs); break;
299 308
300 /* XXX We need to context-switch the timebase for 309 /* XXX We need to context-switch the timebase for
301 * watchdog and FIT. */ 310 * watchdog and FIT. */
@@ -305,18 +314,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
305 case SPRN_MSSSR0: break; 314 case SPRN_MSSSR0: break;
306 315
307 case SPRN_DEC: 316 case SPRN_DEC:
308 vcpu->arch.dec = vcpu->arch.gpr[rs]; 317 vcpu->arch.dec = kvmppc_get_gpr(vcpu, rs);
309 kvmppc_emulate_dec(vcpu); 318 kvmppc_emulate_dec(vcpu);
310 break; 319 break;
311 320
312 case SPRN_SPRG0: 321 case SPRN_SPRG0:
313 vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break; 322 vcpu->arch.sprg0 = kvmppc_get_gpr(vcpu, rs); break;
314 case SPRN_SPRG1: 323 case SPRN_SPRG1:
315 vcpu->arch.sprg1 = vcpu->arch.gpr[rs]; break; 324 vcpu->arch.sprg1 = kvmppc_get_gpr(vcpu, rs); break;
316 case SPRN_SPRG2: 325 case SPRN_SPRG2:
317 vcpu->arch.sprg2 = vcpu->arch.gpr[rs]; break; 326 vcpu->arch.sprg2 = kvmppc_get_gpr(vcpu, rs); break;
318 case SPRN_SPRG3: 327 case SPRN_SPRG3:
319 vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break; 328 vcpu->arch.sprg3 = kvmppc_get_gpr(vcpu, rs); break;
320 329
321 default: 330 default:
322 emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs); 331 emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
@@ -348,7 +357,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
348 rb = get_rb(inst); 357 rb = get_rb(inst);
349 358
350 emulated = kvmppc_handle_store(run, vcpu, 359 emulated = kvmppc_handle_store(run, vcpu,
351 vcpu->arch.gpr[rs], 360 kvmppc_get_gpr(vcpu, rs),
352 4, 0); 361 4, 0);
353 break; 362 break;
354 363
@@ -363,7 +372,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
363 rb = get_rb(inst); 372 rb = get_rb(inst);
364 373
365 emulated = kvmppc_handle_store(run, vcpu, 374 emulated = kvmppc_handle_store(run, vcpu,
366 vcpu->arch.gpr[rs], 375 kvmppc_get_gpr(vcpu, rs),
367 2, 0); 376 2, 0);
368 break; 377 break;
369 378
@@ -382,7 +391,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
382 ra = get_ra(inst); 391 ra = get_ra(inst);
383 rt = get_rt(inst); 392 rt = get_rt(inst);
384 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); 393 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
385 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 394 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
386 break; 395 break;
387 396
388 case OP_LBZ: 397 case OP_LBZ:
@@ -394,35 +403,39 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
394 ra = get_ra(inst); 403 ra = get_ra(inst);
395 rt = get_rt(inst); 404 rt = get_rt(inst);
396 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 405 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
397 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 406 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
398 break; 407 break;
399 408
400 case OP_STW: 409 case OP_STW:
401 rs = get_rs(inst); 410 rs = get_rs(inst);
402 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 411 emulated = kvmppc_handle_store(run, vcpu,
412 kvmppc_get_gpr(vcpu, rs),
403 4, 1); 413 4, 1);
404 break; 414 break;
405 415
406 case OP_STWU: 416 case OP_STWU:
407 ra = get_ra(inst); 417 ra = get_ra(inst);
408 rs = get_rs(inst); 418 rs = get_rs(inst);
409 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 419 emulated = kvmppc_handle_store(run, vcpu,
420 kvmppc_get_gpr(vcpu, rs),
410 4, 1); 421 4, 1);
411 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 422 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
412 break; 423 break;
413 424
414 case OP_STB: 425 case OP_STB:
415 rs = get_rs(inst); 426 rs = get_rs(inst);
416 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 427 emulated = kvmppc_handle_store(run, vcpu,
428 kvmppc_get_gpr(vcpu, rs),
417 1, 1); 429 1, 1);
418 break; 430 break;
419 431
420 case OP_STBU: 432 case OP_STBU:
421 ra = get_ra(inst); 433 ra = get_ra(inst);
422 rs = get_rs(inst); 434 rs = get_rs(inst);
423 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 435 emulated = kvmppc_handle_store(run, vcpu,
436 kvmppc_get_gpr(vcpu, rs),
424 1, 1); 437 1, 1);
425 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 438 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
426 break; 439 break;
427 440
428 case OP_LHZ: 441 case OP_LHZ:
@@ -434,21 +447,23 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
434 ra = get_ra(inst); 447 ra = get_ra(inst);
435 rt = get_rt(inst); 448 rt = get_rt(inst);
436 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); 449 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
437 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 450 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
438 break; 451 break;
439 452
440 case OP_STH: 453 case OP_STH:
441 rs = get_rs(inst); 454 rs = get_rs(inst);
442 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 455 emulated = kvmppc_handle_store(run, vcpu,
456 kvmppc_get_gpr(vcpu, rs),
443 2, 1); 457 2, 1);
444 break; 458 break;
445 459
446 case OP_STHU: 460 case OP_STHU:
447 ra = get_ra(inst); 461 ra = get_ra(inst);
448 rs = get_rs(inst); 462 rs = get_rs(inst);
449 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 463 emulated = kvmppc_handle_store(run, vcpu,
464 kvmppc_get_gpr(vcpu, rs),
450 2, 1); 465 2, 1);
451 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 466 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
452 break; 467 break;
453 468
454 default: 469 default:
@@ -461,6 +476,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
461 advance = 0; 476 advance = 0;
462 printk(KERN_ERR "Couldn't emulate instruction 0x%08x " 477 printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
463 "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); 478 "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
479 kvmppc_core_queue_program(vcpu, 0);
464 } 480 }
465 } 481 }
466 482
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index f06cf93b178..51aedd7f16b 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -137,6 +137,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
137{ 137{
138 kvmppc_free_vcpus(kvm); 138 kvmppc_free_vcpus(kvm);
139 kvm_free_physmem(kvm); 139 kvm_free_physmem(kvm);
140 cleanup_srcu_struct(&kvm->srcu);
140 kfree(kvm); 141 kfree(kvm);
141} 142}
142 143
@@ -165,14 +166,24 @@ long kvm_arch_dev_ioctl(struct file *filp,
165 return -EINVAL; 166 return -EINVAL;
166} 167}
167 168
168int kvm_arch_set_memory_region(struct kvm *kvm, 169int kvm_arch_prepare_memory_region(struct kvm *kvm,
169 struct kvm_userspace_memory_region *mem, 170 struct kvm_memory_slot *memslot,
170 struct kvm_memory_slot old, 171 struct kvm_memory_slot old,
171 int user_alloc) 172 struct kvm_userspace_memory_region *mem,
173 int user_alloc)
172{ 174{
173 return 0; 175 return 0;
174} 176}
175 177
178void kvm_arch_commit_memory_region(struct kvm *kvm,
179 struct kvm_userspace_memory_region *mem,
180 struct kvm_memory_slot old,
181 int user_alloc)
182{
183 return;
184}
185
186
176void kvm_arch_flush_shadow(struct kvm *kvm) 187void kvm_arch_flush_shadow(struct kvm *kvm)
177{ 188{
178} 189}
@@ -260,34 +271,35 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
260static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, 271static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
261 struct kvm_run *run) 272 struct kvm_run *run)
262{ 273{
263 ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; 274 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data);
264 *gpr = run->dcr.data;
265} 275}
266 276
267static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, 277static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
268 struct kvm_run *run) 278 struct kvm_run *run)
269{ 279{
270 ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; 280 ulong gpr;
271 281
272 if (run->mmio.len > sizeof(*gpr)) { 282 if (run->mmio.len > sizeof(gpr)) {
273 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); 283 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
274 return; 284 return;
275 } 285 }
276 286
277 if (vcpu->arch.mmio_is_bigendian) { 287 if (vcpu->arch.mmio_is_bigendian) {
278 switch (run->mmio.len) { 288 switch (run->mmio.len) {
279 case 4: *gpr = *(u32 *)run->mmio.data; break; 289 case 4: gpr = *(u32 *)run->mmio.data; break;
280 case 2: *gpr = *(u16 *)run->mmio.data; break; 290 case 2: gpr = *(u16 *)run->mmio.data; break;
281 case 1: *gpr = *(u8 *)run->mmio.data; break; 291 case 1: gpr = *(u8 *)run->mmio.data; break;
282 } 292 }
283 } else { 293 } else {
284 /* Convert BE data from userland back to LE. */ 294 /* Convert BE data from userland back to LE. */
285 switch (run->mmio.len) { 295 switch (run->mmio.len) {
286 case 4: *gpr = ld_le32((u32 *)run->mmio.data); break; 296 case 4: gpr = ld_le32((u32 *)run->mmio.data); break;
287 case 2: *gpr = ld_le16((u16 *)run->mmio.data); break; 297 case 2: gpr = ld_le16((u16 *)run->mmio.data); break;
288 case 1: *gpr = *(u8 *)run->mmio.data; break; 298 case 1: gpr = *(u8 *)run->mmio.data; break;
289 } 299 }
290 } 300 }
301
302 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
291} 303}
292 304
293int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, 305int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b037d95eead..64c00227b99 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -451,7 +451,7 @@ static int __cpuinit numa_setup_cpu(unsigned long lcpu)
451 nid = of_node_to_nid_single(cpu); 451 nid = of_node_to_nid_single(cpu);
452 452
453 if (nid < 0 || !node_online(nid)) 453 if (nid < 0 || !node_online(nid))
454 nid = any_online_node(NODE_MASK_ALL); 454 nid = first_online_node;
455out: 455out:
456 map_cpu_to_node(lcpu, nid); 456 map_cpu_to_node(lcpu, nid);
457 457
@@ -1114,7 +1114,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
1114 int nid, found = 0; 1114 int nid, found = 0;
1115 1115
1116 if (!numa_enabled || (min_common_depth < 0)) 1116 if (!numa_enabled || (min_common_depth < 0))
1117 return any_online_node(NODE_MASK_ALL); 1117 return first_online_node;
1118 1118
1119 memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 1119 memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
1120 if (memory) { 1120 if (memory) {
@@ -1125,7 +1125,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
1125 } 1125 }
1126 1126
1127 if (nid < 0 || !node_online(nid)) 1127 if (nid < 0 || !node_online(nid))
1128 nid = any_online_node(NODE_MASK_ALL); 1128 nid = first_online_node;
1129 1129
1130 if (NODE_DATA(nid)->node_spanned_pages) 1130 if (NODE_DATA(nid)->node_spanned_pages)
1131 return nid; 1131 return nid;
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 341aff2687a..cd128b07bed 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -288,46 +288,30 @@ static int hypfs_fill_super(struct super_block *sb, void *data, int silent)
288 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 288 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
289 sb->s_magic = HYPFS_MAGIC; 289 sb->s_magic = HYPFS_MAGIC;
290 sb->s_op = &hypfs_s_ops; 290 sb->s_op = &hypfs_s_ops;
291 if (hypfs_parse_options(data, sb)) { 291 if (hypfs_parse_options(data, sb))
292 rc = -EINVAL; 292 return -EINVAL;
293 goto err_alloc;
294 }
295 root_inode = hypfs_make_inode(sb, S_IFDIR | 0755); 293 root_inode = hypfs_make_inode(sb, S_IFDIR | 0755);
296 if (!root_inode) { 294 if (!root_inode)
297 rc = -ENOMEM; 295 return -ENOMEM;
298 goto err_alloc;
299 }
300 root_inode->i_op = &simple_dir_inode_operations; 296 root_inode->i_op = &simple_dir_inode_operations;
301 root_inode->i_fop = &simple_dir_operations; 297 root_inode->i_fop = &simple_dir_operations;
302 root_dentry = d_alloc_root(root_inode); 298 sb->s_root = root_dentry = d_alloc_root(root_inode);
303 if (!root_dentry) { 299 if (!root_dentry) {
304 iput(root_inode); 300 iput(root_inode);
305 rc = -ENOMEM; 301 return -ENOMEM;
306 goto err_alloc;
307 } 302 }
308 if (MACHINE_IS_VM) 303 if (MACHINE_IS_VM)
309 rc = hypfs_vm_create_files(sb, root_dentry); 304 rc = hypfs_vm_create_files(sb, root_dentry);
310 else 305 else
311 rc = hypfs_diag_create_files(sb, root_dentry); 306 rc = hypfs_diag_create_files(sb, root_dentry);
312 if (rc) 307 if (rc)
313 goto err_tree; 308 return rc;
314 sbi->update_file = hypfs_create_update_file(sb, root_dentry); 309 sbi->update_file = hypfs_create_update_file(sb, root_dentry);
315 if (IS_ERR(sbi->update_file)) { 310 if (IS_ERR(sbi->update_file))
316 rc = PTR_ERR(sbi->update_file); 311 return PTR_ERR(sbi->update_file);
317 goto err_tree;
318 }
319 hypfs_update_update(sb); 312 hypfs_update_update(sb);
320 sb->s_root = root_dentry;
321 pr_info("Hypervisor filesystem mounted\n"); 313 pr_info("Hypervisor filesystem mounted\n");
322 return 0; 314 return 0;
323
324err_tree:
325 hypfs_delete_tree(root_dentry);
326 d_genocide(root_dentry);
327 dput(root_dentry);
328err_alloc:
329 kfree(sbi);
330 return rc;
331} 315}
332 316
333static int hypfs_get_super(struct file_system_type *fst, int flags, 317static int hypfs_get_super(struct file_system_type *fst, int flags,
@@ -340,12 +324,12 @@ static void hypfs_kill_super(struct super_block *sb)
340{ 324{
341 struct hypfs_sb_info *sb_info = sb->s_fs_info; 325 struct hypfs_sb_info *sb_info = sb->s_fs_info;
342 326
343 if (sb->s_root) { 327 if (sb->s_root)
344 hypfs_delete_tree(sb->s_root); 328 hypfs_delete_tree(sb->s_root);
329 if (sb_info->update_file)
345 hypfs_remove(sb_info->update_file); 330 hypfs_remove(sb_info->update_file);
346 kfree(sb->s_fs_info); 331 kfree(sb->s_fs_info);
347 sb->s_fs_info = NULL; 332 sb->s_fs_info = NULL;
348 }
349 kill_litter_super(sb); 333 kill_litter_super(sb);
350} 334}
351 335
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 3fa0a10e466..49292869a5c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -242,6 +242,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
242 kvm_free_physmem(kvm); 242 kvm_free_physmem(kvm);
243 free_page((unsigned long)(kvm->arch.sca)); 243 free_page((unsigned long)(kvm->arch.sca));
244 debug_unregister(kvm->arch.dbf); 244 debug_unregister(kvm->arch.dbf);
245 cleanup_srcu_struct(&kvm->srcu);
245 kfree(kvm); 246 kfree(kvm);
246} 247}
247 248
@@ -690,14 +691,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
690} 691}
691 692
692/* Section: memory related */ 693/* Section: memory related */
693int kvm_arch_set_memory_region(struct kvm *kvm, 694int kvm_arch_prepare_memory_region(struct kvm *kvm,
694 struct kvm_userspace_memory_region *mem, 695 struct kvm_memory_slot *memslot,
695 struct kvm_memory_slot old, 696 struct kvm_memory_slot old,
696 int user_alloc) 697 struct kvm_userspace_memory_region *mem,
698 int user_alloc)
697{ 699{
698 int i;
699 struct kvm_vcpu *vcpu;
700
701 /* A few sanity checks. We can have exactly one memory slot which has 700 /* A few sanity checks. We can have exactly one memory slot which has
702 to start at guest virtual zero and which has to be located at a 701 to start at guest virtual zero and which has to be located at a
703 page boundary in userland and which has to end at a page boundary. 702 page boundary in userland and which has to end at a page boundary.
@@ -720,14 +719,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
720 if (!user_alloc) 719 if (!user_alloc)
721 return -EINVAL; 720 return -EINVAL;
722 721
722 return 0;
723}
724
725void kvm_arch_commit_memory_region(struct kvm *kvm,
726 struct kvm_userspace_memory_region *mem,
727 struct kvm_memory_slot old,
728 int user_alloc)
729{
730 int i;
731 struct kvm_vcpu *vcpu;
732
723 /* request update of sie control block for all available vcpus */ 733 /* request update of sie control block for all available vcpus */
724 kvm_for_each_vcpu(i, vcpu, kvm) { 734 kvm_for_each_vcpu(i, vcpu, kvm) {
725 if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) 735 if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
726 continue; 736 continue;
727 kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP); 737 kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP);
728 } 738 }
729
730 return 0;
731} 739}
732 740
733void kvm_arch_flush_shadow(struct kvm *kvm) 741void kvm_arch_flush_shadow(struct kvm *kvm)
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 06cce8285ba..60f09ab3672 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -67,10 +67,14 @@ static inline long kvm_s390_vcpu_get_memsize(struct kvm_vcpu *vcpu)
67 67
68static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu) 68static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu)
69{ 69{
70 int idx;
70 struct kvm_memory_slot *mem; 71 struct kvm_memory_slot *mem;
72 struct kvm_memslots *memslots;
71 73
72 down_read(&vcpu->kvm->slots_lock); 74 idx = srcu_read_lock(&vcpu->kvm->srcu);
73 mem = &vcpu->kvm->memslots[0]; 75 memslots = rcu_dereference(vcpu->kvm->memslots);
76
77 mem = &memslots->memslots[0];
74 78
75 vcpu->arch.sie_block->gmsor = mem->userspace_addr; 79 vcpu->arch.sie_block->gmsor = mem->userspace_addr;
76 vcpu->arch.sie_block->gmslm = 80 vcpu->arch.sie_block->gmslm =
@@ -78,7 +82,7 @@ static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu)
78 (mem->npages << PAGE_SHIFT) + 82 (mem->npages << PAGE_SHIFT) +
79 VIRTIODESCSPACE - 1ul; 83 VIRTIODESCSPACE - 1ul;
80 84
81 up_read(&vcpu->kvm->slots_lock); 85 srcu_read_unlock(&vcpu->kvm->srcu, idx);
82} 86}
83 87
84/* implemented in priv.c */ 88/* implemented in priv.c */
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index be300aaca6f..7da0fc94a01 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -419,6 +419,9 @@ static struct i2c_board_info migor_i2c_devices[] = {
419 I2C_BOARD_INFO("migor_ts", 0x51), 419 I2C_BOARD_INFO("migor_ts", 0x51),
420 .irq = 38, /* IRQ6 */ 420 .irq = 38, /* IRQ6 */
421 }, 421 },
422 {
423 I2C_BOARD_INFO("wm8978", 0x1a),
424 },
422}; 425};
423 426
424static struct i2c_board_info migor_i2c_camera[] = { 427static struct i2c_board_info migor_i2c_camera[] = {
@@ -619,6 +622,19 @@ static int __init migor_devices_setup(void)
619 622
620 platform_resource_setup_memory(&migor_ceu_device, "ceu", 4 << 20); 623 platform_resource_setup_memory(&migor_ceu_device, "ceu", 4 << 20);
621 624
625 /* SIU: Port B */
626 gpio_request(GPIO_FN_SIUBOLR, NULL);
627 gpio_request(GPIO_FN_SIUBOBT, NULL);
628 gpio_request(GPIO_FN_SIUBISLD, NULL);
629 gpio_request(GPIO_FN_SIUBOSLD, NULL);
630 gpio_request(GPIO_FN_SIUMCKB, NULL);
631
632 /*
633 * The original driver sets SIUB OLR/OBT, ILR/IBT, and SIUA OLR/OBT to
634 * output. Need only SIUB, set to output for master mode (table 34.2)
635 */
636 __raw_writew(__raw_readw(PORT_MSELCRA) | 1, PORT_MSELCRA);
637
622 i2c_register_board_info(0, migor_i2c_devices, 638 i2c_register_board_info(0, migor_i2c_devices,
623 ARRAY_SIZE(migor_i2c_devices)); 639 ARRAY_SIZE(migor_i2c_devices));
624 640
diff --git a/arch/sh/boot/compressed/cache.c b/arch/sh/boot/compressed/cache.c
index e27fc74f228..d0b77b68a4d 100644
--- a/arch/sh/boot/compressed/cache.c
+++ b/arch/sh/boot/compressed/cache.c
@@ -5,7 +5,7 @@ int cache_control(unsigned int command)
5 5
6 for (i = 0; i < (32 * 1024); i += 32) { 6 for (i = 0; i < (32 * 1024); i += 32) {
7 (void)*p; 7 (void)*p;
8 p += (32 / sizeof (int)); 8 p += (32 / sizeof(int));
9 } 9 }
10 10
11 return 0; 11 return 0;
diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h
index da3ebec921a..1f4e562c5e8 100644
--- a/arch/sh/include/asm/cacheflush.h
+++ b/arch/sh/include/asm/cacheflush.h
@@ -86,8 +86,8 @@ extern void copy_from_user_page(struct vm_area_struct *vma,
86 struct page *page, unsigned long vaddr, void *dst, const void *src, 86 struct page *page, unsigned long vaddr, void *dst, const void *src,
87 unsigned long len); 87 unsigned long len);
88 88
89#define flush_cache_vmap(start, end) flush_cache_all() 89#define flush_cache_vmap(start, end) local_flush_cache_all(NULL)
90#define flush_cache_vunmap(start, end) flush_cache_all() 90#define flush_cache_vunmap(start, end) local_flush_cache_all(NULL)
91 91
92#define flush_dcache_mmap_lock(mapping) do { } while (0) 92#define flush_dcache_mmap_lock(mapping) do { } while (0)
93#define flush_dcache_mmap_unlock(mapping) do { } while (0) 93#define flush_dcache_mmap_unlock(mapping) do { } while (0)
diff --git a/arch/sh/include/asm/dma-register.h b/arch/sh/include/asm/dma-register.h
new file mode 100644
index 00000000000..51cd78feacf
--- /dev/null
+++ b/arch/sh/include/asm/dma-register.h
@@ -0,0 +1,51 @@
1/*
2 * Common header for the legacy SH DMA driver and the new dmaengine driver
3 *
4 * extracted from arch/sh/include/asm/dma-sh.h:
5 *
6 * Copyright (C) 2000 Takashi YOSHII
7 * Copyright (C) 2003 Paul Mundt
8 *
9 * This file is subject to the terms and conditions of the GNU General Public
10 * License. See the file "COPYING" in the main directory of this archive
11 * for more details.
12 */
13#ifndef DMA_REGISTER_H
14#define DMA_REGISTER_H
15
16/* DMA register */
17#define SAR 0x00
18#define DAR 0x04
19#define TCR 0x08
20#define CHCR 0x0C
21#define DMAOR 0x40
22
23/* DMAOR definitions */
24#define DMAOR_AE 0x00000004
25#define DMAOR_NMIF 0x00000002
26#define DMAOR_DME 0x00000001
27
28/* Definitions for the SuperH DMAC */
29#define REQ_L 0x00000000
30#define REQ_E 0x00080000
31#define RACK_H 0x00000000
32#define RACK_L 0x00040000
33#define ACK_R 0x00000000
34#define ACK_W 0x00020000
35#define ACK_H 0x00000000
36#define ACK_L 0x00010000
37#define DM_INC 0x00004000
38#define DM_DEC 0x00008000
39#define DM_FIX 0x0000c000
40#define SM_INC 0x00001000
41#define SM_DEC 0x00002000
42#define SM_FIX 0x00003000
43#define RS_IN 0x00000200
44#define RS_OUT 0x00000300
45#define TS_BLK 0x00000040
46#define TM_BUR 0x00000020
47#define CHCR_DE 0x00000001
48#define CHCR_TE 0x00000002
49#define CHCR_IE 0x00000004
50
51#endif
diff --git a/arch/sh/include/asm/dma-sh.h b/arch/sh/include/asm/dma-sh.h
index e934a2e6665..f3acb8e34c6 100644
--- a/arch/sh/include/asm/dma-sh.h
+++ b/arch/sh/include/asm/dma-sh.h
@@ -11,7 +11,8 @@
11#ifndef __DMA_SH_H 11#ifndef __DMA_SH_H
12#define __DMA_SH_H 12#define __DMA_SH_H
13 13
14#include <asm/dma.h> 14#include <asm/dma-register.h>
15#include <cpu/dma-register.h>
15#include <cpu/dma.h> 16#include <cpu/dma.h>
16 17
17/* DMAOR contorl: The DMAOR access size is different by CPU.*/ 18/* DMAOR contorl: The DMAOR access size is different by CPU.*/
@@ -53,34 +54,6 @@ static int dmte_irq_map[] __maybe_unused = {
53#endif 54#endif
54}; 55};
55 56
56/* Definitions for the SuperH DMAC */
57#define REQ_L 0x00000000
58#define REQ_E 0x00080000
59#define RACK_H 0x00000000
60#define RACK_L 0x00040000
61#define ACK_R 0x00000000
62#define ACK_W 0x00020000
63#define ACK_H 0x00000000
64#define ACK_L 0x00010000
65#define DM_INC 0x00004000
66#define DM_DEC 0x00008000
67#define DM_FIX 0x0000c000
68#define SM_INC 0x00001000
69#define SM_DEC 0x00002000
70#define SM_FIX 0x00003000
71#define RS_IN 0x00000200
72#define RS_OUT 0x00000300
73#define TS_BLK 0x00000040
74#define TM_BUR 0x00000020
75#define CHCR_DE 0x00000001
76#define CHCR_TE 0x00000002
77#define CHCR_IE 0x00000004
78
79/* DMAOR definitions */
80#define DMAOR_AE 0x00000004
81#define DMAOR_NMIF 0x00000002
82#define DMAOR_DME 0x00000001
83
84/* 57/*
85 * Define the default configuration for dual address memory-memory transfer. 58 * Define the default configuration for dual address memory-memory transfer.
86 * The 0x400 value represents auto-request, external->external. 59 * The 0x400 value represents auto-request, external->external.
@@ -111,61 +84,4 @@ static u32 dma_base_addr[] __maybe_unused = {
111#endif 84#endif
112}; 85};
113 86
114/* DMA register */
115#define SAR 0x00
116#define DAR 0x04
117#define TCR 0x08
118#define CHCR 0x0C
119#define DMAOR 0x40
120
121/*
122 * for dma engine
123 *
124 * SuperH DMA mode
125 */
126#define SHDMA_MIX_IRQ (1 << 1)
127#define SHDMA_DMAOR1 (1 << 2)
128#define SHDMA_DMAE1 (1 << 3)
129
130enum sh_dmae_slave_chan_id {
131 SHDMA_SLAVE_SCIF0_TX,
132 SHDMA_SLAVE_SCIF0_RX,
133 SHDMA_SLAVE_SCIF1_TX,
134 SHDMA_SLAVE_SCIF1_RX,
135 SHDMA_SLAVE_SCIF2_TX,
136 SHDMA_SLAVE_SCIF2_RX,
137 SHDMA_SLAVE_SCIF3_TX,
138 SHDMA_SLAVE_SCIF3_RX,
139 SHDMA_SLAVE_SCIF4_TX,
140 SHDMA_SLAVE_SCIF4_RX,
141 SHDMA_SLAVE_SCIF5_TX,
142 SHDMA_SLAVE_SCIF5_RX,
143 SHDMA_SLAVE_SIUA_TX,
144 SHDMA_SLAVE_SIUA_RX,
145 SHDMA_SLAVE_SIUB_TX,
146 SHDMA_SLAVE_SIUB_RX,
147 SHDMA_SLAVE_NUMBER, /* Must stay last */
148};
149
150struct sh_dmae_slave_config {
151 enum sh_dmae_slave_chan_id slave_id;
152 dma_addr_t addr;
153 u32 chcr;
154 char mid_rid;
155};
156
157struct sh_dmae_pdata {
158 unsigned int mode;
159 struct sh_dmae_slave_config *config;
160 int config_num;
161};
162
163struct device;
164
165struct sh_dmae_slave {
166 enum sh_dmae_slave_chan_id slave_id; /* Set by the platform */
167 struct device *dma_dev; /* Set by the platform */
168 struct sh_dmae_slave_config *config; /* Set by the driver */
169};
170
171#endif /* __DMA_SH_H */ 87#endif /* __DMA_SH_H */
diff --git a/arch/sh/include/asm/dmaengine.h b/arch/sh/include/asm/dmaengine.h
new file mode 100644
index 00000000000..bf2f30cf0a2
--- /dev/null
+++ b/arch/sh/include/asm/dmaengine.h
@@ -0,0 +1,93 @@
1/*
2 * Header for the new SH dmaengine driver
3 *
4 * Copyright (C) 2010 Guennadi Liakhovetski <g.liakhovetski@gmx.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#ifndef ASM_DMAENGINE_H
11#define ASM_DMAENGINE_H
12
13#include <linux/dmaengine.h>
14#include <linux/list.h>
15
16#include <asm/dma-register.h>
17
18#define SH_DMAC_MAX_CHANNELS 6
19
20enum sh_dmae_slave_chan_id {
21 SHDMA_SLAVE_SCIF0_TX,
22 SHDMA_SLAVE_SCIF0_RX,
23 SHDMA_SLAVE_SCIF1_TX,
24 SHDMA_SLAVE_SCIF1_RX,
25 SHDMA_SLAVE_SCIF2_TX,
26 SHDMA_SLAVE_SCIF2_RX,
27 SHDMA_SLAVE_SCIF3_TX,
28 SHDMA_SLAVE_SCIF3_RX,
29 SHDMA_SLAVE_SCIF4_TX,
30 SHDMA_SLAVE_SCIF4_RX,
31 SHDMA_SLAVE_SCIF5_TX,
32 SHDMA_SLAVE_SCIF5_RX,
33 SHDMA_SLAVE_SIUA_TX,
34 SHDMA_SLAVE_SIUA_RX,
35 SHDMA_SLAVE_SIUB_TX,
36 SHDMA_SLAVE_SIUB_RX,
37 SHDMA_SLAVE_NUMBER, /* Must stay last */
38};
39
40struct sh_dmae_slave_config {
41 enum sh_dmae_slave_chan_id slave_id;
42 dma_addr_t addr;
43 u32 chcr;
44 char mid_rid;
45};
46
47struct sh_dmae_channel {
48 unsigned int offset;
49 unsigned int dmars;
50 unsigned int dmars_bit;
51};
52
53struct sh_dmae_pdata {
54 struct sh_dmae_slave_config *slave;
55 int slave_num;
56 struct sh_dmae_channel *channel;
57 int channel_num;
58 unsigned int ts_low_shift;
59 unsigned int ts_low_mask;
60 unsigned int ts_high_shift;
61 unsigned int ts_high_mask;
62 unsigned int *ts_shift;
63 int ts_shift_num;
64 u16 dmaor_init;
65};
66
67struct device;
68
69/* Used by slave DMA clients to request DMA to/from a specific peripheral */
70struct sh_dmae_slave {
71 enum sh_dmae_slave_chan_id slave_id; /* Set by the platform */
72 struct device *dma_dev; /* Set by the platform */
73 struct sh_dmae_slave_config *config; /* Set by the driver */
74};
75
76struct sh_dmae_regs {
77 u32 sar; /* SAR / source address */
78 u32 dar; /* DAR / destination address */
79 u32 tcr; /* TCR / transfer count */
80};
81
82struct sh_desc {
83 struct sh_dmae_regs hw;
84 struct list_head node;
85 struct dma_async_tx_descriptor async_tx;
86 enum dma_data_direction direction;
87 dma_cookie_t cookie;
88 size_t partial;
89 int chunks;
90 int mark;
91};
92
93#endif
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index 7dab7b23a5e..f689554e17c 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -291,21 +291,21 @@ unsigned long long poke_real_address_q(unsigned long long addr,
291 * doesn't exist, so everything must go through page tables. 291 * doesn't exist, so everything must go through page tables.
292 */ 292 */
293#ifdef CONFIG_MMU 293#ifdef CONFIG_MMU
294void __iomem *__ioremap_caller(unsigned long offset, unsigned long size, 294void __iomem *__ioremap_caller(phys_addr_t offset, unsigned long size,
295 pgprot_t prot, void *caller); 295 pgprot_t prot, void *caller);
296void __iounmap(void __iomem *addr); 296void __iounmap(void __iomem *addr);
297 297
298static inline void __iomem * 298static inline void __iomem *
299__ioremap(unsigned long offset, unsigned long size, pgprot_t prot) 299__ioremap(phys_addr_t offset, unsigned long size, pgprot_t prot)
300{ 300{
301 return __ioremap_caller(offset, size, prot, __builtin_return_address(0)); 301 return __ioremap_caller(offset, size, prot, __builtin_return_address(0));
302} 302}
303 303
304static inline void __iomem * 304static inline void __iomem *
305__ioremap_29bit(unsigned long offset, unsigned long size, pgprot_t prot) 305__ioremap_29bit(phys_addr_t offset, unsigned long size, pgprot_t prot)
306{ 306{
307#ifdef CONFIG_29BIT 307#ifdef CONFIG_29BIT
308 unsigned long last_addr = offset + size - 1; 308 phys_addr_t last_addr = offset + size - 1;
309 309
310 /* 310 /*
311 * For P1 and P2 space this is trivial, as everything is already 311 * For P1 and P2 space this is trivial, as everything is already
@@ -329,7 +329,7 @@ __ioremap_29bit(unsigned long offset, unsigned long size, pgprot_t prot)
329} 329}
330 330
331static inline void __iomem * 331static inline void __iomem *
332__ioremap_mode(unsigned long offset, unsigned long size, pgprot_t prot) 332__ioremap_mode(phys_addr_t offset, unsigned long size, pgprot_t prot)
333{ 333{
334 void __iomem *ret; 334 void __iomem *ret;
335 335
@@ -349,35 +349,32 @@ __ioremap_mode(unsigned long offset, unsigned long size, pgprot_t prot)
349#define __iounmap(addr) do { } while (0) 349#define __iounmap(addr) do { } while (0)
350#endif /* CONFIG_MMU */ 350#endif /* CONFIG_MMU */
351 351
352static inline void __iomem * 352static inline void __iomem *ioremap(phys_addr_t offset, unsigned long size)
353ioremap(unsigned long offset, unsigned long size)
354{ 353{
355 return __ioremap_mode(offset, size, PAGE_KERNEL_NOCACHE); 354 return __ioremap_mode(offset, size, PAGE_KERNEL_NOCACHE);
356} 355}
357 356
358static inline void __iomem * 357static inline void __iomem *
359ioremap_cache(unsigned long offset, unsigned long size) 358ioremap_cache(phys_addr_t offset, unsigned long size)
360{ 359{
361 return __ioremap_mode(offset, size, PAGE_KERNEL); 360 return __ioremap_mode(offset, size, PAGE_KERNEL);
362} 361}
363 362
364#ifdef CONFIG_HAVE_IOREMAP_PROT 363#ifdef CONFIG_HAVE_IOREMAP_PROT
365static inline void __iomem * 364static inline void __iomem *
366ioremap_prot(resource_size_t offset, unsigned long size, unsigned long flags) 365ioremap_prot(phys_addr_t offset, unsigned long size, unsigned long flags)
367{ 366{
368 return __ioremap_mode(offset, size, __pgprot(flags)); 367 return __ioremap_mode(offset, size, __pgprot(flags));
369} 368}
370#endif 369#endif
371 370
372#ifdef CONFIG_IOREMAP_FIXED 371#ifdef CONFIG_IOREMAP_FIXED
373extern void __iomem *ioremap_fixed(resource_size_t, unsigned long, 372extern void __iomem *ioremap_fixed(phys_addr_t, unsigned long, pgprot_t);
374 unsigned long, pgprot_t);
375extern int iounmap_fixed(void __iomem *); 373extern int iounmap_fixed(void __iomem *);
376extern void ioremap_fixed_init(void); 374extern void ioremap_fixed_init(void);
377#else 375#else
378static inline void __iomem * 376static inline void __iomem *
379ioremap_fixed(resource_size_t phys_addr, unsigned long offset, 377ioremap_fixed(phys_addr_t phys_addr, unsigned long size, pgprot_t prot)
380 unsigned long size, pgprot_t prot)
381{ 378{
382 BUG(); 379 BUG();
383 return NULL; 380 return NULL;
diff --git a/arch/sh/include/asm/mmu.h b/arch/sh/include/asm/mmu.h
index 15a05b615ba..19fe84550b4 100644
--- a/arch/sh/include/asm/mmu.h
+++ b/arch/sh/include/asm/mmu.h
@@ -55,19 +55,29 @@ typedef struct {
55 55
56#ifdef CONFIG_PMB 56#ifdef CONFIG_PMB
57/* arch/sh/mm/pmb.c */ 57/* arch/sh/mm/pmb.c */
58long pmb_remap(unsigned long virt, unsigned long phys,
59 unsigned long size, pgprot_t prot);
60void pmb_unmap(unsigned long addr);
61void pmb_init(void);
62bool __in_29bit_mode(void); 58bool __in_29bit_mode(void);
59
60void pmb_init(void);
61int pmb_bolt_mapping(unsigned long virt, phys_addr_t phys,
62 unsigned long size, pgprot_t prot);
63void __iomem *pmb_remap_caller(phys_addr_t phys, unsigned long size,
64 pgprot_t prot, void *caller);
65int pmb_unmap(void __iomem *addr);
66
63#else 67#else
64static inline long pmb_remap(unsigned long virt, unsigned long phys, 68
65 unsigned long size, pgprot_t prot) 69static inline void __iomem *
70pmb_remap_caller(phys_addr_t phys, unsigned long size,
71 pgprot_t prot, void *caller)
72{
73 return NULL;
74}
75
76static inline int pmb_unmap(void __iomem *addr)
66{ 77{
67 return -EINVAL; 78 return -EINVAL;
68} 79}
69 80
70#define pmb_unmap(addr) do { } while (0)
71#define pmb_init(addr) do { } while (0) 81#define pmb_init(addr) do { } while (0)
72 82
73#ifdef CONFIG_29BIT 83#ifdef CONFIG_29BIT
@@ -77,6 +87,13 @@ static inline long pmb_remap(unsigned long virt, unsigned long phys,
77#endif 87#endif
78 88
79#endif /* CONFIG_PMB */ 89#endif /* CONFIG_PMB */
90
91static inline void __iomem *
92pmb_remap(phys_addr_t phys, unsigned long size, pgprot_t prot)
93{
94 return pmb_remap_caller(phys, size, prot, __builtin_return_address(0));
95}
96
80#endif /* __ASSEMBLY__ */ 97#endif /* __ASSEMBLY__ */
81 98
82#endif /* __MMU_H */ 99#endif /* __MMU_H */
diff --git a/arch/sh/include/asm/siu.h b/arch/sh/include/asm/siu.h
index 57565a3b551..f1b1e6944a5 100644
--- a/arch/sh/include/asm/siu.h
+++ b/arch/sh/include/asm/siu.h
@@ -11,7 +11,7 @@
11#ifndef ASM_SIU_H 11#ifndef ASM_SIU_H
12#define ASM_SIU_H 12#define ASM_SIU_H
13 13
14#include <asm/dma-sh.h> 14#include <asm/dmaengine.h>
15 15
16struct device; 16struct device;
17 17
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index 37cdadd975a..88e734069fa 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -35,7 +35,7 @@
35 35
36#define pcibus_to_node(bus) ((void)(bus), -1) 36#define pcibus_to_node(bus) ((void)(bus), -1)
37#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \ 37#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \
38 CPU_MASK_ALL_PTR : \ 38 cpu_all_mask : \
39 cpumask_of_node(pcibus_to_node(bus))) 39 cpumask_of_node(pcibus_to_node(bus)))
40 40
41#endif 41#endif
diff --git a/arch/sh/include/cpu-sh3/cpu/dma-register.h b/arch/sh/include/cpu-sh3/cpu/dma-register.h
new file mode 100644
index 00000000000..2349e488c9a
--- /dev/null
+++ b/arch/sh/include/cpu-sh3/cpu/dma-register.h
@@ -0,0 +1,41 @@
1/*
2 * SH3 CPU-specific DMA definitions, used by both DMA drivers
3 *
4 * Copyright (C) 2010 Guennadi Liakhovetski <g.liakhovetski@gmx.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#ifndef CPU_DMA_REGISTER_H
11#define CPU_DMA_REGISTER_H
12
13#define CHCR_TS_LOW_MASK 0x18
14#define CHCR_TS_LOW_SHIFT 3
15#define CHCR_TS_HIGH_MASK 0
16#define CHCR_TS_HIGH_SHIFT 0
17
18#define DMAOR_INIT DMAOR_DME
19
20/*
21 * The SuperH DMAC supports a number of transmit sizes, we list them here,
22 * with their respective values as they appear in the CHCR registers.
23 */
24enum {
25 XMIT_SZ_8BIT,
26 XMIT_SZ_16BIT,
27 XMIT_SZ_32BIT,
28 XMIT_SZ_128BIT,
29};
30
31/* log2(size / 8) - used to calculate number of transfers */
32#define TS_SHIFT { \
33 [XMIT_SZ_8BIT] = 0, \
34 [XMIT_SZ_16BIT] = 1, \
35 [XMIT_SZ_32BIT] = 2, \
36 [XMIT_SZ_128BIT] = 4, \
37}
38
39#define TS_INDEX2VAL(i) (((i) & 3) << CHCR_TS_LOW_SHIFT)
40
41#endif
diff --git a/arch/sh/include/cpu-sh3/cpu/dma.h b/arch/sh/include/cpu-sh3/cpu/dma.h
index 207811a7a65..24e28b91c9d 100644
--- a/arch/sh/include/cpu-sh3/cpu/dma.h
+++ b/arch/sh/include/cpu-sh3/cpu/dma.h
@@ -20,31 +20,4 @@
20#define TS_32 0x00000010 20#define TS_32 0x00000010
21#define TS_128 0x00000018 21#define TS_128 0x00000018
22 22
23#define CHCR_TS_LOW_MASK 0x18
24#define CHCR_TS_LOW_SHIFT 3
25#define CHCR_TS_HIGH_MASK 0
26#define CHCR_TS_HIGH_SHIFT 0
27
28#define DMAOR_INIT DMAOR_DME
29
30/*
31 * The SuperH DMAC supports a number of transmit sizes, we list them here,
32 * with their respective values as they appear in the CHCR registers.
33 */
34enum {
35 XMIT_SZ_8BIT,
36 XMIT_SZ_16BIT,
37 XMIT_SZ_32BIT,
38 XMIT_SZ_128BIT,
39};
40
41#define TS_SHIFT { \
42 [XMIT_SZ_8BIT] = 0, \
43 [XMIT_SZ_16BIT] = 1, \
44 [XMIT_SZ_32BIT] = 2, \
45 [XMIT_SZ_128BIT] = 4, \
46}
47
48#define TS_INDEX2VAL(i) (((i) & 3) << CHCR_TS_LOW_SHIFT)
49
50#endif /* __ASM_CPU_SH3_DMA_H */ 23#endif /* __ASM_CPU_SH3_DMA_H */
diff --git a/arch/sh/include/cpu-sh4/cpu/dma-register.h b/arch/sh/include/cpu-sh4/cpu/dma-register.h
new file mode 100644
index 00000000000..55f9fec082d
--- /dev/null
+++ b/arch/sh/include/cpu-sh4/cpu/dma-register.h
@@ -0,0 +1,112 @@
1/*
2 * SH4 CPU-specific DMA definitions, used by both DMA drivers
3 *
4 * Copyright (C) 2010 Guennadi Liakhovetski <g.liakhovetski@gmx.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#ifndef CPU_DMA_REGISTER_H
11#define CPU_DMA_REGISTER_H
12
13/* SH7751/7760/7780 DMA IRQ sources */
14
15#ifdef CONFIG_CPU_SH4A
16
17#define DMAOR_INIT DMAOR_DME
18
19#if defined(CONFIG_CPU_SUBTYPE_SH7343) || \
20 defined(CONFIG_CPU_SUBTYPE_SH7730)
21#define CHCR_TS_LOW_MASK 0x00000018
22#define CHCR_TS_LOW_SHIFT 3
23#define CHCR_TS_HIGH_MASK 0
24#define CHCR_TS_HIGH_SHIFT 0
25#elif defined(CONFIG_CPU_SUBTYPE_SH7722) || \
26 defined(CONFIG_CPU_SUBTYPE_SH7724)
27#define CHCR_TS_LOW_MASK 0x00000018
28#define CHCR_TS_LOW_SHIFT 3
29#define CHCR_TS_HIGH_MASK 0x00300000
30#define CHCR_TS_HIGH_SHIFT (20 - 2) /* 2 bits for shifted low TS */
31#elif defined(CONFIG_CPU_SUBTYPE_SH7763) || \
32 defined(CONFIG_CPU_SUBTYPE_SH7764)
33#define CHCR_TS_LOW_MASK 0x00000018
34#define CHCR_TS_LOW_SHIFT 3
35#define CHCR_TS_HIGH_MASK 0
36#define CHCR_TS_HIGH_SHIFT 0
37#elif defined(CONFIG_CPU_SUBTYPE_SH7723)
38#define CHCR_TS_LOW_MASK 0x00000018
39#define CHCR_TS_LOW_SHIFT 3
40#define CHCR_TS_HIGH_MASK 0
41#define CHCR_TS_HIGH_SHIFT 0
42#elif defined(CONFIG_CPU_SUBTYPE_SH7780)
43#define CHCR_TS_LOW_MASK 0x00000018
44#define CHCR_TS_LOW_SHIFT 3
45#define CHCR_TS_HIGH_MASK 0
46#define CHCR_TS_HIGH_SHIFT 0
47#else /* SH7785 */
48#define CHCR_TS_LOW_MASK 0x00000018
49#define CHCR_TS_LOW_SHIFT 3
50#define CHCR_TS_HIGH_MASK 0
51#define CHCR_TS_HIGH_SHIFT 0
52#endif
53
54/* Transmit sizes and respective CHCR register values */
55enum {
56 XMIT_SZ_8BIT = 0,
57 XMIT_SZ_16BIT = 1,
58 XMIT_SZ_32BIT = 2,
59 XMIT_SZ_64BIT = 7,
60 XMIT_SZ_128BIT = 3,
61 XMIT_SZ_256BIT = 4,
62 XMIT_SZ_128BIT_BLK = 0xb,
63 XMIT_SZ_256BIT_BLK = 0xc,
64};
65
66/* log2(size / 8) - used to calculate number of transfers */
67#define TS_SHIFT { \
68 [XMIT_SZ_8BIT] = 0, \
69 [XMIT_SZ_16BIT] = 1, \
70 [XMIT_SZ_32BIT] = 2, \
71 [XMIT_SZ_64BIT] = 3, \
72 [XMIT_SZ_128BIT] = 4, \
73 [XMIT_SZ_256BIT] = 5, \
74 [XMIT_SZ_128BIT_BLK] = 4, \
75 [XMIT_SZ_256BIT_BLK] = 5, \
76}
77
78#define TS_INDEX2VAL(i) ((((i) & 3) << CHCR_TS_LOW_SHIFT) | \
79 ((((i) >> 2) & 3) << CHCR_TS_HIGH_SHIFT))
80
81#else /* CONFIG_CPU_SH4A */
82
83#define DMAOR_INIT (0x8000 | DMAOR_DME)
84
85#define CHCR_TS_LOW_MASK 0x70
86#define CHCR_TS_LOW_SHIFT 4
87#define CHCR_TS_HIGH_MASK 0
88#define CHCR_TS_HIGH_SHIFT 0
89
90/* Transmit sizes and respective CHCR register values */
91enum {
92 XMIT_SZ_8BIT = 1,
93 XMIT_SZ_16BIT = 2,
94 XMIT_SZ_32BIT = 3,
95 XMIT_SZ_64BIT = 0,
96 XMIT_SZ_256BIT = 4,
97};
98
99/* log2(size / 8) - used to calculate number of transfers */
100#define TS_SHIFT { \
101 [XMIT_SZ_8BIT] = 0, \
102 [XMIT_SZ_16BIT] = 1, \
103 [XMIT_SZ_32BIT] = 2, \
104 [XMIT_SZ_64BIT] = 3, \
105 [XMIT_SZ_256BIT] = 5, \
106}
107
108#define TS_INDEX2VAL(i) (((i) & 7) << CHCR_TS_LOW_SHIFT)
109
110#endif /* CONFIG_CPU_SH4A */
111
112#endif
diff --git a/arch/sh/include/cpu-sh4/cpu/dma-sh4a.h b/arch/sh/include/cpu-sh4/cpu/dma-sh4a.h
index e734ea47d8a..9647e681fd2 100644
--- a/arch/sh/include/cpu-sh4/cpu/dma-sh4a.h
+++ b/arch/sh/include/cpu-sh4/cpu/dma-sh4a.h
@@ -8,20 +8,12 @@
8#define DMAE0_IRQ 78 /* DMA Error IRQ*/ 8#define DMAE0_IRQ 78 /* DMA Error IRQ*/
9#define SH_DMAC_BASE0 0xFE008020 9#define SH_DMAC_BASE0 0xFE008020
10#define SH_DMARS_BASE0 0xFE009000 10#define SH_DMARS_BASE0 0xFE009000
11#define CHCR_TS_LOW_MASK 0x00000018
12#define CHCR_TS_LOW_SHIFT 3
13#define CHCR_TS_HIGH_MASK 0
14#define CHCR_TS_HIGH_SHIFT 0
15#elif defined(CONFIG_CPU_SUBTYPE_SH7722) 11#elif defined(CONFIG_CPU_SUBTYPE_SH7722)
16#define DMTE0_IRQ 48 12#define DMTE0_IRQ 48
17#define DMTE4_IRQ 76 13#define DMTE4_IRQ 76
18#define DMAE0_IRQ 78 /* DMA Error IRQ*/ 14#define DMAE0_IRQ 78 /* DMA Error IRQ*/
19#define SH_DMAC_BASE0 0xFE008020 15#define SH_DMAC_BASE0 0xFE008020
20#define SH_DMARS_BASE0 0xFE009000 16#define SH_DMARS_BASE0 0xFE009000
21#define CHCR_TS_LOW_MASK 0x00000018
22#define CHCR_TS_LOW_SHIFT 3
23#define CHCR_TS_HIGH_MASK 0x00300000
24#define CHCR_TS_HIGH_SHIFT 20
25#elif defined(CONFIG_CPU_SUBTYPE_SH7763) || \ 17#elif defined(CONFIG_CPU_SUBTYPE_SH7763) || \
26 defined(CONFIG_CPU_SUBTYPE_SH7764) 18 defined(CONFIG_CPU_SUBTYPE_SH7764)
27#define DMTE0_IRQ 34 19#define DMTE0_IRQ 34
@@ -29,10 +21,6 @@
29#define DMAE0_IRQ 38 21#define DMAE0_IRQ 38
30#define SH_DMAC_BASE0 0xFF608020 22#define SH_DMAC_BASE0 0xFF608020
31#define SH_DMARS_BASE0 0xFF609000 23#define SH_DMARS_BASE0 0xFF609000
32#define CHCR_TS_LOW_MASK 0x00000018
33#define CHCR_TS_LOW_SHIFT 3
34#define CHCR_TS_HIGH_MASK 0
35#define CHCR_TS_HIGH_SHIFT 0
36#elif defined(CONFIG_CPU_SUBTYPE_SH7723) 24#elif defined(CONFIG_CPU_SUBTYPE_SH7723)
37#define DMTE0_IRQ 48 /* DMAC0A*/ 25#define DMTE0_IRQ 48 /* DMAC0A*/
38#define DMTE4_IRQ 76 /* DMAC0B */ 26#define DMTE4_IRQ 76 /* DMAC0B */
@@ -46,10 +34,6 @@
46#define SH_DMAC_BASE0 0xFE008020 34#define SH_DMAC_BASE0 0xFE008020
47#define SH_DMAC_BASE1 0xFDC08020 35#define SH_DMAC_BASE1 0xFDC08020
48#define SH_DMARS_BASE0 0xFDC09000 36#define SH_DMARS_BASE0 0xFDC09000
49#define CHCR_TS_LOW_MASK 0x00000018
50#define CHCR_TS_LOW_SHIFT 3
51#define CHCR_TS_HIGH_MASK 0
52#define CHCR_TS_HIGH_SHIFT 0
53#elif defined(CONFIG_CPU_SUBTYPE_SH7724) 37#elif defined(CONFIG_CPU_SUBTYPE_SH7724)
54#define DMTE0_IRQ 48 /* DMAC0A*/ 38#define DMTE0_IRQ 48 /* DMAC0A*/
55#define DMTE4_IRQ 76 /* DMAC0B */ 39#define DMTE4_IRQ 76 /* DMAC0B */
@@ -64,10 +48,6 @@
64#define SH_DMAC_BASE1 0xFDC08020 48#define SH_DMAC_BASE1 0xFDC08020
65#define SH_DMARS_BASE0 0xFE009000 49#define SH_DMARS_BASE0 0xFE009000
66#define SH_DMARS_BASE1 0xFDC09000 50#define SH_DMARS_BASE1 0xFDC09000
67#define CHCR_TS_LOW_MASK 0x00000018
68#define CHCR_TS_LOW_SHIFT 3
69#define CHCR_TS_HIGH_MASK 0x00600000
70#define CHCR_TS_HIGH_SHIFT 21
71#elif defined(CONFIG_CPU_SUBTYPE_SH7780) 51#elif defined(CONFIG_CPU_SUBTYPE_SH7780)
72#define DMTE0_IRQ 34 52#define DMTE0_IRQ 34
73#define DMTE4_IRQ 44 53#define DMTE4_IRQ 44
@@ -80,10 +60,6 @@
80#define SH_DMAC_BASE0 0xFC808020 60#define SH_DMAC_BASE0 0xFC808020
81#define SH_DMAC_BASE1 0xFC818020 61#define SH_DMAC_BASE1 0xFC818020
82#define SH_DMARS_BASE0 0xFC809000 62#define SH_DMARS_BASE0 0xFC809000
83#define CHCR_TS_LOW_MASK 0x00000018
84#define CHCR_TS_LOW_SHIFT 3
85#define CHCR_TS_HIGH_MASK 0
86#define CHCR_TS_HIGH_SHIFT 0
87#else /* SH7785 */ 63#else /* SH7785 */
88#define DMTE0_IRQ 33 64#define DMTE0_IRQ 33
89#define DMTE4_IRQ 37 65#define DMTE4_IRQ 37
@@ -97,10 +73,6 @@
97#define SH_DMAC_BASE0 0xFC808020 73#define SH_DMAC_BASE0 0xFC808020
98#define SH_DMAC_BASE1 0xFCC08020 74#define SH_DMAC_BASE1 0xFCC08020
99#define SH_DMARS_BASE0 0xFC809000 75#define SH_DMARS_BASE0 0xFC809000
100#define CHCR_TS_LOW_MASK 0x00000018
101#define CHCR_TS_LOW_SHIFT 3
102#define CHCR_TS_HIGH_MASK 0
103#define CHCR_TS_HIGH_SHIFT 0
104#endif 76#endif
105 77
106#define REQ_HE 0x000000C0 78#define REQ_HE 0x000000C0
@@ -108,38 +80,4 @@
108#define REQ_LE 0x00000040 80#define REQ_LE 0x00000040
109#define TM_BURST 0x00000020 81#define TM_BURST 0x00000020
110 82
111/*
112 * The SuperH DMAC supports a number of transmit sizes, we list them here,
113 * with their respective values as they appear in the CHCR registers.
114 *
115 * Defaults to a 64-bit transfer size.
116 */
117enum {
118 XMIT_SZ_8BIT = 0,
119 XMIT_SZ_16BIT = 1,
120 XMIT_SZ_32BIT = 2,
121 XMIT_SZ_64BIT = 7,
122 XMIT_SZ_128BIT = 3,
123 XMIT_SZ_256BIT = 4,
124 XMIT_SZ_128BIT_BLK = 0xb,
125 XMIT_SZ_256BIT_BLK = 0xc,
126};
127
128/*
129 * The DMA count is defined as the number of bytes to transfer.
130 */
131#define TS_SHIFT { \
132 [XMIT_SZ_8BIT] = 0, \
133 [XMIT_SZ_16BIT] = 1, \
134 [XMIT_SZ_32BIT] = 2, \
135 [XMIT_SZ_64BIT] = 3, \
136 [XMIT_SZ_128BIT] = 4, \
137 [XMIT_SZ_256BIT] = 5, \
138 [XMIT_SZ_128BIT_BLK] = 4, \
139 [XMIT_SZ_256BIT_BLK] = 5, \
140}
141
142#define TS_INDEX2VAL(i) ((((i) & 3) << CHCR_TS_LOW_SHIFT) | \
143 ((((i) >> 2) & 3) << CHCR_TS_HIGH_SHIFT))
144
145#endif /* __ASM_SH_CPU_SH4_DMA_SH7780_H */ 83#endif /* __ASM_SH_CPU_SH4_DMA_SH7780_H */
diff --git a/arch/sh/include/cpu-sh4/cpu/dma.h b/arch/sh/include/cpu-sh4/cpu/dma.h
index 114a369705b..ca747e93c2e 100644
--- a/arch/sh/include/cpu-sh4/cpu/dma.h
+++ b/arch/sh/include/cpu-sh4/cpu/dma.h
@@ -5,9 +5,8 @@
5 5
6#ifdef CONFIG_CPU_SH4A 6#ifdef CONFIG_CPU_SH4A
7 7
8#define DMAOR_INIT (DMAOR_DME)
9
10#include <cpu/dma-sh4a.h> 8#include <cpu/dma-sh4a.h>
9
11#else /* CONFIG_CPU_SH4A */ 10#else /* CONFIG_CPU_SH4A */
12/* 11/*
13 * SH7750/SH7751/SH7760 12 * SH7750/SH7751/SH7760
@@ -17,7 +16,6 @@
17#define DMTE6_IRQ 46 16#define DMTE6_IRQ 46
18#define DMAE0_IRQ 38 17#define DMAE0_IRQ 38
19 18
20#define DMAOR_INIT (0x8000|DMAOR_DME)
21#define SH_DMAC_BASE0 0xffa00000 19#define SH_DMAC_BASE0 0xffa00000
22#define SH_DMAC_BASE1 0xffa00070 20#define SH_DMAC_BASE1 0xffa00070
23/* Definitions for the SuperH DMAC */ 21/* Definitions for the SuperH DMAC */
@@ -27,40 +25,8 @@
27#define TS_32 0x00000030 25#define TS_32 0x00000030
28#define TS_64 0x00000000 26#define TS_64 0x00000000
29 27
30#define CHCR_TS_LOW_MASK 0x70
31#define CHCR_TS_LOW_SHIFT 4
32#define CHCR_TS_HIGH_MASK 0
33#define CHCR_TS_HIGH_SHIFT 0
34
35#define DMAOR_COD 0x00000008 28#define DMAOR_COD 0x00000008
36 29
37/*
38 * The SuperH DMAC supports a number of transmit sizes, we list them here,
39 * with their respective values as they appear in the CHCR registers.
40 *
41 * Defaults to a 64-bit transfer size.
42 */
43enum {
44 XMIT_SZ_8BIT = 1,
45 XMIT_SZ_16BIT = 2,
46 XMIT_SZ_32BIT = 3,
47 XMIT_SZ_64BIT = 0,
48 XMIT_SZ_256BIT = 4,
49};
50
51/*
52 * The DMA count is defined as the number of bytes to transfer.
53 */
54#define TS_SHIFT { \
55 [XMIT_SZ_8BIT] = 0, \
56 [XMIT_SZ_16BIT] = 1, \
57 [XMIT_SZ_32BIT] = 2, \
58 [XMIT_SZ_64BIT] = 3, \
59 [XMIT_SZ_256BIT] = 5, \
60}
61
62#define TS_INDEX2VAL(i) (((i) & 7) << CHCR_TS_LOW_SHIFT)
63
64#endif 30#endif
65 31
66#endif /* __ASM_CPU_SH4_DMA_H */ 32#endif /* __ASM_CPU_SH4_DMA_H */
diff --git a/arch/sh/include/mach-migor/mach/migor.h b/arch/sh/include/mach-migor/mach/migor.h
index cee6cb88e02..42fccf93412 100644
--- a/arch/sh/include/mach-migor/mach/migor.h
+++ b/arch/sh/include/mach-migor/mach/migor.h
@@ -1,6 +1,7 @@
1#ifndef __ASM_SH_MIGOR_H 1#ifndef __ASM_SH_MIGOR_H
2#define __ASM_SH_MIGOR_H 2#define __ASM_SH_MIGOR_H
3 3
4#define PORT_MSELCRA 0xa4050180
4#define PORT_MSELCRB 0xa4050182 5#define PORT_MSELCRB 0xa4050182
5#define BSC_CS4BCR 0xfec10010 6#define BSC_CS4BCR 0xfec10010
6#define BSC_CS6ABCR 0xfec1001c 7#define BSC_CS6ABCR 0xfec1001c
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
index ef3f9782780..fd7e3639e84 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
@@ -7,19 +7,167 @@
7 * License. See the file "COPYING" in the main directory of this archive 7 * License. See the file "COPYING" in the main directory of this archive
8 * for more details. 8 * for more details.
9 */ 9 */
10#include <linux/platform_device.h>
11#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/mm.h>
12#include <linux/platform_device.h>
12#include <linux/serial.h> 13#include <linux/serial.h>
13#include <linux/serial_sci.h> 14#include <linux/serial_sci.h>
14#include <linux/mm.h> 15#include <linux/sh_timer.h>
15#include <linux/uio_driver.h> 16#include <linux/uio_driver.h>
16#include <linux/usb/m66592.h> 17#include <linux/usb/m66592.h>
17#include <linux/sh_timer.h> 18
18#include <asm/clock.h> 19#include <asm/clock.h>
20#include <asm/dmaengine.h>
19#include <asm/mmzone.h> 21#include <asm/mmzone.h>
20#include <asm/dma-sh.h> 22#include <asm/siu.h>
23
24#include <cpu/dma-register.h>
21#include <cpu/sh7722.h> 25#include <cpu/sh7722.h>
22 26
27static struct sh_dmae_slave_config sh7722_dmae_slaves[] = {
28 {
29 .slave_id = SHDMA_SLAVE_SCIF0_TX,
30 .addr = 0xffe0000c,
31 .chcr = DM_FIX | SM_INC | 0x800 | TS_INDEX2VAL(XMIT_SZ_8BIT),
32 .mid_rid = 0x21,
33 }, {
34 .slave_id = SHDMA_SLAVE_SCIF0_RX,
35 .addr = 0xffe00014,
36 .chcr = DM_INC | SM_FIX | 0x800 | TS_INDEX2VAL(XMIT_SZ_8BIT),
37 .mid_rid = 0x22,
38 }, {
39 .slave_id = SHDMA_SLAVE_SCIF1_TX,
40 .addr = 0xffe1000c,
41 .chcr = DM_FIX | SM_INC | 0x800 | TS_INDEX2VAL(XMIT_SZ_8BIT),
42 .mid_rid = 0x25,
43 }, {
44 .slave_id = SHDMA_SLAVE_SCIF1_RX,
45 .addr = 0xffe10014,
46 .chcr = DM_INC | SM_FIX | 0x800 | TS_INDEX2VAL(XMIT_SZ_8BIT),
47 .mid_rid = 0x26,
48 }, {
49 .slave_id = SHDMA_SLAVE_SCIF2_TX,
50 .addr = 0xffe2000c,
51 .chcr = DM_FIX | SM_INC | 0x800 | TS_INDEX2VAL(XMIT_SZ_8BIT),
52 .mid_rid = 0x29,
53 }, {
54 .slave_id = SHDMA_SLAVE_SCIF2_RX,
55 .addr = 0xffe20014,
56 .chcr = DM_INC | SM_FIX | 0x800 | TS_INDEX2VAL(XMIT_SZ_8BIT),
57 .mid_rid = 0x2a,
58 }, {
59 .slave_id = SHDMA_SLAVE_SIUA_TX,
60 .addr = 0xa454c098,
61 .chcr = DM_FIX | SM_INC | 0x800 | TS_INDEX2VAL(XMIT_SZ_32BIT),
62 .mid_rid = 0xb1,
63 }, {
64 .slave_id = SHDMA_SLAVE_SIUA_RX,
65 .addr = 0xa454c090,
66 .chcr = DM_INC | SM_FIX | 0x800 | TS_INDEX2VAL(XMIT_SZ_32BIT),
67 .mid_rid = 0xb2,
68 }, {
69 .slave_id = SHDMA_SLAVE_SIUB_TX,
70 .addr = 0xa454c09c,
71 .chcr = DM_FIX | SM_INC | 0x800 | TS_INDEX2VAL(XMIT_SZ_32BIT),
72 .mid_rid = 0xb5,
73 }, {
74 .slave_id = SHDMA_SLAVE_SIUB_RX,
75 .addr = 0xa454c094,
76 .chcr = DM_INC | SM_FIX | 0x800 | TS_INDEX2VAL(XMIT_SZ_32BIT),
77 .mid_rid = 0xb6,
78 },
79};
80
81static struct sh_dmae_channel sh7722_dmae_channels[] = {
82 {
83 .offset = 0,
84 .dmars = 0,
85 .dmars_bit = 0,
86 }, {
87 .offset = 0x10,
88 .dmars = 0,
89 .dmars_bit = 8,
90 }, {
91 .offset = 0x20,
92 .dmars = 4,
93 .dmars_bit = 0,
94 }, {
95 .offset = 0x30,
96 .dmars = 4,
97 .dmars_bit = 8,
98 }, {
99 .offset = 0x50,
100 .dmars = 8,
101 .dmars_bit = 0,
102 }, {
103 .offset = 0x60,
104 .dmars = 8,
105 .dmars_bit = 8,
106 }
107};
108
109static unsigned int ts_shift[] = TS_SHIFT;
110
111static struct sh_dmae_pdata dma_platform_data = {
112 .slave = sh7722_dmae_slaves,
113 .slave_num = ARRAY_SIZE(sh7722_dmae_slaves),
114 .channel = sh7722_dmae_channels,
115 .channel_num = ARRAY_SIZE(sh7722_dmae_channels),
116 .ts_low_shift = CHCR_TS_LOW_SHIFT,
117 .ts_low_mask = CHCR_TS_LOW_MASK,
118 .ts_high_shift = CHCR_TS_HIGH_SHIFT,
119 .ts_high_mask = CHCR_TS_HIGH_MASK,
120 .ts_shift = ts_shift,
121 .ts_shift_num = ARRAY_SIZE(ts_shift),
122 .dmaor_init = DMAOR_INIT,
123};
124
125static struct resource sh7722_dmae_resources[] = {
126 [0] = {
127 /* Channel registers and DMAOR */
128 .start = 0xfe008020,
129 .end = 0xfe00808f,
130 .flags = IORESOURCE_MEM,
131 },
132 [1] = {
133 /* DMARSx */
134 .start = 0xfe009000,
135 .end = 0xfe00900b,
136 .flags = IORESOURCE_MEM,
137 },
138 {
139 /* DMA error IRQ */
140 .start = 78,
141 .end = 78,
142 .flags = IORESOURCE_IRQ,
143 },
144 {
145 /* IRQ for channels 0-3 */
146 .start = 48,
147 .end = 51,
148 .flags = IORESOURCE_IRQ,
149 },
150 {
151 /* IRQ for channels 4-5 */
152 .start = 76,
153 .end = 77,
154 .flags = IORESOURCE_IRQ,
155 },
156};
157
158struct platform_device dma_device = {
159 .name = "sh-dma-engine",
160 .id = -1,
161 .resource = sh7722_dmae_resources,
162 .num_resources = ARRAY_SIZE(sh7722_dmae_resources),
163 .dev = {
164 .platform_data = &dma_platform_data,
165 },
166 .archdata = {
167 .hwblk_id = HWBLK_DMAC,
168 },
169};
170
23/* Serial */ 171/* Serial */
24static struct plat_sci_port scif0_platform_data = { 172static struct plat_sci_port scif0_platform_data = {
25 .mapbase = 0xffe00000, 173 .mapbase = 0xffe00000,
@@ -388,15 +536,36 @@ static struct platform_device tmu2_device = {
388 }, 536 },
389}; 537};
390 538
391static struct sh_dmae_pdata dma_platform_data = { 539static struct siu_platform siu_platform_data = {
392 .mode = 0, 540 .dma_dev = &dma_device.dev,
541 .dma_slave_tx_a = SHDMA_SLAVE_SIUA_TX,
542 .dma_slave_rx_a = SHDMA_SLAVE_SIUA_RX,
543 .dma_slave_tx_b = SHDMA_SLAVE_SIUB_TX,
544 .dma_slave_rx_b = SHDMA_SLAVE_SIUB_RX,
393}; 545};
394 546
395static struct platform_device dma_device = { 547static struct resource siu_resources[] = {
396 .name = "sh-dma-engine", 548 [0] = {
549 .start = 0xa4540000,
550 .end = 0xa454c10f,
551 .flags = IORESOURCE_MEM,
552 },
553 [1] = {
554 .start = 108,
555 .flags = IORESOURCE_IRQ,
556 },
557};
558
559static struct platform_device siu_device = {
560 .name = "sh_siu",
397 .id = -1, 561 .id = -1,
398 .dev = { 562 .dev = {
399 .platform_data = &dma_platform_data, 563 .platform_data = &siu_platform_data,
564 },
565 .resource = siu_resources,
566 .num_resources = ARRAY_SIZE(siu_resources),
567 .archdata = {
568 .hwblk_id = HWBLK_SIU,
400 }, 569 },
401}; 570};
402 571
@@ -414,6 +583,7 @@ static struct platform_device *sh7722_devices[] __initdata = {
414 &vpu_device, 583 &vpu_device,
415 &veu_device, 584 &veu_device,
416 &jpu_device, 585 &jpu_device,
586 &siu_device,
417 &dma_device, 587 &dma_device,
418}; 588};
419 589
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index 31e3451f7e3..e7fa2a92fc1 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -21,22 +21,189 @@
21#include <linux/sh_timer.h> 21#include <linux/sh_timer.h>
22#include <linux/io.h> 22#include <linux/io.h>
23#include <linux/notifier.h> 23#include <linux/notifier.h>
24
24#include <asm/suspend.h> 25#include <asm/suspend.h>
25#include <asm/clock.h> 26#include <asm/clock.h>
26#include <asm/dma-sh.h> 27#include <asm/dmaengine.h>
27#include <asm/mmzone.h> 28#include <asm/mmzone.h>
29
30#include <cpu/dma-register.h>
28#include <cpu/sh7724.h> 31#include <cpu/sh7724.h>
29 32
30/* DMA */ 33/* DMA */
31static struct sh_dmae_pdata dma_platform_data = { 34static struct sh_dmae_channel sh7724_dmae0_channels[] = {
32 .mode = SHDMA_DMAOR1, 35 {
36 .offset = 0,
37 .dmars = 0,
38 .dmars_bit = 0,
39 }, {
40 .offset = 0x10,
41 .dmars = 0,
42 .dmars_bit = 8,
43 }, {
44 .offset = 0x20,
45 .dmars = 4,
46 .dmars_bit = 0,
47 }, {
48 .offset = 0x30,
49 .dmars = 4,
50 .dmars_bit = 8,
51 }, {
52 .offset = 0x50,
53 .dmars = 8,
54 .dmars_bit = 0,
55 }, {
56 .offset = 0x60,
57 .dmars = 8,
58 .dmars_bit = 8,
59 }
60};
61
62static struct sh_dmae_channel sh7724_dmae1_channels[] = {
63 {
64 .offset = 0,
65 .dmars = 0,
66 .dmars_bit = 0,
67 }, {
68 .offset = 0x10,
69 .dmars = 0,
70 .dmars_bit = 8,
71 }, {
72 .offset = 0x20,
73 .dmars = 4,
74 .dmars_bit = 0,
75 }, {
76 .offset = 0x30,
77 .dmars = 4,
78 .dmars_bit = 8,
79 }, {
80 .offset = 0x50,
81 .dmars = 8,
82 .dmars_bit = 0,
83 }, {
84 .offset = 0x60,
85 .dmars = 8,
86 .dmars_bit = 8,
87 }
88};
89
90static unsigned int ts_shift[] = TS_SHIFT;
91
92static struct sh_dmae_pdata dma0_platform_data = {
93 .channel = sh7724_dmae0_channels,
94 .channel_num = ARRAY_SIZE(sh7724_dmae0_channels),
95 .ts_low_shift = CHCR_TS_LOW_SHIFT,
96 .ts_low_mask = CHCR_TS_LOW_MASK,
97 .ts_high_shift = CHCR_TS_HIGH_SHIFT,
98 .ts_high_mask = CHCR_TS_HIGH_MASK,
99 .ts_shift = ts_shift,
100 .ts_shift_num = ARRAY_SIZE(ts_shift),
101 .dmaor_init = DMAOR_INIT,
102};
103
104static struct sh_dmae_pdata dma1_platform_data = {
105 .channel = sh7724_dmae1_channels,
106 .channel_num = ARRAY_SIZE(sh7724_dmae1_channels),
107 .ts_low_shift = CHCR_TS_LOW_SHIFT,
108 .ts_low_mask = CHCR_TS_LOW_MASK,
109 .ts_high_shift = CHCR_TS_HIGH_SHIFT,
110 .ts_high_mask = CHCR_TS_HIGH_MASK,
111 .ts_shift = ts_shift,
112 .ts_shift_num = ARRAY_SIZE(ts_shift),
113 .dmaor_init = DMAOR_INIT,
114};
115
116/* Resource order important! */
117static struct resource sh7724_dmae0_resources[] = {
118 {
119 /* Channel registers and DMAOR */
120 .start = 0xfe008020,
121 .end = 0xfe00808f,
122 .flags = IORESOURCE_MEM,
123 },
124 {
125 /* DMARSx */
126 .start = 0xfe009000,
127 .end = 0xfe00900b,
128 .flags = IORESOURCE_MEM,
129 },
130 {
131 /* DMA error IRQ */
132 .start = 78,
133 .end = 78,
134 .flags = IORESOURCE_IRQ,
135 },
136 {
137 /* IRQ for channels 0-3 */
138 .start = 48,
139 .end = 51,
140 .flags = IORESOURCE_IRQ,
141 },
142 {
143 /* IRQ for channels 4-5 */
144 .start = 76,
145 .end = 77,
146 .flags = IORESOURCE_IRQ,
147 },
33}; 148};
34 149
35static struct platform_device dma_device = { 150/* Resource order important! */
36 .name = "sh-dma-engine", 151static struct resource sh7724_dmae1_resources[] = {
37 .id = -1, 152 {
38 .dev = { 153 /* Channel registers and DMAOR */
39 .platform_data = &dma_platform_data, 154 .start = 0xfdc08020,
155 .end = 0xfdc0808f,
156 .flags = IORESOURCE_MEM,
157 },
158 {
159 /* DMARSx */
160 .start = 0xfdc09000,
161 .end = 0xfdc0900b,
162 .flags = IORESOURCE_MEM,
163 },
164 {
165 /* DMA error IRQ */
166 .start = 74,
167 .end = 74,
168 .flags = IORESOURCE_IRQ,
169 },
170 {
171 /* IRQ for channels 0-3 */
172 .start = 40,
173 .end = 43,
174 .flags = IORESOURCE_IRQ,
175 },
176 {
177 /* IRQ for channels 4-5 */
178 .start = 72,
179 .end = 73,
180 .flags = IORESOURCE_IRQ,
181 },
182};
183
184static struct platform_device dma0_device = {
185 .name = "sh-dma-engine",
186 .id = 0,
187 .resource = sh7724_dmae0_resources,
188 .num_resources = ARRAY_SIZE(sh7724_dmae0_resources),
189 .dev = {
190 .platform_data = &dma0_platform_data,
191 },
192 .archdata = {
193 .hwblk_id = HWBLK_DMAC0,
194 },
195};
196
197static struct platform_device dma1_device = {
198 .name = "sh-dma-engine",
199 .id = 1,
200 .resource = sh7724_dmae1_resources,
201 .num_resources = ARRAY_SIZE(sh7724_dmae1_resources),
202 .dev = {
203 .platform_data = &dma1_platform_data,
204 },
205 .archdata = {
206 .hwblk_id = HWBLK_DMAC1,
40 }, 207 },
41}; 208};
42 209
@@ -663,7 +830,8 @@ static struct platform_device *sh7724_devices[] __initdata = {
663 &tmu3_device, 830 &tmu3_device,
664 &tmu4_device, 831 &tmu4_device,
665 &tmu5_device, 832 &tmu5_device,
666 &dma_device, 833 &dma0_device,
834 &dma1_device,
667 &rtc_device, 835 &rtc_device,
668 &iic0_device, 836 &iic0_device,
669 &iic1_device, 837 &iic1_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
index f8f21618d78..02e792c90de 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
@@ -13,7 +13,10 @@
13#include <linux/io.h> 13#include <linux/io.h>
14#include <linux/serial_sci.h> 14#include <linux/serial_sci.h>
15#include <linux/sh_timer.h> 15#include <linux/sh_timer.h>
16#include <asm/dma-sh.h> 16
17#include <asm/dmaengine.h>
18
19#include <cpu/dma-register.h>
17 20
18static struct plat_sci_port scif0_platform_data = { 21static struct plat_sci_port scif0_platform_data = {
19 .mapbase = 0xffe00000, 22 .mapbase = 0xffe00000,
@@ -247,15 +250,131 @@ static struct platform_device rtc_device = {
247 .resource = rtc_resources, 250 .resource = rtc_resources,
248}; 251};
249 252
250static struct sh_dmae_pdata dma_platform_data = { 253/* DMA */
251 .mode = (SHDMA_MIX_IRQ | SHDMA_DMAOR1), 254static struct sh_dmae_channel sh7780_dmae0_channels[] = {
255 {
256 .offset = 0,
257 .dmars = 0,
258 .dmars_bit = 0,
259 }, {
260 .offset = 0x10,
261 .dmars = 0,
262 .dmars_bit = 8,
263 }, {
264 .offset = 0x20,
265 .dmars = 4,
266 .dmars_bit = 0,
267 }, {
268 .offset = 0x30,
269 .dmars = 4,
270 .dmars_bit = 8,
271 }, {
272 .offset = 0x50,
273 .dmars = 8,
274 .dmars_bit = 0,
275 }, {
276 .offset = 0x60,
277 .dmars = 8,
278 .dmars_bit = 8,
279 }
280};
281
282static struct sh_dmae_channel sh7780_dmae1_channels[] = {
283 {
284 .offset = 0,
285 }, {
286 .offset = 0x10,
287 }, {
288 .offset = 0x20,
289 }, {
290 .offset = 0x30,
291 }, {
292 .offset = 0x50,
293 }, {
294 .offset = 0x60,
295 }
296};
297
298static unsigned int ts_shift[] = TS_SHIFT;
299
300static struct sh_dmae_pdata dma0_platform_data = {
301 .channel = sh7780_dmae0_channels,
302 .channel_num = ARRAY_SIZE(sh7780_dmae0_channels),
303 .ts_low_shift = CHCR_TS_LOW_SHIFT,
304 .ts_low_mask = CHCR_TS_LOW_MASK,
305 .ts_high_shift = CHCR_TS_HIGH_SHIFT,
306 .ts_high_mask = CHCR_TS_HIGH_MASK,
307 .ts_shift = ts_shift,
308 .ts_shift_num = ARRAY_SIZE(ts_shift),
309 .dmaor_init = DMAOR_INIT,
310};
311
312static struct sh_dmae_pdata dma1_platform_data = {
313 .channel = sh7780_dmae1_channels,
314 .channel_num = ARRAY_SIZE(sh7780_dmae1_channels),
315 .ts_low_shift = CHCR_TS_LOW_SHIFT,
316 .ts_low_mask = CHCR_TS_LOW_MASK,
317 .ts_high_shift = CHCR_TS_HIGH_SHIFT,
318 .ts_high_mask = CHCR_TS_HIGH_MASK,
319 .ts_shift = ts_shift,
320 .ts_shift_num = ARRAY_SIZE(ts_shift),
321 .dmaor_init = DMAOR_INIT,
252}; 322};
253 323
254static struct platform_device dma_device = { 324static struct resource sh7780_dmae0_resources[] = {
325 [0] = {
326 /* Channel registers and DMAOR */
327 .start = 0xfc808020,
328 .end = 0xfc80808f,
329 .flags = IORESOURCE_MEM,
330 },
331 [1] = {
332 /* DMARSx */
333 .start = 0xfc809000,
334 .end = 0xfc80900b,
335 .flags = IORESOURCE_MEM,
336 },
337 {
338 /* Real DMA error IRQ is 38, and channel IRQs are 34-37, 44-45 */
339 .start = 34,
340 .end = 34,
341 .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_SHAREABLE,
342 },
343};
344
345static struct resource sh7780_dmae1_resources[] = {
346 [0] = {
347 /* Channel registers and DMAOR */
348 .start = 0xfc818020,
349 .end = 0xfc81808f,
350 .flags = IORESOURCE_MEM,
351 },
352 /* DMAC1 has no DMARS */
353 {
354 /* Real DMA error IRQ is 38, and channel IRQs are 46-47, 92-95 */
355 .start = 46,
356 .end = 46,
357 .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_SHAREABLE,
358 },
359};
360
361static struct platform_device dma0_device = {
255 .name = "sh-dma-engine", 362 .name = "sh-dma-engine",
256 .id = -1, 363 .id = 0,
364 .resource = sh7780_dmae0_resources,
365 .num_resources = ARRAY_SIZE(sh7780_dmae0_resources),
257 .dev = { 366 .dev = {
258 .platform_data = &dma_platform_data, 367 .platform_data = &dma0_platform_data,
368 },
369};
370
371static struct platform_device dma1_device = {
372 .name = "sh-dma-engine",
373 .id = 1,
374 .resource = sh7780_dmae1_resources,
375 .num_resources = ARRAY_SIZE(sh7780_dmae1_resources),
376 .dev = {
377 .platform_data = &dma1_platform_data,
259 }, 378 },
260}; 379};
261 380
@@ -269,7 +388,8 @@ static struct platform_device *sh7780_devices[] __initdata = {
269 &tmu4_device, 388 &tmu4_device,
270 &tmu5_device, 389 &tmu5_device,
271 &rtc_device, 390 &rtc_device,
272 &dma_device, 391 &dma0_device,
392 &dma1_device,
273}; 393};
274 394
275static int __init sh7780_devices_setup(void) 395static int __init sh7780_devices_setup(void)
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
index 23448d8c671..1fcd88b1671 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
@@ -14,9 +14,12 @@
14#include <linux/io.h> 14#include <linux/io.h>
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/sh_timer.h> 16#include <linux/sh_timer.h>
17#include <asm/dma-sh.h> 17
18#include <asm/dmaengine.h>
18#include <asm/mmzone.h> 19#include <asm/mmzone.h>
19 20
21#include <cpu/dma-register.h>
22
20static struct plat_sci_port scif0_platform_data = { 23static struct plat_sci_port scif0_platform_data = {
21 .mapbase = 0xffea0000, 24 .mapbase = 0xffea0000,
22 .flags = UPF_BOOT_AUTOCONF, 25 .flags = UPF_BOOT_AUTOCONF,
@@ -295,15 +298,131 @@ static struct platform_device tmu5_device = {
295 .num_resources = ARRAY_SIZE(tmu5_resources), 298 .num_resources = ARRAY_SIZE(tmu5_resources),
296}; 299};
297 300
298static struct sh_dmae_pdata dma_platform_data = { 301/* DMA */
299 .mode = (SHDMA_MIX_IRQ | SHDMA_DMAOR1), 302static struct sh_dmae_channel sh7785_dmae0_channels[] = {
303 {
304 .offset = 0,
305 .dmars = 0,
306 .dmars_bit = 0,
307 }, {
308 .offset = 0x10,
309 .dmars = 0,
310 .dmars_bit = 8,
311 }, {
312 .offset = 0x20,
313 .dmars = 4,
314 .dmars_bit = 0,
315 }, {
316 .offset = 0x30,
317 .dmars = 4,
318 .dmars_bit = 8,
319 }, {
320 .offset = 0x50,
321 .dmars = 8,
322 .dmars_bit = 0,
323 }, {
324 .offset = 0x60,
325 .dmars = 8,
326 .dmars_bit = 8,
327 }
328};
329
330static struct sh_dmae_channel sh7785_dmae1_channels[] = {
331 {
332 .offset = 0,
333 }, {
334 .offset = 0x10,
335 }, {
336 .offset = 0x20,
337 }, {
338 .offset = 0x30,
339 }, {
340 .offset = 0x50,
341 }, {
342 .offset = 0x60,
343 }
344};
345
346static unsigned int ts_shift[] = TS_SHIFT;
347
348static struct sh_dmae_pdata dma0_platform_data = {
349 .channel = sh7785_dmae0_channels,
350 .channel_num = ARRAY_SIZE(sh7785_dmae0_channels),
351 .ts_low_shift = CHCR_TS_LOW_SHIFT,
352 .ts_low_mask = CHCR_TS_LOW_MASK,
353 .ts_high_shift = CHCR_TS_HIGH_SHIFT,
354 .ts_high_mask = CHCR_TS_HIGH_MASK,
355 .ts_shift = ts_shift,
356 .ts_shift_num = ARRAY_SIZE(ts_shift),
357 .dmaor_init = DMAOR_INIT,
358};
359
360static struct sh_dmae_pdata dma1_platform_data = {
361 .channel = sh7785_dmae1_channels,
362 .channel_num = ARRAY_SIZE(sh7785_dmae1_channels),
363 .ts_low_shift = CHCR_TS_LOW_SHIFT,
364 .ts_low_mask = CHCR_TS_LOW_MASK,
365 .ts_high_shift = CHCR_TS_HIGH_SHIFT,
366 .ts_high_mask = CHCR_TS_HIGH_MASK,
367 .ts_shift = ts_shift,
368 .ts_shift_num = ARRAY_SIZE(ts_shift),
369 .dmaor_init = DMAOR_INIT,
300}; 370};
301 371
302static struct platform_device dma_device = { 372static struct resource sh7785_dmae0_resources[] = {
373 [0] = {
374 /* Channel registers and DMAOR */
375 .start = 0xfc808020,
376 .end = 0xfc80808f,
377 .flags = IORESOURCE_MEM,
378 },
379 [1] = {
380 /* DMARSx */
381 .start = 0xfc809000,
382 .end = 0xfc80900b,
383 .flags = IORESOURCE_MEM,
384 },
385 {
386 /* Real DMA error IRQ is 39, and channel IRQs are 33-38 */
387 .start = 33,
388 .end = 33,
389 .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_SHAREABLE,
390 },
391};
392
393static struct resource sh7785_dmae1_resources[] = {
394 [0] = {
395 /* Channel registers and DMAOR */
396 .start = 0xfcc08020,
397 .end = 0xfcc0808f,
398 .flags = IORESOURCE_MEM,
399 },
400 /* DMAC1 has no DMARS */
401 {
402 /* Real DMA error IRQ is 58, and channel IRQs are 52-57 */
403 .start = 52,
404 .end = 52,
405 .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_SHAREABLE,
406 },
407};
408
409static struct platform_device dma0_device = {
303 .name = "sh-dma-engine", 410 .name = "sh-dma-engine",
304 .id = -1, 411 .id = 0,
412 .resource = sh7785_dmae0_resources,
413 .num_resources = ARRAY_SIZE(sh7785_dmae0_resources),
305 .dev = { 414 .dev = {
306 .platform_data = &dma_platform_data, 415 .platform_data = &dma0_platform_data,
416 },
417};
418
419static struct platform_device dma1_device = {
420 .name = "sh-dma-engine",
421 .id = 1,
422 .resource = sh7785_dmae1_resources,
423 .num_resources = ARRAY_SIZE(sh7785_dmae1_resources),
424 .dev = {
425 .platform_data = &dma1_platform_data,
307 }, 426 },
308}; 427};
309 428
@@ -320,7 +439,8 @@ static struct platform_device *sh7785_devices[] __initdata = {
320 &tmu3_device, 439 &tmu3_device,
321 &tmu4_device, 440 &tmu4_device,
322 &tmu5_device, 441 &tmu5_device,
323 &dma_device, 442 &dma0_device,
443 &dma1_device,
324}; 444};
325 445
326static int __init sh7785_devices_setup(void) 446static int __init sh7785_devices_setup(void)
diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c
index e2f1753d275..675eea7785d 100644
--- a/arch/sh/kernel/hw_breakpoint.c
+++ b/arch/sh/kernel/hw_breakpoint.c
@@ -143,26 +143,6 @@ static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
143 return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); 143 return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
144} 144}
145 145
146/*
147 * Store a breakpoint's encoded address, length, and type.
148 */
149static int arch_store_info(struct perf_event *bp)
150{
151 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
152
153 /*
154 * User-space requests will always have the address field populated
155 * For kernel-addresses, either the address or symbol name can be
156 * specified.
157 */
158 if (info->name)
159 info->address = (unsigned long)kallsyms_lookup_name(info->name);
160 if (info->address)
161 return 0;
162
163 return -EINVAL;
164}
165
166int arch_bp_generic_fields(int sh_len, int sh_type, 146int arch_bp_generic_fields(int sh_len, int sh_type,
167 int *gen_len, int *gen_type) 147 int *gen_len, int *gen_type)
168{ 148{
@@ -276,10 +256,12 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,
276 return ret; 256 return ret;
277 } 257 }
278 258
279 ret = arch_store_info(bp); 259 /*
280 260 * For kernel-addresses, either the address or symbol name can be
281 if (ret < 0) 261 * specified.
282 return ret; 262 */
263 if (info->name)
264 info->address = (unsigned long)kallsyms_lookup_name(info->name);
283 265
284 /* 266 /*
285 * Check that the low-order bits of the address are appropriate 267 * Check that the low-order bits of the address are appropriate
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 3459e70eed7..8870d6ba64b 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -443,7 +443,7 @@ void __init setup_arch(char **cmdline_p)
443 443
444 nodes_clear(node_online_map); 444 nodes_clear(node_online_map);
445 445
446 /* Setup bootmem with available RAM */ 446 pmb_init();
447 lmb_init(); 447 lmb_init();
448 setup_memory(); 448 setup_memory();
449 sparse_init(); 449 sparse_init();
@@ -452,7 +452,6 @@ void __init setup_arch(char **cmdline_p)
452 conswitchp = &dummy_con; 452 conswitchp = &dummy_con;
453#endif 453#endif
454 paging_init(); 454 paging_init();
455 pmb_init();
456 455
457 ioremap_fixed_init(); 456 ioremap_fixed_init();
458 457
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index 953fa161331..8a0072de2bc 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -39,12 +39,12 @@ static int null_rtc_set_time(const time_t secs)
39void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time; 39void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time;
40int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time; 40int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time;
41 41
42#ifdef CONFIG_GENERIC_CMOS_UPDATE
43void read_persistent_clock(struct timespec *ts) 42void read_persistent_clock(struct timespec *ts)
44{ 43{
45 rtc_sh_get_time(ts); 44 rtc_sh_get_time(ts);
46} 45}
47 46
47#ifdef CONFIG_GENERIC_CMOS_UPDATE
48int update_persistent_clock(struct timespec now) 48int update_persistent_clock(struct timespec now)
49{ 49{
50 return rtc_sh_set_time(now.tv_sec); 50 return rtc_sh_set_time(now.tv_sec);
@@ -113,9 +113,5 @@ void __init time_init(void)
113 hwblk_init(); 113 hwblk_init();
114 clk_init(); 114 clk_init();
115 115
116 rtc_sh_get_time(&xtime);
117 set_normalized_timespec(&wall_to_monotonic,
118 -xtime.tv_sec, -xtime.tv_nsec);
119
120 late_time_init = sh_late_time_init; 116 late_time_init = sh_late_time_init;
121} 117}
diff --git a/arch/sh/lib/libgcc.h b/arch/sh/lib/libgcc.h
index 3f19d1c5d94..05909d58e2f 100644
--- a/arch/sh/lib/libgcc.h
+++ b/arch/sh/lib/libgcc.h
@@ -17,8 +17,7 @@ struct DWstruct {
17#error I feel sick. 17#error I feel sick.
18#endif 18#endif
19 19
20typedef union 20typedef union {
21{
22 struct DWstruct s; 21 struct DWstruct s;
23 long long ll; 22 long long ll;
24} DWunion; 23} DWunion;
diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c
index c68d2d7d00a..1ab2385ecef 100644
--- a/arch/sh/mm/ioremap.c
+++ b/arch/sh/mm/ioremap.c
@@ -34,11 +34,12 @@
34 * caller shouldn't need to know that small detail. 34 * caller shouldn't need to know that small detail.
35 */ 35 */
36void __iomem * __init_refok 36void __iomem * __init_refok
37__ioremap_caller(unsigned long phys_addr, unsigned long size, 37__ioremap_caller(phys_addr_t phys_addr, unsigned long size,
38 pgprot_t pgprot, void *caller) 38 pgprot_t pgprot, void *caller)
39{ 39{
40 struct vm_struct *area; 40 struct vm_struct *area;
41 unsigned long offset, last_addr, addr, orig_addr; 41 unsigned long offset, last_addr, addr, orig_addr;
42 void __iomem *mapped;
42 43
43 /* Don't allow wraparound or zero size */ 44 /* Don't allow wraparound or zero size */
44 last_addr = phys_addr + size - 1; 45 last_addr = phys_addr + size - 1;
@@ -46,6 +47,20 @@ __ioremap_caller(unsigned long phys_addr, unsigned long size,
46 return NULL; 47 return NULL;
47 48
48 /* 49 /*
50 * If we can't yet use the regular approach, go the fixmap route.
51 */
52 if (!mem_init_done)
53 return ioremap_fixed(phys_addr, size, pgprot);
54
55 /*
56 * First try to remap through the PMB.
57 * PMB entries are all pre-faulted.
58 */
59 mapped = pmb_remap_caller(phys_addr, size, pgprot, caller);
60 if (mapped && !IS_ERR(mapped))
61 return mapped;
62
63 /*
49 * Mappings have to be page-aligned 64 * Mappings have to be page-aligned
50 */ 65 */
51 offset = phys_addr & ~PAGE_MASK; 66 offset = phys_addr & ~PAGE_MASK;
@@ -53,12 +68,6 @@ __ioremap_caller(unsigned long phys_addr, unsigned long size,
53 size = PAGE_ALIGN(last_addr+1) - phys_addr; 68 size = PAGE_ALIGN(last_addr+1) - phys_addr;
54 69
55 /* 70 /*
56 * If we can't yet use the regular approach, go the fixmap route.
57 */
58 if (!mem_init_done)
59 return ioremap_fixed(phys_addr, offset, size, pgprot);
60
61 /*
62 * Ok, go for it.. 71 * Ok, go for it..
63 */ 72 */
64 area = get_vm_area_caller(size, VM_IOREMAP, caller); 73 area = get_vm_area_caller(size, VM_IOREMAP, caller);
@@ -67,33 +76,10 @@ __ioremap_caller(unsigned long phys_addr, unsigned long size,
67 area->phys_addr = phys_addr; 76 area->phys_addr = phys_addr;
68 orig_addr = addr = (unsigned long)area->addr; 77 orig_addr = addr = (unsigned long)area->addr;
69 78
70#ifdef CONFIG_PMB 79 if (ioremap_page_range(addr, addr + size, phys_addr, pgprot)) {
71 /* 80 vunmap((void *)orig_addr);
72 * First try to remap through the PMB once a valid VMA has been 81 return NULL;
73 * established. Smaller allocations (or the rest of the size
74 * remaining after a PMB mapping due to the size not being
75 * perfectly aligned on a PMB size boundary) are then mapped
76 * through the UTLB using conventional page tables.
77 *
78 * PMB entries are all pre-faulted.
79 */
80 if (unlikely(phys_addr >= P1SEG)) {
81 unsigned long mapped;
82
83 mapped = pmb_remap(addr, phys_addr, size, pgprot);
84 if (likely(mapped)) {
85 addr += mapped;
86 phys_addr += mapped;
87 size -= mapped;
88 }
89 } 82 }
90#endif
91
92 if (likely(size))
93 if (ioremap_page_range(addr, addr + size, phys_addr, pgprot)) {
94 vunmap((void *)orig_addr);
95 return NULL;
96 }
97 83
98 return (void __iomem *)(offset + (char *)orig_addr); 84 return (void __iomem *)(offset + (char *)orig_addr);
99} 85}
@@ -133,23 +119,11 @@ void __iounmap(void __iomem *addr)
133 if (iounmap_fixed(addr) == 0) 119 if (iounmap_fixed(addr) == 0)
134 return; 120 return;
135 121
136#ifdef CONFIG_PMB
137 /* 122 /*
138 * Purge any PMB entries that may have been established for this 123 * If the PMB handled it, there's nothing else to do.
139 * mapping, then proceed with conventional VMA teardown.
140 *
141 * XXX: Note that due to the way that remove_vm_area() does
142 * matching of the resultant VMA, we aren't able to fast-forward
143 * the address past the PMB space until the end of the VMA where
144 * the page tables reside. As such, unmap_vm_area() will be
145 * forced to linearly scan over the area until it finds the page
146 * tables where PTEs that need to be unmapped actually reside,
147 * which is far from optimal. Perhaps we need to use a separate
148 * VMA for the PMB mappings?
149 * -- PFM.
150 */ 124 */
151 pmb_unmap(vaddr); 125 if (pmb_unmap(addr) == 0)
152#endif 126 return;
153 127
154 p = remove_vm_area((void *)(vaddr & PAGE_MASK)); 128 p = remove_vm_area((void *)(vaddr & PAGE_MASK));
155 if (!p) { 129 if (!p) {
diff --git a/arch/sh/mm/ioremap_fixed.c b/arch/sh/mm/ioremap_fixed.c
index 0b78b1e20ef..7f682e5dafc 100644
--- a/arch/sh/mm/ioremap_fixed.c
+++ b/arch/sh/mm/ioremap_fixed.c
@@ -45,14 +45,21 @@ void __init ioremap_fixed_init(void)
45} 45}
46 46
47void __init __iomem * 47void __init __iomem *
48ioremap_fixed(resource_size_t phys_addr, unsigned long offset, 48ioremap_fixed(phys_addr_t phys_addr, unsigned long size, pgprot_t prot)
49 unsigned long size, pgprot_t prot)
50{ 49{
51 enum fixed_addresses idx0, idx; 50 enum fixed_addresses idx0, idx;
52 struct ioremap_map *map; 51 struct ioremap_map *map;
53 unsigned int nrpages; 52 unsigned int nrpages;
53 unsigned long offset;
54 int i, slot; 54 int i, slot;
55 55
56 /*
57 * Mappings have to be page-aligned
58 */
59 offset = phys_addr & ~PAGE_MASK;
60 phys_addr &= PAGE_MASK;
61 size = PAGE_ALIGN(phys_addr + size) - phys_addr;
62
56 slot = -1; 63 slot = -1;
57 for (i = 0; i < FIX_N_IOREMAPS; i++) { 64 for (i = 0; i < FIX_N_IOREMAPS; i++) {
58 map = &ioremap_maps[i]; 65 map = &ioremap_maps[i];
diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c
index 422e9272187..961b34085e3 100644
--- a/arch/sh/mm/numa.c
+++ b/arch/sh/mm/numa.c
@@ -74,6 +74,9 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
74 start_pfn = start >> PAGE_SHIFT; 74 start_pfn = start >> PAGE_SHIFT;
75 end_pfn = end >> PAGE_SHIFT; 75 end_pfn = end >> PAGE_SHIFT;
76 76
77 pmb_bolt_mapping((unsigned long)__va(start), start, end - start,
78 PAGE_KERNEL);
79
77 lmb_add(start, end - start); 80 lmb_add(start, end - start);
78 81
79 __add_active_range(nid, start_pfn, end_pfn); 82 __add_active_range(nid, start_pfn, end_pfn);
diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c
index 198bcff5e96..a4662e2782c 100644
--- a/arch/sh/mm/pmb.c
+++ b/arch/sh/mm/pmb.c
@@ -23,7 +23,8 @@
23#include <linux/err.h> 23#include <linux/err.h>
24#include <linux/io.h> 24#include <linux/io.h>
25#include <linux/spinlock.h> 25#include <linux/spinlock.h>
26#include <linux/rwlock.h> 26#include <linux/vmalloc.h>
27#include <asm/cacheflush.h>
27#include <asm/sizes.h> 28#include <asm/sizes.h>
28#include <asm/system.h> 29#include <asm/system.h>
29#include <asm/uaccess.h> 30#include <asm/uaccess.h>
@@ -52,12 +53,24 @@ struct pmb_entry {
52 struct pmb_entry *link; 53 struct pmb_entry *link;
53}; 54};
54 55
56static struct {
57 unsigned long size;
58 int flag;
59} pmb_sizes[] = {
60 { .size = SZ_512M, .flag = PMB_SZ_512M, },
61 { .size = SZ_128M, .flag = PMB_SZ_128M, },
62 { .size = SZ_64M, .flag = PMB_SZ_64M, },
63 { .size = SZ_16M, .flag = PMB_SZ_16M, },
64};
65
55static void pmb_unmap_entry(struct pmb_entry *, int depth); 66static void pmb_unmap_entry(struct pmb_entry *, int depth);
56 67
57static DEFINE_RWLOCK(pmb_rwlock); 68static DEFINE_RWLOCK(pmb_rwlock);
58static struct pmb_entry pmb_entry_list[NR_PMB_ENTRIES]; 69static struct pmb_entry pmb_entry_list[NR_PMB_ENTRIES];
59static DECLARE_BITMAP(pmb_map, NR_PMB_ENTRIES); 70static DECLARE_BITMAP(pmb_map, NR_PMB_ENTRIES);
60 71
72static unsigned int pmb_iomapping_enabled;
73
61static __always_inline unsigned long mk_pmb_entry(unsigned int entry) 74static __always_inline unsigned long mk_pmb_entry(unsigned int entry)
62{ 75{
63 return (entry & PMB_E_MASK) << PMB_E_SHIFT; 76 return (entry & PMB_E_MASK) << PMB_E_SHIFT;
@@ -73,6 +86,142 @@ static __always_inline unsigned long mk_pmb_data(unsigned int entry)
73 return mk_pmb_entry(entry) | PMB_DATA; 86 return mk_pmb_entry(entry) | PMB_DATA;
74} 87}
75 88
89static __always_inline unsigned int pmb_ppn_in_range(unsigned long ppn)
90{
91 return ppn >= __pa(memory_start) && ppn < __pa(memory_end);
92}
93
94/*
95 * Ensure that the PMB entries match our cache configuration.
96 *
97 * When we are in 32-bit address extended mode, CCR.CB becomes
98 * invalid, so care must be taken to manually adjust cacheable
99 * translations.
100 */
101static __always_inline unsigned long pmb_cache_flags(void)
102{
103 unsigned long flags = 0;
104
105#if defined(CONFIG_CACHE_OFF)
106 flags |= PMB_WT | PMB_UB;
107#elif defined(CONFIG_CACHE_WRITETHROUGH)
108 flags |= PMB_C | PMB_WT | PMB_UB;
109#elif defined(CONFIG_CACHE_WRITEBACK)
110 flags |= PMB_C;
111#endif
112
113 return flags;
114}
115
116/*
117 * Convert typical pgprot value to the PMB equivalent
118 */
119static inline unsigned long pgprot_to_pmb_flags(pgprot_t prot)
120{
121 unsigned long pmb_flags = 0;
122 u64 flags = pgprot_val(prot);
123
124 if (flags & _PAGE_CACHABLE)
125 pmb_flags |= PMB_C;
126 if (flags & _PAGE_WT)
127 pmb_flags |= PMB_WT | PMB_UB;
128
129 return pmb_flags;
130}
131
132static inline bool pmb_can_merge(struct pmb_entry *a, struct pmb_entry *b)
133{
134 return (b->vpn == (a->vpn + a->size)) &&
135 (b->ppn == (a->ppn + a->size)) &&
136 (b->flags == a->flags);
137}
138
139static bool pmb_mapping_exists(unsigned long vaddr, phys_addr_t phys,
140 unsigned long size)
141{
142 int i;
143
144 read_lock(&pmb_rwlock);
145
146 for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
147 struct pmb_entry *pmbe, *iter;
148 unsigned long span;
149
150 if (!test_bit(i, pmb_map))
151 continue;
152
153 pmbe = &pmb_entry_list[i];
154
155 /*
156 * See if VPN and PPN are bounded by an existing mapping.
157 */
158 if ((vaddr < pmbe->vpn) || (vaddr >= (pmbe->vpn + pmbe->size)))
159 continue;
160 if ((phys < pmbe->ppn) || (phys >= (pmbe->ppn + pmbe->size)))
161 continue;
162
163 /*
164 * Now see if we're in range of a simple mapping.
165 */
166 if (size <= pmbe->size) {
167 read_unlock(&pmb_rwlock);
168 return true;
169 }
170
171 span = pmbe->size;
172
173 /*
174 * Finally for sizes that involve compound mappings, walk
175 * the chain.
176 */
177 for (iter = pmbe->link; iter; iter = iter->link)
178 span += iter->size;
179
180 /*
181 * Nothing else to do if the range requirements are met.
182 */
183 if (size <= span) {
184 read_unlock(&pmb_rwlock);
185 return true;
186 }
187 }
188
189 read_unlock(&pmb_rwlock);
190 return false;
191}
192
193static bool pmb_size_valid(unsigned long size)
194{
195 int i;
196
197 for (i = 0; i < ARRAY_SIZE(pmb_sizes); i++)
198 if (pmb_sizes[i].size == size)
199 return true;
200
201 return false;
202}
203
204static inline bool pmb_addr_valid(unsigned long addr, unsigned long size)
205{
206 return (addr >= P1SEG && (addr + size - 1) < P3SEG);
207}
208
209static inline bool pmb_prot_valid(pgprot_t prot)
210{
211 return (pgprot_val(prot) & _PAGE_USER) == 0;
212}
213
214static int pmb_size_to_flags(unsigned long size)
215{
216 int i;
217
218 for (i = 0; i < ARRAY_SIZE(pmb_sizes); i++)
219 if (pmb_sizes[i].size == size)
220 return pmb_sizes[i].flag;
221
222 return 0;
223}
224
76static int pmb_alloc_entry(void) 225static int pmb_alloc_entry(void)
77{ 226{
78 int pos; 227 int pos;
@@ -140,33 +289,22 @@ static void pmb_free(struct pmb_entry *pmbe)
140} 289}
141 290
142/* 291/*
143 * Ensure that the PMB entries match our cache configuration. 292 * Must be run uncached.
144 *
145 * When we are in 32-bit address extended mode, CCR.CB becomes
146 * invalid, so care must be taken to manually adjust cacheable
147 * translations.
148 */ 293 */
149static __always_inline unsigned long pmb_cache_flags(void) 294static void __set_pmb_entry(struct pmb_entry *pmbe)
150{ 295{
151 unsigned long flags = 0; 296 unsigned long addr, data;
152 297
153#if defined(CONFIG_CACHE_WRITETHROUGH) 298 addr = mk_pmb_addr(pmbe->entry);
154 flags |= PMB_C | PMB_WT | PMB_UB; 299 data = mk_pmb_data(pmbe->entry);
155#elif defined(CONFIG_CACHE_WRITEBACK)
156 flags |= PMB_C;
157#endif
158 300
159 return flags; 301 jump_to_uncached();
160}
161 302
162/* 303 /* Set V-bit */
163 * Must be run uncached. 304 __raw_writel(pmbe->vpn | PMB_V, addr);
164 */ 305 __raw_writel(pmbe->ppn | pmbe->flags | PMB_V, data);
165static void __set_pmb_entry(struct pmb_entry *pmbe) 306
166{ 307 back_to_cached();
167 writel_uncached(pmbe->vpn | PMB_V, mk_pmb_addr(pmbe->entry));
168 writel_uncached(pmbe->ppn | pmbe->flags | PMB_V,
169 mk_pmb_data(pmbe->entry));
170} 308}
171 309
172static void __clear_pmb_entry(struct pmb_entry *pmbe) 310static void __clear_pmb_entry(struct pmb_entry *pmbe)
@@ -194,144 +332,155 @@ static void set_pmb_entry(struct pmb_entry *pmbe)
194 spin_unlock_irqrestore(&pmbe->lock, flags); 332 spin_unlock_irqrestore(&pmbe->lock, flags);
195} 333}
196 334
197static struct { 335int pmb_bolt_mapping(unsigned long vaddr, phys_addr_t phys,
198 unsigned long size; 336 unsigned long size, pgprot_t prot)
199 int flag;
200} pmb_sizes[] = {
201 { .size = SZ_512M, .flag = PMB_SZ_512M, },
202 { .size = SZ_128M, .flag = PMB_SZ_128M, },
203 { .size = SZ_64M, .flag = PMB_SZ_64M, },
204 { .size = SZ_16M, .flag = PMB_SZ_16M, },
205};
206
207long pmb_remap(unsigned long vaddr, unsigned long phys,
208 unsigned long size, pgprot_t prot)
209{ 337{
210 struct pmb_entry *pmbp, *pmbe; 338 struct pmb_entry *pmbp, *pmbe;
211 unsigned long wanted; 339 unsigned long orig_addr, orig_size;
212 int pmb_flags, i; 340 unsigned long flags, pmb_flags;
213 long err; 341 int i, mapped;
214 u64 flags;
215 342
216 flags = pgprot_val(prot); 343 if (!pmb_addr_valid(vaddr, size))
344 return -EFAULT;
345 if (pmb_mapping_exists(vaddr, phys, size))
346 return 0;
217 347
218 pmb_flags = PMB_WT | PMB_UB; 348 orig_addr = vaddr;
219 349 orig_size = size;
220 /* Convert typical pgprot value to the PMB equivalent */
221 if (flags & _PAGE_CACHABLE) {
222 pmb_flags |= PMB_C;
223 350
224 if ((flags & _PAGE_WT) == 0) 351 flush_tlb_kernel_range(vaddr, vaddr + size);
225 pmb_flags &= ~(PMB_WT | PMB_UB);
226 }
227 352
353 pmb_flags = pgprot_to_pmb_flags(prot);
228 pmbp = NULL; 354 pmbp = NULL;
229 wanted = size;
230 355
231again: 356 do {
232 for (i = 0; i < ARRAY_SIZE(pmb_sizes); i++) { 357 for (i = mapped = 0; i < ARRAY_SIZE(pmb_sizes); i++) {
233 unsigned long flags; 358 if (size < pmb_sizes[i].size)
359 continue;
360
361 pmbe = pmb_alloc(vaddr, phys, pmb_flags |
362 pmb_sizes[i].flag, PMB_NO_ENTRY);
363 if (IS_ERR(pmbe)) {
364 pmb_unmap_entry(pmbp, mapped);
365 return PTR_ERR(pmbe);
366 }
234 367
235 if (size < pmb_sizes[i].size) 368 spin_lock_irqsave(&pmbe->lock, flags);
236 continue;
237 369
238 pmbe = pmb_alloc(vaddr, phys, pmb_flags | pmb_sizes[i].flag, 370 pmbe->size = pmb_sizes[i].size;
239 PMB_NO_ENTRY);
240 if (IS_ERR(pmbe)) {
241 err = PTR_ERR(pmbe);
242 goto out;
243 }
244 371
245 spin_lock_irqsave(&pmbe->lock, flags); 372 __set_pmb_entry(pmbe);
246 373
247 __set_pmb_entry(pmbe); 374 phys += pmbe->size;
375 vaddr += pmbe->size;
376 size -= pmbe->size;
248 377
249 phys += pmb_sizes[i].size; 378 /*
250 vaddr += pmb_sizes[i].size; 379 * Link adjacent entries that span multiple PMB
251 size -= pmb_sizes[i].size; 380 * entries for easier tear-down.
381 */
382 if (likely(pmbp)) {
383 spin_lock(&pmbp->lock);
384 pmbp->link = pmbe;
385 spin_unlock(&pmbp->lock);
386 }
252 387
253 pmbe->size = pmb_sizes[i].size; 388 pmbp = pmbe;
254 389
255 /* 390 /*
256 * Link adjacent entries that span multiple PMB entries 391 * Instead of trying smaller sizes on every
257 * for easier tear-down. 392 * iteration (even if we succeed in allocating
258 */ 393 * space), try using pmb_sizes[i].size again.
259 if (likely(pmbp)) { 394 */
260 spin_lock(&pmbp->lock); 395 i--;
261 pmbp->link = pmbe; 396 mapped++;
262 spin_unlock(&pmbp->lock); 397
398 spin_unlock_irqrestore(&pmbe->lock, flags);
263 } 399 }
400 } while (size >= SZ_16M);
264 401
265 pmbp = pmbe; 402 flush_cache_vmap(orig_addr, orig_addr + orig_size);
266 403
267 /* 404 return 0;
268 * Instead of trying smaller sizes on every iteration 405}
269 * (even if we succeed in allocating space), try using
270 * pmb_sizes[i].size again.
271 */
272 i--;
273 406
274 spin_unlock_irqrestore(&pmbe->lock, flags); 407void __iomem *pmb_remap_caller(phys_addr_t phys, unsigned long size,
275 } 408 pgprot_t prot, void *caller)
409{
410 unsigned long vaddr;
411 phys_addr_t offset, last_addr;
412 phys_addr_t align_mask;
413 unsigned long aligned;
414 struct vm_struct *area;
415 int i, ret;
276 416
277 if (size >= SZ_16M) 417 if (!pmb_iomapping_enabled)
278 goto again; 418 return NULL;
279 419
280 return wanted - size; 420 /*
421 * Small mappings need to go through the TLB.
422 */
423 if (size < SZ_16M)
424 return ERR_PTR(-EINVAL);
425 if (!pmb_prot_valid(prot))
426 return ERR_PTR(-EINVAL);
281 427
282out: 428 for (i = 0; i < ARRAY_SIZE(pmb_sizes); i++)
283 pmb_unmap_entry(pmbp, NR_PMB_ENTRIES); 429 if (size >= pmb_sizes[i].size)
430 break;
431
432 last_addr = phys + size;
433 align_mask = ~(pmb_sizes[i].size - 1);
434 offset = phys & ~align_mask;
435 phys &= align_mask;
436 aligned = ALIGN(last_addr, pmb_sizes[i].size) - phys;
437
438 /*
439 * XXX: This should really start from uncached_end, but this
440 * causes the MMU to reset, so for now we restrict it to the
441 * 0xb000...0xc000 range.
442 */
443 area = __get_vm_area_caller(aligned, VM_IOREMAP, 0xb0000000,
444 P3SEG, caller);
445 if (!area)
446 return NULL;
447
448 area->phys_addr = phys;
449 vaddr = (unsigned long)area->addr;
450
451 ret = pmb_bolt_mapping(vaddr, phys, size, prot);
452 if (unlikely(ret != 0))
453 return ERR_PTR(ret);
284 454
285 return err; 455 return (void __iomem *)(offset + (char *)vaddr);
286} 456}
287 457
288void pmb_unmap(unsigned long addr) 458int pmb_unmap(void __iomem *addr)
289{ 459{
290 struct pmb_entry *pmbe = NULL; 460 struct pmb_entry *pmbe = NULL;
291 int i; 461 unsigned long vaddr = (unsigned long __force)addr;
462 int i, found = 0;
292 463
293 read_lock(&pmb_rwlock); 464 read_lock(&pmb_rwlock);
294 465
295 for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) { 466 for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
296 if (test_bit(i, pmb_map)) { 467 if (test_bit(i, pmb_map)) {
297 pmbe = &pmb_entry_list[i]; 468 pmbe = &pmb_entry_list[i];
298 if (pmbe->vpn == addr) 469 if (pmbe->vpn == vaddr) {
470 found = 1;
299 break; 471 break;
472 }
300 } 473 }
301 } 474 }
302 475
303 read_unlock(&pmb_rwlock); 476 read_unlock(&pmb_rwlock);
304 477
305 pmb_unmap_entry(pmbe, NR_PMB_ENTRIES); 478 if (found) {
306} 479 pmb_unmap_entry(pmbe, NR_PMB_ENTRIES);
307 480 return 0;
308static bool pmb_can_merge(struct pmb_entry *a, struct pmb_entry *b) 481 }
309{
310 return (b->vpn == (a->vpn + a->size)) &&
311 (b->ppn == (a->ppn + a->size)) &&
312 (b->flags == a->flags);
313}
314
315static bool pmb_size_valid(unsigned long size)
316{
317 int i;
318
319 for (i = 0; i < ARRAY_SIZE(pmb_sizes); i++)
320 if (pmb_sizes[i].size == size)
321 return true;
322
323 return false;
324}
325
326static int pmb_size_to_flags(unsigned long size)
327{
328 int i;
329
330 for (i = 0; i < ARRAY_SIZE(pmb_sizes); i++)
331 if (pmb_sizes[i].size == size)
332 return pmb_sizes[i].flag;
333 482
334 return 0; 483 return -EINVAL;
335} 484}
336 485
337static void __pmb_unmap_entry(struct pmb_entry *pmbe, int depth) 486static void __pmb_unmap_entry(struct pmb_entry *pmbe, int depth)
@@ -351,6 +500,8 @@ static void __pmb_unmap_entry(struct pmb_entry *pmbe, int depth)
351 */ 500 */
352 __clear_pmb_entry(pmbe); 501 __clear_pmb_entry(pmbe);
353 502
503 flush_cache_vunmap(pmbe->vpn, pmbe->vpn + pmbe->size);
504
354 pmbe = pmblink->link; 505 pmbe = pmblink->link;
355 506
356 pmb_free(pmblink); 507 pmb_free(pmblink);
@@ -369,11 +520,6 @@ static void pmb_unmap_entry(struct pmb_entry *pmbe, int depth)
369 write_unlock_irqrestore(&pmb_rwlock, flags); 520 write_unlock_irqrestore(&pmb_rwlock, flags);
370} 521}
371 522
372static __always_inline unsigned int pmb_ppn_in_range(unsigned long ppn)
373{
374 return ppn >= __pa(memory_start) && ppn < __pa(memory_end);
375}
376
377static void __init pmb_notify(void) 523static void __init pmb_notify(void)
378{ 524{
379 int i; 525 int i;
@@ -625,6 +771,18 @@ static void __init pmb_resize(void)
625} 771}
626#endif 772#endif
627 773
774static int __init early_pmb(char *p)
775{
776 if (!p)
777 return 0;
778
779 if (strstr(p, "iomap"))
780 pmb_iomapping_enabled = 1;
781
782 return 0;
783}
784early_param("pmb", early_pmb);
785
628void __init pmb_init(void) 786void __init pmb_init(void)
629{ 787{
630 /* Synchronize software state */ 788 /* Synchronize software state */
@@ -713,7 +871,7 @@ static int __init pmb_debugfs_init(void)
713 871
714 return 0; 872 return 0;
715} 873}
716postcore_initcall(pmb_debugfs_init); 874subsys_initcall(pmb_debugfs_init);
717 875
718#ifdef CONFIG_PM 876#ifdef CONFIG_PM
719static int pmb_sysdev_suspend(struct sys_device *dev, pm_message_t state) 877static int pmb_sysdev_suspend(struct sys_device *dev, pm_message_t state)
diff --git a/arch/sparc/configs/sparc32_defconfig b/arch/sparc/configs/sparc32_defconfig
index 99a1f191497..6a8d078070c 100644
--- a/arch/sparc/configs/sparc32_defconfig
+++ b/arch/sparc/configs/sparc32_defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.33-rc2 3# Linux kernel version: 2.6.33
4# Mon Jan 11 23:20:31 2010 4# Wed Mar 3 02:52:23 2010
5# 5#
6# CONFIG_64BIT is not set 6# CONFIG_64BIT is not set
7CONFIG_SPARC=y 7CONFIG_SPARC=y
@@ -9,6 +9,8 @@ CONFIG_SPARC32=y
9# CONFIG_SPARC64 is not set 9# CONFIG_SPARC64 is not set
10CONFIG_ARCH_DEFCONFIG="arch/sparc/configs/sparc32_defconfig" 10CONFIG_ARCH_DEFCONFIG="arch/sparc/configs/sparc32_defconfig"
11CONFIG_BITS=32 11CONFIG_BITS=32
12CONFIG_GENERIC_TIME=y
13CONFIG_ARCH_USES_GETTIMEOFFSET=y
12CONFIG_AUDIT_ARCH=y 14CONFIG_AUDIT_ARCH=y
13CONFIG_MMU=y 15CONFIG_MMU=y
14CONFIG_HIGHMEM=y 16CONFIG_HIGHMEM=y
@@ -48,11 +50,6 @@ CONFIG_RCU_FANOUT=32
48# CONFIG_TREE_RCU_TRACE is not set 50# CONFIG_TREE_RCU_TRACE is not set
49# CONFIG_IKCONFIG is not set 51# CONFIG_IKCONFIG is not set
50CONFIG_LOG_BUF_SHIFT=14 52CONFIG_LOG_BUF_SHIFT=14
51CONFIG_GROUP_SCHED=y
52CONFIG_FAIR_GROUP_SCHED=y
53CONFIG_RT_GROUP_SCHED=y
54CONFIG_USER_SCHED=y
55# CONFIG_CGROUP_SCHED is not set
56# CONFIG_CGROUPS is not set 53# CONFIG_CGROUPS is not set
57CONFIG_SYSFS_DEPRECATED=y 54CONFIG_SYSFS_DEPRECATED=y
58CONFIG_SYSFS_DEPRECATED_V2=y 55CONFIG_SYSFS_DEPRECATED_V2=y
@@ -68,6 +65,7 @@ CONFIG_INITRAMFS_SOURCE=""
68CONFIG_RD_GZIP=y 65CONFIG_RD_GZIP=y
69CONFIG_RD_BZIP2=y 66CONFIG_RD_BZIP2=y
70CONFIG_RD_LZMA=y 67CONFIG_RD_LZMA=y
68CONFIG_RD_LZO=y
71# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set 69# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
72CONFIG_SYSCTL=y 70CONFIG_SYSCTL=y
73CONFIG_ANON_INODES=y 71CONFIG_ANON_INODES=y
@@ -211,7 +209,6 @@ CONFIG_SBUSCHAR=y
211CONFIG_PCI=y 209CONFIG_PCI=y
212CONFIG_PCI_SYSCALL=y 210CONFIG_PCI_SYSCALL=y
213# CONFIG_ARCH_SUPPORTS_MSI is not set 211# CONFIG_ARCH_SUPPORTS_MSI is not set
214CONFIG_PCI_LEGACY=y
215# CONFIG_PCI_DEBUG is not set 212# CONFIG_PCI_DEBUG is not set
216# CONFIG_PCI_STUB is not set 213# CONFIG_PCI_STUB is not set
217# CONFIG_PCI_IOV is not set 214# CONFIG_PCI_IOV is not set
@@ -232,7 +229,6 @@ CONFIG_NET=y
232# Networking options 229# Networking options
233# 230#
234CONFIG_PACKET=y 231CONFIG_PACKET=y
235# CONFIG_PACKET_MMAP is not set
236CONFIG_UNIX=y 232CONFIG_UNIX=y
237CONFIG_XFRM=y 233CONFIG_XFRM=y
238CONFIG_XFRM_USER=m 234CONFIG_XFRM_USER=m
@@ -379,11 +375,13 @@ CONFIG_MISC_DEVICES=y
379# CONFIG_TIFM_CORE is not set 375# CONFIG_TIFM_CORE is not set
380# CONFIG_ENCLOSURE_SERVICES is not set 376# CONFIG_ENCLOSURE_SERVICES is not set
381# CONFIG_HP_ILO is not set 377# CONFIG_HP_ILO is not set
378# CONFIG_TI_DAC7512 is not set
382# CONFIG_C2PORT is not set 379# CONFIG_C2PORT is not set
383 380
384# 381#
385# EEPROM support 382# EEPROM support
386# 383#
384# CONFIG_EEPROM_AT25 is not set
387# CONFIG_EEPROM_93CX6 is not set 385# CONFIG_EEPROM_93CX6 is not set
388# CONFIG_CB710_CORE is not set 386# CONFIG_CB710_CORE is not set
389CONFIG_HAVE_IDE=y 387CONFIG_HAVE_IDE=y
@@ -507,7 +505,9 @@ CONFIG_SUNQE=m
507# CONFIG_SUNGEM is not set 505# CONFIG_SUNGEM is not set
508# CONFIG_CASSINI is not set 506# CONFIG_CASSINI is not set
509# CONFIG_NET_VENDOR_3COM is not set 507# CONFIG_NET_VENDOR_3COM is not set
508# CONFIG_ENC28J60 is not set
510# CONFIG_ETHOC is not set 509# CONFIG_ETHOC is not set
510# CONFIG_GRETH is not set
511# CONFIG_DNET is not set 511# CONFIG_DNET is not set
512# CONFIG_NET_TULIP is not set 512# CONFIG_NET_TULIP is not set
513# CONFIG_HP100 is not set 513# CONFIG_HP100 is not set
@@ -521,6 +521,7 @@ CONFIG_SUNQE=m
521# CONFIG_NET_PCI is not set 521# CONFIG_NET_PCI is not set
522# CONFIG_B44 is not set 522# CONFIG_B44 is not set
523# CONFIG_KS8842 is not set 523# CONFIG_KS8842 is not set
524# CONFIG_KS8851 is not set
524# CONFIG_KS8851_MLL is not set 525# CONFIG_KS8851_MLL is not set
525# CONFIG_ATL2 is not set 526# CONFIG_ATL2 is not set
526CONFIG_NETDEV_1000=y 527CONFIG_NETDEV_1000=y
@@ -563,6 +564,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
563# CONFIG_MLX4_CORE is not set 564# CONFIG_MLX4_CORE is not set
564# CONFIG_TEHUTI is not set 565# CONFIG_TEHUTI is not set
565# CONFIG_BNX2X is not set 566# CONFIG_BNX2X is not set
567# CONFIG_QLCNIC is not set
566# CONFIG_QLGE is not set 568# CONFIG_QLGE is not set
567# CONFIG_SFC is not set 569# CONFIG_SFC is not set
568# CONFIG_BE2NET is not set 570# CONFIG_BE2NET is not set
@@ -665,6 +667,7 @@ CONFIG_DEVKMEM=y
665# 667#
666# Non-8250 serial port support 668# Non-8250 serial port support
667# 669#
670# CONFIG_SERIAL_MAX3100 is not set
668CONFIG_SERIAL_SUNCORE=y 671CONFIG_SERIAL_SUNCORE=y
669CONFIG_SERIAL_SUNZILOG=y 672CONFIG_SERIAL_SUNZILOG=y
670CONFIG_SERIAL_SUNZILOG_CONSOLE=y 673CONFIG_SERIAL_SUNZILOG_CONSOLE=y
@@ -689,7 +692,23 @@ CONFIG_HW_RANDOM=m
689# CONFIG_TCG_TPM is not set 692# CONFIG_TCG_TPM is not set
690CONFIG_DEVPORT=y 693CONFIG_DEVPORT=y
691# CONFIG_I2C is not set 694# CONFIG_I2C is not set
692# CONFIG_SPI is not set 695CONFIG_SPI=y
696# CONFIG_SPI_DEBUG is not set
697CONFIG_SPI_MASTER=y
698
699#
700# SPI Master Controller Drivers
701#
702CONFIG_SPI_BITBANG=m
703CONFIG_SPI_XILINX=m
704CONFIG_SPI_XILINX_PLTFM=m
705# CONFIG_SPI_DESIGNWARE is not set
706
707#
708# SPI Protocol Masters
709#
710# CONFIG_SPI_SPIDEV is not set
711# CONFIG_SPI_TLE62X0 is not set
693 712
694# 713#
695# PPS support 714# PPS support
@@ -706,10 +725,13 @@ CONFIG_HWMON=y
706# 725#
707# Native drivers 726# Native drivers
708# 727#
728# CONFIG_SENSORS_ADCXX is not set
709# CONFIG_SENSORS_I5K_AMB is not set 729# CONFIG_SENSORS_I5K_AMB is not set
710# CONFIG_SENSORS_F71805F is not set 730# CONFIG_SENSORS_F71805F is not set
711# CONFIG_SENSORS_F71882FG is not set 731# CONFIG_SENSORS_F71882FG is not set
712# CONFIG_SENSORS_IT87 is not set 732# CONFIG_SENSORS_IT87 is not set
733# CONFIG_SENSORS_LM70 is not set
734# CONFIG_SENSORS_MAX1111 is not set
713# CONFIG_SENSORS_PC87360 is not set 735# CONFIG_SENSORS_PC87360 is not set
714# CONFIG_SENSORS_PC87427 is not set 736# CONFIG_SENSORS_PC87427 is not set
715# CONFIG_SENSORS_SIS5595 is not set 737# CONFIG_SENSORS_SIS5595 is not set
@@ -720,6 +742,7 @@ CONFIG_HWMON=y
720# CONFIG_SENSORS_VT8231 is not set 742# CONFIG_SENSORS_VT8231 is not set
721# CONFIG_SENSORS_W83627HF is not set 743# CONFIG_SENSORS_W83627HF is not set
722# CONFIG_SENSORS_W83627EHF is not set 744# CONFIG_SENSORS_W83627EHF is not set
745# CONFIG_SENSORS_LIS3_SPI is not set
723# CONFIG_THERMAL is not set 746# CONFIG_THERMAL is not set
724# CONFIG_WATCHDOG is not set 747# CONFIG_WATCHDOG is not set
725CONFIG_SSB_POSSIBLE=y 748CONFIG_SSB_POSSIBLE=y
@@ -736,6 +759,8 @@ CONFIG_SSB_POSSIBLE=y
736# CONFIG_MFD_SM501 is not set 759# CONFIG_MFD_SM501 is not set
737# CONFIG_HTC_PASIC3 is not set 760# CONFIG_HTC_PASIC3 is not set
738# CONFIG_MFD_TMIO is not set 761# CONFIG_MFD_TMIO is not set
762# CONFIG_MFD_MC13783 is not set
763# CONFIG_AB4500_CORE is not set
739# CONFIG_REGULATOR is not set 764# CONFIG_REGULATOR is not set
740# CONFIG_MEDIA_SUPPORT is not set 765# CONFIG_MEDIA_SUPPORT is not set
741 766
@@ -743,6 +768,7 @@ CONFIG_SSB_POSSIBLE=y
743# Graphics support 768# Graphics support
744# 769#
745CONFIG_VGA_ARB=y 770CONFIG_VGA_ARB=y
771CONFIG_VGA_ARB_MAX_GPUS=16
746# CONFIG_VGASTATE is not set 772# CONFIG_VGASTATE is not set
747# CONFIG_VIDEO_OUTPUT_CONTROL is not set 773# CONFIG_VIDEO_OUTPUT_CONTROL is not set
748# CONFIG_FB is not set 774# CONFIG_FB is not set
@@ -808,6 +834,14 @@ CONFIG_RTC_INTF_DEV=y
808# 834#
809# SPI RTC drivers 835# SPI RTC drivers
810# 836#
837# CONFIG_RTC_DRV_M41T94 is not set
838# CONFIG_RTC_DRV_DS1305 is not set
839# CONFIG_RTC_DRV_DS1390 is not set
840# CONFIG_RTC_DRV_MAX6902 is not set
841# CONFIG_RTC_DRV_R9701 is not set
842# CONFIG_RTC_DRV_RS5C348 is not set
843# CONFIG_RTC_DRV_DS3234 is not set
844# CONFIG_RTC_DRV_PCF2123 is not set
811 845
812# 846#
813# Platform RTC drivers 847# Platform RTC drivers
@@ -1180,9 +1214,11 @@ CONFIG_CRC32=y
1180CONFIG_LIBCRC32C=m 1214CONFIG_LIBCRC32C=m
1181CONFIG_ZLIB_INFLATE=y 1215CONFIG_ZLIB_INFLATE=y
1182CONFIG_ZLIB_DEFLATE=y 1216CONFIG_ZLIB_DEFLATE=y
1217CONFIG_LZO_DECOMPRESS=y
1183CONFIG_DECOMPRESS_GZIP=y 1218CONFIG_DECOMPRESS_GZIP=y
1184CONFIG_DECOMPRESS_BZIP2=y 1219CONFIG_DECOMPRESS_BZIP2=y
1185CONFIG_DECOMPRESS_LZMA=y 1220CONFIG_DECOMPRESS_LZMA=y
1221CONFIG_DECOMPRESS_LZO=y
1186CONFIG_HAS_IOMEM=y 1222CONFIG_HAS_IOMEM=y
1187CONFIG_HAS_IOPORT=y 1223CONFIG_HAS_IOPORT=y
1188CONFIG_HAS_DMA=y 1224CONFIG_HAS_DMA=y
diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig
index 41c5a56aa6f..56e3163673e 100644
--- a/arch/sparc/configs/sparc64_defconfig
+++ b/arch/sparc/configs/sparc64_defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.33-rc2 3# Linux kernel version: 2.6.33
4# Wed Jan 20 16:31:47 2010 4# Wed Mar 3 02:54:29 2010
5# 5#
6CONFIG_64BIT=y 6CONFIG_64BIT=y
7CONFIG_SPARC=y 7CONFIG_SPARC=y
@@ -55,14 +55,10 @@ CONFIG_TREE_RCU=y
55# CONFIG_RCU_TRACE is not set 55# CONFIG_RCU_TRACE is not set
56CONFIG_RCU_FANOUT=64 56CONFIG_RCU_FANOUT=64
57# CONFIG_RCU_FANOUT_EXACT is not set 57# CONFIG_RCU_FANOUT_EXACT is not set
58# CONFIG_RCU_FAST_NO_HZ is not set
58# CONFIG_TREE_RCU_TRACE is not set 59# CONFIG_TREE_RCU_TRACE is not set
59# CONFIG_IKCONFIG is not set 60# CONFIG_IKCONFIG is not set
60CONFIG_LOG_BUF_SHIFT=18 61CONFIG_LOG_BUF_SHIFT=18
61CONFIG_GROUP_SCHED=y
62CONFIG_FAIR_GROUP_SCHED=y
63CONFIG_RT_GROUP_SCHED=y
64CONFIG_USER_SCHED=y
65# CONFIG_CGROUP_SCHED is not set
66# CONFIG_CGROUPS is not set 62# CONFIG_CGROUPS is not set
67# CONFIG_SYSFS_DEPRECATED_V2 is not set 63# CONFIG_SYSFS_DEPRECATED_V2 is not set
68CONFIG_RELAY=y 64CONFIG_RELAY=y
@@ -77,6 +73,7 @@ CONFIG_INITRAMFS_SOURCE=""
77CONFIG_RD_GZIP=y 73CONFIG_RD_GZIP=y
78CONFIG_RD_BZIP2=y 74CONFIG_RD_BZIP2=y
79CONFIG_RD_LZMA=y 75CONFIG_RD_LZMA=y
76CONFIG_RD_LZO=y
80CONFIG_CC_OPTIMIZE_FOR_SIZE=y 77CONFIG_CC_OPTIMIZE_FOR_SIZE=y
81CONFIG_SYSCTL=y 78CONFIG_SYSCTL=y
82CONFIG_ANON_INODES=y 79CONFIG_ANON_INODES=y
@@ -105,7 +102,6 @@ CONFIG_PERF_USE_VMALLOC=y
105# Kernel Performance Events And Counters 102# Kernel Performance Events And Counters
106# 103#
107CONFIG_PERF_EVENTS=y 104CONFIG_PERF_EVENTS=y
108CONFIG_EVENT_PROFILE=y
109CONFIG_PERF_COUNTERS=y 105CONFIG_PERF_COUNTERS=y
110# CONFIG_DEBUG_PERF_USE_VMALLOC is not set 106# CONFIG_DEBUG_PERF_USE_VMALLOC is not set
111CONFIG_VM_EVENT_COUNTERS=y 107CONFIG_VM_EVENT_COUNTERS=y
@@ -266,7 +262,6 @@ CONFIG_PCI_DOMAINS=y
266CONFIG_PCI_SYSCALL=y 262CONFIG_PCI_SYSCALL=y
267CONFIG_ARCH_SUPPORTS_MSI=y 263CONFIG_ARCH_SUPPORTS_MSI=y
268CONFIG_PCI_MSI=y 264CONFIG_PCI_MSI=y
269# CONFIG_PCI_LEGACY is not set
270# CONFIG_PCI_DEBUG is not set 265# CONFIG_PCI_DEBUG is not set
271# CONFIG_PCI_STUB is not set 266# CONFIG_PCI_STUB is not set
272# CONFIG_PCI_IOV is not set 267# CONFIG_PCI_IOV is not set
@@ -290,7 +285,6 @@ CONFIG_NET=y
290# Networking options 285# Networking options
291# 286#
292CONFIG_PACKET=y 287CONFIG_PACKET=y
293CONFIG_PACKET_MMAP=y
294CONFIG_UNIX=y 288CONFIG_UNIX=y
295CONFIG_XFRM=y 289CONFIG_XFRM=y
296CONFIG_XFRM_USER=m 290CONFIG_XFRM_USER=m
@@ -425,10 +419,6 @@ CONFIG_BLK_DEV=y
425# CONFIG_BLK_DEV_COW_COMMON is not set 419# CONFIG_BLK_DEV_COW_COMMON is not set
426CONFIG_BLK_DEV_LOOP=m 420CONFIG_BLK_DEV_LOOP=m
427CONFIG_BLK_DEV_CRYPTOLOOP=m 421CONFIG_BLK_DEV_CRYPTOLOOP=m
428
429#
430# DRBD disabled because PROC_FS, INET or CONNECTOR not selected
431#
432# CONFIG_BLK_DEV_DRBD is not set 422# CONFIG_BLK_DEV_DRBD is not set
433CONFIG_BLK_DEV_NBD=m 423CONFIG_BLK_DEV_NBD=m
434# CONFIG_BLK_DEV_SX8 is not set 424# CONFIG_BLK_DEV_SX8 is not set
@@ -677,6 +667,7 @@ CONFIG_SUNGEM=m
677CONFIG_SUNVNET=m 667CONFIG_SUNVNET=m
678# CONFIG_NET_VENDOR_3COM is not set 668# CONFIG_NET_VENDOR_3COM is not set
679# CONFIG_ETHOC is not set 669# CONFIG_ETHOC is not set
670# CONFIG_GRETH is not set
680# CONFIG_DNET is not set 671# CONFIG_DNET is not set
681# CONFIG_NET_TULIP is not set 672# CONFIG_NET_TULIP is not set
682# CONFIG_HP100 is not set 673# CONFIG_HP100 is not set
@@ -691,6 +682,7 @@ CONFIG_NET_PCI=y
691# CONFIG_PCNET32 is not set 682# CONFIG_PCNET32 is not set
692# CONFIG_AMD8111_ETH is not set 683# CONFIG_AMD8111_ETH is not set
693# CONFIG_ADAPTEC_STARFIRE is not set 684# CONFIG_ADAPTEC_STARFIRE is not set
685# CONFIG_KSZ884X_PCI is not set
694# CONFIG_B44 is not set 686# CONFIG_B44 is not set
695# CONFIG_FORCEDETH is not set 687# CONFIG_FORCEDETH is not set
696# CONFIG_E100 is not set 688# CONFIG_E100 is not set
@@ -741,6 +733,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
741# CONFIG_CHELSIO_T3 is not set 733# CONFIG_CHELSIO_T3 is not set
742# CONFIG_ENIC is not set 734# CONFIG_ENIC is not set
743# CONFIG_IXGBE is not set 735# CONFIG_IXGBE is not set
736# CONFIG_IXGBEVF is not set
744# CONFIG_IXGB is not set 737# CONFIG_IXGB is not set
745# CONFIG_S2IO is not set 738# CONFIG_S2IO is not set
746# CONFIG_VXGE is not set 739# CONFIG_VXGE is not set
@@ -751,6 +744,7 @@ CONFIG_NIU=m
751# CONFIG_MLX4_CORE is not set 744# CONFIG_MLX4_CORE is not set
752# CONFIG_TEHUTI is not set 745# CONFIG_TEHUTI is not set
753# CONFIG_BNX2X is not set 746# CONFIG_BNX2X is not set
747# CONFIG_QLCNIC is not set
754# CONFIG_QLGE is not set 748# CONFIG_QLGE is not set
755# CONFIG_SFC is not set 749# CONFIG_SFC is not set
756# CONFIG_BE2NET is not set 750# CONFIG_BE2NET is not set
@@ -1028,6 +1022,7 @@ CONFIG_HWMON=y
1028# CONFIG_SENSORS_SMSC47M192 is not set 1022# CONFIG_SENSORS_SMSC47M192 is not set
1029# CONFIG_SENSORS_SMSC47B397 is not set 1023# CONFIG_SENSORS_SMSC47B397 is not set
1030# CONFIG_SENSORS_ADS7828 is not set 1024# CONFIG_SENSORS_ADS7828 is not set
1025# CONFIG_SENSORS_AMC6821 is not set
1031# CONFIG_SENSORS_THMC50 is not set 1026# CONFIG_SENSORS_THMC50 is not set
1032# CONFIG_SENSORS_TMP401 is not set 1027# CONFIG_SENSORS_TMP401 is not set
1033# CONFIG_SENSORS_TMP421 is not set 1028# CONFIG_SENSORS_TMP421 is not set
@@ -1076,6 +1071,7 @@ CONFIG_SSB_POSSIBLE=y
1076# Graphics support 1071# Graphics support
1077# 1072#
1078CONFIG_VGA_ARB=y 1073CONFIG_VGA_ARB=y
1074CONFIG_VGA_ARB_MAX_GPUS=16
1079# CONFIG_DRM is not set 1075# CONFIG_DRM is not set
1080# CONFIG_VGASTATE is not set 1076# CONFIG_VGASTATE is not set
1081# CONFIG_VIDEO_OUTPUT_CONTROL is not set 1077# CONFIG_VIDEO_OUTPUT_CONTROL is not set
@@ -1279,6 +1275,7 @@ CONFIG_SND_ALI5451=m
1279# CONFIG_SND_YMFPCI is not set 1275# CONFIG_SND_YMFPCI is not set
1280CONFIG_SND_USB=y 1276CONFIG_SND_USB=y
1281# CONFIG_SND_USB_AUDIO is not set 1277# CONFIG_SND_USB_AUDIO is not set
1278# CONFIG_SND_USB_UA101 is not set
1282# CONFIG_SND_USB_CAIAQ is not set 1279# CONFIG_SND_USB_CAIAQ is not set
1283CONFIG_SND_SPARC=y 1280CONFIG_SND_SPARC=y
1284# CONFIG_SND_SUN_AMD7930 is not set 1281# CONFIG_SND_SUN_AMD7930 is not set
@@ -1301,6 +1298,7 @@ CONFIG_USB_HIDDEV=y
1301# 1298#
1302# Special HID drivers 1299# Special HID drivers
1303# 1300#
1301# CONFIG_HID_3M_PCT is not set
1304CONFIG_HID_A4TECH=y 1302CONFIG_HID_A4TECH=y
1305CONFIG_HID_APPLE=y 1303CONFIG_HID_APPLE=y
1306CONFIG_HID_BELKIN=y 1304CONFIG_HID_BELKIN=y
@@ -1317,14 +1315,19 @@ CONFIG_HID_KENSINGTON=y
1317CONFIG_HID_LOGITECH=y 1315CONFIG_HID_LOGITECH=y
1318# CONFIG_LOGITECH_FF is not set 1316# CONFIG_LOGITECH_FF is not set
1319# CONFIG_LOGIRUMBLEPAD2_FF is not set 1317# CONFIG_LOGIRUMBLEPAD2_FF is not set
1318# CONFIG_LOGIG940_FF is not set
1320CONFIG_HID_MICROSOFT=y 1319CONFIG_HID_MICROSOFT=y
1320# CONFIG_HID_MOSART is not set
1321CONFIG_HID_MONTEREY=y 1321CONFIG_HID_MONTEREY=y
1322CONFIG_HID_NTRIG=y 1322CONFIG_HID_NTRIG=y
1323CONFIG_HID_ORTEK=y
1323CONFIG_HID_PANTHERLORD=y 1324CONFIG_HID_PANTHERLORD=y
1324# CONFIG_PANTHERLORD_FF is not set 1325# CONFIG_PANTHERLORD_FF is not set
1325CONFIG_HID_PETALYNX=y 1326CONFIG_HID_PETALYNX=y
1327# CONFIG_HID_QUANTA is not set
1326CONFIG_HID_SAMSUNG=y 1328CONFIG_HID_SAMSUNG=y
1327CONFIG_HID_SONY=y 1329CONFIG_HID_SONY=y
1330# CONFIG_HID_STANTUM is not set
1328CONFIG_HID_SUNPLUS=y 1331CONFIG_HID_SUNPLUS=y
1329CONFIG_HID_GREENASIA=y 1332CONFIG_HID_GREENASIA=y
1330# CONFIG_GREENASIA_FF is not set 1333# CONFIG_GREENASIA_FF is not set
@@ -1807,6 +1810,7 @@ CONFIG_CRYPTO_MANAGER=y
1807CONFIG_CRYPTO_MANAGER2=y 1810CONFIG_CRYPTO_MANAGER2=y
1808CONFIG_CRYPTO_GF128MUL=m 1811CONFIG_CRYPTO_GF128MUL=m
1809CONFIG_CRYPTO_NULL=m 1812CONFIG_CRYPTO_NULL=m
1813# CONFIG_CRYPTO_PCRYPT is not set
1810CONFIG_CRYPTO_WORKQUEUE=y 1814CONFIG_CRYPTO_WORKQUEUE=y
1811# CONFIG_CRYPTO_CRYPTD is not set 1815# CONFIG_CRYPTO_CRYPTD is not set
1812CONFIG_CRYPTO_AUTHENC=y 1816CONFIG_CRYPTO_AUTHENC=y
@@ -1904,9 +1908,11 @@ CONFIG_CRC32=y
1904CONFIG_LIBCRC32C=m 1908CONFIG_LIBCRC32C=m
1905CONFIG_ZLIB_INFLATE=y 1909CONFIG_ZLIB_INFLATE=y
1906CONFIG_ZLIB_DEFLATE=y 1910CONFIG_ZLIB_DEFLATE=y
1911CONFIG_LZO_DECOMPRESS=y
1907CONFIG_DECOMPRESS_GZIP=y 1912CONFIG_DECOMPRESS_GZIP=y
1908CONFIG_DECOMPRESS_BZIP2=y 1913CONFIG_DECOMPRESS_BZIP2=y
1909CONFIG_DECOMPRESS_LZMA=y 1914CONFIG_DECOMPRESS_LZMA=y
1915CONFIG_DECOMPRESS_LZO=y
1910CONFIG_HAS_IOMEM=y 1916CONFIG_HAS_IOMEM=y
1911CONFIG_HAS_IOPORT=y 1917CONFIG_HAS_IOPORT=y
1912CONFIG_HAS_DMA=y 1918CONFIG_HAS_DMA=y
diff --git a/arch/sparc/include/asm/io_32.h b/arch/sparc/include/asm/io_32.h
index 679c7504625..2889574608d 100644
--- a/arch/sparc/include/asm/io_32.h
+++ b/arch/sparc/include/asm/io_32.h
@@ -249,10 +249,14 @@ extern void iounmap(volatile void __iomem *addr);
249 249
250#define ioread8(X) readb(X) 250#define ioread8(X) readb(X)
251#define ioread16(X) readw(X) 251#define ioread16(X) readw(X)
252#define ioread16be(X) __raw_readw(X)
252#define ioread32(X) readl(X) 253#define ioread32(X) readl(X)
254#define ioread32be(X) __raw_readl(X)
253#define iowrite8(val,X) writeb(val,X) 255#define iowrite8(val,X) writeb(val,X)
254#define iowrite16(val,X) writew(val,X) 256#define iowrite16(val,X) writew(val,X)
257#define iowrite16be(val,X) __raw_writew(val,X)
255#define iowrite32(val,X) writel(val,X) 258#define iowrite32(val,X) writel(val,X)
259#define iowrite32be(val,X) __raw_writel(val,X)
256 260
257static inline void ioread8_rep(void __iomem *port, void *buf, unsigned long count) 261static inline void ioread8_rep(void __iomem *port, void *buf, unsigned long count)
258{ 262{
diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h
index 4aee21dc9c6..9517d063c79 100644
--- a/arch/sparc/include/asm/io_64.h
+++ b/arch/sparc/include/asm/io_64.h
@@ -468,10 +468,14 @@ static inline void iounmap(volatile void __iomem *addr)
468 468
469#define ioread8(X) readb(X) 469#define ioread8(X) readb(X)
470#define ioread16(X) readw(X) 470#define ioread16(X) readw(X)
471#define ioread16be(X) __raw_readw(X)
471#define ioread32(X) readl(X) 472#define ioread32(X) readl(X)
473#define ioread32be(X) __raw_readl(X)
472#define iowrite8(val,X) writeb(val,X) 474#define iowrite8(val,X) writeb(val,X)
473#define iowrite16(val,X) writew(val,X) 475#define iowrite16(val,X) writew(val,X)
476#define iowrite16be(val,X) __raw_writew(val,X)
474#define iowrite32(val,X) writel(val,X) 477#define iowrite32(val,X) writel(val,X)
478#define iowrite32be(val,X) __raw_writel(val,X)
475 479
476/* Create a virtual mapping cookie for an IO port range */ 480/* Create a virtual mapping cookie for an IO port range */
477extern void __iomem *ioport_map(unsigned long port, unsigned int nr); 481extern void __iomem *ioport_map(unsigned long port, unsigned int nr);
diff --git a/arch/sparc/include/asm/perfctr.h b/arch/sparc/include/asm/perfctr.h
index 836873002b7..8d8720a8770 100644
--- a/arch/sparc/include/asm/perfctr.h
+++ b/arch/sparc/include/asm/perfctr.h
@@ -10,8 +10,8 @@
10 * from enumeration below. The meaning of further arguments 10 * from enumeration below. The meaning of further arguments
11 * are determined by the operation code. 11 * are determined by the operation code.
12 * 12 *
13 * int sys_perfctr(int opcode, unsigned long arg0, 13 * NOTE: This system call is no longer provided, use the perf_events
14 * unsigned long arg1, unsigned long arg2) 14 * infrastructure.
15 * 15 *
16 * Pointers which are passed by the user are pointers to 64-bit 16 * Pointers which are passed by the user are pointers to 64-bit
17 * integers. 17 * integers.
diff --git a/arch/sparc/include/asm/system_64.h b/arch/sparc/include/asm/system_64.h
index d47a98e6697..d24cfe16afc 100644
--- a/arch/sparc/include/asm/system_64.h
+++ b/arch/sparc/include/asm/system_64.h
@@ -143,15 +143,7 @@ do { \
143 * and 2 stores in this critical code path. -DaveM 143 * and 2 stores in this critical code path. -DaveM
144 */ 144 */
145#define switch_to(prev, next, last) \ 145#define switch_to(prev, next, last) \
146do { if (test_thread_flag(TIF_PERFCTR)) { \ 146do { flush_tlb_pending(); \
147 unsigned long __tmp; \
148 read_pcr(__tmp); \
149 current_thread_info()->pcr_reg = __tmp; \
150 read_pic(__tmp); \
151 current_thread_info()->kernel_cntd0 += (unsigned int)(__tmp);\
152 current_thread_info()->kernel_cntd1 += ((__tmp) >> 32); \
153 } \
154 flush_tlb_pending(); \
155 save_and_clear_fpu(); \ 147 save_and_clear_fpu(); \
156 /* If you are tempted to conditionalize the following */ \ 148 /* If you are tempted to conditionalize the following */ \
157 /* so that ASI is only written if it changes, think again. */ \ 149 /* so that ASI is only written if it changes, think again. */ \
@@ -197,11 +189,6 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \
197 "l1", "l2", "l3", "l4", "l5", "l6", "l7", \ 189 "l1", "l2", "l3", "l4", "l5", "l6", "l7", \
198 "i0", "i1", "i2", "i3", "i4", "i5", \ 190 "i0", "i1", "i2", "i3", "i4", "i5", \
199 "o0", "o1", "o2", "o3", "o4", "o5", "o7"); \ 191 "o0", "o1", "o2", "o3", "o4", "o5", "o7"); \
200 /* If you fuck with this, update ret_from_syscall code too. */ \
201 if (test_thread_flag(TIF_PERFCTR)) { \
202 write_pcr(current_thread_info()->pcr_reg); \
203 reset_pic(); \
204 } \
205} while(0) 192} while(0)
206 193
207static inline unsigned long xchg32(__volatile__ unsigned int *m, unsigned int val) 194static inline unsigned long xchg32(__volatile__ unsigned int *m, unsigned int val)
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index 39be9f256e5..9e2d9447f2a 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -58,11 +58,6 @@ struct thread_info {
58 unsigned long gsr[7]; 58 unsigned long gsr[7];
59 unsigned long xfsr[7]; 59 unsigned long xfsr[7];
60 60
61 __u64 __user *user_cntd0;
62 __u64 __user *user_cntd1;
63 __u64 kernel_cntd0, kernel_cntd1;
64 __u64 pcr_reg;
65
66 struct restart_block restart_block; 61 struct restart_block restart_block;
67 62
68 struct pt_regs *kern_una_regs; 63 struct pt_regs *kern_una_regs;
@@ -96,15 +91,10 @@ struct thread_info {
96#define TI_RWIN_SPTRS 0x000003c8 91#define TI_RWIN_SPTRS 0x000003c8
97#define TI_GSR 0x00000400 92#define TI_GSR 0x00000400
98#define TI_XFSR 0x00000438 93#define TI_XFSR 0x00000438
99#define TI_USER_CNTD0 0x00000470 94#define TI_RESTART_BLOCK 0x00000470
100#define TI_USER_CNTD1 0x00000478 95#define TI_KUNA_REGS 0x000004a0
101#define TI_KERN_CNTD0 0x00000480 96#define TI_KUNA_INSN 0x000004a8
102#define TI_KERN_CNTD1 0x00000488 97#define TI_FPREGS 0x000004c0
103#define TI_PCR 0x00000490
104#define TI_RESTART_BLOCK 0x00000498
105#define TI_KUNA_REGS 0x000004c8
106#define TI_KUNA_INSN 0x000004d0
107#define TI_FPREGS 0x00000500
108 98
109/* We embed this in the uppermost byte of thread_info->flags */ 99/* We embed this in the uppermost byte of thread_info->flags */
110#define FAULT_CODE_WRITE 0x01 /* Write access, implies D-TLB */ 100#define FAULT_CODE_WRITE 0x01 /* Write access, implies D-TLB */
@@ -199,7 +189,7 @@ register struct thread_info *current_thread_info_reg asm("g6");
199 * 189 *
200 * On trap return we need to test several values: 190 * On trap return we need to test several values:
201 * 191 *
202 * user: need_resched, notify_resume, sigpending, wsaved, perfctr 192 * user: need_resched, notify_resume, sigpending, wsaved
203 * kernel: fpdepth 193 * kernel: fpdepth
204 * 194 *
205 * So to check for work in the kernel case we simply load the fpdepth 195 * So to check for work in the kernel case we simply load the fpdepth
@@ -220,7 +210,7 @@ register struct thread_info *current_thread_info_reg asm("g6");
220#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ 210#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
221#define TIF_SIGPENDING 2 /* signal pending */ 211#define TIF_SIGPENDING 2 /* signal pending */
222#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ 212#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
223#define TIF_PERFCTR 4 /* performance counters active */ 213/* flag bit 4 is available */
224#define TIF_UNALIGNED 5 /* allowed to do unaligned accesses */ 214#define TIF_UNALIGNED 5 /* allowed to do unaligned accesses */
225/* flag bit 6 is available */ 215/* flag bit 6 is available */
226#define TIF_32BIT 7 /* 32-bit binary */ 216#define TIF_32BIT 7 /* 32-bit binary */
@@ -241,7 +231,6 @@ register struct thread_info *current_thread_info_reg asm("g6");
241#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) 231#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
242#define _TIF_SIGPENDING (1<<TIF_SIGPENDING) 232#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
243#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) 233#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
244#define _TIF_PERFCTR (1<<TIF_PERFCTR)
245#define _TIF_UNALIGNED (1<<TIF_UNALIGNED) 234#define _TIF_UNALIGNED (1<<TIF_UNALIGNED)
246#define _TIF_32BIT (1<<TIF_32BIT) 235#define _TIF_32BIT (1<<TIF_32BIT)
247#define _TIF_SECCOMP (1<<TIF_SECCOMP) 236#define _TIF_SECCOMP (1<<TIF_SECCOMP)
@@ -252,7 +241,7 @@ register struct thread_info *current_thread_info_reg asm("g6");
252 241
253#define _TIF_USER_WORK_MASK ((0xff << TI_FLAG_WSAVED_SHIFT) | \ 242#define _TIF_USER_WORK_MASK ((0xff << TI_FLAG_WSAVED_SHIFT) | \
254 _TIF_DO_NOTIFY_RESUME_MASK | \ 243 _TIF_DO_NOTIFY_RESUME_MASK | \
255 _TIF_NEED_RESCHED | _TIF_PERFCTR) 244 _TIF_NEED_RESCHED)
256#define _TIF_DO_NOTIFY_RESUME_MASK (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING) 245#define _TIF_DO_NOTIFY_RESUME_MASK (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING)
257 246
258/* 247/*
diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h
index 4f53a2395ac..c011b932bb1 100644
--- a/arch/sparc/kernel/entry.h
+++ b/arch/sparc/kernel/entry.h
@@ -48,7 +48,6 @@ extern void __init boot_cpu_id_too_large(int cpu);
48extern unsigned int dcache_parity_tl1_occurred; 48extern unsigned int dcache_parity_tl1_occurred;
49extern unsigned int icache_parity_tl1_occurred; 49extern unsigned int icache_parity_tl1_occurred;
50 50
51extern asmlinkage void update_perfctrs(void);
52extern asmlinkage void sparc_breakpoint(struct pt_regs *regs); 51extern asmlinkage void sparc_breakpoint(struct pt_regs *regs);
53extern void timer_interrupt(int irq, struct pt_regs *regs); 52extern void timer_interrupt(int irq, struct pt_regs *regs);
54 53
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index cb70476bd8f..a5cf3864b31 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -352,12 +352,6 @@ void exit_thread(void)
352 else 352 else
353 t->utraps[0]--; 353 t->utraps[0]--;
354 } 354 }
355
356 if (test_and_clear_thread_flag(TIF_PERFCTR)) {
357 t->user_cntd0 = t->user_cntd1 = NULL;
358 t->pcr_reg = 0;
359 write_pcr(0);
360 }
361} 355}
362 356
363void flush_thread(void) 357void flush_thread(void)
@@ -371,13 +365,6 @@ void flush_thread(void)
371 365
372 set_thread_wsaved(0); 366 set_thread_wsaved(0);
373 367
374 /* Turn off performance counters if on. */
375 if (test_and_clear_thread_flag(TIF_PERFCTR)) {
376 t->user_cntd0 = t->user_cntd1 = NULL;
377 t->pcr_reg = 0;
378 write_pcr(0);
379 }
380
381 /* Clear FPU register state. */ 368 /* Clear FPU register state. */
382 t->fpsaved[0] = 0; 369 t->fpsaved[0] = 0;
383 370
@@ -591,16 +578,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
591 t->kregs->u_regs[UREG_FP] = 578 t->kregs->u_regs[UREG_FP] =
592 ((unsigned long) child_sf) - STACK_BIAS; 579 ((unsigned long) child_sf) - STACK_BIAS;
593 580
594 /* Special case, if we are spawning a kernel thread from
595 * a userspace task (usermode helper, NFS or similar), we
596 * must disable performance counters in the child because
597 * the address space and protection realm are changing.
598 */
599 if (t->flags & _TIF_PERFCTR) {
600 t->user_cntd0 = t->user_cntd1 = NULL;
601 t->pcr_reg = 0;
602 t->flags &= ~_TIF_PERFCTR;
603 }
604 t->flags |= ((long)ASI_P << TI_FLAG_CURRENT_DS_SHIFT); 581 t->flags |= ((long)ASI_P << TI_FLAG_CURRENT_DS_SHIFT);
605 t->kregs->u_regs[UREG_G6] = (unsigned long) t; 582 t->kregs->u_regs[UREG_G6] = (unsigned long) t;
606 t->kregs->u_regs[UREG_G4] = (unsigned long) t->task; 583 t->kregs->u_regs[UREG_G4] = (unsigned long) t->task;
diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S
index 1ddec403f51..83f1873c6c1 100644
--- a/arch/sparc/kernel/rtrap_64.S
+++ b/arch/sparc/kernel/rtrap_64.S
@@ -65,48 +65,6 @@ __handle_user_windows:
65 ba,pt %xcc, __handle_user_windows_continue 65 ba,pt %xcc, __handle_user_windows_continue
66 66
67 andn %l1, %l4, %l1 67 andn %l1, %l4, %l1
68__handle_perfctrs:
69 call update_perfctrs
70 wrpr %g0, RTRAP_PSTATE, %pstate
71 wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
72 ldub [%g6 + TI_WSAVED], %o2
73 brz,pt %o2, 1f
74 nop
75 /* Redo userwin+sched+sig checks */
76 call fault_in_user_windows
77
78 wrpr %g0, RTRAP_PSTATE, %pstate
79 wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
80 ldx [%g6 + TI_FLAGS], %l0
81 andcc %l0, _TIF_NEED_RESCHED, %g0
82 be,pt %xcc, 1f
83
84 nop
85 call schedule
86 wrpr %g0, RTRAP_PSTATE, %pstate
87 wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
88 ldx [%g6 + TI_FLAGS], %l0
891: andcc %l0, _TIF_DO_NOTIFY_RESUME_MASK, %g0
90
91 be,pt %xcc, __handle_perfctrs_continue
92 sethi %hi(TSTATE_PEF), %o0
93 mov %l5, %o1
94 add %sp, PTREGS_OFF, %o0
95 mov %l0, %o2
96 call do_notify_resume
97
98 wrpr %g0, RTRAP_PSTATE, %pstate
99 wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
100 /* Signal delivery can modify pt_regs tstate, so we must
101 * reload it.
102 */
103 ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
104 sethi %hi(0xf << 20), %l4
105 and %l1, %l4, %l4
106 andn %l1, %l4, %l1
107 ba,pt %xcc, __handle_perfctrs_continue
108
109 sethi %hi(TSTATE_PEF), %o0
110__handle_userfpu: 68__handle_userfpu:
111 rd %fprs, %l5 69 rd %fprs, %l5
112 andcc %l5, FPRS_FEF, %g0 70 andcc %l5, FPRS_FEF, %g0
@@ -191,9 +149,9 @@ rtrap_no_irq_enable:
191 * take until the next local IRQ before the signal/resched 149 * take until the next local IRQ before the signal/resched
192 * event would be handled. 150 * event would be handled.
193 * 151 *
194 * This also means that if we have to deal with performance 152 * This also means that if we have to deal with user
195 * counters or user windows, we have to redo all of these 153 * windows, we have to redo all of these sched+signal checks
196 * sched+signal checks with IRQs disabled. 154 * with IRQs disabled.
197 */ 155 */
198to_user: wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate 156to_user: wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
199 wrpr 0, %pil 157 wrpr 0, %pil
@@ -214,12 +172,8 @@ __handle_signal_continue:
214 brnz,pn %o2, __handle_user_windows 172 brnz,pn %o2, __handle_user_windows
215 nop 173 nop
216__handle_user_windows_continue: 174__handle_user_windows_continue:
217 ldx [%g6 + TI_FLAGS], %l5
218 andcc %l5, _TIF_PERFCTR, %g0
219 sethi %hi(TSTATE_PEF), %o0 175 sethi %hi(TSTATE_PEF), %o0
220 bne,pn %xcc, __handle_perfctrs 176 andcc %l1, %o0, %g0
221__handle_perfctrs_continue:
222 andcc %l1, %o0, %g0
223 177
224 /* This fpdepth clear is necessary for non-syscall rtraps only */ 178 /* This fpdepth clear is necessary for non-syscall rtraps only */
225user_nowork: 179user_nowork:
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
index e7061138c98..46a76ba3fb4 100644
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -51,7 +51,6 @@ SIGN1(sys32_exit_group, sys_exit_group, %o0)
51SIGN1(sys32_wait4, compat_sys_wait4, %o2) 51SIGN1(sys32_wait4, compat_sys_wait4, %o2)
52SIGN1(sys32_creat, sys_creat, %o1) 52SIGN1(sys32_creat, sys_creat, %o1)
53SIGN1(sys32_mknod, sys_mknod, %o1) 53SIGN1(sys32_mknod, sys_mknod, %o1)
54SIGN1(sys32_perfctr, sys_perfctr, %o0)
55SIGN1(sys32_umount, sys_umount, %o1) 54SIGN1(sys32_umount, sys_umount, %o1)
56SIGN1(sys32_signal, sys_signal, %o0) 55SIGN1(sys32_signal, sys_signal, %o0)
57SIGN1(sys32_access, sys_access, %o1) 56SIGN1(sys32_access, sys_access, %o1)
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index d77f5431694..cb1bef6f14b 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -27,7 +27,6 @@
27 27
28#include <asm/uaccess.h> 28#include <asm/uaccess.h>
29#include <asm/utrap.h> 29#include <asm/utrap.h>
30#include <asm/perfctr.h>
31#include <asm/unistd.h> 30#include <asm/unistd.h>
32 31
33#include "entry.h" 32#include "entry.h"
@@ -766,109 +765,6 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig, const struct sigaction __user *, act,
766 return ret; 765 return ret;
767} 766}
768 767
769/* Invoked by rtrap code to update performance counters in
770 * user space.
771 */
772asmlinkage void update_perfctrs(void)
773{
774 unsigned long pic, tmp;
775
776 read_pic(pic);
777 tmp = (current_thread_info()->kernel_cntd0 += (unsigned int)pic);
778 __put_user(tmp, current_thread_info()->user_cntd0);
779 tmp = (current_thread_info()->kernel_cntd1 += (pic >> 32));
780 __put_user(tmp, current_thread_info()->user_cntd1);
781 reset_pic();
782}
783
784SYSCALL_DEFINE4(perfctr, int, opcode, unsigned long, arg0,
785 unsigned long, arg1, unsigned long, arg2)
786{
787 int err = 0;
788
789 switch(opcode) {
790 case PERFCTR_ON:
791 current_thread_info()->pcr_reg = arg2;
792 current_thread_info()->user_cntd0 = (u64 __user *) arg0;
793 current_thread_info()->user_cntd1 = (u64 __user *) arg1;
794 current_thread_info()->kernel_cntd0 =
795 current_thread_info()->kernel_cntd1 = 0;
796 write_pcr(arg2);
797 reset_pic();
798 set_thread_flag(TIF_PERFCTR);
799 break;
800
801 case PERFCTR_OFF:
802 err = -EINVAL;
803 if (test_thread_flag(TIF_PERFCTR)) {
804 current_thread_info()->user_cntd0 =
805 current_thread_info()->user_cntd1 = NULL;
806 current_thread_info()->pcr_reg = 0;
807 write_pcr(0);
808 clear_thread_flag(TIF_PERFCTR);
809 err = 0;
810 }
811 break;
812
813 case PERFCTR_READ: {
814 unsigned long pic, tmp;
815
816 if (!test_thread_flag(TIF_PERFCTR)) {
817 err = -EINVAL;
818 break;
819 }
820 read_pic(pic);
821 tmp = (current_thread_info()->kernel_cntd0 += (unsigned int)pic);
822 err |= __put_user(tmp, current_thread_info()->user_cntd0);
823 tmp = (current_thread_info()->kernel_cntd1 += (pic >> 32));
824 err |= __put_user(tmp, current_thread_info()->user_cntd1);
825 reset_pic();
826 break;
827 }
828
829 case PERFCTR_CLRPIC:
830 if (!test_thread_flag(TIF_PERFCTR)) {
831 err = -EINVAL;
832 break;
833 }
834 current_thread_info()->kernel_cntd0 =
835 current_thread_info()->kernel_cntd1 = 0;
836 reset_pic();
837 break;
838
839 case PERFCTR_SETPCR: {
840 u64 __user *user_pcr = (u64 __user *)arg0;
841
842 if (!test_thread_flag(TIF_PERFCTR)) {
843 err = -EINVAL;
844 break;
845 }
846 err |= __get_user(current_thread_info()->pcr_reg, user_pcr);
847 write_pcr(current_thread_info()->pcr_reg);
848 current_thread_info()->kernel_cntd0 =
849 current_thread_info()->kernel_cntd1 = 0;
850 reset_pic();
851 break;
852 }
853
854 case PERFCTR_GETPCR: {
855 u64 __user *user_pcr = (u64 __user *)arg0;
856
857 if (!test_thread_flag(TIF_PERFCTR)) {
858 err = -EINVAL;
859 break;
860 }
861 err |= __put_user(current_thread_info()->pcr_reg, user_pcr);
862 break;
863 }
864
865 default:
866 err = -EINVAL;
867 break;
868 };
869 return err;
870}
871
872/* 768/*
873 * Do a system call from kernel instead of calling sys_execve so we 769 * Do a system call from kernel instead of calling sys_execve so we
874 * end up with proper pt_regs. 770 * end up with proper pt_regs.
diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S
index dc4a458f74d..1d7e274f3f2 100644
--- a/arch/sparc/kernel/syscalls.S
+++ b/arch/sparc/kernel/syscalls.S
@@ -110,31 +110,12 @@ sys_clone:
110 110
111 .globl ret_from_syscall 111 .globl ret_from_syscall
112ret_from_syscall: 112ret_from_syscall:
113 /* Clear current_thread_info()->new_child, and 113 /* Clear current_thread_info()->new_child. */
114 * check performance counter stuff too.
115 */
116 stb %g0, [%g6 + TI_NEW_CHILD] 114 stb %g0, [%g6 + TI_NEW_CHILD]
117 ldx [%g6 + TI_FLAGS], %l0 115 ldx [%g6 + TI_FLAGS], %l0
118 call schedule_tail 116 call schedule_tail
119 mov %g7, %o0 117 mov %g7, %o0
120 andcc %l0, _TIF_PERFCTR, %g0 118 ba,pt %xcc, ret_sys_call
121 be,pt %icc, 1f
122 nop
123 ldx [%g6 + TI_PCR], %o7
124 wr %g0, %o7, %pcr
125
126 /* Blackbird errata workaround. See commentary in
127 * smp.c:smp_percpu_timer_interrupt() for more
128 * information.
129 */
130 ba,pt %xcc, 99f
131 nop
132
133 .align 64
13499: wr %g0, %g0, %pic
135 rd %pic, %g0
136
1371: ba,pt %xcc, ret_sys_call
138 ldx [%sp + PTREGS_OFF + PT_V9_I0], %o0 119 ldx [%sp + PTREGS_OFF + PT_V9_I0], %o0
139 120
140 .globl sparc_exit 121 .globl sparc_exit
diff --git a/arch/sparc/kernel/systbls.h b/arch/sparc/kernel/systbls.h
index d2f999ae2b8..68312fe8da7 100644
--- a/arch/sparc/kernel/systbls.h
+++ b/arch/sparc/kernel/systbls.h
@@ -36,8 +36,6 @@ extern asmlinkage long sys_rt_sigaction(int sig,
36 struct sigaction __user *oact, 36 struct sigaction __user *oact,
37 void __user *restorer, 37 void __user *restorer,
38 size_t sigsetsize); 38 size_t sigsetsize);
39extern asmlinkage long sys_perfctr(int opcode, unsigned long arg0,
40 unsigned long arg1, unsigned long arg2);
41 39
42extern asmlinkage void sparc64_set_context(struct pt_regs *regs); 40extern asmlinkage void sparc64_set_context(struct pt_regs *regs);
43extern asmlinkage void sparc64_get_context(struct pt_regs *regs); 41extern asmlinkage void sparc64_get_context(struct pt_regs *regs);
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index e575b46bd7a..17614251fb6 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -21,7 +21,7 @@ sys_call_table32:
21/*0*/ .word sys_restart_syscall, sys32_exit, sys_fork, sys_read, sys_write 21/*0*/ .word sys_restart_syscall, sys32_exit, sys_fork, sys_read, sys_write
22/*5*/ .word sys32_open, sys_close, sys32_wait4, sys32_creat, sys_link 22/*5*/ .word sys32_open, sys_close, sys32_wait4, sys32_creat, sys_link
23/*10*/ .word sys_unlink, sunos_execv, sys_chdir, sys_chown16, sys32_mknod 23/*10*/ .word sys_unlink, sunos_execv, sys_chdir, sys_chown16, sys32_mknod
24/*15*/ .word sys_chmod, sys_lchown16, sys_brk, sys32_perfctr, sys32_lseek 24/*15*/ .word sys_chmod, sys_lchown16, sys_brk, sys_nis_syscall, sys32_lseek
25/*20*/ .word sys_getpid, sys_capget, sys_capset, sys_setuid16, sys_getuid16 25/*20*/ .word sys_getpid, sys_capget, sys_capset, sys_setuid16, sys_getuid16
26/*25*/ .word sys32_vmsplice, compat_sys_ptrace, sys_alarm, sys32_sigaltstack, sys_pause 26/*25*/ .word sys32_vmsplice, compat_sys_ptrace, sys_alarm, sys32_sigaltstack, sys_pause
27/*30*/ .word compat_sys_utime, sys_lchown, sys_fchown, sys32_access, sys32_nice 27/*30*/ .word compat_sys_utime, sys_lchown, sys_fchown, sys32_access, sys32_nice
@@ -96,7 +96,7 @@ sys_call_table:
96/*0*/ .word sys_restart_syscall, sparc_exit, sys_fork, sys_read, sys_write 96/*0*/ .word sys_restart_syscall, sparc_exit, sys_fork, sys_read, sys_write
97/*5*/ .word sys_open, sys_close, sys_wait4, sys_creat, sys_link 97/*5*/ .word sys_open, sys_close, sys_wait4, sys_creat, sys_link
98/*10*/ .word sys_unlink, sys_nis_syscall, sys_chdir, sys_chown, sys_mknod 98/*10*/ .word sys_unlink, sys_nis_syscall, sys_chdir, sys_chown, sys_mknod
99/*15*/ .word sys_chmod, sys_lchown, sys_brk, sys_perfctr, sys_lseek 99/*15*/ .word sys_chmod, sys_lchown, sys_brk, sys_nis_syscall, sys_lseek
100/*20*/ .word sys_getpid, sys_capget, sys_capset, sys_setuid, sys_getuid 100/*20*/ .word sys_getpid, sys_capget, sys_capset, sys_setuid, sys_getuid
101/*25*/ .word sys_vmsplice, sys_ptrace, sys_alarm, sys_sigaltstack, sys_nis_syscall 101/*25*/ .word sys_vmsplice, sys_ptrace, sys_alarm, sys_sigaltstack, sys_nis_syscall
102/*30*/ .word sys_utime, sys_nis_syscall, sys_nis_syscall, sys_access, sys_nice 102/*30*/ .word sys_utime, sys_nis_syscall, sys_nis_syscall, sys_access, sys_nice
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index 10f7bb9fc14..bdc05a21908 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -2548,15 +2548,6 @@ void __init trap_init(void)
2548 rwbuf_stkptrs) || 2548 rwbuf_stkptrs) ||
2549 TI_GSR != offsetof(struct thread_info, gsr) || 2549 TI_GSR != offsetof(struct thread_info, gsr) ||
2550 TI_XFSR != offsetof(struct thread_info, xfsr) || 2550 TI_XFSR != offsetof(struct thread_info, xfsr) ||
2551 TI_USER_CNTD0 != offsetof(struct thread_info,
2552 user_cntd0) ||
2553 TI_USER_CNTD1 != offsetof(struct thread_info,
2554 user_cntd1) ||
2555 TI_KERN_CNTD0 != offsetof(struct thread_info,
2556 kernel_cntd0) ||
2557 TI_KERN_CNTD1 != offsetof(struct thread_info,
2558 kernel_cntd1) ||
2559 TI_PCR != offsetof(struct thread_info, pcr_reg) ||
2560 TI_PRE_COUNT != offsetof(struct thread_info, 2551 TI_PRE_COUNT != offsetof(struct thread_info,
2561 preempt_count) || 2552 preempt_count) ||
2562 TI_NEW_CHILD != offsetof(struct thread_info, new_child) || 2553 TI_NEW_CHILD != offsetof(struct thread_info, new_child) ||
diff --git a/arch/sparc/prom/p1275.c b/arch/sparc/prom/p1275.c
index 4b7c937bba6..2d8b70d397f 100644
--- a/arch/sparc/prom/p1275.c
+++ b/arch/sparc/prom/p1275.c
@@ -32,10 +32,9 @@ extern void prom_cif_interface(void);
32extern void prom_cif_callback(void); 32extern void prom_cif_callback(void);
33 33
34/* 34/*
35 * This provides SMP safety on the p1275buf. prom_callback() drops this lock 35 * This provides SMP safety on the p1275buf.
36 * to allow recursuve acquisition.
37 */ 36 */
38DEFINE_SPINLOCK(prom_entry_lock); 37DEFINE_RAW_SPINLOCK(prom_entry_lock);
39 38
40long p1275_cmd(const char *service, long fmt, ...) 39long p1275_cmd(const char *service, long fmt, ...)
41{ 40{
@@ -47,7 +46,9 @@ long p1275_cmd(const char *service, long fmt, ...)
47 46
48 p = p1275buf.prom_buffer; 47 p = p1275buf.prom_buffer;
49 48
50 spin_lock_irqsave(&prom_entry_lock, flags); 49 raw_local_save_flags(flags);
50 raw_local_irq_restore(PIL_NMI);
51 raw_spin_lock(&prom_entry_lock);
51 52
52 p1275buf.prom_args[0] = (unsigned long)p; /* service */ 53 p1275buf.prom_args[0] = (unsigned long)p; /* service */
53 strcpy (p, service); 54 strcpy (p, service);
@@ -139,7 +140,8 @@ long p1275_cmd(const char *service, long fmt, ...)
139 va_end(list); 140 va_end(list);
140 x = p1275buf.prom_args [nargs + 3]; 141 x = p1275buf.prom_args [nargs + 3];
141 142
142 spin_unlock_irqrestore(&prom_entry_lock, flags); 143 raw_spin_unlock(&prom_entry_lock);
144 raw_local_irq_restore(flags);
143 145
144 return x; 146 return x;
145} 147}
diff --git a/arch/um/.gitignore b/arch/um/.gitignore
new file mode 100644
index 00000000000..a73d3a1cc74
--- /dev/null
+++ b/arch/um/.gitignore
@@ -0,0 +1,3 @@
1kernel/config.c
2kernel/config.tmp
3kernel/vmlinux.lds
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index cf8a97f3451..64cda95f59c 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -18,10 +18,10 @@ static irqreturn_t line_interrupt(int irq, void *data)
18{ 18{
19 struct chan *chan = data; 19 struct chan *chan = data;
20 struct line *line = chan->line; 20 struct line *line = chan->line;
21 struct tty_struct *tty = line->tty; 21 struct tty_struct *tty;
22 22
23 if (line) 23 if (line)
24 chan_interrupt(&line->chan_list, &line->task, tty, irq); 24 chan_interrupt(&line->chan_list, &line->task, line->tty, irq);
25 return IRQ_HANDLED; 25 return IRQ_HANDLED;
26} 26}
27 27
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 3b3c36601a7..de317d0c329 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -140,7 +140,7 @@ void mconsole_proc(struct mc_request *req)
140 goto out; 140 goto out;
141 } 141 }
142 142
143 err = may_open(&nd.path, MAY_READ, FMODE_READ); 143 err = may_open(&nd.path, MAY_READ, O_RDONLY);
144 if (result) { 144 if (result) {
145 mconsole_reply(req, "Failed to open file", 1, 0); 145 mconsole_reply(req, "Failed to open file", 1, 0);
146 path_put(&nd.path); 146 path_put(&nd.path);
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index 1b549bca464..804b28dd032 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -6,6 +6,8 @@ obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
6 ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \ 6 ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \
7 sys_call_table.o tls.o 7 sys_call_table.o tls.o
8 8
9obj-$(CONFIG_BINFMT_ELF) += elfcore.o
10
9subarch-obj-y = lib/semaphore_32.o lib/string_32.o 11subarch-obj-y = lib/semaphore_32.o lib/string_32.o
10subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o 12subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o
11subarch-obj-$(CONFIG_MODULES) += kernel/module.o 13subarch-obj-$(CONFIG_MODULES) += kernel/module.o
diff --git a/arch/um/sys-i386/asm/elf.h b/arch/um/sys-i386/asm/elf.h
index 770885472ed..e64cd41d7ba 100644
--- a/arch/um/sys-i386/asm/elf.h
+++ b/arch/um/sys-i386/asm/elf.h
@@ -116,47 +116,4 @@ do { \
116 } \ 116 } \
117} while (0) 117} while (0)
118 118
119/*
120 * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out
121 * extra segments containing the vsyscall DSO contents. Dumping its
122 * contents makes post-mortem fully interpretable later without matching up
123 * the same kernel and hardware config to see what PC values meant.
124 * Dumping its extra ELF program headers includes all the other information
125 * a debugger needs to easily find how the vsyscall DSO was being used.
126 */
127#define ELF_CORE_EXTRA_PHDRS \
128 (vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0 )
129
130#define ELF_CORE_WRITE_EXTRA_PHDRS \
131if ( vsyscall_ehdr ) { \
132 const struct elfhdr *const ehdrp = (struct elfhdr *)vsyscall_ehdr; \
133 const struct elf_phdr *const phdrp = \
134 (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff); \
135 int i; \
136 Elf32_Off ofs = 0; \
137 for (i = 0; i < ehdrp->e_phnum; ++i) { \
138 struct elf_phdr phdr = phdrp[i]; \
139 if (phdr.p_type == PT_LOAD) { \
140 ofs = phdr.p_offset = offset; \
141 offset += phdr.p_filesz; \
142 } \
143 else \
144 phdr.p_offset += ofs; \
145 phdr.p_paddr = 0; /* match other core phdrs */ \
146 DUMP_WRITE(&phdr, sizeof(phdr)); \
147 } \
148}
149#define ELF_CORE_WRITE_EXTRA_DATA \
150if ( vsyscall_ehdr ) { \
151 const struct elfhdr *const ehdrp = (struct elfhdr *)vsyscall_ehdr; \
152 const struct elf_phdr *const phdrp = \
153 (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff); \
154 int i; \
155 for (i = 0; i < ehdrp->e_phnum; ++i) { \
156 if (phdrp[i].p_type == PT_LOAD) \
157 DUMP_WRITE((void *) phdrp[i].p_vaddr, \
158 phdrp[i].p_filesz); \
159 } \
160}
161
162#endif 119#endif
diff --git a/arch/um/sys-i386/elfcore.c b/arch/um/sys-i386/elfcore.c
new file mode 100644
index 00000000000..6bb49b687c9
--- /dev/null
+++ b/arch/um/sys-i386/elfcore.c
@@ -0,0 +1,83 @@
1#include <linux/elf.h>
2#include <linux/coredump.h>
3#include <linux/fs.h>
4#include <linux/mm.h>
5
6#include <asm/elf.h>
7
8
9Elf32_Half elf_core_extra_phdrs(void)
10{
11 return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0;
12}
13
14int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
15 unsigned long limit)
16{
17 if ( vsyscall_ehdr ) {
18 const struct elfhdr *const ehdrp =
19 (struct elfhdr *) vsyscall_ehdr;
20 const struct elf_phdr *const phdrp =
21 (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);
22 int i;
23 Elf32_Off ofs = 0;
24
25 for (i = 0; i < ehdrp->e_phnum; ++i) {
26 struct elf_phdr phdr = phdrp[i];
27
28 if (phdr.p_type == PT_LOAD) {
29 ofs = phdr.p_offset = offset;
30 offset += phdr.p_filesz;
31 } else {
32 phdr.p_offset += ofs;
33 }
34 phdr.p_paddr = 0; /* match other core phdrs */
35 *size += sizeof(phdr);
36 if (*size > limit
37 || !dump_write(file, &phdr, sizeof(phdr)))
38 return 0;
39 }
40 }
41 return 1;
42}
43
44int elf_core_write_extra_data(struct file *file, size_t *size,
45 unsigned long limit)
46{
47 if ( vsyscall_ehdr ) {
48 const struct elfhdr *const ehdrp =
49 (struct elfhdr *) vsyscall_ehdr;
50 const struct elf_phdr *const phdrp =
51 (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);
52 int i;
53
54 for (i = 0; i < ehdrp->e_phnum; ++i) {
55 if (phdrp[i].p_type == PT_LOAD) {
56 void *addr = (void *) phdrp[i].p_vaddr;
57 size_t filesz = phdrp[i].p_filesz;
58
59 *size += filesz;
60 if (*size > limit
61 || !dump_write(file, addr, filesz))
62 return 0;
63 }
64 }
65 }
66 return 1;
67}
68
69size_t elf_core_extra_data_size(void)
70{
71 if ( vsyscall_ehdr ) {
72 const struct elfhdr *const ehdrp =
73 (struct elfhdr *)vsyscall_ehdr;
74 const struct elf_phdr *const phdrp =
75 (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);
76 int i;
77
78 for (i = 0; i < ehdrp->e_phnum; ++i)
79 if (phdrp[i].p_type == PT_LOAD)
80 return (size_t) phdrp[i].p_filesz;
81 }
82 return 0;
83}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 57ccdcec146..e9844037152 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -31,6 +31,7 @@ config X86
31 select ARCH_WANT_FRAME_POINTERS 31 select ARCH_WANT_FRAME_POINTERS
32 select HAVE_DMA_ATTRS 32 select HAVE_DMA_ATTRS
33 select HAVE_KRETPROBES 33 select HAVE_KRETPROBES
34 select HAVE_OPTPROBES
34 select HAVE_FTRACE_MCOUNT_RECORD 35 select HAVE_FTRACE_MCOUNT_RECORD
35 select HAVE_DYNAMIC_FTRACE 36 select HAVE_DYNAMIC_FTRACE
36 select HAVE_FUNCTION_TRACER 37 select HAVE_FUNCTION_TRACER
@@ -392,8 +393,12 @@ config X86_ELAN
392 393
393config X86_MRST 394config X86_MRST
394 bool "Moorestown MID platform" 395 bool "Moorestown MID platform"
396 depends on PCI
397 depends on PCI_GOANY
395 depends on X86_32 398 depends on X86_32
396 depends on X86_EXTENDED_PLATFORM 399 depends on X86_EXTENDED_PLATFORM
400 depends on X86_IO_APIC
401 select APB_TIMER
397 ---help--- 402 ---help---
398 Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin 403 Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin
399 Internet Device(MID) platform. Moorestown consists of two chips: 404 Internet Device(MID) platform. Moorestown consists of two chips:
@@ -428,6 +433,7 @@ config X86_32_NON_STANDARD
428config X86_NUMAQ 433config X86_NUMAQ
429 bool "NUMAQ (IBM/Sequent)" 434 bool "NUMAQ (IBM/Sequent)"
430 depends on X86_32_NON_STANDARD 435 depends on X86_32_NON_STANDARD
436 depends on PCI
431 select NUMA 437 select NUMA
432 select X86_MPPARSE 438 select X86_MPPARSE
433 ---help--- 439 ---help---
@@ -628,6 +634,16 @@ config HPET_EMULATE_RTC
628 def_bool y 634 def_bool y
629 depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y) 635 depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y)
630 636
637config APB_TIMER
638 def_bool y if MRST
639 prompt "Langwell APB Timer Support" if X86_MRST
640 help
641 APB timer is the replacement for 8254, HPET on X86 MID platforms.
642 The APBT provides a stable time base on SMP
643 systems, unlike the TSC, but it is more expensive to access,
644 as it is off-chip. APB timers are always running regardless of CPU
645 C states, they are used as per CPU clockevent device when possible.
646
631# Mark as embedded because too many people got it wrong. 647# Mark as embedded because too many people got it wrong.
632# The code disables itself when not needed. 648# The code disables itself when not needed.
633config DMI 649config DMI
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 9f828f87ca3..493092efaa3 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -11,6 +11,7 @@ header-y += sigcontext32.h
11header-y += ucontext.h 11header-y += ucontext.h
12header-y += processor-flags.h 12header-y += processor-flags.h
13header-y += hw_breakpoint.h 13header-y += hw_breakpoint.h
14header-y += hyperv.h
14 15
15unifdef-y += e820.h 16unifdef-y += e820.h
16unifdef-y += ist.h 17unifdef-y += ist.h
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index f1e253ceba4..b09ec55650b 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -165,10 +165,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
165 * invalid instruction possible) or if the instructions are changed from a 165 * invalid instruction possible) or if the instructions are changed from a
166 * consistent state to another consistent state atomically. 166 * consistent state to another consistent state atomically.
167 * More care must be taken when modifying code in the SMP case because of 167 * More care must be taken when modifying code in the SMP case because of
168 * Intel's errata. 168 * Intel's errata. text_poke_smp() takes care that errata, but still
169 * doesn't support NMI/MCE handler code modifying.
169 * On the local CPU you need to be protected again NMI or MCE handlers seeing an 170 * On the local CPU you need to be protected again NMI or MCE handlers seeing an
170 * inconsistent instruction while you patch. 171 * inconsistent instruction while you patch.
171 */ 172 */
172extern void *text_poke(void *addr, const void *opcode, size_t len); 173extern void *text_poke(void *addr, const void *opcode, size_t len);
174extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
173 175
174#endif /* _ASM_X86_ALTERNATIVE_H */ 176#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h
new file mode 100644
index 00000000000..c74a2eebe57
--- /dev/null
+++ b/arch/x86/include/asm/apb_timer.h
@@ -0,0 +1,70 @@
1/*
2 * apb_timer.h: Driver for Langwell APB timer based on Synopsis DesignWare
3 *
4 * (C) Copyright 2009 Intel Corporation
5 * Author: Jacob Pan (jacob.jun.pan@intel.com)
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
10 * of the License.
11 *
12 * Note:
13 */
14
15#ifndef ASM_X86_APBT_H
16#define ASM_X86_APBT_H
17#include <linux/sfi.h>
18
19#ifdef CONFIG_APB_TIMER
20
21/* Langwell DW APB timer registers */
22#define APBTMR_N_LOAD_COUNT 0x00
23#define APBTMR_N_CURRENT_VALUE 0x04
24#define APBTMR_N_CONTROL 0x08
25#define APBTMR_N_EOI 0x0c
26#define APBTMR_N_INT_STATUS 0x10
27
28#define APBTMRS_INT_STATUS 0xa0
29#define APBTMRS_EOI 0xa4
30#define APBTMRS_RAW_INT_STATUS 0xa8
31#define APBTMRS_COMP_VERSION 0xac
32#define APBTMRS_REG_SIZE 0x14
33
34/* register bits */
35#define APBTMR_CONTROL_ENABLE (1<<0)
36#define APBTMR_CONTROL_MODE_PERIODIC (1<<1) /*1: periodic 0:free running */
37#define APBTMR_CONTROL_INT (1<<2)
38
39/* default memory mapped register base */
40#define LNW_SCU_ADDR 0xFF100000
41#define LNW_EXT_TIMER_OFFSET 0x1B800
42#define APBT_DEFAULT_BASE (LNW_SCU_ADDR+LNW_EXT_TIMER_OFFSET)
43#define LNW_EXT_TIMER_PGOFFSET 0x800
44
45/* APBT clock speed range from PCLK to fabric base, 25-100MHz */
46#define APBT_MAX_FREQ 50
47#define APBT_MIN_FREQ 1
48#define APBT_MMAP_SIZE 1024
49
50#define APBT_DEV_USED 1
51
52extern void apbt_time_init(void);
53extern struct clock_event_device *global_clock_event;
54extern unsigned long apbt_quick_calibrate(void);
55extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu);
56extern void apbt_setup_secondary_clock(void);
57extern unsigned int boot_cpu_id;
58extern int disable_apbt_percpu;
59
60extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint);
61extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr);
62extern int sfi_mtimer_num;
63
64#else /* CONFIG_APB_TIMER */
65
66static inline unsigned long apbt_quick_calibrate(void) {return 0; }
67static inline void apbt_time_init(void) {return 0; }
68
69#endif
70#endif /* ASM_X86_APBT_H */
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index eeac829a0f4..a929c9ede33 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -53,13 +53,6 @@ extern void threshold_interrupt(void);
53extern void call_function_interrupt(void); 53extern void call_function_interrupt(void);
54extern void call_function_single_interrupt(void); 54extern void call_function_single_interrupt(void);
55 55
56/* PIC specific functions */
57extern void disable_8259A_irq(unsigned int irq);
58extern void enable_8259A_irq(unsigned int irq);
59extern int i8259A_irq_pending(unsigned int irq);
60extern void make_8259A_irq(unsigned int irq);
61extern void init_8259A(int aeoi);
62
63/* IOAPIC */ 56/* IOAPIC */
64#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) 57#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))
65extern unsigned long io_apic_irqs; 58extern unsigned long io_apic_irqs;
diff --git a/arch/x86/include/asm/hyperv.h b/arch/x86/include/asm/hyperv.h
new file mode 100644
index 00000000000..e153a2b3889
--- /dev/null
+++ b/arch/x86/include/asm/hyperv.h
@@ -0,0 +1,186 @@
1#ifndef _ASM_X86_KVM_HYPERV_H
2#define _ASM_X86_KVM_HYPERV_H
3
4#include <linux/types.h>
5
6/*
7 * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
8 * is set by CPUID(HvCpuIdFunctionVersionAndFeatures).
9 */
10#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000
11#define HYPERV_CPUID_INTERFACE 0x40000001
12#define HYPERV_CPUID_VERSION 0x40000002
13#define HYPERV_CPUID_FEATURES 0x40000003
14#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004
15#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005
16
17/*
18 * Feature identification. EAX indicates which features are available
19 * to the partition based upon the current partition privileges.
20 */
21
22/* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */
23#define HV_X64_MSR_VP_RUNTIME_AVAILABLE (1 << 0)
24/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
25#define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1)
26/*
27 * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
28 * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available
29 */
30#define HV_X64_MSR_SYNIC_AVAILABLE (1 << 2)
31/*
32 * Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through
33 * HV_X64_MSR_STIMER3_COUNT) available
34 */
35#define HV_X64_MSR_SYNTIMER_AVAILABLE (1 << 3)
36/*
37 * APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR)
38 * are available
39 */
40#define HV_X64_MSR_APIC_ACCESS_AVAILABLE (1 << 4)
41/* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/
42#define HV_X64_MSR_HYPERCALL_AVAILABLE (1 << 5)
43/* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/
44#define HV_X64_MSR_VP_INDEX_AVAILABLE (1 << 6)
45/* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/
46#define HV_X64_MSR_RESET_AVAILABLE (1 << 7)
47 /*
48 * Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE,
49 * HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE,
50 * HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available
51 */
52#define HV_X64_MSR_STAT_PAGES_AVAILABLE (1 << 8)
53
54/*
55 * Feature identification: EBX indicates which flags were specified at
56 * partition creation. The format is the same as the partition creation
57 * flag structure defined in section Partition Creation Flags.
58 */
59#define HV_X64_CREATE_PARTITIONS (1 << 0)
60#define HV_X64_ACCESS_PARTITION_ID (1 << 1)
61#define HV_X64_ACCESS_MEMORY_POOL (1 << 2)
62#define HV_X64_ADJUST_MESSAGE_BUFFERS (1 << 3)
63#define HV_X64_POST_MESSAGES (1 << 4)
64#define HV_X64_SIGNAL_EVENTS (1 << 5)
65#define HV_X64_CREATE_PORT (1 << 6)
66#define HV_X64_CONNECT_PORT (1 << 7)
67#define HV_X64_ACCESS_STATS (1 << 8)
68#define HV_X64_DEBUGGING (1 << 11)
69#define HV_X64_CPU_POWER_MANAGEMENT (1 << 12)
70#define HV_X64_CONFIGURE_PROFILER (1 << 13)
71
72/*
73 * Feature identification. EDX indicates which miscellaneous features
74 * are available to the partition.
75 */
76/* The MWAIT instruction is available (per section MONITOR / MWAIT) */
77#define HV_X64_MWAIT_AVAILABLE (1 << 0)
78/* Guest debugging support is available */
79#define HV_X64_GUEST_DEBUGGING_AVAILABLE (1 << 1)
80/* Performance Monitor support is available*/
81#define HV_X64_PERF_MONITOR_AVAILABLE (1 << 2)
82/* Support for physical CPU dynamic partitioning events is available*/
83#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE (1 << 3)
84/*
85 * Support for passing hypercall input parameter block via XMM
86 * registers is available
87 */
88#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE (1 << 4)
89/* Support for a virtual guest idle state is available */
90#define HV_X64_GUEST_IDLE_STATE_AVAILABLE (1 << 5)
91
92/*
93 * Implementation recommendations. Indicates which behaviors the hypervisor
94 * recommends the OS implement for optimal performance.
95 */
96 /*
97 * Recommend using hypercall for address space switches rather
98 * than MOV to CR3 instruction
99 */
100#define HV_X64_MWAIT_RECOMMENDED (1 << 0)
101/* Recommend using hypercall for local TLB flushes rather
102 * than INVLPG or MOV to CR3 instructions */
103#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED (1 << 1)
104/*
105 * Recommend using hypercall for remote TLB flushes rather
106 * than inter-processor interrupts
107 */
108#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED (1 << 2)
109/*
110 * Recommend using MSRs for accessing APIC registers
111 * EOI, ICR and TPR rather than their memory-mapped counterparts
112 */
113#define HV_X64_APIC_ACCESS_RECOMMENDED (1 << 3)
114/* Recommend using the hypervisor-provided MSR to initiate a system RESET */
115#define HV_X64_SYSTEM_RESET_RECOMMENDED (1 << 4)
116/*
117 * Recommend using relaxed timing for this partition. If used,
118 * the VM should disable any watchdog timeouts that rely on the
119 * timely delivery of external interrupts
120 */
121#define HV_X64_RELAXED_TIMING_RECOMMENDED (1 << 5)
122
123/* MSR used to identify the guest OS. */
124#define HV_X64_MSR_GUEST_OS_ID 0x40000000
125
126/* MSR used to setup pages used to communicate with the hypervisor. */
127#define HV_X64_MSR_HYPERCALL 0x40000001
128
129/* MSR used to provide vcpu index */
130#define HV_X64_MSR_VP_INDEX 0x40000002
131
132/* Define the virtual APIC registers */
133#define HV_X64_MSR_EOI 0x40000070
134#define HV_X64_MSR_ICR 0x40000071
135#define HV_X64_MSR_TPR 0x40000072
136#define HV_X64_MSR_APIC_ASSIST_PAGE 0x40000073
137
138/* Define synthetic interrupt controller model specific registers. */
139#define HV_X64_MSR_SCONTROL 0x40000080
140#define HV_X64_MSR_SVERSION 0x40000081
141#define HV_X64_MSR_SIEFP 0x40000082
142#define HV_X64_MSR_SIMP 0x40000083
143#define HV_X64_MSR_EOM 0x40000084
144#define HV_X64_MSR_SINT0 0x40000090
145#define HV_X64_MSR_SINT1 0x40000091
146#define HV_X64_MSR_SINT2 0x40000092
147#define HV_X64_MSR_SINT3 0x40000093
148#define HV_X64_MSR_SINT4 0x40000094
149#define HV_X64_MSR_SINT5 0x40000095
150#define HV_X64_MSR_SINT6 0x40000096
151#define HV_X64_MSR_SINT7 0x40000097
152#define HV_X64_MSR_SINT8 0x40000098
153#define HV_X64_MSR_SINT9 0x40000099
154#define HV_X64_MSR_SINT10 0x4000009A
155#define HV_X64_MSR_SINT11 0x4000009B
156#define HV_X64_MSR_SINT12 0x4000009C
157#define HV_X64_MSR_SINT13 0x4000009D
158#define HV_X64_MSR_SINT14 0x4000009E
159#define HV_X64_MSR_SINT15 0x4000009F
160
161
162#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001
163#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12
164#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \
165 (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
166
167/* Declare the various hypercall operations. */
168#define HV_X64_HV_NOTIFY_LONG_SPIN_WAIT 0x0008
169
170#define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001
171#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12
172#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \
173 (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
174
175#define HV_PROCESSOR_POWER_STATE_C0 0
176#define HV_PROCESSOR_POWER_STATE_C1 1
177#define HV_PROCESSOR_POWER_STATE_C2 2
178#define HV_PROCESSOR_POWER_STATE_C3 3
179
180/* hypercall status code */
181#define HV_STATUS_SUCCESS 0
182#define HV_STATUS_INVALID_HYPERCALL_CODE 2
183#define HV_STATUS_INVALID_HYPERCALL_INPUT 3
184#define HV_STATUS_INVALID_ALIGNMENT 4
185
186#endif
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index 7ec65b18085..1655147646a 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -26,11 +26,6 @@ extern unsigned int cached_irq_mask;
26 26
27extern raw_spinlock_t i8259A_lock; 27extern raw_spinlock_t i8259A_lock;
28 28
29extern void init_8259A(int auto_eoi);
30extern void enable_8259A_irq(unsigned int irq);
31extern void disable_8259A_irq(unsigned int irq);
32extern unsigned int startup_8259A_irq(unsigned int irq);
33
34/* the PIC may need a careful delay on some platforms, hence specific calls */ 29/* the PIC may need a careful delay on some platforms, hence specific calls */
35static inline unsigned char inb_pic(unsigned int port) 30static inline unsigned char inb_pic(unsigned int port)
36{ 31{
@@ -57,7 +52,17 @@ static inline void outb_pic(unsigned char value, unsigned int port)
57 52
58extern struct irq_chip i8259A_chip; 53extern struct irq_chip i8259A_chip;
59 54
60extern void mask_8259A(void); 55struct legacy_pic {
61extern void unmask_8259A(void); 56 int nr_legacy_irqs;
57 struct irq_chip *chip;
58 void (*mask_all)(void);
59 void (*restore_mask)(void);
60 void (*init)(int auto_eoi);
61 int (*irq_pending)(unsigned int irq);
62 void (*make_irq)(unsigned int irq);
63};
64
65extern struct legacy_pic *legacy_pic;
66extern struct legacy_pic null_legacy_pic;
62 67
63#endif /* _ASM_X86_I8259_H */ 68#endif /* _ASM_X86_I8259_H */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 5f61f6e0ffd..35832a03a51 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -143,8 +143,6 @@ extern int noioapicreroute;
143/* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ 143/* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */
144extern int timer_through_8259; 144extern int timer_through_8259;
145 145
146extern void io_apic_disable_legacy(void);
147
148/* 146/*
149 * If we use the IO-APIC for IRQ routing, disable automatic 147 * If we use the IO-APIC for IRQ routing, disable automatic
150 * assignment of PCI IRQ's. 148 * assignment of PCI IRQ's.
@@ -189,6 +187,7 @@ extern struct mp_ioapic_gsi mp_gsi_routing[];
189int mp_find_ioapic(int gsi); 187int mp_find_ioapic(int gsi);
190int mp_find_ioapic_pin(int ioapic, int gsi); 188int mp_find_ioapic_pin(int ioapic, int gsi);
191void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); 189void __init mp_register_ioapic(int id, u32 address, u32 gsi_base);
190extern void __init pre_init_apic_IRQ0(void);
192 191
193#else /* !CONFIG_X86_IO_APIC */ 192#else /* !CONFIG_X86_IO_APIC */
194 193
@@ -198,7 +197,11 @@ static const int timer_through_8259 = 0;
198static inline void ioapic_init_mappings(void) { } 197static inline void ioapic_init_mappings(void) { }
199static inline void ioapic_insert_resources(void) { } 198static inline void ioapic_insert_resources(void) { }
200static inline void probe_nr_irqs_gsi(void) { } 199static inline void probe_nr_irqs_gsi(void) { }
200static inline int mp_find_ioapic(int gsi) { return 0; }
201 201
202struct io_apic_irq_attr;
203static inline int io_apic_set_pci_routing(struct device *dev, int irq,
204 struct io_apic_irq_attr *irq_attr) { return 0; }
202#endif 205#endif
203 206
204#endif /* _ASM_X86_IO_APIC_H */ 207#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 262292729fc..5458380b6ef 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -48,6 +48,5 @@ extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
48extern int vector_used_by_percpu_irq(unsigned int vector); 48extern int vector_used_by_percpu_irq(unsigned int vector);
49 49
50extern void init_ISA_irqs(void); 50extern void init_ISA_irqs(void);
51extern int nr_legacy_irqs;
52 51
53#endif /* _ASM_X86_IRQ_H */ 52#endif /* _ASM_X86_IRQ_H */
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 4fe681de1e7..4ffa345a8cc 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -32,7 +32,10 @@ struct kprobe;
32 32
33typedef u8 kprobe_opcode_t; 33typedef u8 kprobe_opcode_t;
34#define BREAKPOINT_INSTRUCTION 0xcc 34#define BREAKPOINT_INSTRUCTION 0xcc
35#define RELATIVEJUMP_INSTRUCTION 0xe9 35#define RELATIVEJUMP_OPCODE 0xe9
36#define RELATIVEJUMP_SIZE 5
37#define RELATIVECALL_OPCODE 0xe8
38#define RELATIVE_ADDR_SIZE 4
36#define MAX_INSN_SIZE 16 39#define MAX_INSN_SIZE 16
37#define MAX_STACK_SIZE 64 40#define MAX_STACK_SIZE 64
38#define MIN_STACK_SIZE(ADDR) \ 41#define MIN_STACK_SIZE(ADDR) \
@@ -44,6 +47,17 @@ typedef u8 kprobe_opcode_t;
44 47
45#define flush_insn_slot(p) do { } while (0) 48#define flush_insn_slot(p) do { } while (0)
46 49
50/* optinsn template addresses */
51extern kprobe_opcode_t optprobe_template_entry;
52extern kprobe_opcode_t optprobe_template_val;
53extern kprobe_opcode_t optprobe_template_call;
54extern kprobe_opcode_t optprobe_template_end;
55#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE)
56#define MAX_OPTINSN_SIZE \
57 (((unsigned long)&optprobe_template_end - \
58 (unsigned long)&optprobe_template_entry) + \
59 MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE)
60
47extern const int kretprobe_blacklist_size; 61extern const int kretprobe_blacklist_size;
48 62
49void arch_remove_kprobe(struct kprobe *p); 63void arch_remove_kprobe(struct kprobe *p);
@@ -64,6 +78,21 @@ struct arch_specific_insn {
64 int boostable; 78 int boostable;
65}; 79};
66 80
81struct arch_optimized_insn {
82 /* copy of the original instructions */
83 kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE];
84 /* detour code buffer */
85 kprobe_opcode_t *insn;
86 /* the size of instructions copied to detour code buffer */
87 size_t size;
88};
89
90/* Return true (!0) if optinsn is prepared for optimization. */
91static inline int arch_prepared_optinsn(struct arch_optimized_insn *optinsn)
92{
93 return optinsn->size;
94}
95
67struct prev_kprobe { 96struct prev_kprobe {
68 struct kprobe *kp; 97 struct kprobe *kp;
69 unsigned long status; 98 unsigned long status;
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 7c18e1230f5..7a6f54fa13b 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -54,13 +54,23 @@ struct x86_emulate_ctxt;
54struct x86_emulate_ops { 54struct x86_emulate_ops {
55 /* 55 /*
56 * read_std: Read bytes of standard (non-emulated/special) memory. 56 * read_std: Read bytes of standard (non-emulated/special) memory.
57 * Used for instruction fetch, stack operations, and others. 57 * Used for descriptor reading.
58 * @addr: [IN ] Linear address from which to read. 58 * @addr: [IN ] Linear address from which to read.
59 * @val: [OUT] Value read from memory, zero-extended to 'u_long'. 59 * @val: [OUT] Value read from memory, zero-extended to 'u_long'.
60 * @bytes: [IN ] Number of bytes to read from memory. 60 * @bytes: [IN ] Number of bytes to read from memory.
61 */ 61 */
62 int (*read_std)(unsigned long addr, void *val, 62 int (*read_std)(unsigned long addr, void *val,
63 unsigned int bytes, struct kvm_vcpu *vcpu); 63 unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
64
65 /*
66 * fetch: Read bytes of standard (non-emulated/special) memory.
67 * Used for instruction fetch.
68 * @addr: [IN ] Linear address from which to read.
69 * @val: [OUT] Value read from memory, zero-extended to 'u_long'.
70 * @bytes: [IN ] Number of bytes to read from memory.
71 */
72 int (*fetch)(unsigned long addr, void *val,
73 unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
64 74
65 /* 75 /*
66 * read_emulated: Read bytes from emulated/special memory area. 76 * read_emulated: Read bytes from emulated/special memory area.
@@ -74,7 +84,7 @@ struct x86_emulate_ops {
74 struct kvm_vcpu *vcpu); 84 struct kvm_vcpu *vcpu);
75 85
76 /* 86 /*
77 * write_emulated: Read bytes from emulated/special memory area. 87 * write_emulated: Write bytes to emulated/special memory area.
78 * @addr: [IN ] Linear address to which to write. 88 * @addr: [IN ] Linear address to which to write.
79 * @val: [IN ] Value to write to memory (low-order bytes used as 89 * @val: [IN ] Value to write to memory (low-order bytes used as
80 * required). 90 * required).
@@ -168,6 +178,7 @@ struct x86_emulate_ctxt {
168 178
169/* Execution mode, passed to the emulator. */ 179/* Execution mode, passed to the emulator. */
170#define X86EMUL_MODE_REAL 0 /* Real mode. */ 180#define X86EMUL_MODE_REAL 0 /* Real mode. */
181#define X86EMUL_MODE_VM86 1 /* Virtual 8086 mode. */
171#define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */ 182#define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */
172#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ 183#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */
173#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ 184#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4f865e8b854..06d9e79ca37 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -25,7 +25,7 @@
25#include <asm/mtrr.h> 25#include <asm/mtrr.h>
26#include <asm/msr-index.h> 26#include <asm/msr-index.h>
27 27
28#define KVM_MAX_VCPUS 16 28#define KVM_MAX_VCPUS 64
29#define KVM_MEMORY_SLOTS 32 29#define KVM_MEMORY_SLOTS 32
30/* memory slots that does not exposed to userspace */ 30/* memory slots that does not exposed to userspace */
31#define KVM_PRIVATE_MEM_SLOTS 4 31#define KVM_PRIVATE_MEM_SLOTS 4
@@ -38,19 +38,6 @@
38#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ 38#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \
39 0xFFFFFF0000000000ULL) 39 0xFFFFFF0000000000ULL)
40 40
41#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \
42 (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
43#define KVM_GUEST_CR0_MASK \
44 (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
45#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \
46 (X86_CR0_WP | X86_CR0_NE | X86_CR0_TS | X86_CR0_MP)
47#define KVM_VM_CR0_ALWAYS_ON \
48 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
49#define KVM_GUEST_CR4_MASK \
50 (X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE)
51#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
52#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
53
54#define INVALID_PAGE (~(hpa_t)0) 41#define INVALID_PAGE (~(hpa_t)0)
55#define UNMAPPED_GVA (~(gpa_t)0) 42#define UNMAPPED_GVA (~(gpa_t)0)
56 43
@@ -256,7 +243,8 @@ struct kvm_mmu {
256 void (*new_cr3)(struct kvm_vcpu *vcpu); 243 void (*new_cr3)(struct kvm_vcpu *vcpu);
257 int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); 244 int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
258 void (*free)(struct kvm_vcpu *vcpu); 245 void (*free)(struct kvm_vcpu *vcpu);
259 gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva); 246 gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
247 u32 *error);
260 void (*prefetch_page)(struct kvm_vcpu *vcpu, 248 void (*prefetch_page)(struct kvm_vcpu *vcpu,
261 struct kvm_mmu_page *page); 249 struct kvm_mmu_page *page);
262 int (*sync_page)(struct kvm_vcpu *vcpu, 250 int (*sync_page)(struct kvm_vcpu *vcpu,
@@ -282,13 +270,15 @@ struct kvm_vcpu_arch {
282 u32 regs_dirty; 270 u32 regs_dirty;
283 271
284 unsigned long cr0; 272 unsigned long cr0;
273 unsigned long cr0_guest_owned_bits;
285 unsigned long cr2; 274 unsigned long cr2;
286 unsigned long cr3; 275 unsigned long cr3;
287 unsigned long cr4; 276 unsigned long cr4;
277 unsigned long cr4_guest_owned_bits;
288 unsigned long cr8; 278 unsigned long cr8;
289 u32 hflags; 279 u32 hflags;
290 u64 pdptrs[4]; /* pae */ 280 u64 pdptrs[4]; /* pae */
291 u64 shadow_efer; 281 u64 efer;
292 u64 apic_base; 282 u64 apic_base;
293 struct kvm_lapic *apic; /* kernel irqchip context */ 283 struct kvm_lapic *apic; /* kernel irqchip context */
294 int32_t apic_arb_prio; 284 int32_t apic_arb_prio;
@@ -374,17 +364,27 @@ struct kvm_vcpu_arch {
374 /* used for guest single stepping over the given code position */ 364 /* used for guest single stepping over the given code position */
375 u16 singlestep_cs; 365 u16 singlestep_cs;
376 unsigned long singlestep_rip; 366 unsigned long singlestep_rip;
367 /* fields used by HYPER-V emulation */
368 u64 hv_vapic;
377}; 369};
378 370
379struct kvm_mem_alias { 371struct kvm_mem_alias {
380 gfn_t base_gfn; 372 gfn_t base_gfn;
381 unsigned long npages; 373 unsigned long npages;
382 gfn_t target_gfn; 374 gfn_t target_gfn;
375#define KVM_ALIAS_INVALID 1UL
376 unsigned long flags;
383}; 377};
384 378
385struct kvm_arch{ 379#define KVM_ARCH_HAS_UNALIAS_INSTANTIATION
386 int naliases; 380
381struct kvm_mem_aliases {
387 struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; 382 struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
383 int naliases;
384};
385
386struct kvm_arch {
387 struct kvm_mem_aliases *aliases;
388 388
389 unsigned int n_free_mmu_pages; 389 unsigned int n_free_mmu_pages;
390 unsigned int n_requested_mmu_pages; 390 unsigned int n_requested_mmu_pages;
@@ -416,6 +416,10 @@ struct kvm_arch{
416 s64 kvmclock_offset; 416 s64 kvmclock_offset;
417 417
418 struct kvm_xen_hvm_config xen_hvm_config; 418 struct kvm_xen_hvm_config xen_hvm_config;
419
420 /* fields used by HYPER-V emulation */
421 u64 hv_guest_os_id;
422 u64 hv_hypercall;
419}; 423};
420 424
421struct kvm_vm_stat { 425struct kvm_vm_stat {
@@ -471,6 +475,7 @@ struct kvm_x86_ops {
471 int (*hardware_setup)(void); /* __init */ 475 int (*hardware_setup)(void); /* __init */
472 void (*hardware_unsetup)(void); /* __exit */ 476 void (*hardware_unsetup)(void); /* __exit */
473 bool (*cpu_has_accelerated_tpr)(void); 477 bool (*cpu_has_accelerated_tpr)(void);
478 void (*cpuid_update)(struct kvm_vcpu *vcpu);
474 479
475 /* Create, but do not attach this VCPU */ 480 /* Create, but do not attach this VCPU */
476 struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); 481 struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
@@ -492,6 +497,7 @@ struct kvm_x86_ops {
492 void (*set_segment)(struct kvm_vcpu *vcpu, 497 void (*set_segment)(struct kvm_vcpu *vcpu,
493 struct kvm_segment *var, int seg); 498 struct kvm_segment *var, int seg);
494 void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); 499 void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
500 void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu);
495 void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); 501 void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
496 void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); 502 void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
497 void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); 503 void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
@@ -501,12 +507,13 @@ struct kvm_x86_ops {
501 void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); 507 void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
502 void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); 508 void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
503 void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); 509 void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
504 unsigned long (*get_dr)(struct kvm_vcpu *vcpu, int dr); 510 int (*get_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long *dest);
505 void (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value, 511 int (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value);
506 int *exception);
507 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); 512 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
508 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); 513 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
509 void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); 514 void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
515 void (*fpu_activate)(struct kvm_vcpu *vcpu);
516 void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
510 517
511 void (*tlb_flush)(struct kvm_vcpu *vcpu); 518 void (*tlb_flush)(struct kvm_vcpu *vcpu);
512 519
@@ -531,7 +538,8 @@ struct kvm_x86_ops {
531 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); 538 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
532 int (*get_tdp_level)(void); 539 int (*get_tdp_level)(void);
533 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); 540 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
534 bool (*gb_page_enable)(void); 541 int (*get_lpage_level)(void);
542 bool (*rdtscp_supported)(void);
535 543
536 const struct trace_print_flags *exit_reasons_str; 544 const struct trace_print_flags *exit_reasons_str;
537}; 545};
@@ -606,8 +614,7 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
606 unsigned long value); 614 unsigned long value);
607 615
608void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); 616void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
609int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 617int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
610 int type_bits, int seg);
611 618
612int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason); 619int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason);
613 620
@@ -653,6 +660,10 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
653int kvm_mmu_load(struct kvm_vcpu *vcpu); 660int kvm_mmu_load(struct kvm_vcpu *vcpu);
654void kvm_mmu_unload(struct kvm_vcpu *vcpu); 661void kvm_mmu_unload(struct kvm_vcpu *vcpu);
655void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); 662void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
663gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
664gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
665gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
666gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
656 667
657int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); 668int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
658 669
@@ -666,6 +677,7 @@ void kvm_disable_tdp(void);
666 677
667int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); 678int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
668int complete_pio(struct kvm_vcpu *vcpu); 679int complete_pio(struct kvm_vcpu *vcpu);
680bool kvm_check_iopl(struct kvm_vcpu *vcpu);
669 681
670struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); 682struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn);
671 683
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c584076a47f..ffae1420e7d 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -2,6 +2,7 @@
2#define _ASM_X86_KVM_PARA_H 2#define _ASM_X86_KVM_PARA_H
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5#include <asm/hyperv.h>
5 6
6/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It 7/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It
7 * should be used to determine that a VM is running under KVM. 8 * should be used to determine that a VM is running under KVM.
diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h
new file mode 100644
index 00000000000..451d30e7f62
--- /dev/null
+++ b/arch/x86/include/asm/mrst.h
@@ -0,0 +1,19 @@
1/*
2 * mrst.h: Intel Moorestown platform specific setup code
3 *
4 * (C) Copyright 2009 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; version 2
9 * of the License.
10 */
11#ifndef _ASM_X86_MRST_H
12#define _ASM_X86_MRST_H
13extern int pci_mrst_init(void);
14int __init sfi_parse_mrtc(struct sfi_table_header *table);
15
16#define SFI_MTMR_MAX_NUM 8
17#define SFI_MRTC_MAX 8
18
19#endif /* _ASM_X86_MRST_H */
diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h
index 13370b95ea9..37c516545ec 100644
--- a/arch/x86/include/asm/numaq.h
+++ b/arch/x86/include/asm/numaq.h
@@ -30,6 +30,7 @@
30 30
31extern int found_numaq; 31extern int found_numaq;
32extern int get_memcfg_numaq(void); 32extern int get_memcfg_numaq(void);
33extern int pci_numaq_init(void);
33 34
34extern void *xquad_portio; 35extern void *xquad_portio;
35 36
diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h
index 3a57385d9fa..101229b0d8e 100644
--- a/arch/x86/include/asm/olpc.h
+++ b/arch/x86/include/asm/olpc.h
@@ -13,7 +13,6 @@ struct olpc_platform_t {
13 13
14#define OLPC_F_PRESENT 0x01 14#define OLPC_F_PRESENT 0x01
15#define OLPC_F_DCON 0x02 15#define OLPC_F_DCON 0x02
16#define OLPC_F_VSA 0x04
17 16
18#ifdef CONFIG_OLPC 17#ifdef CONFIG_OLPC
19 18
@@ -51,18 +50,6 @@ static inline int olpc_has_dcon(void)
51} 50}
52 51
53/* 52/*
54 * The VSA is software from AMD that typical Geode bioses will include.
55 * It is used to emulate the PCI bus, VGA, etc. OLPC's Open Firmware does
56 * not include the VSA; instead, PCI is emulated by the kernel.
57 *
58 * The VSA is described further in arch/x86/pci/olpc.c.
59 */
60static inline int olpc_has_vsa(void)
61{
62 return (olpc_platform_info.flags & OLPC_F_VSA) ? 1 : 0;
63}
64
65/*
66 * The "Mass Production" version of OLPC's XO is identified as being model 53 * The "Mass Production" version of OLPC's XO is identified as being model
67 * C2. During the prototype phase, the following models (in chronological 54 * C2. During the prototype phase, the following models (in chronological
68 * order) were created: A1, B1, B2, B3, B4, C1. The A1 through B2 models 55 * order) were created: A1, B1, B2, B3, B4, C1. The A1 through B2 models
@@ -87,13 +74,10 @@ static inline int olpc_has_dcon(void)
87 return 0; 74 return 0;
88} 75}
89 76
90static inline int olpc_has_vsa(void)
91{
92 return 0;
93}
94
95#endif 77#endif
96 78
79extern int pci_olpc_init(void);
80
97/* EC related functions */ 81/* EC related functions */
98 82
99extern int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen, 83extern int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen,
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index b4a00dd4eed..3e002ca5a28 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -45,8 +45,15 @@ static inline int pci_proc_domain(struct pci_bus *bus)
45 45
46#ifdef CONFIG_PCI 46#ifdef CONFIG_PCI
47extern unsigned int pcibios_assign_all_busses(void); 47extern unsigned int pcibios_assign_all_busses(void);
48extern int pci_legacy_init(void);
49# ifdef CONFIG_ACPI
50# define x86_default_pci_init pci_acpi_init
51# else
52# define x86_default_pci_init pci_legacy_init
53# endif
48#else 54#else
49#define pcibios_assign_all_busses() 0 55# define pcibios_assign_all_busses() 0
56# define x86_default_pci_init NULL
50#endif 57#endif
51 58
52extern unsigned long pci_mem_start; 59extern unsigned long pci_mem_start;
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 05b58ccb2e8..1a0422348d6 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -83,7 +83,6 @@ struct irq_routing_table {
83 83
84extern unsigned int pcibios_irq_mask; 84extern unsigned int pcibios_irq_mask;
85 85
86extern int pcibios_scanned;
87extern spinlock_t pci_config_lock; 86extern spinlock_t pci_config_lock;
88 87
89extern int (*pcibios_enable_irq)(struct pci_dev *dev); 88extern int (*pcibios_enable_irq)(struct pci_dev *dev);
@@ -106,16 +105,15 @@ extern bool port_cf9_safe;
106extern int pci_direct_probe(void); 105extern int pci_direct_probe(void);
107extern void pci_direct_init(int type); 106extern void pci_direct_init(int type);
108extern void pci_pcbios_init(void); 107extern void pci_pcbios_init(void);
109extern int pci_olpc_init(void);
110extern void __init dmi_check_pciprobe(void); 108extern void __init dmi_check_pciprobe(void);
111extern void __init dmi_check_skip_isa_align(void); 109extern void __init dmi_check_skip_isa_align(void);
112 110
113/* some common used subsys_initcalls */ 111/* some common used subsys_initcalls */
114extern int __init pci_acpi_init(void); 112extern int __init pci_acpi_init(void);
115extern int __init pcibios_irq_init(void); 113extern void __init pcibios_irq_init(void);
116extern int __init pci_visws_init(void);
117extern int __init pci_numaq_init(void);
118extern int __init pcibios_init(void); 114extern int __init pcibios_init(void);
115extern int pci_legacy_init(void);
116extern void pcibios_fixup_irqs(void);
119 117
120/* pci-mmconfig.c */ 118/* pci-mmconfig.c */
121 119
@@ -183,3 +181,17 @@ static inline void mmio_config_writel(void __iomem *pos, u32 val)
183{ 181{
184 asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory"); 182 asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory");
185} 183}
184
185#ifdef CONFIG_PCI
186# ifdef CONFIG_ACPI
187# define x86_default_pci_init pci_acpi_init
188# else
189# define x86_default_pci_init pci_legacy_init
190# endif
191# define x86_default_pci_init_irq pcibios_irq_init
192# define x86_default_pci_fixup_irqs pcibios_fixup_irqs
193#else
194# define x86_default_pci_init NULL
195# define x86_default_pci_init_irq NULL
196# define x86_default_pci_fixup_irqs NULL
197#endif
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 18e496c98ff..86b1506f417 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -37,10 +37,8 @@ void setup_bios_corruption_check(void);
37 37
38#ifdef CONFIG_X86_VISWS 38#ifdef CONFIG_X86_VISWS
39extern void visws_early_detect(void); 39extern void visws_early_detect(void);
40extern int is_visws_box(void);
41#else 40#else
42static inline void visws_early_detect(void) { } 41static inline void visws_early_detect(void) { }
43static inline int is_visws_box(void) { return 0; }
44#endif 42#endif
45 43
46extern unsigned long saved_video_mode; 44extern unsigned long saved_video_mode;
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 1fecb7e6113..38638cd2fa4 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -313,7 +313,7 @@ struct __attribute__ ((__packed__)) vmcb {
313 313
314#define SVM_EXIT_ERR -1 314#define SVM_EXIT_ERR -1
315 315
316#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */ 316#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
317 317
318#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda" 318#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
319#define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8" 319#define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
diff --git a/arch/x86/include/asm/visws/cobalt.h b/arch/x86/include/asm/visws/cobalt.h
index 166adf61e77..2edb37637ea 100644
--- a/arch/x86/include/asm/visws/cobalt.h
+++ b/arch/x86/include/asm/visws/cobalt.h
@@ -122,4 +122,6 @@ extern char visws_board_type;
122 122
123extern char visws_board_rev; 123extern char visws_board_rev;
124 124
125extern int pci_visws_init(void);
126
125#endif /* _ASM_X86_VISWS_COBALT_H */ 127#endif /* _ASM_X86_VISWS_COBALT_H */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 2b4945419a8..fb9a080740e 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -53,6 +53,7 @@
53 */ 53 */
54#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 54#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
55#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 55#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
56#define SECONDARY_EXEC_RDTSCP 0x00000008
56#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 57#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
57#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 58#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
58#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 59#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
@@ -251,6 +252,7 @@ enum vmcs_field {
251#define EXIT_REASON_MSR_READ 31 252#define EXIT_REASON_MSR_READ 31
252#define EXIT_REASON_MSR_WRITE 32 253#define EXIT_REASON_MSR_WRITE 32
253#define EXIT_REASON_MWAIT_INSTRUCTION 36 254#define EXIT_REASON_MWAIT_INSTRUCTION 36
255#define EXIT_REASON_MONITOR_INSTRUCTION 39
254#define EXIT_REASON_PAUSE_INSTRUCTION 40 256#define EXIT_REASON_PAUSE_INSTRUCTION 40
255#define EXIT_REASON_MCE_DURING_VMENTRY 41 257#define EXIT_REASON_MCE_DURING_VMENTRY 41
256#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 258#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
@@ -362,6 +364,7 @@ enum vmcs_field {
362#define VMX_EPTP_UC_BIT (1ull << 8) 364#define VMX_EPTP_UC_BIT (1ull << 8)
363#define VMX_EPTP_WB_BIT (1ull << 14) 365#define VMX_EPTP_WB_BIT (1ull << 14)
364#define VMX_EPT_2MB_PAGE_BIT (1ull << 16) 366#define VMX_EPT_2MB_PAGE_BIT (1ull << 16)
367#define VMX_EPT_1GB_PAGE_BIT (1ull << 17)
365#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) 368#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24)
366#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) 369#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
367#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) 370#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
@@ -374,7 +377,7 @@ enum vmcs_field {
374#define VMX_EPT_READABLE_MASK 0x1ull 377#define VMX_EPT_READABLE_MASK 0x1ull
375#define VMX_EPT_WRITABLE_MASK 0x2ull 378#define VMX_EPT_WRITABLE_MASK 0x2ull
376#define VMX_EPT_EXECUTABLE_MASK 0x4ull 379#define VMX_EPT_EXECUTABLE_MASK 0x4ull
377#define VMX_EPT_IGMT_BIT (1ull << 6) 380#define VMX_EPT_IPAT_BIT (1ull << 6)
378 381
379#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul 382#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
380 383
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 60cc3526908..519b54327d7 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -99,6 +99,20 @@ struct x86_init_iommu {
99}; 99};
100 100
101/** 101/**
102 * struct x86_init_pci - platform specific pci init functions
103 * @arch_init: platform specific pci arch init call
104 * @init: platform specific pci subsystem init
105 * @init_irq: platform specific pci irq init
106 * @fixup_irqs: platform specific pci irq fixup
107 */
108struct x86_init_pci {
109 int (*arch_init)(void);
110 int (*init)(void);
111 void (*init_irq)(void);
112 void (*fixup_irqs)(void);
113};
114
115/**
102 * struct x86_init_ops - functions for platform specific setup 116 * struct x86_init_ops - functions for platform specific setup
103 * 117 *
104 */ 118 */
@@ -110,6 +124,7 @@ struct x86_init_ops {
110 struct x86_init_paging paging; 124 struct x86_init_paging paging;
111 struct x86_init_timers timers; 125 struct x86_init_timers timers;
112 struct x86_init_iommu iommu; 126 struct x86_init_iommu iommu;
127 struct x86_init_pci pci;
113}; 128};
114 129
115/** 130/**
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d87f09bc5a5..4c58352209e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -87,6 +87,7 @@ obj-$(CONFIG_VM86) += vm86_32.o
87obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 87obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
88 88
89obj-$(CONFIG_HPET_TIMER) += hpet.o 89obj-$(CONFIG_HPET_TIMER) += hpet.o
90obj-$(CONFIG_APB_TIMER) += apb_timer.o
90 91
91obj-$(CONFIG_K8_NB) += k8.o 92obj-$(CONFIG_K8_NB) += k8.o
92obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o 93obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 738fcb60e70..a54d714545f 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -35,6 +35,7 @@
35#include <linux/ioport.h> 35#include <linux/ioport.h>
36#include <linux/pci.h> 36#include <linux/pci.h>
37 37
38#include <asm/pci_x86.h>
38#include <asm/pgtable.h> 39#include <asm/pgtable.h>
39#include <asm/io_apic.h> 40#include <asm/io_apic.h>
40#include <asm/apic.h> 41#include <asm/apic.h>
@@ -1624,6 +1625,9 @@ int __init acpi_boot_init(void)
1624 1625
1625 acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet); 1626 acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet);
1626 1627
1628 if (!acpi_noirq)
1629 x86_init.pci.init = pci_acpi_init;
1630
1627 return 0; 1631 return 0;
1628} 1632}
1629 1633
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index e6ea0342c8f..3a4bf35c179 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -7,6 +7,7 @@
7#include <linux/mm.h> 7#include <linux/mm.h>
8#include <linux/vmalloc.h> 8#include <linux/vmalloc.h>
9#include <linux/memory.h> 9#include <linux/memory.h>
10#include <linux/stop_machine.h>
10#include <asm/alternative.h> 11#include <asm/alternative.h>
11#include <asm/sections.h> 12#include <asm/sections.h>
12#include <asm/pgtable.h> 13#include <asm/pgtable.h>
@@ -572,3 +573,62 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
572 local_irq_restore(flags); 573 local_irq_restore(flags);
573 return addr; 574 return addr;
574} 575}
576
577/*
578 * Cross-modifying kernel text with stop_machine().
579 * This code originally comes from immediate value.
580 */
581static atomic_t stop_machine_first;
582static int wrote_text;
583
584struct text_poke_params {
585 void *addr;
586 const void *opcode;
587 size_t len;
588};
589
590static int __kprobes stop_machine_text_poke(void *data)
591{
592 struct text_poke_params *tpp = data;
593
594 if (atomic_dec_and_test(&stop_machine_first)) {
595 text_poke(tpp->addr, tpp->opcode, tpp->len);
596 smp_wmb(); /* Make sure other cpus see that this has run */
597 wrote_text = 1;
598 } else {
599 while (!wrote_text)
600 cpu_relax();
601 smp_mb(); /* Load wrote_text before following execution */
602 }
603
604 flush_icache_range((unsigned long)tpp->addr,
605 (unsigned long)tpp->addr + tpp->len);
606 return 0;
607}
608
609/**
610 * text_poke_smp - Update instructions on a live kernel on SMP
611 * @addr: address to modify
612 * @opcode: source of the copy
613 * @len: length to copy
614 *
615 * Modify multi-byte instruction by using stop_machine() on SMP. This allows
616 * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying
617 * should be allowed, since stop_machine() does _not_ protect code against
618 * NMI and MCE.
619 *
620 * Note: Must be called under get_online_cpus() and text_mutex.
621 */
622void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
623{
624 struct text_poke_params tpp;
625
626 tpp.addr = addr;
627 tpp.opcode = opcode;
628 tpp.len = len;
629 atomic_set(&stop_machine_first, 1);
630 wrote_text = 0;
631 stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
632 return addr;
633}
634
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
new file mode 100644
index 00000000000..4b7099526d2
--- /dev/null
+++ b/arch/x86/kernel/apb_timer.c
@@ -0,0 +1,784 @@
1/*
2 * apb_timer.c: Driver for Langwell APB timers
3 *
4 * (C) Copyright 2009 Intel Corporation
5 * Author: Jacob Pan (jacob.jun.pan@intel.com)
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
10 * of the License.
11 *
12 * Note:
13 * Langwell is the south complex of Intel Moorestown MID platform. There are
14 * eight external timers in total that can be used by the operating system.
15 * The timer information, such as frequency and addresses, is provided to the
16 * OS via SFI tables.
17 * Timer interrupts are routed via FW/HW emulated IOAPIC independently via
18 * individual redirection table entries (RTE).
19 * Unlike HPET, there is no master counter, therefore one of the timers are
20 * used as clocksource. The overall allocation looks like:
21 * - timer 0 - NR_CPUs for per cpu timer
22 * - one timer for clocksource
23 * - one timer for watchdog driver.
24 * It is also worth notice that APB timer does not support true one-shot mode,
25 * free-running mode will be used here to emulate one-shot mode.
26 * APB timer can also be used as broadcast timer along with per cpu local APIC
27 * timer, but by default APB timer has higher rating than local APIC timers.
28 */
29
30#include <linux/clocksource.h>
31#include <linux/clockchips.h>
32#include <linux/delay.h>
33#include <linux/errno.h>
34#include <linux/init.h>
35#include <linux/sysdev.h>
36#include <linux/pm.h>
37#include <linux/pci.h>
38#include <linux/sfi.h>
39#include <linux/interrupt.h>
40#include <linux/cpu.h>
41#include <linux/irq.h>
42
43#include <asm/fixmap.h>
44#include <asm/apb_timer.h>
45
46#define APBT_MASK CLOCKSOURCE_MASK(32)
47#define APBT_SHIFT 22
48#define APBT_CLOCKEVENT_RATING 150
49#define APBT_CLOCKSOURCE_RATING 250
50#define APBT_MIN_DELTA_USEC 200
51
52#define EVT_TO_APBT_DEV(evt) container_of(evt, struct apbt_dev, evt)
53#define APBT_CLOCKEVENT0_NUM (0)
54#define APBT_CLOCKEVENT1_NUM (1)
55#define APBT_CLOCKSOURCE_NUM (2)
56
57static unsigned long apbt_address;
58static int apb_timer_block_enabled;
59static void __iomem *apbt_virt_address;
60static int phy_cs_timer_id;
61
62/*
63 * Common DW APB timer info
64 */
65static uint64_t apbt_freq;
66
67static void apbt_set_mode(enum clock_event_mode mode,
68 struct clock_event_device *evt);
69static int apbt_next_event(unsigned long delta,
70 struct clock_event_device *evt);
71static cycle_t apbt_read_clocksource(struct clocksource *cs);
72static void apbt_restart_clocksource(struct clocksource *cs);
73
74struct apbt_dev {
75 struct clock_event_device evt;
76 unsigned int num;
77 int cpu;
78 unsigned int irq;
79 unsigned int tick;
80 unsigned int count;
81 unsigned int flags;
82 char name[10];
83};
84
85int disable_apbt_percpu __cpuinitdata;
86
87static DEFINE_PER_CPU(struct apbt_dev, cpu_apbt_dev);
88
89#ifdef CONFIG_SMP
90static unsigned int apbt_num_timers_used;
91static struct apbt_dev *apbt_devs;
92#endif
93
94static inline unsigned long apbt_readl_reg(unsigned long a)
95{
96 return readl(apbt_virt_address + a);
97}
98
99static inline void apbt_writel_reg(unsigned long d, unsigned long a)
100{
101 writel(d, apbt_virt_address + a);
102}
103
104static inline unsigned long apbt_readl(int n, unsigned long a)
105{
106 return readl(apbt_virt_address + a + n * APBTMRS_REG_SIZE);
107}
108
109static inline void apbt_writel(int n, unsigned long d, unsigned long a)
110{
111 writel(d, apbt_virt_address + a + n * APBTMRS_REG_SIZE);
112}
113
114static inline void apbt_set_mapping(void)
115{
116 struct sfi_timer_table_entry *mtmr;
117
118 if (apbt_virt_address) {
119 pr_debug("APBT base already mapped\n");
120 return;
121 }
122 mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM);
123 if (mtmr == NULL) {
124 printk(KERN_ERR "Failed to get MTMR %d from SFI\n",
125 APBT_CLOCKEVENT0_NUM);
126 return;
127 }
128 apbt_address = (unsigned long)mtmr->phys_addr;
129 if (!apbt_address) {
130 printk(KERN_WARNING "No timer base from SFI, use default\n");
131 apbt_address = APBT_DEFAULT_BASE;
132 }
133 apbt_virt_address = ioremap_nocache(apbt_address, APBT_MMAP_SIZE);
134 if (apbt_virt_address) {
135 pr_debug("Mapped APBT physical addr %p at virtual addr %p\n",\
136 (void *)apbt_address, (void *)apbt_virt_address);
137 } else {
138 pr_debug("Failed mapping APBT phy address at %p\n",\
139 (void *)apbt_address);
140 goto panic_noapbt;
141 }
142 apbt_freq = mtmr->freq_hz / USEC_PER_SEC;
143 sfi_free_mtmr(mtmr);
144
145 /* Now figure out the physical timer id for clocksource device */
146 mtmr = sfi_get_mtmr(APBT_CLOCKSOURCE_NUM);
147 if (mtmr == NULL)
148 goto panic_noapbt;
149
150 /* Now figure out the physical timer id */
151 phy_cs_timer_id = (unsigned int)(mtmr->phys_addr & 0xff)
152 / APBTMRS_REG_SIZE;
153 pr_debug("Use timer %d for clocksource\n", phy_cs_timer_id);
154 return;
155
156panic_noapbt:
157 panic("Failed to setup APB system timer\n");
158
159}
160
161static inline void apbt_clear_mapping(void)
162{
163 iounmap(apbt_virt_address);
164 apbt_virt_address = NULL;
165}
166
167/*
168 * APBT timer interrupt enable / disable
169 */
170static inline int is_apbt_capable(void)
171{
172 return apbt_virt_address ? 1 : 0;
173}
174
175static struct clocksource clocksource_apbt = {
176 .name = "apbt",
177 .rating = APBT_CLOCKSOURCE_RATING,
178 .read = apbt_read_clocksource,
179 .mask = APBT_MASK,
180 .shift = APBT_SHIFT,
181 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
182 .resume = apbt_restart_clocksource,
183};
184
185/* boot APB clock event device */
186static struct clock_event_device apbt_clockevent = {
187 .name = "apbt0",
188 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
189 .set_mode = apbt_set_mode,
190 .set_next_event = apbt_next_event,
191 .shift = APBT_SHIFT,
192 .irq = 0,
193 .rating = APBT_CLOCKEVENT_RATING,
194};
195
196/*
197 * if user does not want to use per CPU apb timer, just give it a lower rating
198 * than local apic timer and skip the late per cpu timer init.
199 */
200static inline int __init setup_x86_mrst_timer(char *arg)
201{
202 if (!arg)
203 return -EINVAL;
204
205 if (strcmp("apbt_only", arg) == 0)
206 disable_apbt_percpu = 0;
207 else if (strcmp("lapic_and_apbt", arg) == 0)
208 disable_apbt_percpu = 1;
209 else {
210 pr_warning("X86 MRST timer option %s not recognised"
211 " use x86_mrst_timer=apbt_only or lapic_and_apbt\n",
212 arg);
213 return -EINVAL;
214 }
215 return 0;
216}
217__setup("x86_mrst_timer=", setup_x86_mrst_timer);
218
219/*
220 * start count down from 0xffff_ffff. this is done by toggling the enable bit
221 * then load initial load count to ~0.
222 */
223static void apbt_start_counter(int n)
224{
225 unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL);
226
227 ctrl &= ~APBTMR_CONTROL_ENABLE;
228 apbt_writel(n, ctrl, APBTMR_N_CONTROL);
229 apbt_writel(n, ~0, APBTMR_N_LOAD_COUNT);
230 /* enable, mask interrupt */
231 ctrl &= ~APBTMR_CONTROL_MODE_PERIODIC;
232 ctrl |= (APBTMR_CONTROL_ENABLE | APBTMR_CONTROL_INT);
233 apbt_writel(n, ctrl, APBTMR_N_CONTROL);
234 /* read it once to get cached counter value initialized */
235 apbt_read_clocksource(&clocksource_apbt);
236}
237
238static irqreturn_t apbt_interrupt_handler(int irq, void *data)
239{
240 struct apbt_dev *dev = (struct apbt_dev *)data;
241 struct clock_event_device *aevt = &dev->evt;
242
243 if (!aevt->event_handler) {
244 printk(KERN_INFO "Spurious APBT timer interrupt on %d\n",
245 dev->num);
246 return IRQ_NONE;
247 }
248 aevt->event_handler(aevt);
249 return IRQ_HANDLED;
250}
251
252static void apbt_restart_clocksource(struct clocksource *cs)
253{
254 apbt_start_counter(phy_cs_timer_id);
255}
256
257/* Setup IRQ routing via IOAPIC */
258#ifdef CONFIG_SMP
259static void apbt_setup_irq(struct apbt_dev *adev)
260{
261 struct irq_chip *chip;
262 struct irq_desc *desc;
263
264 /* timer0 irq has been setup early */
265 if (adev->irq == 0)
266 return;
267 desc = irq_to_desc(adev->irq);
268 chip = get_irq_chip(adev->irq);
269 disable_irq(adev->irq);
270 desc->status |= IRQ_MOVE_PCNTXT;
271 irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
272 /* APB timer irqs are set up as mp_irqs, timer is edge triggerred */
273 set_irq_chip_and_handler_name(adev->irq, chip, handle_edge_irq, "edge");
274 enable_irq(adev->irq);
275 if (system_state == SYSTEM_BOOTING)
276 if (request_irq(adev->irq, apbt_interrupt_handler,
277 IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING,
278 adev->name, adev)) {
279 printk(KERN_ERR "Failed request IRQ for APBT%d\n",
280 adev->num);
281 }
282}
283#endif
284
285static void apbt_enable_int(int n)
286{
287 unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL);
288 /* clear pending intr */
289 apbt_readl(n, APBTMR_N_EOI);
290 ctrl &= ~APBTMR_CONTROL_INT;
291 apbt_writel(n, ctrl, APBTMR_N_CONTROL);
292}
293
294static void apbt_disable_int(int n)
295{
296 unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL);
297
298 ctrl |= APBTMR_CONTROL_INT;
299 apbt_writel(n, ctrl, APBTMR_N_CONTROL);
300}
301
302
303static int __init apbt_clockevent_register(void)
304{
305 struct sfi_timer_table_entry *mtmr;
306 struct apbt_dev *adev = &__get_cpu_var(cpu_apbt_dev);
307
308 mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM);
309 if (mtmr == NULL) {
310 printk(KERN_ERR "Failed to get MTMR %d from SFI\n",
311 APBT_CLOCKEVENT0_NUM);
312 return -ENODEV;
313 }
314
315 /*
316 * We need to calculate the scaled math multiplication factor for
317 * nanosecond to apbt tick conversion.
318 * mult = (nsec/cycle)*2^APBT_SHIFT
319 */
320 apbt_clockevent.mult = div_sc((unsigned long) mtmr->freq_hz
321 , NSEC_PER_SEC, APBT_SHIFT);
322
323 /* Calculate the min / max delta */
324 apbt_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
325 &apbt_clockevent);
326 apbt_clockevent.min_delta_ns = clockevent_delta2ns(
327 APBT_MIN_DELTA_USEC*apbt_freq,
328 &apbt_clockevent);
329 /*
330 * Start apbt with the boot cpu mask and make it
331 * global if not used for per cpu timer.
332 */
333 apbt_clockevent.cpumask = cpumask_of(smp_processor_id());
334 adev->num = smp_processor_id();
335 memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device));
336
337 if (disable_apbt_percpu) {
338 apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100;
339 global_clock_event = &adev->evt;
340 printk(KERN_DEBUG "%s clockevent registered as global\n",
341 global_clock_event->name);
342 }
343
344 if (request_irq(apbt_clockevent.irq, apbt_interrupt_handler,
345 IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING,
346 apbt_clockevent.name, adev)) {
347 printk(KERN_ERR "Failed request IRQ for APBT%d\n",
348 apbt_clockevent.irq);
349 }
350
351 clockevents_register_device(&adev->evt);
352 /* Start APBT 0 interrupts */
353 apbt_enable_int(APBT_CLOCKEVENT0_NUM);
354
355 sfi_free_mtmr(mtmr);
356 return 0;
357}
358
359#ifdef CONFIG_SMP
360/* Should be called with per cpu */
361void apbt_setup_secondary_clock(void)
362{
363 struct apbt_dev *adev;
364 struct clock_event_device *aevt;
365 int cpu;
366
367 /* Don't register boot CPU clockevent */
368 cpu = smp_processor_id();
369 if (cpu == boot_cpu_id)
370 return;
371 /*
372 * We need to calculate the scaled math multiplication factor for
373 * nanosecond to apbt tick conversion.
374 * mult = (nsec/cycle)*2^APBT_SHIFT
375 */
376 printk(KERN_INFO "Init per CPU clockevent %d\n", cpu);
377 adev = &per_cpu(cpu_apbt_dev, cpu);
378 aevt = &adev->evt;
379
380 memcpy(aevt, &apbt_clockevent, sizeof(*aevt));
381 aevt->cpumask = cpumask_of(cpu);
382 aevt->name = adev->name;
383 aevt->mode = CLOCK_EVT_MODE_UNUSED;
384
385 printk(KERN_INFO "Registering CPU %d clockevent device %s, mask %08x\n",
386 cpu, aevt->name, *(u32 *)aevt->cpumask);
387
388 apbt_setup_irq(adev);
389
390 clockevents_register_device(aevt);
391
392 apbt_enable_int(cpu);
393
394 return;
395}
396
397/*
398 * this notify handler process CPU hotplug events. in case of S0i3, nonboot
399 * cpus are disabled/enabled frequently, for performance reasons, we keep the
400 * per cpu timer irq registered so that we do need to do free_irq/request_irq.
401 *
402 * TODO: it might be more reliable to directly disable percpu clockevent device
403 * without the notifier chain. currently, cpu 0 may get interrupts from other
404 * cpu timers during the offline process due to the ordering of notification.
405 * the extra interrupt is harmless.
406 */
407static int apbt_cpuhp_notify(struct notifier_block *n,
408 unsigned long action, void *hcpu)
409{
410 unsigned long cpu = (unsigned long)hcpu;
411 struct apbt_dev *adev = &per_cpu(cpu_apbt_dev, cpu);
412
413 switch (action & 0xf) {
414 case CPU_DEAD:
415 apbt_disable_int(cpu);
416 if (system_state == SYSTEM_RUNNING)
417 pr_debug("skipping APBT CPU %lu offline\n", cpu);
418 else if (adev) {
419 pr_debug("APBT clockevent for cpu %lu offline\n", cpu);
420 free_irq(adev->irq, adev);
421 }
422 break;
423 default:
424 pr_debug(KERN_INFO "APBT notified %lu, no action\n", action);
425 }
426 return NOTIFY_OK;
427}
428
429static __init int apbt_late_init(void)
430{
431 if (disable_apbt_percpu)
432 return 0;
433 /* This notifier should be called after workqueue is ready */
434 hotcpu_notifier(apbt_cpuhp_notify, -20);
435 return 0;
436}
437fs_initcall(apbt_late_init);
438#else
439
440void apbt_setup_secondary_clock(void) {}
441
442#endif /* CONFIG_SMP */
443
444static void apbt_set_mode(enum clock_event_mode mode,
445 struct clock_event_device *evt)
446{
447 unsigned long ctrl;
448 uint64_t delta;
449 int timer_num;
450 struct apbt_dev *adev = EVT_TO_APBT_DEV(evt);
451
452 timer_num = adev->num;
453 pr_debug("%s CPU %d timer %d mode=%d\n",
454 __func__, first_cpu(*evt->cpumask), timer_num, mode);
455
456 switch (mode) {
457 case CLOCK_EVT_MODE_PERIODIC:
458 delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * apbt_clockevent.mult;
459 delta >>= apbt_clockevent.shift;
460 ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL);
461 ctrl |= APBTMR_CONTROL_MODE_PERIODIC;
462 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
463 /*
464 * DW APB p. 46, have to disable timer before load counter,
465 * may cause sync problem.
466 */
467 ctrl &= ~APBTMR_CONTROL_ENABLE;
468 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
469 udelay(1);
470 pr_debug("Setting clock period %d for HZ %d\n", (int)delta, HZ);
471 apbt_writel(timer_num, delta, APBTMR_N_LOAD_COUNT);
472 ctrl |= APBTMR_CONTROL_ENABLE;
473 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
474 break;
475 /* APB timer does not have one-shot mode, use free running mode */
476 case CLOCK_EVT_MODE_ONESHOT:
477 ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL);
478 /*
479 * set free running mode, this mode will let timer reload max
480 * timeout which will give time (3min on 25MHz clock) to rearm
481 * the next event, therefore emulate the one-shot mode.
482 */
483 ctrl &= ~APBTMR_CONTROL_ENABLE;
484 ctrl &= ~APBTMR_CONTROL_MODE_PERIODIC;
485
486 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
487 /* write again to set free running mode */
488 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
489
490 /*
491 * DW APB p. 46, load counter with all 1s before starting free
492 * running mode.
493 */
494 apbt_writel(timer_num, ~0, APBTMR_N_LOAD_COUNT);
495 ctrl &= ~APBTMR_CONTROL_INT;
496 ctrl |= APBTMR_CONTROL_ENABLE;
497 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
498 break;
499
500 case CLOCK_EVT_MODE_UNUSED:
501 case CLOCK_EVT_MODE_SHUTDOWN:
502 apbt_disable_int(timer_num);
503 ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL);
504 ctrl &= ~APBTMR_CONTROL_ENABLE;
505 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
506 break;
507
508 case CLOCK_EVT_MODE_RESUME:
509 apbt_enable_int(timer_num);
510 break;
511 }
512}
513
514static int apbt_next_event(unsigned long delta,
515 struct clock_event_device *evt)
516{
517 unsigned long ctrl;
518 int timer_num;
519
520 struct apbt_dev *adev = EVT_TO_APBT_DEV(evt);
521
522 timer_num = adev->num;
523 /* Disable timer */
524 ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL);
525 ctrl &= ~APBTMR_CONTROL_ENABLE;
526 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
527 /* write new count */
528 apbt_writel(timer_num, delta, APBTMR_N_LOAD_COUNT);
529 ctrl |= APBTMR_CONTROL_ENABLE;
530 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
531 return 0;
532}
533
534/*
535 * APB timer clock is not in sync with pclk on Langwell, which translates to
536 * unreliable read value caused by sampling error. the error does not add up
537 * overtime and only happens when sampling a 0 as a 1 by mistake. so the time
538 * would go backwards. the following code is trying to prevent time traveling
539 * backwards. little bit paranoid.
540 */
541static cycle_t apbt_read_clocksource(struct clocksource *cs)
542{
543 unsigned long t0, t1, t2;
544 static unsigned long last_read;
545
546bad_count:
547 t1 = apbt_readl(phy_cs_timer_id,
548 APBTMR_N_CURRENT_VALUE);
549 t2 = apbt_readl(phy_cs_timer_id,
550 APBTMR_N_CURRENT_VALUE);
551 if (unlikely(t1 < t2)) {
552 pr_debug("APBT: read current count error %lx:%lx:%lx\n",
553 t1, t2, t2 - t1);
554 goto bad_count;
555 }
556 /*
557 * check against cached last read, makes sure time does not go back.
558 * it could be a normal rollover but we will do tripple check anyway
559 */
560 if (unlikely(t2 > last_read)) {
561 /* check if we have a normal rollover */
562 unsigned long raw_intr_status =
563 apbt_readl_reg(APBTMRS_RAW_INT_STATUS);
564 /*
565 * cs timer interrupt is masked but raw intr bit is set if
566 * rollover occurs. then we read EOI reg to clear it.
567 */
568 if (raw_intr_status & (1 << phy_cs_timer_id)) {
569 apbt_readl(phy_cs_timer_id, APBTMR_N_EOI);
570 goto out;
571 }
572 pr_debug("APB CS going back %lx:%lx:%lx ",
573 t2, last_read, t2 - last_read);
574bad_count_x3:
575 pr_debug(KERN_INFO "tripple check enforced\n");
576 t0 = apbt_readl(phy_cs_timer_id,
577 APBTMR_N_CURRENT_VALUE);
578 udelay(1);
579 t1 = apbt_readl(phy_cs_timer_id,
580 APBTMR_N_CURRENT_VALUE);
581 udelay(1);
582 t2 = apbt_readl(phy_cs_timer_id,
583 APBTMR_N_CURRENT_VALUE);
584 if ((t2 > t1) || (t1 > t0)) {
585 printk(KERN_ERR "Error: APB CS tripple check failed\n");
586 goto bad_count_x3;
587 }
588 }
589out:
590 last_read = t2;
591 return (cycle_t)~t2;
592}
593
594static int apbt_clocksource_register(void)
595{
596 u64 start, now;
597 cycle_t t1;
598
599 /* Start the counter, use timer 2 as source, timer 0/1 for event */
600 apbt_start_counter(phy_cs_timer_id);
601
602 /* Verify whether apbt counter works */
603 t1 = apbt_read_clocksource(&clocksource_apbt);
604 rdtscll(start);
605
606 /*
607 * We don't know the TSC frequency yet, but waiting for
608 * 200000 TSC cycles is safe:
609 * 4 GHz == 50us
610 * 1 GHz == 200us
611 */
612 do {
613 rep_nop();
614 rdtscll(now);
615 } while ((now - start) < 200000UL);
616
617 /* APBT is the only always on clocksource, it has to work! */
618 if (t1 == apbt_read_clocksource(&clocksource_apbt))
619 panic("APBT counter not counting. APBT disabled\n");
620
621 /*
622 * initialize and register APBT clocksource
623 * convert that to ns/clock cycle
624 * mult = (ns/c) * 2^APBT_SHIFT
625 */
626 clocksource_apbt.mult = div_sc(MSEC_PER_SEC,
627 (unsigned long) apbt_freq, APBT_SHIFT);
628 clocksource_register(&clocksource_apbt);
629
630 return 0;
631}
632
633/*
634 * Early setup the APBT timer, only use timer 0 for booting then switch to
635 * per CPU timer if possible.
636 * returns 1 if per cpu apbt is setup
637 * returns 0 if no per cpu apbt is chosen
638 * panic if set up failed, this is the only platform timer on Moorestown.
639 */
640void __init apbt_time_init(void)
641{
642#ifdef CONFIG_SMP
643 int i;
644 struct sfi_timer_table_entry *p_mtmr;
645 unsigned int percpu_timer;
646 struct apbt_dev *adev;
647#endif
648
649 if (apb_timer_block_enabled)
650 return;
651 apbt_set_mapping();
652 if (apbt_virt_address) {
653 pr_debug("Found APBT version 0x%lx\n",\
654 apbt_readl_reg(APBTMRS_COMP_VERSION));
655 } else
656 goto out_noapbt;
657 /*
658 * Read the frequency and check for a sane value, for ESL model
659 * we extend the possible clock range to allow time scaling.
660 */
661
662 if (apbt_freq < APBT_MIN_FREQ || apbt_freq > APBT_MAX_FREQ) {
663 pr_debug("APBT has invalid freq 0x%llx\n", apbt_freq);
664 goto out_noapbt;
665 }
666 if (apbt_clocksource_register()) {
667 pr_debug("APBT has failed to register clocksource\n");
668 goto out_noapbt;
669 }
670 if (!apbt_clockevent_register())
671 apb_timer_block_enabled = 1;
672 else {
673 pr_debug("APBT has failed to register clockevent\n");
674 goto out_noapbt;
675 }
676#ifdef CONFIG_SMP
677 /* kernel cmdline disable apb timer, so we will use lapic timers */
678 if (disable_apbt_percpu) {
679 printk(KERN_INFO "apbt: disabled per cpu timer\n");
680 return;
681 }
682 pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus());
683 if (num_possible_cpus() <= sfi_mtimer_num) {
684 percpu_timer = 1;
685 apbt_num_timers_used = num_possible_cpus();
686 } else {
687 percpu_timer = 0;
688 apbt_num_timers_used = 1;
689 adev = &per_cpu(cpu_apbt_dev, 0);
690 adev->flags &= ~APBT_DEV_USED;
691 }
692 pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used);
693
694 /* here we set up per CPU timer data structure */
695 apbt_devs = kzalloc(sizeof(struct apbt_dev) * apbt_num_timers_used,
696 GFP_KERNEL);
697 if (!apbt_devs) {
698 printk(KERN_ERR "Failed to allocate APB timer devices\n");
699 return;
700 }
701 for (i = 0; i < apbt_num_timers_used; i++) {
702 adev = &per_cpu(cpu_apbt_dev, i);
703 adev->num = i;
704 adev->cpu = i;
705 p_mtmr = sfi_get_mtmr(i);
706 if (p_mtmr) {
707 adev->tick = p_mtmr->freq_hz;
708 adev->irq = p_mtmr->irq;
709 } else
710 printk(KERN_ERR "Failed to get timer for cpu %d\n", i);
711 adev->count = 0;
712 sprintf(adev->name, "apbt%d", i);
713 }
714#endif
715
716 return;
717
718out_noapbt:
719 apbt_clear_mapping();
720 apb_timer_block_enabled = 0;
721 panic("failed to enable APB timer\n");
722}
723
724static inline void apbt_disable(int n)
725{
726 if (is_apbt_capable()) {
727 unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL);
728 ctrl &= ~APBTMR_CONTROL_ENABLE;
729 apbt_writel(n, ctrl, APBTMR_N_CONTROL);
730 }
731}
732
733/* called before apb_timer_enable, use early map */
734unsigned long apbt_quick_calibrate()
735{
736 int i, scale;
737 u64 old, new;
738 cycle_t t1, t2;
739 unsigned long khz = 0;
740 u32 loop, shift;
741
742 apbt_set_mapping();
743 apbt_start_counter(phy_cs_timer_id);
744
745 /* check if the timer can count down, otherwise return */
746 old = apbt_read_clocksource(&clocksource_apbt);
747 i = 10000;
748 while (--i) {
749 if (old != apbt_read_clocksource(&clocksource_apbt))
750 break;
751 }
752 if (!i)
753 goto failed;
754
755 /* count 16 ms */
756 loop = (apbt_freq * 1000) << 4;
757
758 /* restart the timer to ensure it won't get to 0 in the calibration */
759 apbt_start_counter(phy_cs_timer_id);
760
761 old = apbt_read_clocksource(&clocksource_apbt);
762 old += loop;
763
764 t1 = __native_read_tsc();
765
766 do {
767 new = apbt_read_clocksource(&clocksource_apbt);
768 } while (new < old);
769
770 t2 = __native_read_tsc();
771
772 shift = 5;
773 if (unlikely(loop >> shift == 0)) {
774 printk(KERN_INFO
775 "APBT TSC calibration failed, not enough resolution\n");
776 return 0;
777 }
778 scale = (int)div_u64((t2 - t1), loop >> shift);
779 khz = (scale * apbt_freq * 1000) >> shift;
780 printk(KERN_INFO "TSC freq calculated by APB timer is %lu khz\n", khz);
781 return khz;
782failed:
783 return 0;
784}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 6e29b2a77aa..00187f1fcfb 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1390,7 +1390,7 @@ void __init enable_IR_x2apic(void)
1390 } 1390 }
1391 1391
1392 local_irq_save(flags); 1392 local_irq_save(flags);
1393 mask_8259A(); 1393 legacy_pic->mask_all();
1394 mask_IO_APIC_setup(ioapic_entries); 1394 mask_IO_APIC_setup(ioapic_entries);
1395 1395
1396 if (dmar_table_init_ret) 1396 if (dmar_table_init_ret)
@@ -1422,7 +1422,7 @@ void __init enable_IR_x2apic(void)
1422nox2apic: 1422nox2apic:
1423 if (!ret) /* IR enabling failed */ 1423 if (!ret) /* IR enabling failed */
1424 restore_IO_APIC_setup(ioapic_entries); 1424 restore_IO_APIC_setup(ioapic_entries);
1425 unmask_8259A(); 1425 legacy_pic->restore_mask();
1426 local_irq_restore(flags); 1426 local_irq_restore(flags);
1427 1427
1428out: 1428out:
@@ -2018,7 +2018,7 @@ static int lapic_resume(struct sys_device *dev)
2018 } 2018 }
2019 2019
2020 mask_IO_APIC_setup(ioapic_entries); 2020 mask_IO_APIC_setup(ioapic_entries);
2021 mask_8259A(); 2021 legacy_pic->mask_all();
2022 } 2022 }
2023 2023
2024 if (x2apic_mode) 2024 if (x2apic_mode)
@@ -2062,7 +2062,7 @@ static int lapic_resume(struct sys_device *dev)
2062 2062
2063 if (intr_remapping_enabled) { 2063 if (intr_remapping_enabled) {
2064 reenable_intr_remapping(x2apic_mode); 2064 reenable_intr_remapping(x2apic_mode);
2065 unmask_8259A(); 2065 legacy_pic->restore_mask();
2066 restore_IO_APIC_setup(ioapic_entries); 2066 restore_IO_APIC_setup(ioapic_entries);
2067 free_ioapic_entries(ioapic_entries); 2067 free_ioapic_entries(ioapic_entries);
2068 } 2068 }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 14862f11cc4..e4e0ddcb154 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -143,12 +143,6 @@ static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
143static struct irq_cfg irq_cfgx[NR_IRQS]; 143static struct irq_cfg irq_cfgx[NR_IRQS];
144#endif 144#endif
145 145
146void __init io_apic_disable_legacy(void)
147{
148 nr_legacy_irqs = 0;
149 nr_irqs_gsi = 0;
150}
151
152int __init arch_early_irq_init(void) 146int __init arch_early_irq_init(void)
153{ 147{
154 struct irq_cfg *cfg; 148 struct irq_cfg *cfg;
@@ -157,6 +151,11 @@ int __init arch_early_irq_init(void)
157 int node; 151 int node;
158 int i; 152 int i;
159 153
154 if (!legacy_pic->nr_legacy_irqs) {
155 nr_irqs_gsi = 0;
156 io_apic_irqs = ~0UL;
157 }
158
160 cfg = irq_cfgx; 159 cfg = irq_cfgx;
161 count = ARRAY_SIZE(irq_cfgx); 160 count = ARRAY_SIZE(irq_cfgx);
162 node= cpu_to_node(boot_cpu_id); 161 node= cpu_to_node(boot_cpu_id);
@@ -170,7 +169,7 @@ int __init arch_early_irq_init(void)
170 * For legacy IRQ's, start with assigning irq0 to irq15 to 169 * For legacy IRQ's, start with assigning irq0 to irq15 to
171 * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0. 170 * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
172 */ 171 */
173 if (i < nr_legacy_irqs) { 172 if (i < legacy_pic->nr_legacy_irqs) {
174 cfg[i].vector = IRQ0_VECTOR + i; 173 cfg[i].vector = IRQ0_VECTOR + i;
175 cpumask_set_cpu(0, cfg[i].domain); 174 cpumask_set_cpu(0, cfg[i].domain);
176 } 175 }
@@ -852,7 +851,7 @@ static int __init find_isa_irq_apic(int irq, int type)
852 */ 851 */
853static int EISA_ELCR(unsigned int irq) 852static int EISA_ELCR(unsigned int irq)
854{ 853{
855 if (irq < nr_legacy_irqs) { 854 if (irq < legacy_pic->nr_legacy_irqs) {
856 unsigned int port = 0x4d0 + (irq >> 3); 855 unsigned int port = 0x4d0 + (irq >> 3);
857 return (inb(port) >> (irq & 7)) & 1; 856 return (inb(port) >> (irq & 7)) & 1;
858 } 857 }
@@ -1439,7 +1438,7 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
1439 * controllers like 8259. Now that IO-APIC can handle this irq, update 1438 * controllers like 8259. Now that IO-APIC can handle this irq, update
1440 * the cfg->domain. 1439 * the cfg->domain.
1441 */ 1440 */
1442 if (irq < nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain)) 1441 if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain))
1443 apic->vector_allocation_domain(0, cfg->domain); 1442 apic->vector_allocation_domain(0, cfg->domain);
1444 1443
1445 if (assign_irq_vector(irq, cfg, apic->target_cpus())) 1444 if (assign_irq_vector(irq, cfg, apic->target_cpus()))
@@ -1463,8 +1462,8 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
1463 } 1462 }
1464 1463
1465 ioapic_register_intr(irq, desc, trigger); 1464 ioapic_register_intr(irq, desc, trigger);
1466 if (irq < nr_legacy_irqs) 1465 if (irq < legacy_pic->nr_legacy_irqs)
1467 disable_8259A_irq(irq); 1466 legacy_pic->chip->mask(irq);
1468 1467
1469 ioapic_write_entry(apic_id, pin, entry); 1468 ioapic_write_entry(apic_id, pin, entry);
1470} 1469}
@@ -1873,7 +1872,7 @@ __apicdebuginit(void) print_PIC(void)
1873 unsigned int v; 1872 unsigned int v;
1874 unsigned long flags; 1873 unsigned long flags;
1875 1874
1876 if (!nr_legacy_irqs) 1875 if (!legacy_pic->nr_legacy_irqs)
1877 return; 1876 return;
1878 1877
1879 printk(KERN_DEBUG "\nprinting PIC contents\n"); 1878 printk(KERN_DEBUG "\nprinting PIC contents\n");
@@ -1957,7 +1956,7 @@ void __init enable_IO_APIC(void)
1957 nr_ioapic_registers[apic] = reg_01.bits.entries+1; 1956 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
1958 } 1957 }
1959 1958
1960 if (!nr_legacy_irqs) 1959 if (!legacy_pic->nr_legacy_irqs)
1961 return; 1960 return;
1962 1961
1963 for(apic = 0; apic < nr_ioapics; apic++) { 1962 for(apic = 0; apic < nr_ioapics; apic++) {
@@ -2014,7 +2013,7 @@ void disable_IO_APIC(void)
2014 */ 2013 */
2015 clear_IO_APIC(); 2014 clear_IO_APIC();
2016 2015
2017 if (!nr_legacy_irqs) 2016 if (!legacy_pic->nr_legacy_irqs)
2018 return; 2017 return;
2019 2018
2020 /* 2019 /*
@@ -2247,9 +2246,9 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
2247 struct irq_cfg *cfg; 2246 struct irq_cfg *cfg;
2248 2247
2249 raw_spin_lock_irqsave(&ioapic_lock, flags); 2248 raw_spin_lock_irqsave(&ioapic_lock, flags);
2250 if (irq < nr_legacy_irqs) { 2249 if (irq < legacy_pic->nr_legacy_irqs) {
2251 disable_8259A_irq(irq); 2250 legacy_pic->chip->mask(irq);
2252 if (i8259A_irq_pending(irq)) 2251 if (legacy_pic->irq_pending(irq))
2253 was_pending = 1; 2252 was_pending = 1;
2254 } 2253 }
2255 cfg = irq_cfg(irq); 2254 cfg = irq_cfg(irq);
@@ -2782,8 +2781,8 @@ static inline void init_IO_APIC_traps(void)
2782 * so default to an old-fashioned 8259 2781 * so default to an old-fashioned 8259
2783 * interrupt if we can.. 2782 * interrupt if we can..
2784 */ 2783 */
2785 if (irq < nr_legacy_irqs) 2784 if (irq < legacy_pic->nr_legacy_irqs)
2786 make_8259A_irq(irq); 2785 legacy_pic->make_irq(irq);
2787 else 2786 else
2788 /* Strange. Oh, well.. */ 2787 /* Strange. Oh, well.. */
2789 desc->chip = &no_irq_chip; 2788 desc->chip = &no_irq_chip;
@@ -2940,7 +2939,7 @@ static inline void __init check_timer(void)
2940 /* 2939 /*
2941 * get/set the timer IRQ vector: 2940 * get/set the timer IRQ vector:
2942 */ 2941 */
2943 disable_8259A_irq(0); 2942 legacy_pic->chip->mask(0);
2944 assign_irq_vector(0, cfg, apic->target_cpus()); 2943 assign_irq_vector(0, cfg, apic->target_cpus());
2945 2944
2946 /* 2945 /*
@@ -2953,7 +2952,7 @@ static inline void __init check_timer(void)
2953 * automatically. 2952 * automatically.
2954 */ 2953 */
2955 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2954 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2956 init_8259A(1); 2955 legacy_pic->init(1);
2957#ifdef CONFIG_X86_32 2956#ifdef CONFIG_X86_32
2958 { 2957 {
2959 unsigned int ver; 2958 unsigned int ver;
@@ -3012,7 +3011,7 @@ static inline void __init check_timer(void)
3012 if (timer_irq_works()) { 3011 if (timer_irq_works()) {
3013 if (nmi_watchdog == NMI_IO_APIC) { 3012 if (nmi_watchdog == NMI_IO_APIC) {
3014 setup_nmi(); 3013 setup_nmi();
3015 enable_8259A_irq(0); 3014 legacy_pic->chip->unmask(0);
3016 } 3015 }
3017 if (disable_timer_pin_1 > 0) 3016 if (disable_timer_pin_1 > 0)
3018 clear_IO_APIC_pin(0, pin1); 3017 clear_IO_APIC_pin(0, pin1);
@@ -3035,14 +3034,14 @@ static inline void __init check_timer(void)
3035 */ 3034 */
3036 replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); 3035 replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
3037 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); 3036 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
3038 enable_8259A_irq(0); 3037 legacy_pic->chip->unmask(0);
3039 if (timer_irq_works()) { 3038 if (timer_irq_works()) {
3040 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 3039 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
3041 timer_through_8259 = 1; 3040 timer_through_8259 = 1;
3042 if (nmi_watchdog == NMI_IO_APIC) { 3041 if (nmi_watchdog == NMI_IO_APIC) {
3043 disable_8259A_irq(0); 3042 legacy_pic->chip->mask(0);
3044 setup_nmi(); 3043 setup_nmi();
3045 enable_8259A_irq(0); 3044 legacy_pic->chip->unmask(0);
3046 } 3045 }
3047 goto out; 3046 goto out;
3048 } 3047 }
@@ -3050,7 +3049,7 @@ static inline void __init check_timer(void)
3050 * Cleanup, just in case ... 3049 * Cleanup, just in case ...
3051 */ 3050 */
3052 local_irq_disable(); 3051 local_irq_disable();
3053 disable_8259A_irq(0); 3052 legacy_pic->chip->mask(0);
3054 clear_IO_APIC_pin(apic2, pin2); 3053 clear_IO_APIC_pin(apic2, pin2);
3055 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); 3054 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
3056 } 3055 }
@@ -3069,22 +3068,22 @@ static inline void __init check_timer(void)
3069 3068
3070 lapic_register_intr(0, desc); 3069 lapic_register_intr(0, desc);
3071 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ 3070 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
3072 enable_8259A_irq(0); 3071 legacy_pic->chip->unmask(0);
3073 3072
3074 if (timer_irq_works()) { 3073 if (timer_irq_works()) {
3075 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); 3074 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
3076 goto out; 3075 goto out;
3077 } 3076 }
3078 local_irq_disable(); 3077 local_irq_disable();
3079 disable_8259A_irq(0); 3078 legacy_pic->chip->mask(0);
3080 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); 3079 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
3081 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); 3080 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
3082 3081
3083 apic_printk(APIC_QUIET, KERN_INFO 3082 apic_printk(APIC_QUIET, KERN_INFO
3084 "...trying to set up timer as ExtINT IRQ...\n"); 3083 "...trying to set up timer as ExtINT IRQ...\n");
3085 3084
3086 init_8259A(0); 3085 legacy_pic->init(0);
3087 make_8259A_irq(0); 3086 legacy_pic->make_irq(0);
3088 apic_write(APIC_LVT0, APIC_DM_EXTINT); 3087 apic_write(APIC_LVT0, APIC_DM_EXTINT);
3089 3088
3090 unlock_ExtINT_logic(); 3089 unlock_ExtINT_logic();
@@ -3126,7 +3125,7 @@ void __init setup_IO_APIC(void)
3126 /* 3125 /*
3127 * calling enable_IO_APIC() is moved to setup_local_APIC for BP 3126 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
3128 */ 3127 */
3129 io_apic_irqs = nr_legacy_irqs ? ~PIC_IRQS : ~0UL; 3128 io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL;
3130 3129
3131 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); 3130 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
3132 /* 3131 /*
@@ -3137,7 +3136,7 @@ void __init setup_IO_APIC(void)
3137 sync_Arb_IDs(); 3136 sync_Arb_IDs();
3138 setup_IO_APIC_irqs(); 3137 setup_IO_APIC_irqs();
3139 init_IO_APIC_traps(); 3138 init_IO_APIC_traps();
3140 if (nr_legacy_irqs) 3139 if (legacy_pic->nr_legacy_irqs)
3141 check_timer(); 3140 check_timer();
3142} 3141}
3143 3142
@@ -3928,7 +3927,7 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq,
3928 /* 3927 /*
3929 * IRQs < 16 are already in the irq_2_pin[] map 3928 * IRQs < 16 are already in the irq_2_pin[] map
3930 */ 3929 */
3931 if (irq >= nr_legacy_irqs) { 3930 if (irq >= legacy_pic->nr_legacy_irqs) {
3932 cfg = desc->chip_data; 3931 cfg = desc->chip_data;
3933 if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) { 3932 if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
3934 printk(KERN_INFO "can not add pin %d for irq %d\n", 3933 printk(KERN_INFO "can not add pin %d for irq %d\n",
@@ -4302,3 +4301,24 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
4302 4301
4303 nr_ioapics++; 4302 nr_ioapics++;
4304} 4303}
4304
4305/* Enable IOAPIC early just for system timer */
4306void __init pre_init_apic_IRQ0(void)
4307{
4308 struct irq_cfg *cfg;
4309 struct irq_desc *desc;
4310
4311 printk(KERN_INFO "Early APIC setup for system timer0\n");
4312#ifndef CONFIG_SMP
4313 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
4314#endif
4315 desc = irq_to_desc_alloc_node(0, 0);
4316
4317 setup_local_APIC();
4318
4319 cfg = irq_cfg(0);
4320 add_pin_to_irq_node(cfg, 0, 0, 0);
4321 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
4322
4323 setup_IO_APIC_irq(0, 0, 0, desc, 0, 0);
4324}
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index bd7c96b5e8d..8aa65adbd25 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -177,7 +177,7 @@ int __init check_nmi_watchdog(void)
177error: 177error:
178 if (nmi_watchdog == NMI_IO_APIC) { 178 if (nmi_watchdog == NMI_IO_APIC) {
179 if (!timer_through_8259) 179 if (!timer_through_8259)
180 disable_8259A_irq(0); 180 legacy_pic->chip->mask(0);
181 on_each_cpu(__acpi_nmi_disable, NULL, 1); 181 on_each_cpu(__acpi_nmi_disable, NULL, 1);
182 } 182 }
183 183
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 47dd856708e..3e28401f161 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -277,6 +277,7 @@ static __init void early_check_numaq(void)
277 x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus; 277 x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus;
278 x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info; 278 x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info;
279 x86_init.timers.tsc_pre_init = numaq_tsc_init; 279 x86_init.timers.tsc_pre_init = numaq_tsc_init;
280 x86_init.pci.init = pci_numaq_init;
280 } 281 }
281} 282}
282 283
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index f138c6c389b..870e6cc6ad2 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -10,6 +10,20 @@ if CPU_FREQ
10 10
11comment "CPUFreq processor drivers" 11comment "CPUFreq processor drivers"
12 12
13config X86_PCC_CPUFREQ
14 tristate "Processor Clocking Control interface driver"
15 depends on ACPI && ACPI_PROCESSOR
16 help
17 This driver adds support for the PCC interface.
18
19 For details, take a look at:
20 <file:Documentation/cpu-freq/pcc-cpufreq.txt>.
21
22 To compile this driver as a module, choose M here: the
23 module will be called pcc-cpufreq.
24
25 If in doubt, say N.
26
13config X86_ACPI_CPUFREQ 27config X86_ACPI_CPUFREQ
14 tristate "ACPI Processor P-States driver" 28 tristate "ACPI Processor P-States driver"
15 select CPU_FREQ_TABLE 29 select CPU_FREQ_TABLE
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile
index 509296df294..1840c0a5170 100644
--- a/arch/x86/kernel/cpu/cpufreq/Makefile
+++ b/arch/x86/kernel/cpu/cpufreq/Makefile
@@ -4,6 +4,7 @@
4 4
5obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o 5obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
6obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o 6obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o
7obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o
7obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o 8obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o
8obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o 9obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o
9obj-$(CONFIG_X86_LONGHAUL) += longhaul.o 10obj-$(CONFIG_X86_LONGHAUL) += longhaul.o
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
new file mode 100644
index 00000000000..ff36d2979a9
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -0,0 +1,620 @@
1/*
2 * pcc-cpufreq.c - Processor Clocking Control firmware cpufreq interface
3 *
4 * Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com>
5 * Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
6 * Nagananda Chumbalkar <nagananda.chumbalkar@hp.com>
7 *
8 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; version 2 of the License.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON
17 * INFRINGEMENT. See the GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 675 Mass Ave, Cambridge, MA 02139, USA.
22 *
23 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
24 */
25
26#include <linux/kernel.h>
27#include <linux/module.h>
28#include <linux/init.h>
29#include <linux/smp.h>
30#include <linux/sched.h>
31#include <linux/cpufreq.h>
32#include <linux/compiler.h>
33
34#include <linux/acpi.h>
35#include <linux/io.h>
36#include <linux/spinlock.h>
37#include <linux/uaccess.h>
38
39#include <acpi/processor.h>
40
41#define PCC_VERSION "1.00.00"
42#define POLL_LOOPS 300
43
44#define CMD_COMPLETE 0x1
45#define CMD_GET_FREQ 0x0
46#define CMD_SET_FREQ 0x1
47
48#define BUF_SZ 4
49
50#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
51 "pcc-cpufreq", msg)
52
53struct pcc_register_resource {
54 u8 descriptor;
55 u16 length;
56 u8 space_id;
57 u8 bit_width;
58 u8 bit_offset;
59 u8 access_size;
60 u64 address;
61} __attribute__ ((packed));
62
63struct pcc_memory_resource {
64 u8 descriptor;
65 u16 length;
66 u8 space_id;
67 u8 resource_usage;
68 u8 type_specific;
69 u64 granularity;
70 u64 minimum;
71 u64 maximum;
72 u64 translation_offset;
73 u64 address_length;
74} __attribute__ ((packed));
75
76static struct cpufreq_driver pcc_cpufreq_driver;
77
78struct pcc_header {
79 u32 signature;
80 u16 length;
81 u8 major;
82 u8 minor;
83 u32 features;
84 u16 command;
85 u16 status;
86 u32 latency;
87 u32 minimum_time;
88 u32 maximum_time;
89 u32 nominal;
90 u32 throttled_frequency;
91 u32 minimum_frequency;
92};
93
94static void __iomem *pcch_virt_addr;
95static struct pcc_header __iomem *pcch_hdr;
96
97static DEFINE_SPINLOCK(pcc_lock);
98
99static struct acpi_generic_address doorbell;
100
101static u64 doorbell_preserve;
102static u64 doorbell_write;
103
104static u8 OSC_UUID[16] = {0x63, 0x9B, 0x2C, 0x9F, 0x70, 0x91, 0x49, 0x1f,
105 0xBB, 0x4F, 0xA5, 0x98, 0x2F, 0xA1, 0xB5, 0x46};
106
107struct pcc_cpu {
108 u32 input_offset;
109 u32 output_offset;
110};
111
112static struct pcc_cpu *pcc_cpu_info;
113
114static int pcc_cpufreq_verify(struct cpufreq_policy *policy)
115{
116 cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
117 policy->cpuinfo.max_freq);
118 return 0;
119}
120
121static inline void pcc_cmd(void)
122{
123 u64 doorbell_value;
124 int i;
125
126 acpi_read(&doorbell_value, &doorbell);
127 acpi_write((doorbell_value & doorbell_preserve) | doorbell_write,
128 &doorbell);
129
130 for (i = 0; i < POLL_LOOPS; i++) {
131 if (ioread16(&pcch_hdr->status) & CMD_COMPLETE)
132 break;
133 }
134}
135
136static inline void pcc_clear_mapping(void)
137{
138 if (pcch_virt_addr)
139 iounmap(pcch_virt_addr);
140 pcch_virt_addr = NULL;
141}
142
143static unsigned int pcc_get_freq(unsigned int cpu)
144{
145 struct pcc_cpu *pcc_cpu_data;
146 unsigned int curr_freq;
147 unsigned int freq_limit;
148 u16 status;
149 u32 input_buffer;
150 u32 output_buffer;
151
152 spin_lock(&pcc_lock);
153
154 dprintk("get: get_freq for CPU %d\n", cpu);
155 pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
156
157 input_buffer = 0x1;
158 iowrite32(input_buffer,
159 (pcch_virt_addr + pcc_cpu_data->input_offset));
160 iowrite16(CMD_GET_FREQ, &pcch_hdr->command);
161
162 pcc_cmd();
163
164 output_buffer =
165 ioread32(pcch_virt_addr + pcc_cpu_data->output_offset);
166
167 /* Clear the input buffer - we are done with the current command */
168 memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
169
170 status = ioread16(&pcch_hdr->status);
171 if (status != CMD_COMPLETE) {
172 dprintk("get: FAILED: for CPU %d, status is %d\n",
173 cpu, status);
174 goto cmd_incomplete;
175 }
176 iowrite16(0, &pcch_hdr->status);
177 curr_freq = (((ioread32(&pcch_hdr->nominal) * (output_buffer & 0xff))
178 / 100) * 1000);
179
180 dprintk("get: SUCCESS: (virtual) output_offset for cpu %d is "
181 "0x%x, contains a value of: 0x%x. Speed is: %d MHz\n",
182 cpu, (pcch_virt_addr + pcc_cpu_data->output_offset),
183 output_buffer, curr_freq);
184
185 freq_limit = (output_buffer >> 8) & 0xff;
186 if (freq_limit != 0xff) {
187 dprintk("get: frequency for cpu %d is being temporarily"
188 " capped at %d\n", cpu, curr_freq);
189 }
190
191 spin_unlock(&pcc_lock);
192 return curr_freq;
193
194cmd_incomplete:
195 iowrite16(0, &pcch_hdr->status);
196 spin_unlock(&pcc_lock);
197 return -EINVAL;
198}
199
200static int pcc_cpufreq_target(struct cpufreq_policy *policy,
201 unsigned int target_freq,
202 unsigned int relation)
203{
204 struct pcc_cpu *pcc_cpu_data;
205 struct cpufreq_freqs freqs;
206 u16 status;
207 u32 input_buffer;
208 int cpu;
209
210 spin_lock(&pcc_lock);
211 cpu = policy->cpu;
212 pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
213
214 dprintk("target: CPU %d should go to target freq: %d "
215 "(virtual) input_offset is 0x%x\n",
216 cpu, target_freq,
217 (pcch_virt_addr + pcc_cpu_data->input_offset));
218
219 freqs.new = target_freq;
220 freqs.cpu = cpu;
221 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
222
223 input_buffer = 0x1 | (((target_freq * 100)
224 / (ioread32(&pcch_hdr->nominal) * 1000)) << 8);
225 iowrite32(input_buffer,
226 (pcch_virt_addr + pcc_cpu_data->input_offset));
227 iowrite16(CMD_SET_FREQ, &pcch_hdr->command);
228
229 pcc_cmd();
230
231 /* Clear the input buffer - we are done with the current command */
232 memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
233
234 status = ioread16(&pcch_hdr->status);
235 if (status != CMD_COMPLETE) {
236 dprintk("target: FAILED for cpu %d, with status: 0x%x\n",
237 cpu, status);
238 goto cmd_incomplete;
239 }
240 iowrite16(0, &pcch_hdr->status);
241
242 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
243 dprintk("target: was SUCCESSFUL for cpu %d\n", cpu);
244 spin_unlock(&pcc_lock);
245
246 return 0;
247
248cmd_incomplete:
249 iowrite16(0, &pcch_hdr->status);
250 spin_unlock(&pcc_lock);
251 return -EINVAL;
252}
253
254static int pcc_get_offset(int cpu)
255{
256 acpi_status status;
257 struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
258 union acpi_object *pccp, *offset;
259 struct pcc_cpu *pcc_cpu_data;
260 struct acpi_processor *pr;
261 int ret = 0;
262
263 pr = per_cpu(processors, cpu);
264 pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
265
266 status = acpi_evaluate_object(pr->handle, "PCCP", NULL, &buffer);
267 if (ACPI_FAILURE(status))
268 return -ENODEV;
269
270 pccp = buffer.pointer;
271 if (!pccp || pccp->type != ACPI_TYPE_PACKAGE) {
272 ret = -ENODEV;
273 goto out_free;
274 };
275
276 offset = &(pccp->package.elements[0]);
277 if (!offset || offset->type != ACPI_TYPE_INTEGER) {
278 ret = -ENODEV;
279 goto out_free;
280 }
281
282 pcc_cpu_data->input_offset = offset->integer.value;
283
284 offset = &(pccp->package.elements[1]);
285 if (!offset || offset->type != ACPI_TYPE_INTEGER) {
286 ret = -ENODEV;
287 goto out_free;
288 }
289
290 pcc_cpu_data->output_offset = offset->integer.value;
291
292 memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
293 memset_io((pcch_virt_addr + pcc_cpu_data->output_offset), 0, BUF_SZ);
294
295 dprintk("pcc_get_offset: for CPU %d: pcc_cpu_data "
296 "input_offset: 0x%x, pcc_cpu_data output_offset: 0x%x\n",
297 cpu, pcc_cpu_data->input_offset, pcc_cpu_data->output_offset);
298out_free:
299 kfree(buffer.pointer);
300 return ret;
301}
302
303static int __init pcc_cpufreq_do_osc(acpi_handle *handle)
304{
305 acpi_status status;
306 struct acpi_object_list input;
307 struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
308 union acpi_object in_params[4];
309 union acpi_object *out_obj;
310 u32 capabilities[2];
311 u32 errors;
312 u32 supported;
313 int ret = 0;
314
315 input.count = 4;
316 input.pointer = in_params;
317 input.count = 4;
318 input.pointer = in_params;
319 in_params[0].type = ACPI_TYPE_BUFFER;
320 in_params[0].buffer.length = 16;
321 in_params[0].buffer.pointer = OSC_UUID;
322 in_params[1].type = ACPI_TYPE_INTEGER;
323 in_params[1].integer.value = 1;
324 in_params[2].type = ACPI_TYPE_INTEGER;
325 in_params[2].integer.value = 2;
326 in_params[3].type = ACPI_TYPE_BUFFER;
327 in_params[3].buffer.length = 8;
328 in_params[3].buffer.pointer = (u8 *)&capabilities;
329
330 capabilities[0] = OSC_QUERY_ENABLE;
331 capabilities[1] = 0x1;
332
333 status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
334 if (ACPI_FAILURE(status))
335 return -ENODEV;
336
337 if (!output.length)
338 return -ENODEV;
339
340 out_obj = output.pointer;
341 if (out_obj->type != ACPI_TYPE_BUFFER) {
342 ret = -ENODEV;
343 goto out_free;
344 }
345
346 errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
347 if (errors) {
348 ret = -ENODEV;
349 goto out_free;
350 }
351
352 supported = *((u32 *)(out_obj->buffer.pointer + 4));
353 if (!(supported & 0x1)) {
354 ret = -ENODEV;
355 goto out_free;
356 }
357
358 kfree(output.pointer);
359 capabilities[0] = 0x0;
360 capabilities[1] = 0x1;
361
362 status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
363 if (ACPI_FAILURE(status))
364 return -ENODEV;
365
366 if (!output.length)
367 return -ENODEV;
368
369 out_obj = output.pointer;
370 if (out_obj->type != ACPI_TYPE_BUFFER) {
371 ret = -ENODEV;
372 goto out_free;
373 }
374
375 errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
376 if (errors) {
377 ret = -ENODEV;
378 goto out_free;
379 }
380
381 supported = *((u32 *)(out_obj->buffer.pointer + 4));
382 if (!(supported & 0x1)) {
383 ret = -ENODEV;
384 goto out_free;
385 }
386
387out_free:
388 kfree(output.pointer);
389 return ret;
390}
391
392static int __init pcc_cpufreq_probe(void)
393{
394 acpi_status status;
395 struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
396 struct pcc_memory_resource *mem_resource;
397 struct pcc_register_resource *reg_resource;
398 union acpi_object *out_obj, *member;
399 acpi_handle handle, osc_handle;
400 int ret = 0;
401
402 status = acpi_get_handle(NULL, "\\_SB", &handle);
403 if (ACPI_FAILURE(status))
404 return -ENODEV;
405
406 status = acpi_get_handle(handle, "_OSC", &osc_handle);
407 if (ACPI_SUCCESS(status)) {
408 ret = pcc_cpufreq_do_osc(&osc_handle);
409 if (ret)
410 dprintk("probe: _OSC evaluation did not succeed\n");
411 /* Firmware's use of _OSC is optional */
412 ret = 0;
413 }
414
415 status = acpi_evaluate_object(handle, "PCCH", NULL, &output);
416 if (ACPI_FAILURE(status))
417 return -ENODEV;
418
419 out_obj = output.pointer;
420 if (out_obj->type != ACPI_TYPE_PACKAGE) {
421 ret = -ENODEV;
422 goto out_free;
423 }
424
425 member = &out_obj->package.elements[0];
426 if (member->type != ACPI_TYPE_BUFFER) {
427 ret = -ENODEV;
428 goto out_free;
429 }
430
431 mem_resource = (struct pcc_memory_resource *)member->buffer.pointer;
432
433 dprintk("probe: mem_resource descriptor: 0x%x,"
434 " length: %d, space_id: %d, resource_usage: %d,"
435 " type_specific: %d, granularity: 0x%llx,"
436 " minimum: 0x%llx, maximum: 0x%llx,"
437 " translation_offset: 0x%llx, address_length: 0x%llx\n",
438 mem_resource->descriptor, mem_resource->length,
439 mem_resource->space_id, mem_resource->resource_usage,
440 mem_resource->type_specific, mem_resource->granularity,
441 mem_resource->minimum, mem_resource->maximum,
442 mem_resource->translation_offset,
443 mem_resource->address_length);
444
445 if (mem_resource->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) {
446 ret = -ENODEV;
447 goto out_free;
448 }
449
450 pcch_virt_addr = ioremap_nocache(mem_resource->minimum,
451 mem_resource->address_length);
452 if (pcch_virt_addr == NULL) {
453 dprintk("probe: could not map shared mem region\n");
454 goto out_free;
455 }
456 pcch_hdr = pcch_virt_addr;
457
458 dprintk("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr);
459 dprintk("probe: PCCH header is at physical address: 0x%llx,"
460 " signature: 0x%x, length: %d bytes, major: %d, minor: %d,"
461 " supported features: 0x%x, command field: 0x%x,"
462 " status field: 0x%x, nominal latency: %d us\n",
463 mem_resource->minimum, ioread32(&pcch_hdr->signature),
464 ioread16(&pcch_hdr->length), ioread8(&pcch_hdr->major),
465 ioread8(&pcch_hdr->minor), ioread32(&pcch_hdr->features),
466 ioread16(&pcch_hdr->command), ioread16(&pcch_hdr->status),
467 ioread32(&pcch_hdr->latency));
468
469 dprintk("probe: min time between commands: %d us,"
470 " max time between commands: %d us,"
471 " nominal CPU frequency: %d MHz,"
472 " minimum CPU frequency: %d MHz,"
473 " minimum CPU frequency without throttling: %d MHz\n",
474 ioread32(&pcch_hdr->minimum_time),
475 ioread32(&pcch_hdr->maximum_time),
476 ioread32(&pcch_hdr->nominal),
477 ioread32(&pcch_hdr->throttled_frequency),
478 ioread32(&pcch_hdr->minimum_frequency));
479
480 member = &out_obj->package.elements[1];
481 if (member->type != ACPI_TYPE_BUFFER) {
482 ret = -ENODEV;
483 goto pcch_free;
484 }
485
486 reg_resource = (struct pcc_register_resource *)member->buffer.pointer;
487
488 doorbell.space_id = reg_resource->space_id;
489 doorbell.bit_width = reg_resource->bit_width;
490 doorbell.bit_offset = reg_resource->bit_offset;
491 doorbell.access_width = 64;
492 doorbell.address = reg_resource->address;
493
494 dprintk("probe: doorbell: space_id is %d, bit_width is %d, "
495 "bit_offset is %d, access_width is %d, address is 0x%llx\n",
496 doorbell.space_id, doorbell.bit_width, doorbell.bit_offset,
497 doorbell.access_width, reg_resource->address);
498
499 member = &out_obj->package.elements[2];
500 if (member->type != ACPI_TYPE_INTEGER) {
501 ret = -ENODEV;
502 goto pcch_free;
503 }
504
505 doorbell_preserve = member->integer.value;
506
507 member = &out_obj->package.elements[3];
508 if (member->type != ACPI_TYPE_INTEGER) {
509 ret = -ENODEV;
510 goto pcch_free;
511 }
512
513 doorbell_write = member->integer.value;
514
515 dprintk("probe: doorbell_preserve: 0x%llx,"
516 " doorbell_write: 0x%llx\n",
517 doorbell_preserve, doorbell_write);
518
519 pcc_cpu_info = alloc_percpu(struct pcc_cpu);
520 if (!pcc_cpu_info) {
521 ret = -ENOMEM;
522 goto pcch_free;
523 }
524
525 printk(KERN_DEBUG "pcc-cpufreq: (v%s) driver loaded with frequency"
526 " limits: %d MHz, %d MHz\n", PCC_VERSION,
527 ioread32(&pcch_hdr->minimum_frequency),
528 ioread32(&pcch_hdr->nominal));
529 kfree(output.pointer);
530 return ret;
531pcch_free:
532 pcc_clear_mapping();
533out_free:
534 kfree(output.pointer);
535 return ret;
536}
537
538static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
539{
540 unsigned int cpu = policy->cpu;
541 unsigned int result = 0;
542
543 if (!pcch_virt_addr) {
544 result = -1;
545 goto pcch_null;
546 }
547
548 result = pcc_get_offset(cpu);
549 if (result) {
550 dprintk("init: PCCP evaluation failed\n");
551 goto free;
552 }
553
554 policy->max = policy->cpuinfo.max_freq =
555 ioread32(&pcch_hdr->nominal) * 1000;
556 policy->min = policy->cpuinfo.min_freq =
557 ioread32(&pcch_hdr->minimum_frequency) * 1000;
558 policy->cur = pcc_get_freq(cpu);
559
560 dprintk("init: policy->max is %d, policy->min is %d\n",
561 policy->max, policy->min);
562
563 return 0;
564free:
565 pcc_clear_mapping();
566 free_percpu(pcc_cpu_info);
567pcch_null:
568 return result;
569}
570
571static int pcc_cpufreq_cpu_exit(struct cpufreq_policy *policy)
572{
573 return 0;
574}
575
576static struct cpufreq_driver pcc_cpufreq_driver = {
577 .flags = CPUFREQ_CONST_LOOPS,
578 .get = pcc_get_freq,
579 .verify = pcc_cpufreq_verify,
580 .target = pcc_cpufreq_target,
581 .init = pcc_cpufreq_cpu_init,
582 .exit = pcc_cpufreq_cpu_exit,
583 .name = "pcc-cpufreq",
584 .owner = THIS_MODULE,
585};
586
587static int __init pcc_cpufreq_init(void)
588{
589 int ret;
590
591 if (acpi_disabled)
592 return 0;
593
594 ret = pcc_cpufreq_probe();
595 if (ret) {
596 dprintk("pcc_cpufreq_init: PCCH evaluation failed\n");
597 return ret;
598 }
599
600 ret = cpufreq_register_driver(&pcc_cpufreq_driver);
601
602 return ret;
603}
604
605static void __exit pcc_cpufreq_exit(void)
606{
607 cpufreq_unregister_driver(&pcc_cpufreq_driver);
608
609 pcc_clear_mapping();
610
611 free_percpu(pcc_cpu_info);
612}
613
614MODULE_AUTHOR("Matthew Garrett, Naga Chumbalkar");
615MODULE_VERSION(PCC_VERSION);
616MODULE_DESCRIPTION("Processor Clocking Control interface driver");
617MODULE_LICENSE("GPL");
618
619late_initcall(pcc_cpufreq_init);
620module_exit(pcc_cpufreq_exit);
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index fe4622e8c83..79556bd9b60 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -145,6 +145,7 @@ struct set_mtrr_data {
145 145
146/** 146/**
147 * ipi_handler - Synchronisation handler. Executed by "other" CPUs. 147 * ipi_handler - Synchronisation handler. Executed by "other" CPUs.
148 * @info: pointer to mtrr configuration data
148 * 149 *
149 * Returns nothing. 150 * Returns nothing.
150 */ 151 */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 641ccb9dddb..b1fbdeecf6c 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -676,7 +676,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
676 if (c->weight != w) 676 if (c->weight != w)
677 continue; 677 continue;
678 678
679 for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { 679 for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
680 if (!test_bit(j, used_mask)) 680 if (!test_bit(j, used_mask))
681 break; 681 break;
682 } 682 }
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index cf6590cf4a5..977e7544738 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -757,7 +757,7 @@ again:
757 757
758 inc_irq_stat(apic_perf_irqs); 758 inc_irq_stat(apic_perf_irqs);
759 ack = status; 759 ack = status;
760 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { 760 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
761 struct perf_event *event = cpuc->events[bit]; 761 struct perf_event *event = cpuc->events[bit];
762 762
763 clear_bit(bit, (unsigned long *) &status); 763 clear_bit(bit, (unsigned long *) &status);
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 8c93a84bb62..fb725ee15f5 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -34,6 +34,12 @@
34static int i8259A_auto_eoi; 34static int i8259A_auto_eoi;
35DEFINE_RAW_SPINLOCK(i8259A_lock); 35DEFINE_RAW_SPINLOCK(i8259A_lock);
36static void mask_and_ack_8259A(unsigned int); 36static void mask_and_ack_8259A(unsigned int);
37static void mask_8259A(void);
38static void unmask_8259A(void);
39static void disable_8259A_irq(unsigned int irq);
40static void enable_8259A_irq(unsigned int irq);
41static void init_8259A(int auto_eoi);
42static int i8259A_irq_pending(unsigned int irq);
37 43
38struct irq_chip i8259A_chip = { 44struct irq_chip i8259A_chip = {
39 .name = "XT-PIC", 45 .name = "XT-PIC",
@@ -63,7 +69,7 @@ unsigned int cached_irq_mask = 0xffff;
63 */ 69 */
64unsigned long io_apic_irqs; 70unsigned long io_apic_irqs;
65 71
66void disable_8259A_irq(unsigned int irq) 72static void disable_8259A_irq(unsigned int irq)
67{ 73{
68 unsigned int mask = 1 << irq; 74 unsigned int mask = 1 << irq;
69 unsigned long flags; 75 unsigned long flags;
@@ -77,7 +83,7 @@ void disable_8259A_irq(unsigned int irq)
77 raw_spin_unlock_irqrestore(&i8259A_lock, flags); 83 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
78} 84}
79 85
80void enable_8259A_irq(unsigned int irq) 86static void enable_8259A_irq(unsigned int irq)
81{ 87{
82 unsigned int mask = ~(1 << irq); 88 unsigned int mask = ~(1 << irq);
83 unsigned long flags; 89 unsigned long flags;
@@ -91,7 +97,7 @@ void enable_8259A_irq(unsigned int irq)
91 raw_spin_unlock_irqrestore(&i8259A_lock, flags); 97 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
92} 98}
93 99
94int i8259A_irq_pending(unsigned int irq) 100static int i8259A_irq_pending(unsigned int irq)
95{ 101{
96 unsigned int mask = 1<<irq; 102 unsigned int mask = 1<<irq;
97 unsigned long flags; 103 unsigned long flags;
@@ -107,7 +113,7 @@ int i8259A_irq_pending(unsigned int irq)
107 return ret; 113 return ret;
108} 114}
109 115
110void make_8259A_irq(unsigned int irq) 116static void make_8259A_irq(unsigned int irq)
111{ 117{
112 disable_irq_nosync(irq); 118 disable_irq_nosync(irq);
113 io_apic_irqs &= ~(1<<irq); 119 io_apic_irqs &= ~(1<<irq);
@@ -281,7 +287,7 @@ static int __init i8259A_init_sysfs(void)
281 287
282device_initcall(i8259A_init_sysfs); 288device_initcall(i8259A_init_sysfs);
283 289
284void mask_8259A(void) 290static void mask_8259A(void)
285{ 291{
286 unsigned long flags; 292 unsigned long flags;
287 293
@@ -293,7 +299,7 @@ void mask_8259A(void)
293 raw_spin_unlock_irqrestore(&i8259A_lock, flags); 299 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
294} 300}
295 301
296void unmask_8259A(void) 302static void unmask_8259A(void)
297{ 303{
298 unsigned long flags; 304 unsigned long flags;
299 305
@@ -305,7 +311,7 @@ void unmask_8259A(void)
305 raw_spin_unlock_irqrestore(&i8259A_lock, flags); 311 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
306} 312}
307 313
308void init_8259A(int auto_eoi) 314static void init_8259A(int auto_eoi)
309{ 315{
310 unsigned long flags; 316 unsigned long flags;
311 317
@@ -358,3 +364,47 @@ void init_8259A(int auto_eoi)
358 364
359 raw_spin_unlock_irqrestore(&i8259A_lock, flags); 365 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
360} 366}
367
368/*
369 * make i8259 a driver so that we can select pic functions at run time. the goal
370 * is to make x86 binary compatible among pc compatible and non-pc compatible
371 * platforms, such as x86 MID.
372 */
373
374static void legacy_pic_noop(void) { };
375static void legacy_pic_uint_noop(unsigned int unused) { };
376static void legacy_pic_int_noop(int unused) { };
377
378static struct irq_chip dummy_pic_chip = {
379 .name = "dummy pic",
380 .mask = legacy_pic_uint_noop,
381 .unmask = legacy_pic_uint_noop,
382 .disable = legacy_pic_uint_noop,
383 .mask_ack = legacy_pic_uint_noop,
384};
385static int legacy_pic_irq_pending_noop(unsigned int irq)
386{
387 return 0;
388}
389
390struct legacy_pic null_legacy_pic = {
391 .nr_legacy_irqs = 0,
392 .chip = &dummy_pic_chip,
393 .mask_all = legacy_pic_noop,
394 .restore_mask = legacy_pic_noop,
395 .init = legacy_pic_int_noop,
396 .irq_pending = legacy_pic_irq_pending_noop,
397 .make_irq = legacy_pic_uint_noop,
398};
399
400struct legacy_pic default_legacy_pic = {
401 .nr_legacy_irqs = NR_IRQS_LEGACY,
402 .chip = &i8259A_chip,
403 .mask_all = mask_8259A,
404 .restore_mask = unmask_8259A,
405 .init = init_8259A,
406 .irq_pending = i8259A_irq_pending,
407 .make_irq = make_8259A_irq,
408};
409
410struct legacy_pic *legacy_pic = &default_legacy_pic;
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index fce55d53263..ef257fc2921 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -99,9 +99,6 @@ int vector_used_by_percpu_irq(unsigned int vector)
99 return 0; 99 return 0;
100} 100}
101 101
102/* Number of legacy interrupts */
103int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY;
104
105void __init init_ISA_irqs(void) 102void __init init_ISA_irqs(void)
106{ 103{
107 int i; 104 int i;
@@ -109,12 +106,12 @@ void __init init_ISA_irqs(void)
109#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) 106#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
110 init_bsp_APIC(); 107 init_bsp_APIC();
111#endif 108#endif
112 init_8259A(0); 109 legacy_pic->init(0);
113 110
114 /* 111 /*
115 * 16 old-style INTA-cycle interrupts: 112 * 16 old-style INTA-cycle interrupts:
116 */ 113 */
117 for (i = 0; i < NR_IRQS_LEGACY; i++) { 114 for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) {
118 struct irq_desc *desc = irq_to_desc(i); 115 struct irq_desc *desc = irq_to_desc(i);
119 116
120 desc->status = IRQ_DISABLED; 117 desc->status = IRQ_DISABLED;
@@ -138,7 +135,7 @@ void __init init_IRQ(void)
138 * then this vector space can be freed and re-used dynamically as the 135 * then this vector space can be freed and re-used dynamically as the
139 * irq's migrate etc. 136 * irq's migrate etc.
140 */ 137 */
141 for (i = 0; i < nr_legacy_irqs; i++) 138 for (i = 0; i < legacy_pic->nr_legacy_irqs; i++)
142 per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i; 139 per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i;
143 140
144 x86_init.irqs.intr_init(); 141 x86_init.irqs.intr_init();
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 5de9f4a9c3f..b43bbaebe2c 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -49,6 +49,7 @@
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/kdebug.h> 50#include <linux/kdebug.h>
51#include <linux/kallsyms.h> 51#include <linux/kallsyms.h>
52#include <linux/ftrace.h>
52 53
53#include <asm/cacheflush.h> 54#include <asm/cacheflush.h>
54#include <asm/desc.h> 55#include <asm/desc.h>
@@ -106,16 +107,22 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
106}; 107};
107const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); 108const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
108 109
109/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ 110static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
110static void __kprobes set_jmp_op(void *from, void *to)
111{ 111{
112 struct __arch_jmp_op { 112 struct __arch_relative_insn {
113 char op; 113 u8 op;
114 s32 raddr; 114 s32 raddr;
115 } __attribute__((packed)) * jop; 115 } __attribute__((packed)) *insn;
116 jop = (struct __arch_jmp_op *)from; 116
117 jop->raddr = (s32)((long)(to) - ((long)(from) + 5)); 117 insn = (struct __arch_relative_insn *)from;
118 jop->op = RELATIVEJUMP_INSTRUCTION; 118 insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
119 insn->op = op;
120}
121
122/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
123static void __kprobes synthesize_reljump(void *from, void *to)
124{
125 __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
119} 126}
120 127
121/* 128/*
@@ -202,7 +209,7 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
202 /* 209 /*
203 * Basically, kp->ainsn.insn has an original instruction. 210 * Basically, kp->ainsn.insn has an original instruction.
204 * However, RIP-relative instruction can not do single-stepping 211 * However, RIP-relative instruction can not do single-stepping
205 * at different place, fix_riprel() tweaks the displacement of 212 * at different place, __copy_instruction() tweaks the displacement of
206 * that instruction. In that case, we can't recover the instruction 213 * that instruction. In that case, we can't recover the instruction
207 * from the kp->ainsn.insn. 214 * from the kp->ainsn.insn.
208 * 215 *
@@ -284,21 +291,37 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
284} 291}
285 292
286/* 293/*
287 * Adjust the displacement if the instruction uses the %rip-relative 294 * Copy an instruction and adjust the displacement if the instruction
288 * addressing mode. 295 * uses the %rip-relative addressing mode.
289 * If it does, Return the address of the 32-bit displacement word. 296 * If it does, Return the address of the 32-bit displacement word.
290 * If not, return null. 297 * If not, return null.
291 * Only applicable to 64-bit x86. 298 * Only applicable to 64-bit x86.
292 */ 299 */
293static void __kprobes fix_riprel(struct kprobe *p) 300static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
294{ 301{
295#ifdef CONFIG_X86_64
296 struct insn insn; 302 struct insn insn;
297 kernel_insn_init(&insn, p->ainsn.insn); 303 int ret;
304 kprobe_opcode_t buf[MAX_INSN_SIZE];
298 305
306 kernel_insn_init(&insn, src);
307 if (recover) {
308 insn_get_opcode(&insn);
309 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
310 ret = recover_probed_instruction(buf,
311 (unsigned long)src);
312 if (ret)
313 return 0;
314 kernel_insn_init(&insn, buf);
315 }
316 }
317 insn_get_length(&insn);
318 memcpy(dest, insn.kaddr, insn.length);
319
320#ifdef CONFIG_X86_64
299 if (insn_rip_relative(&insn)) { 321 if (insn_rip_relative(&insn)) {
300 s64 newdisp; 322 s64 newdisp;
301 u8 *disp; 323 u8 *disp;
324 kernel_insn_init(&insn, dest);
302 insn_get_displacement(&insn); 325 insn_get_displacement(&insn);
303 /* 326 /*
304 * The copied instruction uses the %rip-relative addressing 327 * The copied instruction uses the %rip-relative addressing
@@ -312,20 +335,23 @@ static void __kprobes fix_riprel(struct kprobe *p)
312 * extension of the original signed 32-bit displacement would 335 * extension of the original signed 32-bit displacement would
313 * have given. 336 * have given.
314 */ 337 */
315 newdisp = (u8 *) p->addr + (s64) insn.displacement.value - 338 newdisp = (u8 *) src + (s64) insn.displacement.value -
316 (u8 *) p->ainsn.insn; 339 (u8 *) dest;
317 BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ 340 BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
318 disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn); 341 disp = (u8 *) dest + insn_offset_displacement(&insn);
319 *(s32 *) disp = (s32) newdisp; 342 *(s32 *) disp = (s32) newdisp;
320 } 343 }
321#endif 344#endif
345 return insn.length;
322} 346}
323 347
324static void __kprobes arch_copy_kprobe(struct kprobe *p) 348static void __kprobes arch_copy_kprobe(struct kprobe *p)
325{ 349{
326 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); 350 /*
327 351 * Copy an instruction without recovering int3, because it will be
328 fix_riprel(p); 352 * put by another subsystem.
353 */
354 __copy_instruction(p->ainsn.insn, p->addr, 0);
329 355
330 if (can_boost(p->addr)) 356 if (can_boost(p->addr))
331 p->ainsn.boostable = 0; 357 p->ainsn.boostable = 0;
@@ -406,18 +432,6 @@ static void __kprobes restore_btf(void)
406 update_debugctlmsr(current->thread.debugctlmsr); 432 update_debugctlmsr(current->thread.debugctlmsr);
407} 433}
408 434
409static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
410{
411 clear_btf();
412 regs->flags |= X86_EFLAGS_TF;
413 regs->flags &= ~X86_EFLAGS_IF;
414 /* single step inline if the instruction is an int3 */
415 if (p->opcode == BREAKPOINT_INSTRUCTION)
416 regs->ip = (unsigned long)p->addr;
417 else
418 regs->ip = (unsigned long)p->ainsn.insn;
419}
420
421void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 435void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
422 struct pt_regs *regs) 436 struct pt_regs *regs)
423{ 437{
@@ -429,20 +443,50 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
429 *sara = (unsigned long) &kretprobe_trampoline; 443 *sara = (unsigned long) &kretprobe_trampoline;
430} 444}
431 445
446#ifdef CONFIG_OPTPROBES
447static int __kprobes setup_detour_execution(struct kprobe *p,
448 struct pt_regs *regs,
449 int reenter);
450#else
451#define setup_detour_execution(p, regs, reenter) (0)
452#endif
453
432static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, 454static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
433 struct kprobe_ctlblk *kcb) 455 struct kprobe_ctlblk *kcb, int reenter)
434{ 456{
457 if (setup_detour_execution(p, regs, reenter))
458 return;
459
435#if !defined(CONFIG_PREEMPT) 460#if !defined(CONFIG_PREEMPT)
436 if (p->ainsn.boostable == 1 && !p->post_handler) { 461 if (p->ainsn.boostable == 1 && !p->post_handler) {
437 /* Boost up -- we can execute copied instructions directly */ 462 /* Boost up -- we can execute copied instructions directly */
438 reset_current_kprobe(); 463 if (!reenter)
464 reset_current_kprobe();
465 /*
466 * Reentering boosted probe doesn't reset current_kprobe,
467 * nor set current_kprobe, because it doesn't use single
468 * stepping.
469 */
439 regs->ip = (unsigned long)p->ainsn.insn; 470 regs->ip = (unsigned long)p->ainsn.insn;
440 preempt_enable_no_resched(); 471 preempt_enable_no_resched();
441 return; 472 return;
442 } 473 }
443#endif 474#endif
444 prepare_singlestep(p, regs); 475 if (reenter) {
445 kcb->kprobe_status = KPROBE_HIT_SS; 476 save_previous_kprobe(kcb);
477 set_current_kprobe(p, regs, kcb);
478 kcb->kprobe_status = KPROBE_REENTER;
479 } else
480 kcb->kprobe_status = KPROBE_HIT_SS;
481 /* Prepare real single stepping */
482 clear_btf();
483 regs->flags |= X86_EFLAGS_TF;
484 regs->flags &= ~X86_EFLAGS_IF;
485 /* single step inline if the instruction is an int3 */
486 if (p->opcode == BREAKPOINT_INSTRUCTION)
487 regs->ip = (unsigned long)p->addr;
488 else
489 regs->ip = (unsigned long)p->ainsn.insn;
446} 490}
447 491
448/* 492/*
@@ -456,11 +500,8 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
456 switch (kcb->kprobe_status) { 500 switch (kcb->kprobe_status) {
457 case KPROBE_HIT_SSDONE: 501 case KPROBE_HIT_SSDONE:
458 case KPROBE_HIT_ACTIVE: 502 case KPROBE_HIT_ACTIVE:
459 save_previous_kprobe(kcb);
460 set_current_kprobe(p, regs, kcb);
461 kprobes_inc_nmissed_count(p); 503 kprobes_inc_nmissed_count(p);
462 prepare_singlestep(p, regs); 504 setup_singlestep(p, regs, kcb, 1);
463 kcb->kprobe_status = KPROBE_REENTER;
464 break; 505 break;
465 case KPROBE_HIT_SS: 506 case KPROBE_HIT_SS:
466 /* A probe has been hit in the codepath leading up to, or just 507 /* A probe has been hit in the codepath leading up to, or just
@@ -535,13 +576,13 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
535 * more here. 576 * more here.
536 */ 577 */
537 if (!p->pre_handler || !p->pre_handler(p, regs)) 578 if (!p->pre_handler || !p->pre_handler(p, regs))
538 setup_singlestep(p, regs, kcb); 579 setup_singlestep(p, regs, kcb, 0);
539 return 1; 580 return 1;
540 } 581 }
541 } else if (kprobe_running()) { 582 } else if (kprobe_running()) {
542 p = __get_cpu_var(current_kprobe); 583 p = __get_cpu_var(current_kprobe);
543 if (p->break_handler && p->break_handler(p, regs)) { 584 if (p->break_handler && p->break_handler(p, regs)) {
544 setup_singlestep(p, regs, kcb); 585 setup_singlestep(p, regs, kcb, 0);
545 return 1; 586 return 1;
546 } 587 }
547 } /* else: not a kprobe fault; let the kernel handle it */ 588 } /* else: not a kprobe fault; let the kernel handle it */
@@ -550,6 +591,69 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
550 return 0; 591 return 0;
551} 592}
552 593
594#ifdef CONFIG_X86_64
595#define SAVE_REGS_STRING \
596 /* Skip cs, ip, orig_ax. */ \
597 " subq $24, %rsp\n" \
598 " pushq %rdi\n" \
599 " pushq %rsi\n" \
600 " pushq %rdx\n" \
601 " pushq %rcx\n" \
602 " pushq %rax\n" \
603 " pushq %r8\n" \
604 " pushq %r9\n" \
605 " pushq %r10\n" \
606 " pushq %r11\n" \
607 " pushq %rbx\n" \
608 " pushq %rbp\n" \
609 " pushq %r12\n" \
610 " pushq %r13\n" \
611 " pushq %r14\n" \
612 " pushq %r15\n"
613#define RESTORE_REGS_STRING \
614 " popq %r15\n" \
615 " popq %r14\n" \
616 " popq %r13\n" \
617 " popq %r12\n" \
618 " popq %rbp\n" \
619 " popq %rbx\n" \
620 " popq %r11\n" \
621 " popq %r10\n" \
622 " popq %r9\n" \
623 " popq %r8\n" \
624 " popq %rax\n" \
625 " popq %rcx\n" \
626 " popq %rdx\n" \
627 " popq %rsi\n" \
628 " popq %rdi\n" \
629 /* Skip orig_ax, ip, cs */ \
630 " addq $24, %rsp\n"
631#else
632#define SAVE_REGS_STRING \
633 /* Skip cs, ip, orig_ax and gs. */ \
634 " subl $16, %esp\n" \
635 " pushl %fs\n" \
636 " pushl %ds\n" \
637 " pushl %es\n" \
638 " pushl %eax\n" \
639 " pushl %ebp\n" \
640 " pushl %edi\n" \
641 " pushl %esi\n" \
642 " pushl %edx\n" \
643 " pushl %ecx\n" \
644 " pushl %ebx\n"
645#define RESTORE_REGS_STRING \
646 " popl %ebx\n" \
647 " popl %ecx\n" \
648 " popl %edx\n" \
649 " popl %esi\n" \
650 " popl %edi\n" \
651 " popl %ebp\n" \
652 " popl %eax\n" \
653 /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
654 " addl $24, %esp\n"
655#endif
656
553/* 657/*
554 * When a retprobed function returns, this code saves registers and 658 * When a retprobed function returns, this code saves registers and
555 * calls trampoline_handler() runs, which calls the kretprobe's handler. 659 * calls trampoline_handler() runs, which calls the kretprobe's handler.
@@ -563,65 +667,16 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
563 /* We don't bother saving the ss register */ 667 /* We don't bother saving the ss register */
564 " pushq %rsp\n" 668 " pushq %rsp\n"
565 " pushfq\n" 669 " pushfq\n"
566 /* 670 SAVE_REGS_STRING
567 * Skip cs, ip, orig_ax.
568 * trampoline_handler() will plug in these values
569 */
570 " subq $24, %rsp\n"
571 " pushq %rdi\n"
572 " pushq %rsi\n"
573 " pushq %rdx\n"
574 " pushq %rcx\n"
575 " pushq %rax\n"
576 " pushq %r8\n"
577 " pushq %r9\n"
578 " pushq %r10\n"
579 " pushq %r11\n"
580 " pushq %rbx\n"
581 " pushq %rbp\n"
582 " pushq %r12\n"
583 " pushq %r13\n"
584 " pushq %r14\n"
585 " pushq %r15\n"
586 " movq %rsp, %rdi\n" 671 " movq %rsp, %rdi\n"
587 " call trampoline_handler\n" 672 " call trampoline_handler\n"
588 /* Replace saved sp with true return address. */ 673 /* Replace saved sp with true return address. */
589 " movq %rax, 152(%rsp)\n" 674 " movq %rax, 152(%rsp)\n"
590 " popq %r15\n" 675 RESTORE_REGS_STRING
591 " popq %r14\n"
592 " popq %r13\n"
593 " popq %r12\n"
594 " popq %rbp\n"
595 " popq %rbx\n"
596 " popq %r11\n"
597 " popq %r10\n"
598 " popq %r9\n"
599 " popq %r8\n"
600 " popq %rax\n"
601 " popq %rcx\n"
602 " popq %rdx\n"
603 " popq %rsi\n"
604 " popq %rdi\n"
605 /* Skip orig_ax, ip, cs */
606 " addq $24, %rsp\n"
607 " popfq\n" 676 " popfq\n"
608#else 677#else
609 " pushf\n" 678 " pushf\n"
610 /* 679 SAVE_REGS_STRING
611 * Skip cs, ip, orig_ax and gs.
612 * trampoline_handler() will plug in these values
613 */
614 " subl $16, %esp\n"
615 " pushl %fs\n"
616 " pushl %es\n"
617 " pushl %ds\n"
618 " pushl %eax\n"
619 " pushl %ebp\n"
620 " pushl %edi\n"
621 " pushl %esi\n"
622 " pushl %edx\n"
623 " pushl %ecx\n"
624 " pushl %ebx\n"
625 " movl %esp, %eax\n" 680 " movl %esp, %eax\n"
626 " call trampoline_handler\n" 681 " call trampoline_handler\n"
627 /* Move flags to cs */ 682 /* Move flags to cs */
@@ -629,15 +684,7 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
629 " movl %edx, 52(%esp)\n" 684 " movl %edx, 52(%esp)\n"
630 /* Replace saved flags with true return address. */ 685 /* Replace saved flags with true return address. */
631 " movl %eax, 56(%esp)\n" 686 " movl %eax, 56(%esp)\n"
632 " popl %ebx\n" 687 RESTORE_REGS_STRING
633 " popl %ecx\n"
634 " popl %edx\n"
635 " popl %esi\n"
636 " popl %edi\n"
637 " popl %ebp\n"
638 " popl %eax\n"
639 /* Skip ds, es, fs, gs, orig_ax and ip */
640 " addl $24, %esp\n"
641 " popf\n" 688 " popf\n"
642#endif 689#endif
643 " ret\n"); 690 " ret\n");
@@ -805,8 +852,8 @@ static void __kprobes resume_execution(struct kprobe *p,
805 * These instructions can be executed directly if it 852 * These instructions can be executed directly if it
806 * jumps back to correct address. 853 * jumps back to correct address.
807 */ 854 */
808 set_jmp_op((void *)regs->ip, 855 synthesize_reljump((void *)regs->ip,
809 (void *)orig_ip + (regs->ip - copy_ip)); 856 (void *)orig_ip + (regs->ip - copy_ip));
810 p->ainsn.boostable = 1; 857 p->ainsn.boostable = 1;
811 } else { 858 } else {
812 p->ainsn.boostable = -1; 859 p->ainsn.boostable = -1;
@@ -1033,6 +1080,358 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
1033 return 0; 1080 return 0;
1034} 1081}
1035 1082
1083
1084#ifdef CONFIG_OPTPROBES
1085
1086/* Insert a call instruction at address 'from', which calls address 'to'.*/
1087static void __kprobes synthesize_relcall(void *from, void *to)
1088{
1089 __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
1090}
1091
1092/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
1093static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
1094 unsigned long val)
1095{
1096#ifdef CONFIG_X86_64
1097 *addr++ = 0x48;
1098 *addr++ = 0xbf;
1099#else
1100 *addr++ = 0xb8;
1101#endif
1102 *(unsigned long *)addr = val;
1103}
1104
1105void __kprobes kprobes_optinsn_template_holder(void)
1106{
1107 asm volatile (
1108 ".global optprobe_template_entry\n"
1109 "optprobe_template_entry: \n"
1110#ifdef CONFIG_X86_64
1111 /* We don't bother saving the ss register */
1112 " pushq %rsp\n"
1113 " pushfq\n"
1114 SAVE_REGS_STRING
1115 " movq %rsp, %rsi\n"
1116 ".global optprobe_template_val\n"
1117 "optprobe_template_val: \n"
1118 ASM_NOP5
1119 ASM_NOP5
1120 ".global optprobe_template_call\n"
1121 "optprobe_template_call: \n"
1122 ASM_NOP5
1123 /* Move flags to rsp */
1124 " movq 144(%rsp), %rdx\n"
1125 " movq %rdx, 152(%rsp)\n"
1126 RESTORE_REGS_STRING
1127 /* Skip flags entry */
1128 " addq $8, %rsp\n"
1129 " popfq\n"
1130#else /* CONFIG_X86_32 */
1131 " pushf\n"
1132 SAVE_REGS_STRING
1133 " movl %esp, %edx\n"
1134 ".global optprobe_template_val\n"
1135 "optprobe_template_val: \n"
1136 ASM_NOP5
1137 ".global optprobe_template_call\n"
1138 "optprobe_template_call: \n"
1139 ASM_NOP5
1140 RESTORE_REGS_STRING
1141 " addl $4, %esp\n" /* skip cs */
1142 " popf\n"
1143#endif
1144 ".global optprobe_template_end\n"
1145 "optprobe_template_end: \n");
1146}
1147
1148#define TMPL_MOVE_IDX \
1149 ((long)&optprobe_template_val - (long)&optprobe_template_entry)
1150#define TMPL_CALL_IDX \
1151 ((long)&optprobe_template_call - (long)&optprobe_template_entry)
1152#define TMPL_END_IDX \
1153 ((long)&optprobe_template_end - (long)&optprobe_template_entry)
1154
1155#define INT3_SIZE sizeof(kprobe_opcode_t)
1156
1157/* Optimized kprobe call back function: called from optinsn */
1158static void __kprobes optimized_callback(struct optimized_kprobe *op,
1159 struct pt_regs *regs)
1160{
1161 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1162
1163 preempt_disable();
1164 if (kprobe_running()) {
1165 kprobes_inc_nmissed_count(&op->kp);
1166 } else {
1167 /* Save skipped registers */
1168#ifdef CONFIG_X86_64
1169 regs->cs = __KERNEL_CS;
1170#else
1171 regs->cs = __KERNEL_CS | get_kernel_rpl();
1172 regs->gs = 0;
1173#endif
1174 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
1175 regs->orig_ax = ~0UL;
1176
1177 __get_cpu_var(current_kprobe) = &op->kp;
1178 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
1179 opt_pre_handler(&op->kp, regs);
1180 __get_cpu_var(current_kprobe) = NULL;
1181 }
1182 preempt_enable_no_resched();
1183}
1184
1185static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
1186{
1187 int len = 0, ret;
1188
1189 while (len < RELATIVEJUMP_SIZE) {
1190 ret = __copy_instruction(dest + len, src + len, 1);
1191 if (!ret || !can_boost(dest + len))
1192 return -EINVAL;
1193 len += ret;
1194 }
1195 /* Check whether the address range is reserved */
1196 if (ftrace_text_reserved(src, src + len - 1) ||
1197 alternatives_text_reserved(src, src + len - 1))
1198 return -EBUSY;
1199
1200 return len;
1201}
1202
1203/* Check whether insn is indirect jump */
1204static int __kprobes insn_is_indirect_jump(struct insn *insn)
1205{
1206 return ((insn->opcode.bytes[0] == 0xff &&
1207 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
1208 insn->opcode.bytes[0] == 0xea); /* Segment based jump */
1209}
1210
1211/* Check whether insn jumps into specified address range */
1212static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
1213{
1214 unsigned long target = 0;
1215
1216 switch (insn->opcode.bytes[0]) {
1217 case 0xe0: /* loopne */
1218 case 0xe1: /* loope */
1219 case 0xe2: /* loop */
1220 case 0xe3: /* jcxz */
1221 case 0xe9: /* near relative jump */
1222 case 0xeb: /* short relative jump */
1223 break;
1224 case 0x0f:
1225 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
1226 break;
1227 return 0;
1228 default:
1229 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
1230 break;
1231 return 0;
1232 }
1233 target = (unsigned long)insn->next_byte + insn->immediate.value;
1234
1235 return (start <= target && target <= start + len);
1236}
1237
1238/* Decode whole function to ensure any instructions don't jump into target */
1239static int __kprobes can_optimize(unsigned long paddr)
1240{
1241 int ret;
1242 unsigned long addr, size = 0, offset = 0;
1243 struct insn insn;
1244 kprobe_opcode_t buf[MAX_INSN_SIZE];
1245 /* Dummy buffers for lookup_symbol_attrs */
1246 static char __dummy_buf[KSYM_NAME_LEN];
1247
1248 /* Lookup symbol including addr */
1249 if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf))
1250 return 0;
1251
1252 /* Check there is enough space for a relative jump. */
1253 if (size - offset < RELATIVEJUMP_SIZE)
1254 return 0;
1255
1256 /* Decode instructions */
1257 addr = paddr - offset;
1258 while (addr < paddr - offset + size) { /* Decode until function end */
1259 if (search_exception_tables(addr))
1260 /*
1261 * Since some fixup code will jumps into this function,
1262 * we can't optimize kprobe in this function.
1263 */
1264 return 0;
1265 kernel_insn_init(&insn, (void *)addr);
1266 insn_get_opcode(&insn);
1267 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
1268 ret = recover_probed_instruction(buf, addr);
1269 if (ret)
1270 return 0;
1271 kernel_insn_init(&insn, buf);
1272 }
1273 insn_get_length(&insn);
1274 /* Recover address */
1275 insn.kaddr = (void *)addr;
1276 insn.next_byte = (void *)(addr + insn.length);
1277 /* Check any instructions don't jump into target */
1278 if (insn_is_indirect_jump(&insn) ||
1279 insn_jump_into_range(&insn, paddr + INT3_SIZE,
1280 RELATIVE_ADDR_SIZE))
1281 return 0;
1282 addr += insn.length;
1283 }
1284
1285 return 1;
1286}
1287
1288/* Check optimized_kprobe can actually be optimized. */
1289int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
1290{
1291 int i;
1292 struct kprobe *p;
1293
1294 for (i = 1; i < op->optinsn.size; i++) {
1295 p = get_kprobe(op->kp.addr + i);
1296 if (p && !kprobe_disabled(p))
1297 return -EEXIST;
1298 }
1299
1300 return 0;
1301}
1302
1303/* Check the addr is within the optimized instructions. */
1304int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op,
1305 unsigned long addr)
1306{
1307 return ((unsigned long)op->kp.addr <= addr &&
1308 (unsigned long)op->kp.addr + op->optinsn.size > addr);
1309}
1310
1311/* Free optimized instruction slot */
1312static __kprobes
1313void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
1314{
1315 if (op->optinsn.insn) {
1316 free_optinsn_slot(op->optinsn.insn, dirty);
1317 op->optinsn.insn = NULL;
1318 op->optinsn.size = 0;
1319 }
1320}
1321
1322void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
1323{
1324 __arch_remove_optimized_kprobe(op, 1);
1325}
1326
1327/*
1328 * Copy replacing target instructions
1329 * Target instructions MUST be relocatable (checked inside)
1330 */
1331int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
1332{
1333 u8 *buf;
1334 int ret;
1335 long rel;
1336
1337 if (!can_optimize((unsigned long)op->kp.addr))
1338 return -EILSEQ;
1339
1340 op->optinsn.insn = get_optinsn_slot();
1341 if (!op->optinsn.insn)
1342 return -ENOMEM;
1343
1344 /*
1345 * Verify if the address gap is in 2GB range, because this uses
1346 * a relative jump.
1347 */
1348 rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
1349 if (abs(rel) > 0x7fffffff)
1350 return -ERANGE;
1351
1352 buf = (u8 *)op->optinsn.insn;
1353
1354 /* Copy instructions into the out-of-line buffer */
1355 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
1356 if (ret < 0) {
1357 __arch_remove_optimized_kprobe(op, 0);
1358 return ret;
1359 }
1360 op->optinsn.size = ret;
1361
1362 /* Copy arch-dep-instance from template */
1363 memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
1364
1365 /* Set probe information */
1366 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
1367
1368 /* Set probe function call */
1369 synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
1370
1371 /* Set returning jmp instruction at the tail of out-of-line buffer */
1372 synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
1373 (u8 *)op->kp.addr + op->optinsn.size);
1374
1375 flush_icache_range((unsigned long) buf,
1376 (unsigned long) buf + TMPL_END_IDX +
1377 op->optinsn.size + RELATIVEJUMP_SIZE);
1378 return 0;
1379}
1380
1381/* Replace a breakpoint (int3) with a relative jump. */
1382int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
1383{
1384 unsigned char jmp_code[RELATIVEJUMP_SIZE];
1385 s32 rel = (s32)((long)op->optinsn.insn -
1386 ((long)op->kp.addr + RELATIVEJUMP_SIZE));
1387
1388 /* Backup instructions which will be replaced by jump address */
1389 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
1390 RELATIVE_ADDR_SIZE);
1391
1392 jmp_code[0] = RELATIVEJUMP_OPCODE;
1393 *(s32 *)(&jmp_code[1]) = rel;
1394
1395 /*
1396 * text_poke_smp doesn't support NMI/MCE code modifying.
1397 * However, since kprobes itself also doesn't support NMI/MCE
1398 * code probing, it's not a problem.
1399 */
1400 text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE);
1401 return 0;
1402}
1403
1404/* Replace a relative jump with a breakpoint (int3). */
1405void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
1406{
1407 u8 buf[RELATIVEJUMP_SIZE];
1408
1409 /* Set int3 to first byte for kprobes */
1410 buf[0] = BREAKPOINT_INSTRUCTION;
1411 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
1412 text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
1413}
1414
1415static int __kprobes setup_detour_execution(struct kprobe *p,
1416 struct pt_regs *regs,
1417 int reenter)
1418{
1419 struct optimized_kprobe *op;
1420
1421 if (p->flags & KPROBE_FLAG_OPTIMIZED) {
1422 /* This kprobe is really able to run optimized path. */
1423 op = container_of(p, struct optimized_kprobe, kp);
1424 /* Detour through copied instructions */
1425 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
1426 if (!reenter)
1427 reset_current_kprobe();
1428 preempt_enable_no_resched();
1429 return 1;
1430 }
1431 return 0;
1432}
1433#endif
1434
1036int __init arch_init_kprobes(void) 1435int __init arch_init_kprobes(void)
1037{ 1436{
1038 return 0; 1437 return 0;
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c
index 3b7078abc87..0aad8670858 100644
--- a/arch/x86/kernel/mrst.c
+++ b/arch/x86/kernel/mrst.c
@@ -10,8 +10,211 @@
10 * of the License. 10 * of the License.
11 */ 11 */
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/kernel.h>
14#include <linux/sfi.h>
15#include <linux/irq.h>
16#include <linux/module.h>
13 17
14#include <asm/setup.h> 18#include <asm/setup.h>
19#include <asm/mpspec_def.h>
20#include <asm/hw_irq.h>
21#include <asm/apic.h>
22#include <asm/io_apic.h>
23#include <asm/mrst.h>
24#include <asm/io.h>
25#include <asm/i8259.h>
26#include <asm/apb_timer.h>
27
28static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM];
29static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM];
30int sfi_mtimer_num;
31
32struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX];
33EXPORT_SYMBOL_GPL(sfi_mrtc_array);
34int sfi_mrtc_num;
35
36static inline void assign_to_mp_irq(struct mpc_intsrc *m,
37 struct mpc_intsrc *mp_irq)
38{
39 memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
40}
41
42static inline int mp_irq_cmp(struct mpc_intsrc *mp_irq,
43 struct mpc_intsrc *m)
44{
45 return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
46}
47
48static void save_mp_irq(struct mpc_intsrc *m)
49{
50 int i;
51
52 for (i = 0; i < mp_irq_entries; i++) {
53 if (!mp_irq_cmp(&mp_irqs[i], m))
54 return;
55 }
56
57 assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
58 if (++mp_irq_entries == MAX_IRQ_SOURCES)
59 panic("Max # of irq sources exceeded!!\n");
60}
61
62/* parse all the mtimer info to a static mtimer array */
63static int __init sfi_parse_mtmr(struct sfi_table_header *table)
64{
65 struct sfi_table_simple *sb;
66 struct sfi_timer_table_entry *pentry;
67 struct mpc_intsrc mp_irq;
68 int totallen;
69
70 sb = (struct sfi_table_simple *)table;
71 if (!sfi_mtimer_num) {
72 sfi_mtimer_num = SFI_GET_NUM_ENTRIES(sb,
73 struct sfi_timer_table_entry);
74 pentry = (struct sfi_timer_table_entry *) sb->pentry;
75 totallen = sfi_mtimer_num * sizeof(*pentry);
76 memcpy(sfi_mtimer_array, pentry, totallen);
77 }
78
79 printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num);
80 pentry = sfi_mtimer_array;
81 for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) {
82 printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz,"
83 " irq = %d\n", totallen, (u32)pentry->phys_addr,
84 pentry->freq_hz, pentry->irq);
85 if (!pentry->irq)
86 continue;
87 mp_irq.type = MP_IOAPIC;
88 mp_irq.irqtype = mp_INT;
89/* triggering mode edge bit 2-3, active high polarity bit 0-1 */
90 mp_irq.irqflag = 5;
91 mp_irq.srcbus = 0;
92 mp_irq.srcbusirq = pentry->irq; /* IRQ */
93 mp_irq.dstapic = MP_APIC_ALL;
94 mp_irq.dstirq = pentry->irq;
95 save_mp_irq(&mp_irq);
96 }
97
98 return 0;
99}
100
101struct sfi_timer_table_entry *sfi_get_mtmr(int hint)
102{
103 int i;
104 if (hint < sfi_mtimer_num) {
105 if (!sfi_mtimer_usage[hint]) {
106 pr_debug("hint taken for timer %d irq %d\n",\
107 hint, sfi_mtimer_array[hint].irq);
108 sfi_mtimer_usage[hint] = 1;
109 return &sfi_mtimer_array[hint];
110 }
111 }
112 /* take the first timer available */
113 for (i = 0; i < sfi_mtimer_num;) {
114 if (!sfi_mtimer_usage[i]) {
115 sfi_mtimer_usage[i] = 1;
116 return &sfi_mtimer_array[i];
117 }
118 i++;
119 }
120 return NULL;
121}
122
123void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr)
124{
125 int i;
126 for (i = 0; i < sfi_mtimer_num;) {
127 if (mtmr->irq == sfi_mtimer_array[i].irq) {
128 sfi_mtimer_usage[i] = 0;
129 return;
130 }
131 i++;
132 }
133}
134
135/* parse all the mrtc info to a global mrtc array */
136int __init sfi_parse_mrtc(struct sfi_table_header *table)
137{
138 struct sfi_table_simple *sb;
139 struct sfi_rtc_table_entry *pentry;
140 struct mpc_intsrc mp_irq;
141
142 int totallen;
143
144 sb = (struct sfi_table_simple *)table;
145 if (!sfi_mrtc_num) {
146 sfi_mrtc_num = SFI_GET_NUM_ENTRIES(sb,
147 struct sfi_rtc_table_entry);
148 pentry = (struct sfi_rtc_table_entry *)sb->pentry;
149 totallen = sfi_mrtc_num * sizeof(*pentry);
150 memcpy(sfi_mrtc_array, pentry, totallen);
151 }
152
153 printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num);
154 pentry = sfi_mrtc_array;
155 for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
156 printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n",
157 totallen, (u32)pentry->phys_addr, pentry->irq);
158 mp_irq.type = MP_IOAPIC;
159 mp_irq.irqtype = mp_INT;
160 mp_irq.irqflag = 0;
161 mp_irq.srcbus = 0;
162 mp_irq.srcbusirq = pentry->irq; /* IRQ */
163 mp_irq.dstapic = MP_APIC_ALL;
164 mp_irq.dstirq = pentry->irq;
165 save_mp_irq(&mp_irq);
166 }
167 return 0;
168}
169
170/*
171 * the secondary clock in Moorestown can be APBT or LAPIC clock, default to
172 * APBT but cmdline option can also override it.
173 */
174static void __cpuinit mrst_setup_secondary_clock(void)
175{
176 /* restore default lapic clock if disabled by cmdline */
177 if (disable_apbt_percpu)
178 return setup_secondary_APIC_clock();
179 apbt_setup_secondary_clock();
180}
181
182static unsigned long __init mrst_calibrate_tsc(void)
183{
184 unsigned long flags, fast_calibrate;
185
186 local_irq_save(flags);
187 fast_calibrate = apbt_quick_calibrate();
188 local_irq_restore(flags);
189
190 if (fast_calibrate)
191 return fast_calibrate;
192
193 return 0;
194}
195
196void __init mrst_time_init(void)
197{
198 sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
199 pre_init_apic_IRQ0();
200 apbt_time_init();
201}
202
203void __init mrst_rtc_init(void)
204{
205 sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
206}
207
208/*
209 * if we use per cpu apb timer, the bootclock already setup. if we use lapic
210 * timer and one apbt timer for broadcast, we need to set up lapic boot clock.
211 */
212static void __init mrst_setup_boot_clock(void)
213{
214 pr_info("%s: per cpu apbt flag %d \n", __func__, disable_apbt_percpu);
215 if (disable_apbt_percpu)
216 setup_boot_APIC_clock();
217};
15 218
16/* 219/*
17 * Moorestown specific x86_init function overrides and early setup 220 * Moorestown specific x86_init function overrides and early setup
@@ -21,4 +224,17 @@ void __init x86_mrst_early_setup(void)
21{ 224{
22 x86_init.resources.probe_roms = x86_init_noop; 225 x86_init.resources.probe_roms = x86_init_noop;
23 x86_init.resources.reserve_resources = x86_init_noop; 226 x86_init.resources.reserve_resources = x86_init_noop;
227
228 x86_init.timers.timer_init = mrst_time_init;
229 x86_init.timers.setup_percpu_clockev = mrst_setup_boot_clock;
230
231 x86_init.irqs.pre_vector_init = x86_init_noop;
232
233 x86_cpuinit.setup_percpu_clockev = mrst_setup_secondary_clock;
234
235 x86_platform.calibrate_tsc = mrst_calibrate_tsc;
236 x86_init.pci.init = pci_mrst_init;
237 x86_init.pci.fixup_irqs = x86_init_noop;
238
239 legacy_pic = &null_legacy_pic;
24} 240}
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 9d1d263f786..8297160c41b 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -17,7 +17,9 @@
17#include <linux/spinlock.h> 17#include <linux/spinlock.h>
18#include <linux/io.h> 18#include <linux/io.h>
19#include <linux/string.h> 19#include <linux/string.h>
20
20#include <asm/geode.h> 21#include <asm/geode.h>
22#include <asm/setup.h>
21#include <asm/olpc.h> 23#include <asm/olpc.h>
22 24
23#ifdef CONFIG_OPEN_FIRMWARE 25#ifdef CONFIG_OPEN_FIRMWARE
@@ -243,9 +245,11 @@ static int __init olpc_init(void)
243 olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0, 245 olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0,
244 (unsigned char *) &olpc_platform_info.ecver, 1); 246 (unsigned char *) &olpc_platform_info.ecver, 1);
245 247
246 /* check to see if the VSA exists */ 248#ifdef CONFIG_PCI_OLPC
247 if (cs5535_has_vsa2()) 249 /* If the VSA exists let it emulate PCI, if not emulate in kernel */
248 olpc_platform_info.flags |= OLPC_F_VSA; 250 if (!cs5535_has_vsa2())
251 x86_init.pci.arch_init = pci_olpc_init;
252#endif
249 253
250 printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n", 254 printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n",
251 ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "", 255 ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "",
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index a435c76d714..a02e80c3c54 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -48,6 +48,7 @@
48#include <linux/err.h> 48#include <linux/err.h>
49#include <linux/nmi.h> 49#include <linux/nmi.h>
50#include <linux/tboot.h> 50#include <linux/tboot.h>
51#include <linux/stackprotector.h>
51 52
52#include <asm/acpi.h> 53#include <asm/acpi.h>
53#include <asm/desc.h> 54#include <asm/desc.h>
@@ -67,6 +68,7 @@
67#include <linux/mc146818rtc.h> 68#include <linux/mc146818rtc.h>
68 69
69#include <asm/smpboot_hooks.h> 70#include <asm/smpboot_hooks.h>
71#include <asm/i8259.h>
70 72
71#ifdef CONFIG_X86_32 73#ifdef CONFIG_X86_32
72u8 apicid_2_node[MAX_APICID]; 74u8 apicid_2_node[MAX_APICID];
@@ -291,9 +293,9 @@ notrace static void __cpuinit start_secondary(void *unused)
291 check_tsc_sync_target(); 293 check_tsc_sync_target();
292 294
293 if (nmi_watchdog == NMI_IO_APIC) { 295 if (nmi_watchdog == NMI_IO_APIC) {
294 disable_8259A_irq(0); 296 legacy_pic->chip->mask(0);
295 enable_NMI_through_LVT0(); 297 enable_NMI_through_LVT0();
296 enable_8259A_irq(0); 298 legacy_pic->chip->unmask(0);
297 } 299 }
298 300
299#ifdef CONFIG_X86_32 301#ifdef CONFIG_X86_32
@@ -329,6 +331,9 @@ notrace static void __cpuinit start_secondary(void *unused)
329 /* enable local interrupts */ 331 /* enable local interrupts */
330 local_irq_enable(); 332 local_irq_enable();
331 333
334 /* to prevent fake stack check failure in clock setup */
335 boot_init_stack_canary();
336
332 x86_cpuinit.setup_percpu_clockev(); 337 x86_cpuinit.setup_percpu_clockev();
333 338
334 wmb(); 339 wmb();
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index ab38ce0984f..e680ea52db9 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -49,11 +49,6 @@ extern int no_broadcast;
49char visws_board_type = -1; 49char visws_board_type = -1;
50char visws_board_rev = -1; 50char visws_board_rev = -1;
51 51
52int is_visws_box(void)
53{
54 return visws_board_type >= 0;
55}
56
57static void __init visws_time_init(void) 52static void __init visws_time_init(void)
58{ 53{
59 printk(KERN_INFO "Starting Cobalt Timer system clock\n"); 54 printk(KERN_INFO "Starting Cobalt Timer system clock\n");
@@ -242,6 +237,8 @@ void __init visws_early_detect(void)
242 x86_init.irqs.pre_vector_init = visws_pre_intr_init; 237 x86_init.irqs.pre_vector_init = visws_pre_intr_init;
243 x86_init.irqs.trap_init = visws_trap_init; 238 x86_init.irqs.trap_init = visws_trap_init;
244 x86_init.timers.timer_init = visws_time_init; 239 x86_init.timers.timer_init = visws_time_init;
240 x86_init.pci.init = pci_visws_init;
241 x86_init.pci.init_irq = x86_init_noop;
245 242
246 /* 243 /*
247 * Install reboot quirks: 244 * Install reboot quirks:
@@ -508,7 +505,7 @@ static struct irq_chip cobalt_irq_type = {
508 */ 505 */
509static unsigned int startup_piix4_master_irq(unsigned int irq) 506static unsigned int startup_piix4_master_irq(unsigned int irq)
510{ 507{
511 init_8259A(0); 508 legacy_pic->init(0);
512 509
513 return startup_cobalt_irq(irq); 510 return startup_cobalt_irq(irq);
514} 511}
@@ -532,9 +529,6 @@ static struct irq_chip piix4_master_irq_type = {
532 529
533static struct irq_chip piix4_virtual_irq_type = { 530static struct irq_chip piix4_virtual_irq_type = {
534 .name = "PIIX4-virtual", 531 .name = "PIIX4-virtual",
535 .shutdown = disable_8259A_irq,
536 .enable = enable_8259A_irq,
537 .disable = disable_8259A_irq,
538}; 532};
539 533
540 534
@@ -609,7 +603,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
609 handle_IRQ_event(realirq, desc->action); 603 handle_IRQ_event(realirq, desc->action);
610 604
611 if (!(desc->status & IRQ_DISABLED)) 605 if (!(desc->status & IRQ_DISABLED))
612 enable_8259A_irq(realirq); 606 legacy_pic->chip->unmask(realirq);
613 607
614 return IRQ_HANDLED; 608 return IRQ_HANDLED;
615 609
@@ -628,6 +622,12 @@ static struct irqaction cascade_action = {
628 .name = "cascade", 622 .name = "cascade",
629}; 623};
630 624
625static inline void set_piix4_virtual_irq_type(void)
626{
627 piix4_virtual_irq_type.shutdown = i8259A_chip.mask;
628 piix4_virtual_irq_type.enable = i8259A_chip.unmask;
629 piix4_virtual_irq_type.disable = i8259A_chip.mask;
630}
631 631
632void init_VISWS_APIC_irqs(void) 632void init_VISWS_APIC_irqs(void)
633{ 633{
@@ -653,6 +653,7 @@ void init_VISWS_APIC_irqs(void)
653 desc->chip = &piix4_master_irq_type; 653 desc->chip = &piix4_master_irq_type;
654 } 654 }
655 else if (i < CO_IRQ_APIC0) { 655 else if (i < CO_IRQ_APIC0) {
656 set_piix4_virtual_irq_type();
656 desc->chip = &piix4_virtual_irq_type; 657 desc->chip = &piix4_virtual_irq_type;
657 } 658 }
658 else if (IS_CO_APIC(i)) { 659 else if (IS_CO_APIC(i)) {
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 9055e5872ff..1c0c6ab9c60 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -301,7 +301,8 @@ static int __init vsyscall_init(void)
301 register_sysctl_table(kernel_root_table2); 301 register_sysctl_table(kernel_root_table2);
302#endif 302#endif
303 on_each_cpu(cpu_vsyscall_init, NULL, 1); 303 on_each_cpu(cpu_vsyscall_init, NULL, 1);
304 hotcpu_notifier(cpu_vsyscall_notifier, 0); 304 /* notifier priority > KVM */
305 hotcpu_notifier(cpu_vsyscall_notifier, 30);
305 return 0; 306 return 0;
306} 307}
307 308
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index ee5746c9462..61a1e8c7e19 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -4,9 +4,11 @@
4 * For licencing details see kernel-base/COPYING 4 * For licencing details see kernel-base/COPYING
5 */ 5 */
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/ioport.h>
7 8
8#include <asm/bios_ebda.h> 9#include <asm/bios_ebda.h>
9#include <asm/paravirt.h> 10#include <asm/paravirt.h>
11#include <asm/pci_x86.h>
10#include <asm/mpspec.h> 12#include <asm/mpspec.h>
11#include <asm/setup.h> 13#include <asm/setup.h>
12#include <asm/apic.h> 14#include <asm/apic.h>
@@ -70,6 +72,12 @@ struct x86_init_ops x86_init __initdata = {
70 .iommu = { 72 .iommu = {
71 .iommu_init = iommu_init_noop, 73 .iommu_init = iommu_init_noop,
72 }, 74 },
75
76 .pci = {
77 .init = x86_default_pci_init,
78 .init_irq = x86_default_pci_init_irq,
79 .fixup_irqs = x86_default_pci_fixup_irqs,
80 },
73}; 81};
74 82
75struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { 83struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 3c4d0109ad2..970bbd47951 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -29,6 +29,7 @@ config KVM
29 select HAVE_KVM_EVENTFD 29 select HAVE_KVM_EVENTFD
30 select KVM_APIC_ARCHITECTURE 30 select KVM_APIC_ARCHITECTURE
31 select USER_RETURN_NOTIFIER 31 select USER_RETURN_NOTIFIER
32 select KVM_MMIO
32 ---help--- 33 ---help---
33 Support hosting fully virtualized guest machines using hardware 34 Support hosting fully virtualized guest machines using hardware
34 virtualization extensions. You will need a fairly recent 35 virtualization extensions. You will need a fairly recent
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 7e8faea4651..4dade6ac082 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -32,7 +32,7 @@
32#include <linux/module.h> 32#include <linux/module.h>
33#include <asm/kvm_emulate.h> 33#include <asm/kvm_emulate.h>
34 34
35#include "mmu.h" /* for is_long_mode() */ 35#include "x86.h"
36 36
37/* 37/*
38 * Opcode effective-address decode tables. 38 * Opcode effective-address decode tables.
@@ -76,6 +76,8 @@
76#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ 76#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
77#define GroupMask 0xff /* Group number stored in bits 0:7 */ 77#define GroupMask 0xff /* Group number stored in bits 0:7 */
78/* Misc flags */ 78/* Misc flags */
79#define Lock (1<<26) /* lock prefix is allowed for the instruction */
80#define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
79#define No64 (1<<28) 81#define No64 (1<<28)
80/* Source 2 operand type */ 82/* Source 2 operand type */
81#define Src2None (0<<29) 83#define Src2None (0<<29)
@@ -88,39 +90,40 @@
88enum { 90enum {
89 Group1_80, Group1_81, Group1_82, Group1_83, 91 Group1_80, Group1_81, Group1_82, Group1_83,
90 Group1A, Group3_Byte, Group3, Group4, Group5, Group7, 92 Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
93 Group8, Group9,
91}; 94};
92 95
93static u32 opcode_table[256] = { 96static u32 opcode_table[256] = {
94 /* 0x00 - 0x07 */ 97 /* 0x00 - 0x07 */
95 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 98 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
96 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 99 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
97 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 100 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
98 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, 101 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
99 /* 0x08 - 0x0F */ 102 /* 0x08 - 0x0F */
100 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 103 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
101 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 104 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
102 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 105 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
103 ImplicitOps | Stack | No64, 0, 106 ImplicitOps | Stack | No64, 0,
104 /* 0x10 - 0x17 */ 107 /* 0x10 - 0x17 */
105 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 108 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
106 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 109 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
107 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 110 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
108 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, 111 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
109 /* 0x18 - 0x1F */ 112 /* 0x18 - 0x1F */
110 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 113 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
111 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 114 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
112 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 115 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
113 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, 116 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
114 /* 0x20 - 0x27 */ 117 /* 0x20 - 0x27 */
115 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 118 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
116 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 119 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
117 DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, 120 DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
118 /* 0x28 - 0x2F */ 121 /* 0x28 - 0x2F */
119 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 122 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
120 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 123 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
121 0, 0, 0, 0, 124 0, 0, 0, 0,
122 /* 0x30 - 0x37 */ 125 /* 0x30 - 0x37 */
123 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 126 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
124 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 127 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
125 0, 0, 0, 0, 128 0, 0, 0, 0,
126 /* 0x38 - 0x3F */ 129 /* 0x38 - 0x3F */
@@ -156,7 +159,7 @@ static u32 opcode_table[256] = {
156 Group | Group1_80, Group | Group1_81, 159 Group | Group1_80, Group | Group1_81,
157 Group | Group1_82, Group | Group1_83, 160 Group | Group1_82, Group | Group1_83,
158 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 161 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
159 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 162 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
160 /* 0x88 - 0x8F */ 163 /* 0x88 - 0x8F */
161 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, 164 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
162 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, 165 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
@@ -210,7 +213,7 @@ static u32 opcode_table[256] = {
210 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, 213 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
211 /* 0xF0 - 0xF7 */ 214 /* 0xF0 - 0xF7 */
212 0, 0, 0, 0, 215 0, 0, 0, 0,
213 ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3, 216 ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3,
214 /* 0xF8 - 0xFF */ 217 /* 0xF8 - 0xFF */
215 ImplicitOps, 0, ImplicitOps, ImplicitOps, 218 ImplicitOps, 0, ImplicitOps, ImplicitOps,
216 ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, 219 ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
@@ -218,16 +221,20 @@ static u32 opcode_table[256] = {
218 221
219static u32 twobyte_table[256] = { 222static u32 twobyte_table[256] = {
220 /* 0x00 - 0x0F */ 223 /* 0x00 - 0x0F */
221 0, Group | GroupDual | Group7, 0, 0, 0, ImplicitOps, ImplicitOps, 0, 224 0, Group | GroupDual | Group7, 0, 0,
222 ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 225 0, ImplicitOps, ImplicitOps | Priv, 0,
226 ImplicitOps | Priv, ImplicitOps | Priv, 0, 0,
227 0, ImplicitOps | ModRM, 0, 0,
223 /* 0x10 - 0x1F */ 228 /* 0x10 - 0x1F */
224 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, 229 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
225 /* 0x20 - 0x2F */ 230 /* 0x20 - 0x2F */
226 ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0, 231 ModRM | ImplicitOps | Priv, ModRM | Priv,
232 ModRM | ImplicitOps | Priv, ModRM | Priv,
233 0, 0, 0, 0,
227 0, 0, 0, 0, 0, 0, 0, 0, 234 0, 0, 0, 0, 0, 0, 0, 0,
228 /* 0x30 - 0x3F */ 235 /* 0x30 - 0x3F */
229 ImplicitOps, 0, ImplicitOps, 0, 236 ImplicitOps | Priv, 0, ImplicitOps | Priv, 0,
230 ImplicitOps, ImplicitOps, 0, 0, 237 ImplicitOps, ImplicitOps | Priv, 0, 0,
231 0, 0, 0, 0, 0, 0, 0, 0, 238 0, 0, 0, 0, 0, 0, 0, 0,
232 /* 0x40 - 0x47 */ 239 /* 0x40 - 0x47 */
233 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, 240 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
@@ -257,21 +264,23 @@ static u32 twobyte_table[256] = {
257 DstMem | SrcReg | Src2CL | ModRM, 0, 0, 264 DstMem | SrcReg | Src2CL | ModRM, 0, 0,
258 /* 0xA8 - 0xAF */ 265 /* 0xA8 - 0xAF */
259 ImplicitOps | Stack, ImplicitOps | Stack, 266 ImplicitOps | Stack, ImplicitOps | Stack,
260 0, DstMem | SrcReg | ModRM | BitOp, 267 0, DstMem | SrcReg | ModRM | BitOp | Lock,
261 DstMem | SrcReg | Src2ImmByte | ModRM, 268 DstMem | SrcReg | Src2ImmByte | ModRM,
262 DstMem | SrcReg | Src2CL | ModRM, 269 DstMem | SrcReg | Src2CL | ModRM,
263 ModRM, 0, 270 ModRM, 0,
264 /* 0xB0 - 0xB7 */ 271 /* 0xB0 - 0xB7 */
265 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, 272 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
266 DstMem | SrcReg | ModRM | BitOp, 273 0, DstMem | SrcReg | ModRM | BitOp | Lock,
267 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, 274 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
268 DstReg | SrcMem16 | ModRM | Mov, 275 DstReg | SrcMem16 | ModRM | Mov,
269 /* 0xB8 - 0xBF */ 276 /* 0xB8 - 0xBF */
270 0, 0, DstMem | SrcImmByte | ModRM, DstMem | SrcReg | ModRM | BitOp, 277 0, 0,
278 Group | Group8, DstMem | SrcReg | ModRM | BitOp | Lock,
271 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, 279 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
272 DstReg | SrcMem16 | ModRM | Mov, 280 DstReg | SrcMem16 | ModRM | Mov,
273 /* 0xC0 - 0xCF */ 281 /* 0xC0 - 0xCF */
274 0, 0, 0, DstMem | SrcReg | ModRM | Mov, 0, 0, 0, ImplicitOps | ModRM, 282 0, 0, 0, DstMem | SrcReg | ModRM | Mov,
283 0, 0, 0, Group | GroupDual | Group9,
275 0, 0, 0, 0, 0, 0, 0, 0, 284 0, 0, 0, 0, 0, 0, 0, 0,
276 /* 0xD0 - 0xDF */ 285 /* 0xD0 - 0xDF */
277 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 286 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -283,25 +292,41 @@ static u32 twobyte_table[256] = {
283 292
284static u32 group_table[] = { 293static u32 group_table[] = {
285 [Group1_80*8] = 294 [Group1_80*8] =
286 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 295 ByteOp | DstMem | SrcImm | ModRM | Lock,
287 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 296 ByteOp | DstMem | SrcImm | ModRM | Lock,
288 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 297 ByteOp | DstMem | SrcImm | ModRM | Lock,
289 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 298 ByteOp | DstMem | SrcImm | ModRM | Lock,
299 ByteOp | DstMem | SrcImm | ModRM | Lock,
300 ByteOp | DstMem | SrcImm | ModRM | Lock,
301 ByteOp | DstMem | SrcImm | ModRM | Lock,
302 ByteOp | DstMem | SrcImm | ModRM,
290 [Group1_81*8] = 303 [Group1_81*8] =
291 DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, 304 DstMem | SrcImm | ModRM | Lock,
292 DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, 305 DstMem | SrcImm | ModRM | Lock,
293 DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, 306 DstMem | SrcImm | ModRM | Lock,
294 DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, 307 DstMem | SrcImm | ModRM | Lock,
308 DstMem | SrcImm | ModRM | Lock,
309 DstMem | SrcImm | ModRM | Lock,
310 DstMem | SrcImm | ModRM | Lock,
311 DstMem | SrcImm | ModRM,
295 [Group1_82*8] = 312 [Group1_82*8] =
296 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 313 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
297 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 314 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
298 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 315 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
299 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 316 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
317 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
318 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
319 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
320 ByteOp | DstMem | SrcImm | ModRM | No64,
300 [Group1_83*8] = 321 [Group1_83*8] =
301 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, 322 DstMem | SrcImmByte | ModRM | Lock,
302 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, 323 DstMem | SrcImmByte | ModRM | Lock,
303 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, 324 DstMem | SrcImmByte | ModRM | Lock,
304 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, 325 DstMem | SrcImmByte | ModRM | Lock,
326 DstMem | SrcImmByte | ModRM | Lock,
327 DstMem | SrcImmByte | ModRM | Lock,
328 DstMem | SrcImmByte | ModRM | Lock,
329 DstMem | SrcImmByte | ModRM,
305 [Group1A*8] = 330 [Group1A*8] =
306 DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0, 331 DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
307 [Group3_Byte*8] = 332 [Group3_Byte*8] =
@@ -320,24 +345,39 @@ static u32 group_table[] = {
320 SrcMem | ModRM | Stack, 0, 345 SrcMem | ModRM | Stack, 0,
321 SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0, 346 SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0,
322 [Group7*8] = 347 [Group7*8] =
323 0, 0, ModRM | SrcMem, ModRM | SrcMem, 348 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv,
324 SrcNone | ModRM | DstMem | Mov, 0, 349 SrcNone | ModRM | DstMem | Mov, 0,
325 SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp, 350 SrcMem16 | ModRM | Mov | Priv, SrcMem | ModRM | ByteOp | Priv,
351 [Group8*8] =
352 0, 0, 0, 0,
353 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock,
354 DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock,
355 [Group9*8] =
356 0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0,
326}; 357};
327 358
328static u32 group2_table[] = { 359static u32 group2_table[] = {
329 [Group7*8] = 360 [Group7*8] =
330 SrcNone | ModRM, 0, 0, SrcNone | ModRM, 361 SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM,
331 SrcNone | ModRM | DstMem | Mov, 0, 362 SrcNone | ModRM | DstMem | Mov, 0,
332 SrcMem16 | ModRM | Mov, 0, 363 SrcMem16 | ModRM | Mov, 0,
364 [Group9*8] =
365 0, 0, 0, 0, 0, 0, 0, 0,
333}; 366};
334 367
335/* EFLAGS bit definitions. */ 368/* EFLAGS bit definitions. */
369#define EFLG_ID (1<<21)
370#define EFLG_VIP (1<<20)
371#define EFLG_VIF (1<<19)
372#define EFLG_AC (1<<18)
336#define EFLG_VM (1<<17) 373#define EFLG_VM (1<<17)
337#define EFLG_RF (1<<16) 374#define EFLG_RF (1<<16)
375#define EFLG_IOPL (3<<12)
376#define EFLG_NT (1<<14)
338#define EFLG_OF (1<<11) 377#define EFLG_OF (1<<11)
339#define EFLG_DF (1<<10) 378#define EFLG_DF (1<<10)
340#define EFLG_IF (1<<9) 379#define EFLG_IF (1<<9)
380#define EFLG_TF (1<<8)
341#define EFLG_SF (1<<7) 381#define EFLG_SF (1<<7)
342#define EFLG_ZF (1<<6) 382#define EFLG_ZF (1<<6)
343#define EFLG_AF (1<<4) 383#define EFLG_AF (1<<4)
@@ -606,7 +646,7 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
606 646
607 if (linear < fc->start || linear >= fc->end) { 647 if (linear < fc->start || linear >= fc->end) {
608 size = min(15UL, PAGE_SIZE - offset_in_page(linear)); 648 size = min(15UL, PAGE_SIZE - offset_in_page(linear));
609 rc = ops->read_std(linear, fc->data, size, ctxt->vcpu); 649 rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL);
610 if (rc) 650 if (rc)
611 return rc; 651 return rc;
612 fc->start = linear; 652 fc->start = linear;
@@ -661,11 +701,11 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
661 op_bytes = 3; 701 op_bytes = 3;
662 *address = 0; 702 *address = 0;
663 rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, 703 rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
664 ctxt->vcpu); 704 ctxt->vcpu, NULL);
665 if (rc) 705 if (rc)
666 return rc; 706 return rc;
667 rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, 707 rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
668 ctxt->vcpu); 708 ctxt->vcpu, NULL);
669 return rc; 709 return rc;
670} 710}
671 711
@@ -889,6 +929,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
889 929
890 switch (mode) { 930 switch (mode) {
891 case X86EMUL_MODE_REAL: 931 case X86EMUL_MODE_REAL:
932 case X86EMUL_MODE_VM86:
892 case X86EMUL_MODE_PROT16: 933 case X86EMUL_MODE_PROT16:
893 def_op_bytes = def_ad_bytes = 2; 934 def_op_bytes = def_ad_bytes = 2;
894 break; 935 break;
@@ -975,7 +1016,7 @@ done_prefixes:
975 } 1016 }
976 1017
977 if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { 1018 if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
978 kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");; 1019 kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");
979 return -1; 1020 return -1;
980 } 1021 }
981 1022
@@ -1196,13 +1237,56 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1196 rc = ops->read_emulated(register_address(c, ss_base(ctxt), 1237 rc = ops->read_emulated(register_address(c, ss_base(ctxt),
1197 c->regs[VCPU_REGS_RSP]), 1238 c->regs[VCPU_REGS_RSP]),
1198 dest, len, ctxt->vcpu); 1239 dest, len, ctxt->vcpu);
1199 if (rc != 0) 1240 if (rc != X86EMUL_CONTINUE)
1200 return rc; 1241 return rc;
1201 1242
1202 register_address_increment(c, &c->regs[VCPU_REGS_RSP], len); 1243 register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
1203 return rc; 1244 return rc;
1204} 1245}
1205 1246
1247static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1248 struct x86_emulate_ops *ops,
1249 void *dest, int len)
1250{
1251 int rc;
1252 unsigned long val, change_mask;
1253 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1254 int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu);
1255
1256 rc = emulate_pop(ctxt, ops, &val, len);
1257 if (rc != X86EMUL_CONTINUE)
1258 return rc;
1259
1260 change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
1261 | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
1262
1263 switch(ctxt->mode) {
1264 case X86EMUL_MODE_PROT64:
1265 case X86EMUL_MODE_PROT32:
1266 case X86EMUL_MODE_PROT16:
1267 if (cpl == 0)
1268 change_mask |= EFLG_IOPL;
1269 if (cpl <= iopl)
1270 change_mask |= EFLG_IF;
1271 break;
1272 case X86EMUL_MODE_VM86:
1273 if (iopl < 3) {
1274 kvm_inject_gp(ctxt->vcpu, 0);
1275 return X86EMUL_PROPAGATE_FAULT;
1276 }
1277 change_mask |= EFLG_IF;
1278 break;
1279 default: /* real mode */
1280 change_mask |= (EFLG_IOPL | EFLG_IF);
1281 break;
1282 }
1283
1284 *(unsigned long *)dest =
1285 (ctxt->eflags & ~change_mask) | (val & change_mask);
1286
1287 return rc;
1288}
1289
1206static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg) 1290static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
1207{ 1291{
1208 struct decode_cache *c = &ctxt->decode; 1292 struct decode_cache *c = &ctxt->decode;
@@ -1225,7 +1309,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
1225 if (rc != 0) 1309 if (rc != 0)
1226 return rc; 1310 return rc;
1227 1311
1228 rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, 1, seg); 1312 rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg);
1229 return rc; 1313 return rc;
1230} 1314}
1231 1315
@@ -1370,7 +1454,7 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1370 int rc; 1454 int rc;
1371 1455
1372 rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu); 1456 rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu);
1373 if (rc != 0) 1457 if (rc != X86EMUL_CONTINUE)
1374 return rc; 1458 return rc;
1375 1459
1376 if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || 1460 if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
@@ -1385,7 +1469,7 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1385 (u32) c->regs[VCPU_REGS_RBX]; 1469 (u32) c->regs[VCPU_REGS_RBX];
1386 1470
1387 rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu); 1471 rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu);
1388 if (rc != 0) 1472 if (rc != X86EMUL_CONTINUE)
1389 return rc; 1473 return rc;
1390 ctxt->eflags |= EFLG_ZF; 1474 ctxt->eflags |= EFLG_ZF;
1391 } 1475 }
@@ -1407,7 +1491,7 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
1407 rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); 1491 rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1408 if (rc) 1492 if (rc)
1409 return rc; 1493 return rc;
1410 rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS); 1494 rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS);
1411 return rc; 1495 return rc;
1412} 1496}
1413 1497
@@ -1451,7 +1535,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
1451 &c->dst.val, 1535 &c->dst.val,
1452 c->dst.bytes, 1536 c->dst.bytes,
1453 ctxt->vcpu); 1537 ctxt->vcpu);
1454 if (rc != 0) 1538 if (rc != X86EMUL_CONTINUE)
1455 return rc; 1539 return rc;
1456 break; 1540 break;
1457 case OP_NONE: 1541 case OP_NONE:
@@ -1514,9 +1598,8 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt)
1514 u64 msr_data; 1598 u64 msr_data;
1515 1599
1516 /* syscall is not available in real mode */ 1600 /* syscall is not available in real mode */
1517 if (c->lock_prefix || ctxt->mode == X86EMUL_MODE_REAL 1601 if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86)
1518 || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) 1602 return X86EMUL_UNHANDLEABLE;
1519 return -1;
1520 1603
1521 setup_syscalls_segments(ctxt, &cs, &ss); 1604 setup_syscalls_segments(ctxt, &cs, &ss);
1522 1605
@@ -1553,7 +1636,7 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt)
1553 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); 1636 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1554 } 1637 }
1555 1638
1556 return 0; 1639 return X86EMUL_CONTINUE;
1557} 1640}
1558 1641
1559static int 1642static int
@@ -1563,22 +1646,17 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt)
1563 struct kvm_segment cs, ss; 1646 struct kvm_segment cs, ss;
1564 u64 msr_data; 1647 u64 msr_data;
1565 1648
1566 /* inject #UD if LOCK prefix is used */ 1649 /* inject #GP if in real mode */
1567 if (c->lock_prefix) 1650 if (ctxt->mode == X86EMUL_MODE_REAL) {
1568 return -1;
1569
1570 /* inject #GP if in real mode or paging is disabled */
1571 if (ctxt->mode == X86EMUL_MODE_REAL ||
1572 !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
1573 kvm_inject_gp(ctxt->vcpu, 0); 1651 kvm_inject_gp(ctxt->vcpu, 0);
1574 return -1; 1652 return X86EMUL_UNHANDLEABLE;
1575 } 1653 }
1576 1654
1577 /* XXX sysenter/sysexit have not been tested in 64bit mode. 1655 /* XXX sysenter/sysexit have not been tested in 64bit mode.
1578 * Therefore, we inject an #UD. 1656 * Therefore, we inject an #UD.
1579 */ 1657 */
1580 if (ctxt->mode == X86EMUL_MODE_PROT64) 1658 if (ctxt->mode == X86EMUL_MODE_PROT64)
1581 return -1; 1659 return X86EMUL_UNHANDLEABLE;
1582 1660
1583 setup_syscalls_segments(ctxt, &cs, &ss); 1661 setup_syscalls_segments(ctxt, &cs, &ss);
1584 1662
@@ -1587,13 +1665,13 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt)
1587 case X86EMUL_MODE_PROT32: 1665 case X86EMUL_MODE_PROT32:
1588 if ((msr_data & 0xfffc) == 0x0) { 1666 if ((msr_data & 0xfffc) == 0x0) {
1589 kvm_inject_gp(ctxt->vcpu, 0); 1667 kvm_inject_gp(ctxt->vcpu, 0);
1590 return -1; 1668 return X86EMUL_PROPAGATE_FAULT;
1591 } 1669 }
1592 break; 1670 break;
1593 case X86EMUL_MODE_PROT64: 1671 case X86EMUL_MODE_PROT64:
1594 if (msr_data == 0x0) { 1672 if (msr_data == 0x0) {
1595 kvm_inject_gp(ctxt->vcpu, 0); 1673 kvm_inject_gp(ctxt->vcpu, 0);
1596 return -1; 1674 return X86EMUL_PROPAGATE_FAULT;
1597 } 1675 }
1598 break; 1676 break;
1599 } 1677 }
@@ -1618,7 +1696,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt)
1618 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); 1696 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
1619 c->regs[VCPU_REGS_RSP] = msr_data; 1697 c->regs[VCPU_REGS_RSP] = msr_data;
1620 1698
1621 return 0; 1699 return X86EMUL_CONTINUE;
1622} 1700}
1623 1701
1624static int 1702static int
@@ -1629,21 +1707,11 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
1629 u64 msr_data; 1707 u64 msr_data;
1630 int usermode; 1708 int usermode;
1631 1709
1632 /* inject #UD if LOCK prefix is used */ 1710 /* inject #GP if in real mode or Virtual 8086 mode */
1633 if (c->lock_prefix) 1711 if (ctxt->mode == X86EMUL_MODE_REAL ||
1634 return -1; 1712 ctxt->mode == X86EMUL_MODE_VM86) {
1635
1636 /* inject #GP if in real mode or paging is disabled */
1637 if (ctxt->mode == X86EMUL_MODE_REAL
1638 || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
1639 kvm_inject_gp(ctxt->vcpu, 0);
1640 return -1;
1641 }
1642
1643 /* sysexit must be called from CPL 0 */
1644 if (kvm_x86_ops->get_cpl(ctxt->vcpu) != 0) {
1645 kvm_inject_gp(ctxt->vcpu, 0); 1713 kvm_inject_gp(ctxt->vcpu, 0);
1646 return -1; 1714 return X86EMUL_UNHANDLEABLE;
1647 } 1715 }
1648 1716
1649 setup_syscalls_segments(ctxt, &cs, &ss); 1717 setup_syscalls_segments(ctxt, &cs, &ss);
@@ -1661,7 +1729,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
1661 cs.selector = (u16)(msr_data + 16); 1729 cs.selector = (u16)(msr_data + 16);
1662 if ((msr_data & 0xfffc) == 0x0) { 1730 if ((msr_data & 0xfffc) == 0x0) {
1663 kvm_inject_gp(ctxt->vcpu, 0); 1731 kvm_inject_gp(ctxt->vcpu, 0);
1664 return -1; 1732 return X86EMUL_PROPAGATE_FAULT;
1665 } 1733 }
1666 ss.selector = (u16)(msr_data + 24); 1734 ss.selector = (u16)(msr_data + 24);
1667 break; 1735 break;
@@ -1669,7 +1737,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
1669 cs.selector = (u16)(msr_data + 32); 1737 cs.selector = (u16)(msr_data + 32);
1670 if (msr_data == 0x0) { 1738 if (msr_data == 0x0) {
1671 kvm_inject_gp(ctxt->vcpu, 0); 1739 kvm_inject_gp(ctxt->vcpu, 0);
1672 return -1; 1740 return X86EMUL_PROPAGATE_FAULT;
1673 } 1741 }
1674 ss.selector = cs.selector + 8; 1742 ss.selector = cs.selector + 8;
1675 cs.db = 0; 1743 cs.db = 0;
@@ -1685,7 +1753,58 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
1685 c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX]; 1753 c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
1686 c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX]; 1754 c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
1687 1755
1688 return 0; 1756 return X86EMUL_CONTINUE;
1757}
1758
1759static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
1760{
1761 int iopl;
1762 if (ctxt->mode == X86EMUL_MODE_REAL)
1763 return false;
1764 if (ctxt->mode == X86EMUL_MODE_VM86)
1765 return true;
1766 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1767 return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl;
1768}
1769
1770static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
1771 struct x86_emulate_ops *ops,
1772 u16 port, u16 len)
1773{
1774 struct kvm_segment tr_seg;
1775 int r;
1776 u16 io_bitmap_ptr;
1777 u8 perm, bit_idx = port & 0x7;
1778 unsigned mask = (1 << len) - 1;
1779
1780 kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR);
1781 if (tr_seg.unusable)
1782 return false;
1783 if (tr_seg.limit < 103)
1784 return false;
1785 r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu,
1786 NULL);
1787 if (r != X86EMUL_CONTINUE)
1788 return false;
1789 if (io_bitmap_ptr + port/8 > tr_seg.limit)
1790 return false;
1791 r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1,
1792 ctxt->vcpu, NULL);
1793 if (r != X86EMUL_CONTINUE)
1794 return false;
1795 if ((perm >> bit_idx) & mask)
1796 return false;
1797 return true;
1798}
1799
1800static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
1801 struct x86_emulate_ops *ops,
1802 u16 port, u16 len)
1803{
1804 if (emulator_bad_iopl(ctxt))
1805 if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
1806 return false;
1807 return true;
1689} 1808}
1690 1809
1691int 1810int
@@ -1709,6 +1828,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1709 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); 1828 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
1710 saved_eip = c->eip; 1829 saved_eip = c->eip;
1711 1830
1831 /* LOCK prefix is allowed only with some instructions */
1832 if (c->lock_prefix && !(c->d & Lock)) {
1833 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1834 goto done;
1835 }
1836
1837 /* Privileged instruction can be executed only in CPL=0 */
1838 if ((c->d & Priv) && kvm_x86_ops->get_cpl(ctxt->vcpu)) {
1839 kvm_inject_gp(ctxt->vcpu, 0);
1840 goto done;
1841 }
1842
1712 if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs)) 1843 if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs))
1713 memop = c->modrm_ea; 1844 memop = c->modrm_ea;
1714 1845
@@ -1749,7 +1880,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1749 &c->src.val, 1880 &c->src.val,
1750 c->src.bytes, 1881 c->src.bytes,
1751 ctxt->vcpu); 1882 ctxt->vcpu);
1752 if (rc != 0) 1883 if (rc != X86EMUL_CONTINUE)
1753 goto done; 1884 goto done;
1754 c->src.orig_val = c->src.val; 1885 c->src.orig_val = c->src.val;
1755 } 1886 }
@@ -1768,12 +1899,15 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1768 c->dst.ptr = (void *)c->dst.ptr + 1899 c->dst.ptr = (void *)c->dst.ptr +
1769 (c->src.val & mask) / 8; 1900 (c->src.val & mask) / 8;
1770 } 1901 }
1771 if (!(c->d & Mov) && 1902 if (!(c->d & Mov)) {
1772 /* optimisation - avoid slow emulated read */ 1903 /* optimisation - avoid slow emulated read */
1773 ((rc = ops->read_emulated((unsigned long)c->dst.ptr, 1904 rc = ops->read_emulated((unsigned long)c->dst.ptr,
1774 &c->dst.val, 1905 &c->dst.val,
1775 c->dst.bytes, ctxt->vcpu)) != 0)) 1906 c->dst.bytes,
1776 goto done; 1907 ctxt->vcpu);
1908 if (rc != X86EMUL_CONTINUE)
1909 goto done;
1910 }
1777 } 1911 }
1778 c->dst.orig_val = c->dst.val; 1912 c->dst.orig_val = c->dst.val;
1779 1913
@@ -1876,7 +2010,12 @@ special_insn:
1876 break; 2010 break;
1877 case 0x6c: /* insb */ 2011 case 0x6c: /* insb */
1878 case 0x6d: /* insw/insd */ 2012 case 0x6d: /* insw/insd */
1879 if (kvm_emulate_pio_string(ctxt->vcpu, 2013 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2014 (c->d & ByteOp) ? 1 : c->op_bytes)) {
2015 kvm_inject_gp(ctxt->vcpu, 0);
2016 goto done;
2017 }
2018 if (kvm_emulate_pio_string(ctxt->vcpu,
1880 1, 2019 1,
1881 (c->d & ByteOp) ? 1 : c->op_bytes, 2020 (c->d & ByteOp) ? 1 : c->op_bytes,
1882 c->rep_prefix ? 2021 c->rep_prefix ?
@@ -1892,6 +2031,11 @@ special_insn:
1892 return 0; 2031 return 0;
1893 case 0x6e: /* outsb */ 2032 case 0x6e: /* outsb */
1894 case 0x6f: /* outsw/outsd */ 2033 case 0x6f: /* outsw/outsd */
2034 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2035 (c->d & ByteOp) ? 1 : c->op_bytes)) {
2036 kvm_inject_gp(ctxt->vcpu, 0);
2037 goto done;
2038 }
1895 if (kvm_emulate_pio_string(ctxt->vcpu, 2039 if (kvm_emulate_pio_string(ctxt->vcpu,
1896 0, 2040 0,
1897 (c->d & ByteOp) ? 1 : c->op_bytes, 2041 (c->d & ByteOp) ? 1 : c->op_bytes,
@@ -1978,25 +2122,19 @@ special_insn:
1978 break; 2122 break;
1979 case 0x8e: { /* mov seg, r/m16 */ 2123 case 0x8e: { /* mov seg, r/m16 */
1980 uint16_t sel; 2124 uint16_t sel;
1981 int type_bits;
1982 int err;
1983 2125
1984 sel = c->src.val; 2126 sel = c->src.val;
1985 if (c->modrm_reg == VCPU_SREG_SS)
1986 toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS);
1987 2127
1988 if (c->modrm_reg <= 5) { 2128 if (c->modrm_reg == VCPU_SREG_CS ||
1989 type_bits = (c->modrm_reg == 1) ? 9 : 1; 2129 c->modrm_reg > VCPU_SREG_GS) {
1990 err = kvm_load_segment_descriptor(ctxt->vcpu, sel, 2130 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1991 type_bits, c->modrm_reg); 2131 goto done;
1992 } else {
1993 printk(KERN_INFO "Invalid segreg in modrm byte 0x%02x\n",
1994 c->modrm);
1995 goto cannot_emulate;
1996 } 2132 }
1997 2133
1998 if (err < 0) 2134 if (c->modrm_reg == VCPU_SREG_SS)
1999 goto cannot_emulate; 2135 toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS);
2136
2137 rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg);
2000 2138
2001 c->dst.type = OP_NONE; /* Disable writeback. */ 2139 c->dst.type = OP_NONE; /* Disable writeback. */
2002 break; 2140 break;
@@ -2025,7 +2163,10 @@ special_insn:
2025 c->dst.type = OP_REG; 2163 c->dst.type = OP_REG;
2026 c->dst.ptr = (unsigned long *) &ctxt->eflags; 2164 c->dst.ptr = (unsigned long *) &ctxt->eflags;
2027 c->dst.bytes = c->op_bytes; 2165 c->dst.bytes = c->op_bytes;
2028 goto pop_instruction; 2166 rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
2167 if (rc != X86EMUL_CONTINUE)
2168 goto done;
2169 break;
2029 case 0xa0 ... 0xa1: /* mov */ 2170 case 0xa0 ... 0xa1: /* mov */
2030 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; 2171 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
2031 c->dst.val = c->src.val; 2172 c->dst.val = c->src.val;
@@ -2039,11 +2180,12 @@ special_insn:
2039 c->dst.ptr = (unsigned long *)register_address(c, 2180 c->dst.ptr = (unsigned long *)register_address(c,
2040 es_base(ctxt), 2181 es_base(ctxt),
2041 c->regs[VCPU_REGS_RDI]); 2182 c->regs[VCPU_REGS_RDI]);
2042 if ((rc = ops->read_emulated(register_address(c, 2183 rc = ops->read_emulated(register_address(c,
2043 seg_override_base(ctxt, c), 2184 seg_override_base(ctxt, c),
2044 c->regs[VCPU_REGS_RSI]), 2185 c->regs[VCPU_REGS_RSI]),
2045 &c->dst.val, 2186 &c->dst.val,
2046 c->dst.bytes, ctxt->vcpu)) != 0) 2187 c->dst.bytes, ctxt->vcpu);
2188 if (rc != X86EMUL_CONTINUE)
2047 goto done; 2189 goto done;
2048 register_address_increment(c, &c->regs[VCPU_REGS_RSI], 2190 register_address_increment(c, &c->regs[VCPU_REGS_RSI],
2049 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes 2191 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
@@ -2058,10 +2200,11 @@ special_insn:
2058 c->src.ptr = (unsigned long *)register_address(c, 2200 c->src.ptr = (unsigned long *)register_address(c,
2059 seg_override_base(ctxt, c), 2201 seg_override_base(ctxt, c),
2060 c->regs[VCPU_REGS_RSI]); 2202 c->regs[VCPU_REGS_RSI]);
2061 if ((rc = ops->read_emulated((unsigned long)c->src.ptr, 2203 rc = ops->read_emulated((unsigned long)c->src.ptr,
2062 &c->src.val, 2204 &c->src.val,
2063 c->src.bytes, 2205 c->src.bytes,
2064 ctxt->vcpu)) != 0) 2206 ctxt->vcpu);
2207 if (rc != X86EMUL_CONTINUE)
2065 goto done; 2208 goto done;
2066 2209
2067 c->dst.type = OP_NONE; /* Disable writeback. */ 2210 c->dst.type = OP_NONE; /* Disable writeback. */
@@ -2069,10 +2212,11 @@ special_insn:
2069 c->dst.ptr = (unsigned long *)register_address(c, 2212 c->dst.ptr = (unsigned long *)register_address(c,
2070 es_base(ctxt), 2213 es_base(ctxt),
2071 c->regs[VCPU_REGS_RDI]); 2214 c->regs[VCPU_REGS_RDI]);
2072 if ((rc = ops->read_emulated((unsigned long)c->dst.ptr, 2215 rc = ops->read_emulated((unsigned long)c->dst.ptr,
2073 &c->dst.val, 2216 &c->dst.val,
2074 c->dst.bytes, 2217 c->dst.bytes,
2075 ctxt->vcpu)) != 0) 2218 ctxt->vcpu);
2219 if (rc != X86EMUL_CONTINUE)
2076 goto done; 2220 goto done;
2077 2221
2078 DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); 2222 DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);
@@ -2102,12 +2246,13 @@ special_insn:
2102 c->dst.type = OP_REG; 2246 c->dst.type = OP_REG;
2103 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 2247 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2104 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; 2248 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
2105 if ((rc = ops->read_emulated(register_address(c, 2249 rc = ops->read_emulated(register_address(c,
2106 seg_override_base(ctxt, c), 2250 seg_override_base(ctxt, c),
2107 c->regs[VCPU_REGS_RSI]), 2251 c->regs[VCPU_REGS_RSI]),
2108 &c->dst.val, 2252 &c->dst.val,
2109 c->dst.bytes, 2253 c->dst.bytes,
2110 ctxt->vcpu)) != 0) 2254 ctxt->vcpu);
2255 if (rc != X86EMUL_CONTINUE)
2111 goto done; 2256 goto done;
2112 register_address_increment(c, &c->regs[VCPU_REGS_RSI], 2257 register_address_increment(c, &c->regs[VCPU_REGS_RSI],
2113 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes 2258 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
@@ -2163,11 +2308,9 @@ special_insn:
2163 case 0xe9: /* jmp rel */ 2308 case 0xe9: /* jmp rel */
2164 goto jmp; 2309 goto jmp;
2165 case 0xea: /* jmp far */ 2310 case 0xea: /* jmp far */
2166 if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 9, 2311 if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val,
2167 VCPU_SREG_CS) < 0) { 2312 VCPU_SREG_CS))
2168 DPRINTF("jmp far: Failed to load CS descriptor\n"); 2313 goto done;
2169 goto cannot_emulate;
2170 }
2171 2314
2172 c->eip = c->src.val; 2315 c->eip = c->src.val;
2173 break; 2316 break;
@@ -2185,7 +2328,13 @@ special_insn:
2185 case 0xef: /* out (e/r)ax,dx */ 2328 case 0xef: /* out (e/r)ax,dx */
2186 port = c->regs[VCPU_REGS_RDX]; 2329 port = c->regs[VCPU_REGS_RDX];
2187 io_dir_in = 0; 2330 io_dir_in = 0;
2188 do_io: if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, 2331 do_io:
2332 if (!emulator_io_permited(ctxt, ops, port,
2333 (c->d & ByteOp) ? 1 : c->op_bytes)) {
2334 kvm_inject_gp(ctxt->vcpu, 0);
2335 goto done;
2336 }
2337 if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
2189 (c->d & ByteOp) ? 1 : c->op_bytes, 2338 (c->d & ByteOp) ? 1 : c->op_bytes,
2190 port) != 0) { 2339 port) != 0) {
2191 c->eip = saved_eip; 2340 c->eip = saved_eip;
@@ -2210,13 +2359,21 @@ special_insn:
2210 c->dst.type = OP_NONE; /* Disable writeback. */ 2359 c->dst.type = OP_NONE; /* Disable writeback. */
2211 break; 2360 break;
2212 case 0xfa: /* cli */ 2361 case 0xfa: /* cli */
2213 ctxt->eflags &= ~X86_EFLAGS_IF; 2362 if (emulator_bad_iopl(ctxt))
2214 c->dst.type = OP_NONE; /* Disable writeback. */ 2363 kvm_inject_gp(ctxt->vcpu, 0);
2364 else {
2365 ctxt->eflags &= ~X86_EFLAGS_IF;
2366 c->dst.type = OP_NONE; /* Disable writeback. */
2367 }
2215 break; 2368 break;
2216 case 0xfb: /* sti */ 2369 case 0xfb: /* sti */
2217 toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); 2370 if (emulator_bad_iopl(ctxt))
2218 ctxt->eflags |= X86_EFLAGS_IF; 2371 kvm_inject_gp(ctxt->vcpu, 0);
2219 c->dst.type = OP_NONE; /* Disable writeback. */ 2372 else {
2373 toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
2374 ctxt->eflags |= X86_EFLAGS_IF;
2375 c->dst.type = OP_NONE; /* Disable writeback. */
2376 }
2220 break; 2377 break;
2221 case 0xfc: /* cld */ 2378 case 0xfc: /* cld */
2222 ctxt->eflags &= ~EFLG_DF; 2379 ctxt->eflags &= ~EFLG_DF;
@@ -2319,8 +2476,9 @@ twobyte_insn:
2319 } 2476 }
2320 break; 2477 break;
2321 case 0x05: /* syscall */ 2478 case 0x05: /* syscall */
2322 if (emulate_syscall(ctxt) == -1) 2479 rc = emulate_syscall(ctxt);
2323 goto cannot_emulate; 2480 if (rc != X86EMUL_CONTINUE)
2481 goto done;
2324 else 2482 else
2325 goto writeback; 2483 goto writeback;
2326 break; 2484 break;
@@ -2391,14 +2549,16 @@ twobyte_insn:
2391 c->dst.type = OP_NONE; 2549 c->dst.type = OP_NONE;
2392 break; 2550 break;
2393 case 0x34: /* sysenter */ 2551 case 0x34: /* sysenter */
2394 if (emulate_sysenter(ctxt) == -1) 2552 rc = emulate_sysenter(ctxt);
2395 goto cannot_emulate; 2553 if (rc != X86EMUL_CONTINUE)
2554 goto done;
2396 else 2555 else
2397 goto writeback; 2556 goto writeback;
2398 break; 2557 break;
2399 case 0x35: /* sysexit */ 2558 case 0x35: /* sysexit */
2400 if (emulate_sysexit(ctxt) == -1) 2559 rc = emulate_sysexit(ctxt);
2401 goto cannot_emulate; 2560 if (rc != X86EMUL_CONTINUE)
2561 goto done;
2402 else 2562 else
2403 goto writeback; 2563 goto writeback;
2404 break; 2564 break;
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 15578f180e5..294698b6daf 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -242,11 +242,11 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
242{ 242{
243 struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, 243 struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
244 irq_ack_notifier); 244 irq_ack_notifier);
245 spin_lock(&ps->inject_lock); 245 raw_spin_lock(&ps->inject_lock);
246 if (atomic_dec_return(&ps->pit_timer.pending) < 0) 246 if (atomic_dec_return(&ps->pit_timer.pending) < 0)
247 atomic_inc(&ps->pit_timer.pending); 247 atomic_inc(&ps->pit_timer.pending);
248 ps->irq_ack = 1; 248 ps->irq_ack = 1;
249 spin_unlock(&ps->inject_lock); 249 raw_spin_unlock(&ps->inject_lock);
250} 250}
251 251
252void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) 252void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
@@ -605,7 +605,7 @@ static const struct kvm_io_device_ops speaker_dev_ops = {
605 .write = speaker_ioport_write, 605 .write = speaker_ioport_write,
606}; 606};
607 607
608/* Caller must have writers lock on slots_lock */ 608/* Caller must hold slots_lock */
609struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) 609struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
610{ 610{
611 struct kvm_pit *pit; 611 struct kvm_pit *pit;
@@ -624,7 +624,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
624 624
625 mutex_init(&pit->pit_state.lock); 625 mutex_init(&pit->pit_state.lock);
626 mutex_lock(&pit->pit_state.lock); 626 mutex_lock(&pit->pit_state.lock);
627 spin_lock_init(&pit->pit_state.inject_lock); 627 raw_spin_lock_init(&pit->pit_state.inject_lock);
628 628
629 kvm->arch.vpit = pit; 629 kvm->arch.vpit = pit;
630 pit->kvm = kvm; 630 pit->kvm = kvm;
@@ -645,13 +645,13 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
645 kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); 645 kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
646 646
647 kvm_iodevice_init(&pit->dev, &pit_dev_ops); 647 kvm_iodevice_init(&pit->dev, &pit_dev_ops);
648 ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev); 648 ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev);
649 if (ret < 0) 649 if (ret < 0)
650 goto fail; 650 goto fail;
651 651
652 if (flags & KVM_PIT_SPEAKER_DUMMY) { 652 if (flags & KVM_PIT_SPEAKER_DUMMY) {
653 kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops); 653 kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
654 ret = __kvm_io_bus_register_dev(&kvm->pio_bus, 654 ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,
655 &pit->speaker_dev); 655 &pit->speaker_dev);
656 if (ret < 0) 656 if (ret < 0)
657 goto fail_unregister; 657 goto fail_unregister;
@@ -660,11 +660,12 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
660 return pit; 660 return pit;
661 661
662fail_unregister: 662fail_unregister:
663 __kvm_io_bus_unregister_dev(&kvm->pio_bus, &pit->dev); 663 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
664 664
665fail: 665fail:
666 if (pit->irq_source_id >= 0) 666 kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
667 kvm_free_irq_source_id(kvm, pit->irq_source_id); 667 kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
668 kvm_free_irq_source_id(kvm, pit->irq_source_id);
668 669
669 kfree(pit); 670 kfree(pit);
670 return NULL; 671 return NULL;
@@ -723,12 +724,12 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
723 /* Try to inject pending interrupts when 724 /* Try to inject pending interrupts when
724 * last one has been acked. 725 * last one has been acked.
725 */ 726 */
726 spin_lock(&ps->inject_lock); 727 raw_spin_lock(&ps->inject_lock);
727 if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) { 728 if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) {
728 ps->irq_ack = 0; 729 ps->irq_ack = 0;
729 inject = 1; 730 inject = 1;
730 } 731 }
731 spin_unlock(&ps->inject_lock); 732 raw_spin_unlock(&ps->inject_lock);
732 if (inject) 733 if (inject)
733 __inject_pit_timer_intr(kvm); 734 __inject_pit_timer_intr(kvm);
734 } 735 }
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index d4c1c7ffdc0..900d6b0ba7c 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -27,7 +27,7 @@ struct kvm_kpit_state {
27 u32 speaker_data_on; 27 u32 speaker_data_on;
28 struct mutex lock; 28 struct mutex lock;
29 struct kvm_pit *pit; 29 struct kvm_pit *pit;
30 spinlock_t inject_lock; 30 raw_spinlock_t inject_lock;
31 unsigned long irq_ack; 31 unsigned long irq_ack;
32 struct kvm_irq_ack_notifier irq_ack_notifier; 32 struct kvm_irq_ack_notifier irq_ack_notifier;
33}; 33};
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index d057c0cbd24..07771da85de 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -44,18 +44,19 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
44 * Other interrupt may be delivered to PIC while lock is dropped but 44 * Other interrupt may be delivered to PIC while lock is dropped but
45 * it should be safe since PIC state is already updated at this stage. 45 * it should be safe since PIC state is already updated at this stage.
46 */ 46 */
47 spin_unlock(&s->pics_state->lock); 47 raw_spin_unlock(&s->pics_state->lock);
48 kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); 48 kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
49 spin_lock(&s->pics_state->lock); 49 raw_spin_lock(&s->pics_state->lock);
50} 50}
51 51
52void kvm_pic_clear_isr_ack(struct kvm *kvm) 52void kvm_pic_clear_isr_ack(struct kvm *kvm)
53{ 53{
54 struct kvm_pic *s = pic_irqchip(kvm); 54 struct kvm_pic *s = pic_irqchip(kvm);
55 spin_lock(&s->lock); 55
56 raw_spin_lock(&s->lock);
56 s->pics[0].isr_ack = 0xff; 57 s->pics[0].isr_ack = 0xff;
57 s->pics[1].isr_ack = 0xff; 58 s->pics[1].isr_ack = 0xff;
58 spin_unlock(&s->lock); 59 raw_spin_unlock(&s->lock);
59} 60}
60 61
61/* 62/*
@@ -156,9 +157,9 @@ static void pic_update_irq(struct kvm_pic *s)
156 157
157void kvm_pic_update_irq(struct kvm_pic *s) 158void kvm_pic_update_irq(struct kvm_pic *s)
158{ 159{
159 spin_lock(&s->lock); 160 raw_spin_lock(&s->lock);
160 pic_update_irq(s); 161 pic_update_irq(s);
161 spin_unlock(&s->lock); 162 raw_spin_unlock(&s->lock);
162} 163}
163 164
164int kvm_pic_set_irq(void *opaque, int irq, int level) 165int kvm_pic_set_irq(void *opaque, int irq, int level)
@@ -166,14 +167,14 @@ int kvm_pic_set_irq(void *opaque, int irq, int level)
166 struct kvm_pic *s = opaque; 167 struct kvm_pic *s = opaque;
167 int ret = -1; 168 int ret = -1;
168 169
169 spin_lock(&s->lock); 170 raw_spin_lock(&s->lock);
170 if (irq >= 0 && irq < PIC_NUM_PINS) { 171 if (irq >= 0 && irq < PIC_NUM_PINS) {
171 ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); 172 ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
172 pic_update_irq(s); 173 pic_update_irq(s);
173 trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, 174 trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
174 s->pics[irq >> 3].imr, ret == 0); 175 s->pics[irq >> 3].imr, ret == 0);
175 } 176 }
176 spin_unlock(&s->lock); 177 raw_spin_unlock(&s->lock);
177 178
178 return ret; 179 return ret;
179} 180}
@@ -203,7 +204,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
203 int irq, irq2, intno; 204 int irq, irq2, intno;
204 struct kvm_pic *s = pic_irqchip(kvm); 205 struct kvm_pic *s = pic_irqchip(kvm);
205 206
206 spin_lock(&s->lock); 207 raw_spin_lock(&s->lock);
207 irq = pic_get_irq(&s->pics[0]); 208 irq = pic_get_irq(&s->pics[0]);
208 if (irq >= 0) { 209 if (irq >= 0) {
209 pic_intack(&s->pics[0], irq); 210 pic_intack(&s->pics[0], irq);
@@ -228,7 +229,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
228 intno = s->pics[0].irq_base + irq; 229 intno = s->pics[0].irq_base + irq;
229 } 230 }
230 pic_update_irq(s); 231 pic_update_irq(s);
231 spin_unlock(&s->lock); 232 raw_spin_unlock(&s->lock);
232 233
233 return intno; 234 return intno;
234} 235}
@@ -442,7 +443,7 @@ static int picdev_write(struct kvm_io_device *this,
442 printk(KERN_ERR "PIC: non byte write\n"); 443 printk(KERN_ERR "PIC: non byte write\n");
443 return 0; 444 return 0;
444 } 445 }
445 spin_lock(&s->lock); 446 raw_spin_lock(&s->lock);
446 switch (addr) { 447 switch (addr) {
447 case 0x20: 448 case 0x20:
448 case 0x21: 449 case 0x21:
@@ -455,7 +456,7 @@ static int picdev_write(struct kvm_io_device *this,
455 elcr_ioport_write(&s->pics[addr & 1], addr, data); 456 elcr_ioport_write(&s->pics[addr & 1], addr, data);
456 break; 457 break;
457 } 458 }
458 spin_unlock(&s->lock); 459 raw_spin_unlock(&s->lock);
459 return 0; 460 return 0;
460} 461}
461 462
@@ -472,7 +473,7 @@ static int picdev_read(struct kvm_io_device *this,
472 printk(KERN_ERR "PIC: non byte read\n"); 473 printk(KERN_ERR "PIC: non byte read\n");
473 return 0; 474 return 0;
474 } 475 }
475 spin_lock(&s->lock); 476 raw_spin_lock(&s->lock);
476 switch (addr) { 477 switch (addr) {
477 case 0x20: 478 case 0x20:
478 case 0x21: 479 case 0x21:
@@ -486,7 +487,7 @@ static int picdev_read(struct kvm_io_device *this,
486 break; 487 break;
487 } 488 }
488 *(unsigned char *)val = data; 489 *(unsigned char *)val = data;
489 spin_unlock(&s->lock); 490 raw_spin_unlock(&s->lock);
490 return 0; 491 return 0;
491} 492}
492 493
@@ -520,7 +521,7 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
520 s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); 521 s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
521 if (!s) 522 if (!s)
522 return NULL; 523 return NULL;
523 spin_lock_init(&s->lock); 524 raw_spin_lock_init(&s->lock);
524 s->kvm = kvm; 525 s->kvm = kvm;
525 s->pics[0].elcr_mask = 0xf8; 526 s->pics[0].elcr_mask = 0xf8;
526 s->pics[1].elcr_mask = 0xde; 527 s->pics[1].elcr_mask = 0xde;
@@ -533,7 +534,9 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
533 * Initialize PIO device 534 * Initialize PIO device
534 */ 535 */
535 kvm_iodevice_init(&s->dev, &picdev_ops); 536 kvm_iodevice_init(&s->dev, &picdev_ops);
536 ret = kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev); 537 mutex_lock(&kvm->slots_lock);
538 ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev);
539 mutex_unlock(&kvm->slots_lock);
537 if (ret < 0) { 540 if (ret < 0) {
538 kfree(s); 541 kfree(s);
539 return NULL; 542 return NULL;
@@ -541,3 +544,14 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
541 544
542 return s; 545 return s;
543} 546}
547
548void kvm_destroy_pic(struct kvm *kvm)
549{
550 struct kvm_pic *vpic = kvm->arch.vpic;
551
552 if (vpic) {
553 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev);
554 kvm->arch.vpic = NULL;
555 kfree(vpic);
556 }
557}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index be399e207d5..34b15915754 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -62,7 +62,7 @@ struct kvm_kpic_state {
62}; 62};
63 63
64struct kvm_pic { 64struct kvm_pic {
65 spinlock_t lock; 65 raw_spinlock_t lock;
66 unsigned pending_acks; 66 unsigned pending_acks;
67 struct kvm *kvm; 67 struct kvm *kvm;
68 struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ 68 struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
@@ -75,6 +75,7 @@ struct kvm_pic {
75}; 75};
76 76
77struct kvm_pic *kvm_create_pic(struct kvm *kvm); 77struct kvm_pic *kvm_create_pic(struct kvm *kvm);
78void kvm_destroy_pic(struct kvm *kvm);
78int kvm_pic_read_irq(struct kvm *kvm); 79int kvm_pic_read_irq(struct kvm *kvm);
79void kvm_pic_update_irq(struct kvm_pic *s); 80void kvm_pic_update_irq(struct kvm_pic *s);
80void kvm_pic_clear_isr_ack(struct kvm *kvm); 81void kvm_pic_clear_isr_ack(struct kvm *kvm);
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 7bcc5b6a440..cff851cf532 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -1,6 +1,11 @@
1#ifndef ASM_KVM_CACHE_REGS_H 1#ifndef ASM_KVM_CACHE_REGS_H
2#define ASM_KVM_CACHE_REGS_H 2#define ASM_KVM_CACHE_REGS_H
3 3
4#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
5#define KVM_POSSIBLE_CR4_GUEST_BITS \
6 (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
7 | X86_CR4_OSXMMEXCPT | X86_CR4_PGE)
8
4static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, 9static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
5 enum kvm_reg reg) 10 enum kvm_reg reg)
6{ 11{
@@ -38,4 +43,30 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
38 return vcpu->arch.pdptrs[index]; 43 return vcpu->arch.pdptrs[index];
39} 44}
40 45
46static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
47{
48 ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS;
49 if (tmask & vcpu->arch.cr0_guest_owned_bits)
50 kvm_x86_ops->decache_cr0_guest_bits(vcpu);
51 return vcpu->arch.cr0 & mask;
52}
53
54static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu)
55{
56 return kvm_read_cr0_bits(vcpu, ~0UL);
57}
58
59static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask)
60{
61 ulong tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS;
62 if (tmask & vcpu->arch.cr4_guest_owned_bits)
63 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
64 return vcpu->arch.cr4 & mask;
65}
66
67static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu)
68{
69 return kvm_read_cr4_bits(vcpu, ~0UL);
70}
71
41#endif 72#endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ba8c045da78..4b224f90087 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1246,3 +1246,34 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
1246 1246
1247 return 0; 1247 return 0;
1248} 1248}
1249
1250int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
1251{
1252 struct kvm_lapic *apic = vcpu->arch.apic;
1253
1254 if (!irqchip_in_kernel(vcpu->kvm))
1255 return 1;
1256
1257 /* if this is ICR write vector before command */
1258 if (reg == APIC_ICR)
1259 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
1260 return apic_reg_write(apic, reg, (u32)data);
1261}
1262
1263int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
1264{
1265 struct kvm_lapic *apic = vcpu->arch.apic;
1266 u32 low, high = 0;
1267
1268 if (!irqchip_in_kernel(vcpu->kvm))
1269 return 1;
1270
1271 if (apic_reg_read(apic, reg, 4, &low))
1272 return 1;
1273 if (reg == APIC_ICR)
1274 apic_reg_read(apic, APIC_ICR2, 4, &high);
1275
1276 *data = (((u64)high) << 32) | low;
1277
1278 return 0;
1279}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 40010b09c4a..f5fe32c5eda 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -48,4 +48,12 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
48 48
49int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data); 49int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
50int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); 50int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
51
52int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
53int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
54
55static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
56{
57 return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
58}
51#endif 59#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 89a49fb46a2..741373e8ca7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -18,6 +18,7 @@
18 */ 18 */
19 19
20#include "mmu.h" 20#include "mmu.h"
21#include "x86.h"
21#include "kvm_cache_regs.h" 22#include "kvm_cache_regs.h"
22 23
23#include <linux/kvm_host.h> 24#include <linux/kvm_host.h>
@@ -29,6 +30,7 @@
29#include <linux/swap.h> 30#include <linux/swap.h>
30#include <linux/hugetlb.h> 31#include <linux/hugetlb.h>
31#include <linux/compiler.h> 32#include <linux/compiler.h>
33#include <linux/srcu.h>
32 34
33#include <asm/page.h> 35#include <asm/page.h>
34#include <asm/cmpxchg.h> 36#include <asm/cmpxchg.h>
@@ -136,16 +138,6 @@ module_param(oos_shadow, bool, 0644);
136#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ 138#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \
137 | PT64_NX_MASK) 139 | PT64_NX_MASK)
138 140
139#define PFERR_PRESENT_MASK (1U << 0)
140#define PFERR_WRITE_MASK (1U << 1)
141#define PFERR_USER_MASK (1U << 2)
142#define PFERR_RSVD_MASK (1U << 3)
143#define PFERR_FETCH_MASK (1U << 4)
144
145#define PT_PDPE_LEVEL 3
146#define PT_DIRECTORY_LEVEL 2
147#define PT_PAGE_TABLE_LEVEL 1
148
149#define RMAP_EXT 4 141#define RMAP_EXT 4
150 142
151#define ACC_EXEC_MASK 1 143#define ACC_EXEC_MASK 1
@@ -153,6 +145,9 @@ module_param(oos_shadow, bool, 0644);
153#define ACC_USER_MASK PT_USER_MASK 145#define ACC_USER_MASK PT_USER_MASK
154#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) 146#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
155 147
148#include <trace/events/kvm.h>
149
150#undef TRACE_INCLUDE_FILE
156#define CREATE_TRACE_POINTS 151#define CREATE_TRACE_POINTS
157#include "mmutrace.h" 152#include "mmutrace.h"
158 153
@@ -229,7 +224,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
229 224
230static int is_write_protection(struct kvm_vcpu *vcpu) 225static int is_write_protection(struct kvm_vcpu *vcpu)
231{ 226{
232 return vcpu->arch.cr0 & X86_CR0_WP; 227 return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
233} 228}
234 229
235static int is_cpuid_PSE36(void) 230static int is_cpuid_PSE36(void)
@@ -239,7 +234,7 @@ static int is_cpuid_PSE36(void)
239 234
240static int is_nx(struct kvm_vcpu *vcpu) 235static int is_nx(struct kvm_vcpu *vcpu)
241{ 236{
242 return vcpu->arch.shadow_efer & EFER_NX; 237 return vcpu->arch.efer & EFER_NX;
243} 238}
244 239
245static int is_shadow_present_pte(u64 pte) 240static int is_shadow_present_pte(u64 pte)
@@ -253,7 +248,7 @@ static int is_large_pte(u64 pte)
253 return pte & PT_PAGE_SIZE_MASK; 248 return pte & PT_PAGE_SIZE_MASK;
254} 249}
255 250
256static int is_writeble_pte(unsigned long pte) 251static int is_writable_pte(unsigned long pte)
257{ 252{
258 return pte & PT_WRITABLE_MASK; 253 return pte & PT_WRITABLE_MASK;
259} 254}
@@ -470,24 +465,10 @@ static int has_wrprotected_page(struct kvm *kvm,
470 465
471static int host_mapping_level(struct kvm *kvm, gfn_t gfn) 466static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
472{ 467{
473 unsigned long page_size = PAGE_SIZE; 468 unsigned long page_size;
474 struct vm_area_struct *vma;
475 unsigned long addr;
476 int i, ret = 0; 469 int i, ret = 0;
477 470
478 addr = gfn_to_hva(kvm, gfn); 471 page_size = kvm_host_page_size(kvm, gfn);
479 if (kvm_is_error_hva(addr))
480 return PT_PAGE_TABLE_LEVEL;
481
482 down_read(&current->mm->mmap_sem);
483 vma = find_vma(current->mm, addr);
484 if (!vma)
485 goto out;
486
487 page_size = vma_kernel_pagesize(vma);
488
489out:
490 up_read(&current->mm->mmap_sem);
491 472
492 for (i = PT_PAGE_TABLE_LEVEL; 473 for (i = PT_PAGE_TABLE_LEVEL;
493 i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) { 474 i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) {
@@ -503,8 +484,7 @@ out:
503static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) 484static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
504{ 485{
505 struct kvm_memory_slot *slot; 486 struct kvm_memory_slot *slot;
506 int host_level; 487 int host_level, level, max_level;
507 int level = PT_PAGE_TABLE_LEVEL;
508 488
509 slot = gfn_to_memslot(vcpu->kvm, large_gfn); 489 slot = gfn_to_memslot(vcpu->kvm, large_gfn);
510 if (slot && slot->dirty_bitmap) 490 if (slot && slot->dirty_bitmap)
@@ -515,7 +495,10 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
515 if (host_level == PT_PAGE_TABLE_LEVEL) 495 if (host_level == PT_PAGE_TABLE_LEVEL)
516 return host_level; 496 return host_level;
517 497
518 for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) 498 max_level = kvm_x86_ops->get_lpage_level() < host_level ?
499 kvm_x86_ops->get_lpage_level() : host_level;
500
501 for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
519 if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) 502 if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
520 break; 503 break;
521 504
@@ -633,7 +616,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
633 pfn = spte_to_pfn(*spte); 616 pfn = spte_to_pfn(*spte);
634 if (*spte & shadow_accessed_mask) 617 if (*spte & shadow_accessed_mask)
635 kvm_set_pfn_accessed(pfn); 618 kvm_set_pfn_accessed(pfn);
636 if (is_writeble_pte(*spte)) 619 if (is_writable_pte(*spte))
637 kvm_set_pfn_dirty(pfn); 620 kvm_set_pfn_dirty(pfn);
638 rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); 621 rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level);
639 if (!*rmapp) { 622 if (!*rmapp) {
@@ -662,6 +645,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
662 prev_desc = desc; 645 prev_desc = desc;
663 desc = desc->more; 646 desc = desc->more;
664 } 647 }
648 pr_err("rmap_remove: %p %llx many->many\n", spte, *spte);
665 BUG(); 649 BUG();
666 } 650 }
667} 651}
@@ -708,7 +692,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
708 BUG_ON(!spte); 692 BUG_ON(!spte);
709 BUG_ON(!(*spte & PT_PRESENT_MASK)); 693 BUG_ON(!(*spte & PT_PRESENT_MASK));
710 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); 694 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
711 if (is_writeble_pte(*spte)) { 695 if (is_writable_pte(*spte)) {
712 __set_spte(spte, *spte & ~PT_WRITABLE_MASK); 696 __set_spte(spte, *spte & ~PT_WRITABLE_MASK);
713 write_protected = 1; 697 write_protected = 1;
714 } 698 }
@@ -732,7 +716,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
732 BUG_ON(!(*spte & PT_PRESENT_MASK)); 716 BUG_ON(!(*spte & PT_PRESENT_MASK));
733 BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); 717 BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK));
734 pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); 718 pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);
735 if (is_writeble_pte(*spte)) { 719 if (is_writable_pte(*spte)) {
736 rmap_remove(kvm, spte); 720 rmap_remove(kvm, spte);
737 --kvm->stat.lpages; 721 --kvm->stat.lpages;
738 __set_spte(spte, shadow_trap_nonpresent_pte); 722 __set_spte(spte, shadow_trap_nonpresent_pte);
@@ -787,7 +771,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
787 771
788 new_spte &= ~PT_WRITABLE_MASK; 772 new_spte &= ~PT_WRITABLE_MASK;
789 new_spte &= ~SPTE_HOST_WRITEABLE; 773 new_spte &= ~SPTE_HOST_WRITEABLE;
790 if (is_writeble_pte(*spte)) 774 if (is_writable_pte(*spte))
791 kvm_set_pfn_dirty(spte_to_pfn(*spte)); 775 kvm_set_pfn_dirty(spte_to_pfn(*spte));
792 __set_spte(spte, new_spte); 776 __set_spte(spte, new_spte);
793 spte = rmap_next(kvm, rmapp, spte); 777 spte = rmap_next(kvm, rmapp, spte);
@@ -805,35 +789,32 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
805 unsigned long data)) 789 unsigned long data))
806{ 790{
807 int i, j; 791 int i, j;
792 int ret;
808 int retval = 0; 793 int retval = 0;
794 struct kvm_memslots *slots;
809 795
810 /* 796 slots = rcu_dereference(kvm->memslots);
811 * If mmap_sem isn't taken, we can look the memslots with only 797
812 * the mmu_lock by skipping over the slots with userspace_addr == 0. 798 for (i = 0; i < slots->nmemslots; i++) {
813 */ 799 struct kvm_memory_slot *memslot = &slots->memslots[i];
814 for (i = 0; i < kvm->nmemslots; i++) {
815 struct kvm_memory_slot *memslot = &kvm->memslots[i];
816 unsigned long start = memslot->userspace_addr; 800 unsigned long start = memslot->userspace_addr;
817 unsigned long end; 801 unsigned long end;
818 802
819 /* mmu_lock protects userspace_addr */
820 if (!start)
821 continue;
822
823 end = start + (memslot->npages << PAGE_SHIFT); 803 end = start + (memslot->npages << PAGE_SHIFT);
824 if (hva >= start && hva < end) { 804 if (hva >= start && hva < end) {
825 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; 805 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
826 806
827 retval |= handler(kvm, &memslot->rmap[gfn_offset], 807 ret = handler(kvm, &memslot->rmap[gfn_offset], data);
828 data);
829 808
830 for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { 809 for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
831 int idx = gfn_offset; 810 int idx = gfn_offset;
832 idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); 811 idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
833 retval |= handler(kvm, 812 ret |= handler(kvm,
834 &memslot->lpage_info[j][idx].rmap_pde, 813 &memslot->lpage_info[j][idx].rmap_pde,
835 data); 814 data);
836 } 815 }
816 trace_kvm_age_page(hva, memslot, ret);
817 retval |= ret;
837 } 818 }
838 } 819 }
839 820
@@ -856,9 +837,15 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
856 u64 *spte; 837 u64 *spte;
857 int young = 0; 838 int young = 0;
858 839
859 /* always return old for EPT */ 840 /*
841 * Emulate the accessed bit for EPT, by checking if this page has
842 * an EPT mapping, and clearing it if it does. On the next access,
843 * a new EPT mapping will be established.
844 * This has some overhead, but not as much as the cost of swapping
845 * out actively used pages or breaking up actively used hugepages.
846 */
860 if (!shadow_accessed_mask) 847 if (!shadow_accessed_mask)
861 return 0; 848 return kvm_unmap_rmapp(kvm, rmapp, data);
862 849
863 spte = rmap_next(kvm, rmapp, NULL); 850 spte = rmap_next(kvm, rmapp, NULL);
864 while (spte) { 851 while (spte) {
@@ -1615,7 +1602,7 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
1615 1602
1616static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) 1603static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
1617{ 1604{
1618 int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); 1605 int slot = memslot_id(kvm, gfn);
1619 struct kvm_mmu_page *sp = page_header(__pa(pte)); 1606 struct kvm_mmu_page *sp = page_header(__pa(pte));
1620 1607
1621 __set_bit(slot, sp->slot_bitmap); 1608 __set_bit(slot, sp->slot_bitmap);
@@ -1639,7 +1626,7 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
1639{ 1626{
1640 struct page *page; 1627 struct page *page;
1641 1628
1642 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); 1629 gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
1643 1630
1644 if (gpa == UNMAPPED_GVA) 1631 if (gpa == UNMAPPED_GVA)
1645 return NULL; 1632 return NULL;
@@ -1852,7 +1839,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1852 * is responsibility of mmu_get_page / kvm_sync_page. 1839 * is responsibility of mmu_get_page / kvm_sync_page.
1853 * Same reasoning can be applied to dirty page accounting. 1840 * Same reasoning can be applied to dirty page accounting.
1854 */ 1841 */
1855 if (!can_unsync && is_writeble_pte(*sptep)) 1842 if (!can_unsync && is_writable_pte(*sptep))
1856 goto set_pte; 1843 goto set_pte;
1857 1844
1858 if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { 1845 if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
@@ -1860,7 +1847,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1860 __func__, gfn); 1847 __func__, gfn);
1861 ret = 1; 1848 ret = 1;
1862 pte_access &= ~ACC_WRITE_MASK; 1849 pte_access &= ~ACC_WRITE_MASK;
1863 if (is_writeble_pte(spte)) 1850 if (is_writable_pte(spte))
1864 spte &= ~PT_WRITABLE_MASK; 1851 spte &= ~PT_WRITABLE_MASK;
1865 } 1852 }
1866 } 1853 }
@@ -1881,7 +1868,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1881 bool reset_host_protection) 1868 bool reset_host_protection)
1882{ 1869{
1883 int was_rmapped = 0; 1870 int was_rmapped = 0;
1884 int was_writeble = is_writeble_pte(*sptep); 1871 int was_writable = is_writable_pte(*sptep);
1885 int rmap_count; 1872 int rmap_count;
1886 1873
1887 pgprintk("%s: spte %llx access %x write_fault %d" 1874 pgprintk("%s: spte %llx access %x write_fault %d"
@@ -1932,7 +1919,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1932 if (rmap_count > RMAP_RECYCLE_THRESHOLD) 1919 if (rmap_count > RMAP_RECYCLE_THRESHOLD)
1933 rmap_recycle(vcpu, sptep, gfn); 1920 rmap_recycle(vcpu, sptep, gfn);
1934 } else { 1921 } else {
1935 if (was_writeble) 1922 if (was_writable)
1936 kvm_release_pfn_dirty(pfn); 1923 kvm_release_pfn_dirty(pfn);
1937 else 1924 else
1938 kvm_release_pfn_clean(pfn); 1925 kvm_release_pfn_clean(pfn);
@@ -2162,8 +2149,11 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
2162 spin_unlock(&vcpu->kvm->mmu_lock); 2149 spin_unlock(&vcpu->kvm->mmu_lock);
2163} 2150}
2164 2151
2165static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) 2152static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
2153 u32 access, u32 *error)
2166{ 2154{
2155 if (error)
2156 *error = 0;
2167 return vaddr; 2157 return vaddr;
2168} 2158}
2169 2159
@@ -2747,7 +2737,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
2747 if (tdp_enabled) 2737 if (tdp_enabled)
2748 return 0; 2738 return 0;
2749 2739
2750 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); 2740 gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
2751 2741
2752 spin_lock(&vcpu->kvm->mmu_lock); 2742 spin_lock(&vcpu->kvm->mmu_lock);
2753 r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); 2743 r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
@@ -2847,16 +2837,13 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
2847 */ 2837 */
2848 page = alloc_page(GFP_KERNEL | __GFP_DMA32); 2838 page = alloc_page(GFP_KERNEL | __GFP_DMA32);
2849 if (!page) 2839 if (!page)
2850 goto error_1; 2840 return -ENOMEM;
2841
2851 vcpu->arch.mmu.pae_root = page_address(page); 2842 vcpu->arch.mmu.pae_root = page_address(page);
2852 for (i = 0; i < 4; ++i) 2843 for (i = 0; i < 4; ++i)
2853 vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; 2844 vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
2854 2845
2855 return 0; 2846 return 0;
2856
2857error_1:
2858 free_mmu_pages(vcpu);
2859 return -ENOMEM;
2860} 2847}
2861 2848
2862int kvm_mmu_create(struct kvm_vcpu *vcpu) 2849int kvm_mmu_create(struct kvm_vcpu *vcpu)
@@ -2936,10 +2923,9 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
2936 spin_lock(&kvm_lock); 2923 spin_lock(&kvm_lock);
2937 2924
2938 list_for_each_entry(kvm, &vm_list, vm_list) { 2925 list_for_each_entry(kvm, &vm_list, vm_list) {
2939 int npages; 2926 int npages, idx;
2940 2927
2941 if (!down_read_trylock(&kvm->slots_lock)) 2928 idx = srcu_read_lock(&kvm->srcu);
2942 continue;
2943 spin_lock(&kvm->mmu_lock); 2929 spin_lock(&kvm->mmu_lock);
2944 npages = kvm->arch.n_alloc_mmu_pages - 2930 npages = kvm->arch.n_alloc_mmu_pages -
2945 kvm->arch.n_free_mmu_pages; 2931 kvm->arch.n_free_mmu_pages;
@@ -2952,7 +2938,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
2952 nr_to_scan--; 2938 nr_to_scan--;
2953 2939
2954 spin_unlock(&kvm->mmu_lock); 2940 spin_unlock(&kvm->mmu_lock);
2955 up_read(&kvm->slots_lock); 2941 srcu_read_unlock(&kvm->srcu, idx);
2956 } 2942 }
2957 if (kvm_freed) 2943 if (kvm_freed)
2958 list_move_tail(&kvm_freed->vm_list, &vm_list); 2944 list_move_tail(&kvm_freed->vm_list, &vm_list);
@@ -3019,9 +3005,11 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
3019 int i; 3005 int i;
3020 unsigned int nr_mmu_pages; 3006 unsigned int nr_mmu_pages;
3021 unsigned int nr_pages = 0; 3007 unsigned int nr_pages = 0;
3008 struct kvm_memslots *slots;
3022 3009
3023 for (i = 0; i < kvm->nmemslots; i++) 3010 slots = rcu_dereference(kvm->memslots);
3024 nr_pages += kvm->memslots[i].npages; 3011 for (i = 0; i < slots->nmemslots; i++)
3012 nr_pages += slots->memslots[i].npages;
3025 3013
3026 nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; 3014 nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
3027 nr_mmu_pages = max(nr_mmu_pages, 3015 nr_mmu_pages = max(nr_mmu_pages,
@@ -3246,7 +3234,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
3246 if (is_shadow_present_pte(ent) && !is_last_spte(ent, level)) 3234 if (is_shadow_present_pte(ent) && !is_last_spte(ent, level))
3247 audit_mappings_page(vcpu, ent, va, level - 1); 3235 audit_mappings_page(vcpu, ent, va, level - 1);
3248 else { 3236 else {
3249 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); 3237 gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, va, NULL);
3250 gfn_t gfn = gpa >> PAGE_SHIFT; 3238 gfn_t gfn = gpa >> PAGE_SHIFT;
3251 pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn); 3239 pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn);
3252 hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT; 3240 hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT;
@@ -3291,10 +3279,12 @@ static void audit_mappings(struct kvm_vcpu *vcpu)
3291static int count_rmaps(struct kvm_vcpu *vcpu) 3279static int count_rmaps(struct kvm_vcpu *vcpu)
3292{ 3280{
3293 int nmaps = 0; 3281 int nmaps = 0;
3294 int i, j, k; 3282 int i, j, k, idx;
3295 3283
3284 idx = srcu_read_lock(&kvm->srcu);
3285 slots = rcu_dereference(kvm->memslots);
3296 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 3286 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
3297 struct kvm_memory_slot *m = &vcpu->kvm->memslots[i]; 3287 struct kvm_memory_slot *m = &slots->memslots[i];
3298 struct kvm_rmap_desc *d; 3288 struct kvm_rmap_desc *d;
3299 3289
3300 for (j = 0; j < m->npages; ++j) { 3290 for (j = 0; j < m->npages; ++j) {
@@ -3317,6 +3307,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu)
3317 } 3307 }
3318 } 3308 }
3319 } 3309 }
3310 srcu_read_unlock(&kvm->srcu, idx);
3320 return nmaps; 3311 return nmaps;
3321} 3312}
3322 3313
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 61a1b3884b4..be66759321a 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -2,6 +2,7 @@
2#define __KVM_X86_MMU_H 2#define __KVM_X86_MMU_H
3 3
4#include <linux/kvm_host.h> 4#include <linux/kvm_host.h>
5#include "kvm_cache_regs.h"
5 6
6#define PT64_PT_BITS 9 7#define PT64_PT_BITS 9
7#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) 8#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
@@ -37,6 +38,16 @@
37#define PT32_ROOT_LEVEL 2 38#define PT32_ROOT_LEVEL 2
38#define PT32E_ROOT_LEVEL 3 39#define PT32E_ROOT_LEVEL 3
39 40
41#define PT_PDPE_LEVEL 3
42#define PT_DIRECTORY_LEVEL 2
43#define PT_PAGE_TABLE_LEVEL 1
44
45#define PFERR_PRESENT_MASK (1U << 0)
46#define PFERR_WRITE_MASK (1U << 1)
47#define PFERR_USER_MASK (1U << 2)
48#define PFERR_RSVD_MASK (1U << 3)
49#define PFERR_FETCH_MASK (1U << 4)
50
40int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); 51int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
41 52
42static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) 53static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
@@ -53,30 +64,6 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
53 return kvm_mmu_load(vcpu); 64 return kvm_mmu_load(vcpu);
54} 65}
55 66
56static inline int is_long_mode(struct kvm_vcpu *vcpu)
57{
58#ifdef CONFIG_X86_64
59 return vcpu->arch.shadow_efer & EFER_LMA;
60#else
61 return 0;
62#endif
63}
64
65static inline int is_pae(struct kvm_vcpu *vcpu)
66{
67 return vcpu->arch.cr4 & X86_CR4_PAE;
68}
69
70static inline int is_pse(struct kvm_vcpu *vcpu)
71{
72 return vcpu->arch.cr4 & X86_CR4_PSE;
73}
74
75static inline int is_paging(struct kvm_vcpu *vcpu)
76{
77 return vcpu->arch.cr0 & X86_CR0_PG;
78}
79
80static inline int is_present_gpte(unsigned long pte) 67static inline int is_present_gpte(unsigned long pte)
81{ 68{
82 return pte & PT_PRESENT_MASK; 69 return pte & PT_PRESENT_MASK;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index ede2131a922..81eab9a50e6 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -162,7 +162,7 @@ walk:
162 if (rsvd_fault) 162 if (rsvd_fault)
163 goto access_error; 163 goto access_error;
164 164
165 if (write_fault && !is_writeble_pte(pte)) 165 if (write_fault && !is_writable_pte(pte))
166 if (user_fault || is_write_protection(vcpu)) 166 if (user_fault || is_write_protection(vcpu))
167 goto access_error; 167 goto access_error;
168 168
@@ -490,18 +490,23 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
490 spin_unlock(&vcpu->kvm->mmu_lock); 490 spin_unlock(&vcpu->kvm->mmu_lock);
491} 491}
492 492
493static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) 493static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
494 u32 *error)
494{ 495{
495 struct guest_walker walker; 496 struct guest_walker walker;
496 gpa_t gpa = UNMAPPED_GVA; 497 gpa_t gpa = UNMAPPED_GVA;
497 int r; 498 int r;
498 499
499 r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0); 500 r = FNAME(walk_addr)(&walker, vcpu, vaddr,
501 !!(access & PFERR_WRITE_MASK),
502 !!(access & PFERR_USER_MASK),
503 !!(access & PFERR_FETCH_MASK));
500 504
501 if (r) { 505 if (r) {
502 gpa = gfn_to_gpa(walker.gfn); 506 gpa = gfn_to_gpa(walker.gfn);
503 gpa |= vaddr & ~PAGE_MASK; 507 gpa |= vaddr & ~PAGE_MASK;
504 } 508 } else if (error)
509 *error = walker.error_code;
505 510
506 return gpa; 511 return gpa;
507} 512}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1d9b33843c8..52f78dd0301 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -231,7 +231,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
231 efer &= ~EFER_LME; 231 efer &= ~EFER_LME;
232 232
233 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; 233 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
234 vcpu->arch.shadow_efer = efer; 234 vcpu->arch.efer = efer;
235} 235}
236 236
237static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, 237static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
@@ -540,6 +540,8 @@ static void init_vmcb(struct vcpu_svm *svm)
540 struct vmcb_control_area *control = &svm->vmcb->control; 540 struct vmcb_control_area *control = &svm->vmcb->control;
541 struct vmcb_save_area *save = &svm->vmcb->save; 541 struct vmcb_save_area *save = &svm->vmcb->save;
542 542
543 svm->vcpu.fpu_active = 1;
544
543 control->intercept_cr_read = INTERCEPT_CR0_MASK | 545 control->intercept_cr_read = INTERCEPT_CR0_MASK |
544 INTERCEPT_CR3_MASK | 546 INTERCEPT_CR3_MASK |
545 INTERCEPT_CR4_MASK; 547 INTERCEPT_CR4_MASK;
@@ -552,13 +554,19 @@ static void init_vmcb(struct vcpu_svm *svm)
552 control->intercept_dr_read = INTERCEPT_DR0_MASK | 554 control->intercept_dr_read = INTERCEPT_DR0_MASK |
553 INTERCEPT_DR1_MASK | 555 INTERCEPT_DR1_MASK |
554 INTERCEPT_DR2_MASK | 556 INTERCEPT_DR2_MASK |
555 INTERCEPT_DR3_MASK; 557 INTERCEPT_DR3_MASK |
558 INTERCEPT_DR4_MASK |
559 INTERCEPT_DR5_MASK |
560 INTERCEPT_DR6_MASK |
561 INTERCEPT_DR7_MASK;
556 562
557 control->intercept_dr_write = INTERCEPT_DR0_MASK | 563 control->intercept_dr_write = INTERCEPT_DR0_MASK |
558 INTERCEPT_DR1_MASK | 564 INTERCEPT_DR1_MASK |
559 INTERCEPT_DR2_MASK | 565 INTERCEPT_DR2_MASK |
560 INTERCEPT_DR3_MASK | 566 INTERCEPT_DR3_MASK |
567 INTERCEPT_DR4_MASK |
561 INTERCEPT_DR5_MASK | 568 INTERCEPT_DR5_MASK |
569 INTERCEPT_DR6_MASK |
562 INTERCEPT_DR7_MASK; 570 INTERCEPT_DR7_MASK;
563 571
564 control->intercept_exceptions = (1 << PF_VECTOR) | 572 control->intercept_exceptions = (1 << PF_VECTOR) |
@@ -569,6 +577,7 @@ static void init_vmcb(struct vcpu_svm *svm)
569 control->intercept = (1ULL << INTERCEPT_INTR) | 577 control->intercept = (1ULL << INTERCEPT_INTR) |
570 (1ULL << INTERCEPT_NMI) | 578 (1ULL << INTERCEPT_NMI) |
571 (1ULL << INTERCEPT_SMI) | 579 (1ULL << INTERCEPT_SMI) |
580 (1ULL << INTERCEPT_SELECTIVE_CR0) |
572 (1ULL << INTERCEPT_CPUID) | 581 (1ULL << INTERCEPT_CPUID) |
573 (1ULL << INTERCEPT_INVD) | 582 (1ULL << INTERCEPT_INVD) |
574 (1ULL << INTERCEPT_HLT) | 583 (1ULL << INTERCEPT_HLT) |
@@ -641,10 +650,8 @@ static void init_vmcb(struct vcpu_svm *svm)
641 control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) | 650 control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) |
642 (1ULL << INTERCEPT_INVLPG)); 651 (1ULL << INTERCEPT_INVLPG));
643 control->intercept_exceptions &= ~(1 << PF_VECTOR); 652 control->intercept_exceptions &= ~(1 << PF_VECTOR);
644 control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK| 653 control->intercept_cr_read &= ~INTERCEPT_CR3_MASK;
645 INTERCEPT_CR3_MASK); 654 control->intercept_cr_write &= ~INTERCEPT_CR3_MASK;
646 control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
647 INTERCEPT_CR3_MASK);
648 save->g_pat = 0x0007040600070406ULL; 655 save->g_pat = 0x0007040600070406ULL;
649 save->cr3 = 0; 656 save->cr3 = 0;
650 save->cr4 = 0; 657 save->cr4 = 0;
@@ -730,7 +737,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
730 init_vmcb(svm); 737 init_vmcb(svm);
731 738
732 fx_init(&svm->vcpu); 739 fx_init(&svm->vcpu);
733 svm->vcpu.fpu_active = 1;
734 svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 740 svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
735 if (kvm_vcpu_is_bsp(&svm->vcpu)) 741 if (kvm_vcpu_is_bsp(&svm->vcpu))
736 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; 742 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
@@ -765,14 +771,16 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
765 if (unlikely(cpu != vcpu->cpu)) { 771 if (unlikely(cpu != vcpu->cpu)) {
766 u64 delta; 772 u64 delta;
767 773
768 /* 774 if (check_tsc_unstable()) {
769 * Make sure that the guest sees a monotonically 775 /*
770 * increasing TSC. 776 * Make sure that the guest sees a monotonically
771 */ 777 * increasing TSC.
772 delta = vcpu->arch.host_tsc - native_read_tsc(); 778 */
773 svm->vmcb->control.tsc_offset += delta; 779 delta = vcpu->arch.host_tsc - native_read_tsc();
774 if (is_nested(svm)) 780 svm->vmcb->control.tsc_offset += delta;
775 svm->nested.hsave->control.tsc_offset += delta; 781 if (is_nested(svm))
782 svm->nested.hsave->control.tsc_offset += delta;
783 }
776 vcpu->cpu = cpu; 784 vcpu->cpu = cpu;
777 kvm_migrate_timers(vcpu); 785 kvm_migrate_timers(vcpu);
778 svm->asid_generation = 0; 786 svm->asid_generation = 0;
@@ -954,42 +962,59 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
954 svm->vmcb->save.gdtr.base = dt->base ; 962 svm->vmcb->save.gdtr.base = dt->base ;
955} 963}
956 964
965static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
966{
967}
968
957static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) 969static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
958{ 970{
959} 971}
960 972
973static void update_cr0_intercept(struct vcpu_svm *svm)
974{
975 ulong gcr0 = svm->vcpu.arch.cr0;
976 u64 *hcr0 = &svm->vmcb->save.cr0;
977
978 if (!svm->vcpu.fpu_active)
979 *hcr0 |= SVM_CR0_SELECTIVE_MASK;
980 else
981 *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
982 | (gcr0 & SVM_CR0_SELECTIVE_MASK);
983
984
985 if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
986 svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK;
987 svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
988 } else {
989 svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
990 svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK;
991 }
992}
993
961static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 994static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
962{ 995{
963 struct vcpu_svm *svm = to_svm(vcpu); 996 struct vcpu_svm *svm = to_svm(vcpu);
964 997
965#ifdef CONFIG_X86_64 998#ifdef CONFIG_X86_64
966 if (vcpu->arch.shadow_efer & EFER_LME) { 999 if (vcpu->arch.efer & EFER_LME) {
967 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { 1000 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
968 vcpu->arch.shadow_efer |= EFER_LMA; 1001 vcpu->arch.efer |= EFER_LMA;
969 svm->vmcb->save.efer |= EFER_LMA | EFER_LME; 1002 svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
970 } 1003 }
971 1004
972 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) { 1005 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
973 vcpu->arch.shadow_efer &= ~EFER_LMA; 1006 vcpu->arch.efer &= ~EFER_LMA;
974 svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); 1007 svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
975 } 1008 }
976 } 1009 }
977#endif 1010#endif
978 if (npt_enabled) 1011 vcpu->arch.cr0 = cr0;
979 goto set;
980 1012
981 if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) { 1013 if (!npt_enabled)
982 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); 1014 cr0 |= X86_CR0_PG | X86_CR0_WP;
983 vcpu->fpu_active = 1;
984 }
985 1015
986 vcpu->arch.cr0 = cr0; 1016 if (!vcpu->fpu_active)
987 cr0 |= X86_CR0_PG | X86_CR0_WP;
988 if (!vcpu->fpu_active) {
989 svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
990 cr0 |= X86_CR0_TS; 1017 cr0 |= X86_CR0_TS;
991 }
992set:
993 /* 1018 /*
994 * re-enable caching here because the QEMU bios 1019 * re-enable caching here because the QEMU bios
995 * does not do it - this results in some delay at 1020 * does not do it - this results in some delay at
@@ -997,6 +1022,7 @@ set:
997 */ 1022 */
998 cr0 &= ~(X86_CR0_CD | X86_CR0_NW); 1023 cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
999 svm->vmcb->save.cr0 = cr0; 1024 svm->vmcb->save.cr0 = cr0;
1025 update_cr0_intercept(svm);
1000} 1026}
1001 1027
1002static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 1028static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -1102,76 +1128,70 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1102 svm->vmcb->control.asid = sd->next_asid++; 1128 svm->vmcb->control.asid = sd->next_asid++;
1103} 1129}
1104 1130
1105static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) 1131static int svm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *dest)
1106{ 1132{
1107 struct vcpu_svm *svm = to_svm(vcpu); 1133 struct vcpu_svm *svm = to_svm(vcpu);
1108 unsigned long val;
1109 1134
1110 switch (dr) { 1135 switch (dr) {
1111 case 0 ... 3: 1136 case 0 ... 3:
1112 val = vcpu->arch.db[dr]; 1137 *dest = vcpu->arch.db[dr];
1113 break; 1138 break;
1139 case 4:
1140 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1141 return EMULATE_FAIL; /* will re-inject UD */
1142 /* fall through */
1114 case 6: 1143 case 6:
1115 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) 1144 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1116 val = vcpu->arch.dr6; 1145 *dest = vcpu->arch.dr6;
1117 else 1146 else
1118 val = svm->vmcb->save.dr6; 1147 *dest = svm->vmcb->save.dr6;
1119 break; 1148 break;
1149 case 5:
1150 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1151 return EMULATE_FAIL; /* will re-inject UD */
1152 /* fall through */
1120 case 7: 1153 case 7:
1121 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) 1154 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1122 val = vcpu->arch.dr7; 1155 *dest = vcpu->arch.dr7;
1123 else 1156 else
1124 val = svm->vmcb->save.dr7; 1157 *dest = svm->vmcb->save.dr7;
1125 break; 1158 break;
1126 default:
1127 val = 0;
1128 } 1159 }
1129 1160
1130 return val; 1161 return EMULATE_DONE;
1131} 1162}
1132 1163
1133static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, 1164static int svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value)
1134 int *exception)
1135{ 1165{
1136 struct vcpu_svm *svm = to_svm(vcpu); 1166 struct vcpu_svm *svm = to_svm(vcpu);
1137 1167
1138 *exception = 0;
1139
1140 switch (dr) { 1168 switch (dr) {
1141 case 0 ... 3: 1169 case 0 ... 3:
1142 vcpu->arch.db[dr] = value; 1170 vcpu->arch.db[dr] = value;
1143 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) 1171 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1144 vcpu->arch.eff_db[dr] = value; 1172 vcpu->arch.eff_db[dr] = value;
1145 return; 1173 break;
1146 case 4 ... 5: 1174 case 4:
1147 if (vcpu->arch.cr4 & X86_CR4_DE) 1175 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1148 *exception = UD_VECTOR; 1176 return EMULATE_FAIL; /* will re-inject UD */
1149 return; 1177 /* fall through */
1150 case 6: 1178 case 6:
1151 if (value & 0xffffffff00000000ULL) {
1152 *exception = GP_VECTOR;
1153 return;
1154 }
1155 vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1; 1179 vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1;
1156 return; 1180 break;
1181 case 5:
1182 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1183 return EMULATE_FAIL; /* will re-inject UD */
1184 /* fall through */
1157 case 7: 1185 case 7:
1158 if (value & 0xffffffff00000000ULL) {
1159 *exception = GP_VECTOR;
1160 return;
1161 }
1162 vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1; 1186 vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1;
1163 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { 1187 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
1164 svm->vmcb->save.dr7 = vcpu->arch.dr7; 1188 svm->vmcb->save.dr7 = vcpu->arch.dr7;
1165 vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK); 1189 vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK);
1166 } 1190 }
1167 return; 1191 break;
1168 default:
1169 /* FIXME: Possible case? */
1170 printk(KERN_DEBUG "%s: unexpected dr %u\n",
1171 __func__, dr);
1172 *exception = UD_VECTOR;
1173 return;
1174 } 1192 }
1193
1194 return EMULATE_DONE;
1175} 1195}
1176 1196
1177static int pf_interception(struct vcpu_svm *svm) 1197static int pf_interception(struct vcpu_svm *svm)
@@ -1239,13 +1259,17 @@ static int ud_interception(struct vcpu_svm *svm)
1239 return 1; 1259 return 1;
1240} 1260}
1241 1261
1242static int nm_interception(struct vcpu_svm *svm) 1262static void svm_fpu_activate(struct kvm_vcpu *vcpu)
1243{ 1263{
1264 struct vcpu_svm *svm = to_svm(vcpu);
1244 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); 1265 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
1245 if (!(svm->vcpu.arch.cr0 & X86_CR0_TS))
1246 svm->vmcb->save.cr0 &= ~X86_CR0_TS;
1247 svm->vcpu.fpu_active = 1; 1266 svm->vcpu.fpu_active = 1;
1267 update_cr0_intercept(svm);
1268}
1248 1269
1270static int nm_interception(struct vcpu_svm *svm)
1271{
1272 svm_fpu_activate(&svm->vcpu);
1249 return 1; 1273 return 1;
1250} 1274}
1251 1275
@@ -1337,7 +1361,7 @@ static int vmmcall_interception(struct vcpu_svm *svm)
1337 1361
1338static int nested_svm_check_permissions(struct vcpu_svm *svm) 1362static int nested_svm_check_permissions(struct vcpu_svm *svm)
1339{ 1363{
1340 if (!(svm->vcpu.arch.shadow_efer & EFER_SVME) 1364 if (!(svm->vcpu.arch.efer & EFER_SVME)
1341 || !is_paging(&svm->vcpu)) { 1365 || !is_paging(&svm->vcpu)) {
1342 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 1366 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1343 return 1; 1367 return 1;
@@ -1740,8 +1764,8 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
1740 hsave->save.ds = vmcb->save.ds; 1764 hsave->save.ds = vmcb->save.ds;
1741 hsave->save.gdtr = vmcb->save.gdtr; 1765 hsave->save.gdtr = vmcb->save.gdtr;
1742 hsave->save.idtr = vmcb->save.idtr; 1766 hsave->save.idtr = vmcb->save.idtr;
1743 hsave->save.efer = svm->vcpu.arch.shadow_efer; 1767 hsave->save.efer = svm->vcpu.arch.efer;
1744 hsave->save.cr0 = svm->vcpu.arch.cr0; 1768 hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
1745 hsave->save.cr4 = svm->vcpu.arch.cr4; 1769 hsave->save.cr4 = svm->vcpu.arch.cr4;
1746 hsave->save.rflags = vmcb->save.rflags; 1770 hsave->save.rflags = vmcb->save.rflags;
1747 hsave->save.rip = svm->next_rip; 1771 hsave->save.rip = svm->next_rip;
@@ -2153,9 +2177,10 @@ static int rdmsr_interception(struct vcpu_svm *svm)
2153 u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 2177 u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2154 u64 data; 2178 u64 data;
2155 2179
2156 if (svm_get_msr(&svm->vcpu, ecx, &data)) 2180 if (svm_get_msr(&svm->vcpu, ecx, &data)) {
2181 trace_kvm_msr_read_ex(ecx);
2157 kvm_inject_gp(&svm->vcpu, 0); 2182 kvm_inject_gp(&svm->vcpu, 0);
2158 else { 2183 } else {
2159 trace_kvm_msr_read(ecx, data); 2184 trace_kvm_msr_read(ecx, data);
2160 2185
2161 svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff; 2186 svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
@@ -2247,13 +2272,15 @@ static int wrmsr_interception(struct vcpu_svm *svm)
2247 u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) 2272 u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
2248 | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); 2273 | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
2249 2274
2250 trace_kvm_msr_write(ecx, data);
2251 2275
2252 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 2276 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
2253 if (svm_set_msr(&svm->vcpu, ecx, data)) 2277 if (svm_set_msr(&svm->vcpu, ecx, data)) {
2278 trace_kvm_msr_write_ex(ecx, data);
2254 kvm_inject_gp(&svm->vcpu, 0); 2279 kvm_inject_gp(&svm->vcpu, 0);
2255 else 2280 } else {
2281 trace_kvm_msr_write(ecx, data);
2256 skip_emulated_instruction(&svm->vcpu); 2282 skip_emulated_instruction(&svm->vcpu);
2283 }
2257 return 1; 2284 return 1;
2258} 2285}
2259 2286
@@ -2297,7 +2324,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
2297 [SVM_EXIT_READ_CR3] = emulate_on_interception, 2324 [SVM_EXIT_READ_CR3] = emulate_on_interception,
2298 [SVM_EXIT_READ_CR4] = emulate_on_interception, 2325 [SVM_EXIT_READ_CR4] = emulate_on_interception,
2299 [SVM_EXIT_READ_CR8] = emulate_on_interception, 2326 [SVM_EXIT_READ_CR8] = emulate_on_interception,
2300 /* for now: */ 2327 [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception,
2301 [SVM_EXIT_WRITE_CR0] = emulate_on_interception, 2328 [SVM_EXIT_WRITE_CR0] = emulate_on_interception,
2302 [SVM_EXIT_WRITE_CR3] = emulate_on_interception, 2329 [SVM_EXIT_WRITE_CR3] = emulate_on_interception,
2303 [SVM_EXIT_WRITE_CR4] = emulate_on_interception, 2330 [SVM_EXIT_WRITE_CR4] = emulate_on_interception,
@@ -2306,11 +2333,17 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
2306 [SVM_EXIT_READ_DR1] = emulate_on_interception, 2333 [SVM_EXIT_READ_DR1] = emulate_on_interception,
2307 [SVM_EXIT_READ_DR2] = emulate_on_interception, 2334 [SVM_EXIT_READ_DR2] = emulate_on_interception,
2308 [SVM_EXIT_READ_DR3] = emulate_on_interception, 2335 [SVM_EXIT_READ_DR3] = emulate_on_interception,
2336 [SVM_EXIT_READ_DR4] = emulate_on_interception,
2337 [SVM_EXIT_READ_DR5] = emulate_on_interception,
2338 [SVM_EXIT_READ_DR6] = emulate_on_interception,
2339 [SVM_EXIT_READ_DR7] = emulate_on_interception,
2309 [SVM_EXIT_WRITE_DR0] = emulate_on_interception, 2340 [SVM_EXIT_WRITE_DR0] = emulate_on_interception,
2310 [SVM_EXIT_WRITE_DR1] = emulate_on_interception, 2341 [SVM_EXIT_WRITE_DR1] = emulate_on_interception,
2311 [SVM_EXIT_WRITE_DR2] = emulate_on_interception, 2342 [SVM_EXIT_WRITE_DR2] = emulate_on_interception,
2312 [SVM_EXIT_WRITE_DR3] = emulate_on_interception, 2343 [SVM_EXIT_WRITE_DR3] = emulate_on_interception,
2344 [SVM_EXIT_WRITE_DR4] = emulate_on_interception,
2313 [SVM_EXIT_WRITE_DR5] = emulate_on_interception, 2345 [SVM_EXIT_WRITE_DR5] = emulate_on_interception,
2346 [SVM_EXIT_WRITE_DR6] = emulate_on_interception,
2314 [SVM_EXIT_WRITE_DR7] = emulate_on_interception, 2347 [SVM_EXIT_WRITE_DR7] = emulate_on_interception,
2315 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, 2348 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
2316 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, 2349 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
@@ -2383,20 +2416,10 @@ static int handle_exit(struct kvm_vcpu *vcpu)
2383 2416
2384 svm_complete_interrupts(svm); 2417 svm_complete_interrupts(svm);
2385 2418
2386 if (npt_enabled) { 2419 if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK))
2387 int mmu_reload = 0;
2388 if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
2389 svm_set_cr0(vcpu, svm->vmcb->save.cr0);
2390 mmu_reload = 1;
2391 }
2392 vcpu->arch.cr0 = svm->vmcb->save.cr0; 2420 vcpu->arch.cr0 = svm->vmcb->save.cr0;
2421 if (npt_enabled)
2393 vcpu->arch.cr3 = svm->vmcb->save.cr3; 2422 vcpu->arch.cr3 = svm->vmcb->save.cr3;
2394 if (mmu_reload) {
2395 kvm_mmu_reset_context(vcpu);
2396 kvm_mmu_load(vcpu);
2397 }
2398 }
2399
2400 2423
2401 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { 2424 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
2402 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 2425 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@@ -2798,12 +2821,6 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
2798 2821
2799 svm->vmcb->save.cr3 = root; 2822 svm->vmcb->save.cr3 = root;
2800 force_new_asid(vcpu); 2823 force_new_asid(vcpu);
2801
2802 if (vcpu->fpu_active) {
2803 svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
2804 svm->vmcb->save.cr0 |= X86_CR0_TS;
2805 vcpu->fpu_active = 0;
2806 }
2807} 2824}
2808 2825
2809static int is_disabled(void) 2826static int is_disabled(void)
@@ -2852,6 +2869,10 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
2852 return 0; 2869 return 0;
2853} 2870}
2854 2871
2872static void svm_cpuid_update(struct kvm_vcpu *vcpu)
2873{
2874}
2875
2855static const struct trace_print_flags svm_exit_reasons_str[] = { 2876static const struct trace_print_flags svm_exit_reasons_str[] = {
2856 { SVM_EXIT_READ_CR0, "read_cr0" }, 2877 { SVM_EXIT_READ_CR0, "read_cr0" },
2857 { SVM_EXIT_READ_CR3, "read_cr3" }, 2878 { SVM_EXIT_READ_CR3, "read_cr3" },
@@ -2905,9 +2926,22 @@ static const struct trace_print_flags svm_exit_reasons_str[] = {
2905 { -1, NULL } 2926 { -1, NULL }
2906}; 2927};
2907 2928
2908static bool svm_gb_page_enable(void) 2929static int svm_get_lpage_level(void)
2909{ 2930{
2910 return true; 2931 return PT_PDPE_LEVEL;
2932}
2933
2934static bool svm_rdtscp_supported(void)
2935{
2936 return false;
2937}
2938
2939static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
2940{
2941 struct vcpu_svm *svm = to_svm(vcpu);
2942
2943 update_cr0_intercept(svm);
2944 svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR;
2911} 2945}
2912 2946
2913static struct kvm_x86_ops svm_x86_ops = { 2947static struct kvm_x86_ops svm_x86_ops = {
@@ -2936,6 +2970,7 @@ static struct kvm_x86_ops svm_x86_ops = {
2936 .set_segment = svm_set_segment, 2970 .set_segment = svm_set_segment,
2937 .get_cpl = svm_get_cpl, 2971 .get_cpl = svm_get_cpl,
2938 .get_cs_db_l_bits = kvm_get_cs_db_l_bits, 2972 .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
2973 .decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
2939 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, 2974 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
2940 .set_cr0 = svm_set_cr0, 2975 .set_cr0 = svm_set_cr0,
2941 .set_cr3 = svm_set_cr3, 2976 .set_cr3 = svm_set_cr3,
@@ -2950,6 +2985,8 @@ static struct kvm_x86_ops svm_x86_ops = {
2950 .cache_reg = svm_cache_reg, 2985 .cache_reg = svm_cache_reg,
2951 .get_rflags = svm_get_rflags, 2986 .get_rflags = svm_get_rflags,
2952 .set_rflags = svm_set_rflags, 2987 .set_rflags = svm_set_rflags,
2988 .fpu_activate = svm_fpu_activate,
2989 .fpu_deactivate = svm_fpu_deactivate,
2953 2990
2954 .tlb_flush = svm_flush_tlb, 2991 .tlb_flush = svm_flush_tlb,
2955 2992
@@ -2975,7 +3012,11 @@ static struct kvm_x86_ops svm_x86_ops = {
2975 .get_mt_mask = svm_get_mt_mask, 3012 .get_mt_mask = svm_get_mt_mask,
2976 3013
2977 .exit_reasons_str = svm_exit_reasons_str, 3014 .exit_reasons_str = svm_exit_reasons_str,
2978 .gb_page_enable = svm_gb_page_enable, 3015 .get_lpage_level = svm_get_lpage_level,
3016
3017 .cpuid_update = svm_cpuid_update,
3018
3019 .rdtscp_supported = svm_rdtscp_supported,
2979}; 3020};
2980 3021
2981static int __init svm_init(void) 3022static int __init svm_init(void)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 816e0449db0..6ad30a29f04 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -56,6 +56,38 @@ TRACE_EVENT(kvm_hypercall,
56); 56);
57 57
58/* 58/*
59 * Tracepoint for hypercall.
60 */
61TRACE_EVENT(kvm_hv_hypercall,
62 TP_PROTO(__u16 code, bool fast, __u16 rep_cnt, __u16 rep_idx,
63 __u64 ingpa, __u64 outgpa),
64 TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa),
65
66 TP_STRUCT__entry(
67 __field( __u16, code )
68 __field( bool, fast )
69 __field( __u16, rep_cnt )
70 __field( __u16, rep_idx )
71 __field( __u64, ingpa )
72 __field( __u64, outgpa )
73 ),
74
75 TP_fast_assign(
76 __entry->code = code;
77 __entry->fast = fast;
78 __entry->rep_cnt = rep_cnt;
79 __entry->rep_idx = rep_idx;
80 __entry->ingpa = ingpa;
81 __entry->outgpa = outgpa;
82 ),
83
84 TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx",
85 __entry->code, __entry->fast ? "fast" : "slow",
86 __entry->rep_cnt, __entry->rep_idx, __entry->ingpa,
87 __entry->outgpa)
88);
89
90/*
59 * Tracepoint for PIO. 91 * Tracepoint for PIO.
60 */ 92 */
61TRACE_EVENT(kvm_pio, 93TRACE_EVENT(kvm_pio,
@@ -214,28 +246,33 @@ TRACE_EVENT(kvm_page_fault,
214 * Tracepoint for guest MSR access. 246 * Tracepoint for guest MSR access.
215 */ 247 */
216TRACE_EVENT(kvm_msr, 248TRACE_EVENT(kvm_msr,
217 TP_PROTO(unsigned int rw, unsigned int ecx, unsigned long data), 249 TP_PROTO(unsigned write, u32 ecx, u64 data, bool exception),
218 TP_ARGS(rw, ecx, data), 250 TP_ARGS(write, ecx, data, exception),
219 251
220 TP_STRUCT__entry( 252 TP_STRUCT__entry(
221 __field( unsigned int, rw ) 253 __field( unsigned, write )
222 __field( unsigned int, ecx ) 254 __field( u32, ecx )
223 __field( unsigned long, data ) 255 __field( u64, data )
256 __field( u8, exception )
224 ), 257 ),
225 258
226 TP_fast_assign( 259 TP_fast_assign(
227 __entry->rw = rw; 260 __entry->write = write;
228 __entry->ecx = ecx; 261 __entry->ecx = ecx;
229 __entry->data = data; 262 __entry->data = data;
263 __entry->exception = exception;
230 ), 264 ),
231 265
232 TP_printk("msr_%s %x = 0x%lx", 266 TP_printk("msr_%s %x = 0x%llx%s",
233 __entry->rw ? "write" : "read", 267 __entry->write ? "write" : "read",
234 __entry->ecx, __entry->data) 268 __entry->ecx, __entry->data,
269 __entry->exception ? " (#GP)" : "")
235); 270);
236 271
237#define trace_kvm_msr_read(ecx, data) trace_kvm_msr(0, ecx, data) 272#define trace_kvm_msr_read(ecx, data) trace_kvm_msr(0, ecx, data, false)
238#define trace_kvm_msr_write(ecx, data) trace_kvm_msr(1, ecx, data) 273#define trace_kvm_msr_write(ecx, data) trace_kvm_msr(1, ecx, data, false)
274#define trace_kvm_msr_read_ex(ecx) trace_kvm_msr(0, ecx, 0, true)
275#define trace_kvm_msr_write_ex(ecx, data) trace_kvm_msr(1, ecx, data, true)
239 276
240/* 277/*
241 * Tracepoint for guest CR access. 278 * Tracepoint for guest CR access.
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d4918d6fc92..14873b9f843 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -61,6 +61,21 @@ module_param_named(unrestricted_guest,
61static int __read_mostly emulate_invalid_guest_state = 0; 61static int __read_mostly emulate_invalid_guest_state = 0;
62module_param(emulate_invalid_guest_state, bool, S_IRUGO); 62module_param(emulate_invalid_guest_state, bool, S_IRUGO);
63 63
64#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \
65 (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
66#define KVM_GUEST_CR0_MASK \
67 (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
68#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \
69 (X86_CR0_WP | X86_CR0_NE)
70#define KVM_VM_CR0_ALWAYS_ON \
71 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
72#define KVM_CR4_GUEST_OWNED_BITS \
73 (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
74 | X86_CR4_OSXMMEXCPT)
75
76#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
77#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
78
64/* 79/*
65 * These 2 parameters are used to config the controls for Pause-Loop Exiting: 80 * These 2 parameters are used to config the controls for Pause-Loop Exiting:
66 * ple_gap: upper bound on the amount of time between two successive 81 * ple_gap: upper bound on the amount of time between two successive
@@ -136,6 +151,8 @@ struct vcpu_vmx {
136 ktime_t entry_time; 151 ktime_t entry_time;
137 s64 vnmi_blocked_time; 152 s64 vnmi_blocked_time;
138 u32 exit_reason; 153 u32 exit_reason;
154
155 bool rdtscp_enabled;
139}; 156};
140 157
141static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) 158static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -210,7 +227,7 @@ static const u32 vmx_msr_index[] = {
210#ifdef CONFIG_X86_64 227#ifdef CONFIG_X86_64
211 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, 228 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
212#endif 229#endif
213 MSR_EFER, MSR_K6_STAR, 230 MSR_EFER, MSR_TSC_AUX, MSR_K6_STAR,
214}; 231};
215#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) 232#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
216 233
@@ -301,6 +318,11 @@ static inline bool cpu_has_vmx_ept_2m_page(void)
301 return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); 318 return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT);
302} 319}
303 320
321static inline bool cpu_has_vmx_ept_1g_page(void)
322{
323 return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT);
324}
325
304static inline int cpu_has_vmx_invept_individual_addr(void) 326static inline int cpu_has_vmx_invept_individual_addr(void)
305{ 327{
306 return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); 328 return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT);
@@ -336,9 +358,7 @@ static inline int cpu_has_vmx_ple(void)
336 358
337static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) 359static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
338{ 360{
339 return flexpriority_enabled && 361 return flexpriority_enabled && irqchip_in_kernel(kvm);
340 (cpu_has_vmx_virtualize_apic_accesses()) &&
341 (irqchip_in_kernel(kvm));
342} 362}
343 363
344static inline int cpu_has_vmx_vpid(void) 364static inline int cpu_has_vmx_vpid(void)
@@ -347,6 +367,12 @@ static inline int cpu_has_vmx_vpid(void)
347 SECONDARY_EXEC_ENABLE_VPID; 367 SECONDARY_EXEC_ENABLE_VPID;
348} 368}
349 369
370static inline int cpu_has_vmx_rdtscp(void)
371{
372 return vmcs_config.cpu_based_2nd_exec_ctrl &
373 SECONDARY_EXEC_RDTSCP;
374}
375
350static inline int cpu_has_virtual_nmis(void) 376static inline int cpu_has_virtual_nmis(void)
351{ 377{
352 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; 378 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
@@ -551,22 +577,18 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
551{ 577{
552 u32 eb; 578 u32 eb;
553 579
554 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR); 580 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
555 if (!vcpu->fpu_active) 581 (1u << NM_VECTOR) | (1u << DB_VECTOR);
556 eb |= 1u << NM_VECTOR; 582 if ((vcpu->guest_debug &
557 /* 583 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
558 * Unconditionally intercept #DB so we can maintain dr6 without 584 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
559 * reading it every exit. 585 eb |= 1u << BP_VECTOR;
560 */
561 eb |= 1u << DB_VECTOR;
562 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
563 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
564 eb |= 1u << BP_VECTOR;
565 }
566 if (to_vmx(vcpu)->rmode.vm86_active) 586 if (to_vmx(vcpu)->rmode.vm86_active)
567 eb = ~0; 587 eb = ~0;
568 if (enable_ept) 588 if (enable_ept)
569 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ 589 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
590 if (vcpu->fpu_active)
591 eb &= ~(1u << NM_VECTOR);
570 vmcs_write32(EXCEPTION_BITMAP, eb); 592 vmcs_write32(EXCEPTION_BITMAP, eb);
571} 593}
572 594
@@ -589,7 +611,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
589 u64 guest_efer; 611 u64 guest_efer;
590 u64 ignore_bits; 612 u64 ignore_bits;
591 613
592 guest_efer = vmx->vcpu.arch.shadow_efer; 614 guest_efer = vmx->vcpu.arch.efer;
593 615
594 /* 616 /*
595 * NX is emulated; LMA and LME handled by hardware; SCE meaninless 617 * NX is emulated; LMA and LME handled by hardware; SCE meaninless
@@ -767,22 +789,30 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
767 789
768static void vmx_fpu_activate(struct kvm_vcpu *vcpu) 790static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
769{ 791{
792 ulong cr0;
793
770 if (vcpu->fpu_active) 794 if (vcpu->fpu_active)
771 return; 795 return;
772 vcpu->fpu_active = 1; 796 vcpu->fpu_active = 1;
773 vmcs_clear_bits(GUEST_CR0, X86_CR0_TS); 797 cr0 = vmcs_readl(GUEST_CR0);
774 if (vcpu->arch.cr0 & X86_CR0_TS) 798 cr0 &= ~(X86_CR0_TS | X86_CR0_MP);
775 vmcs_set_bits(GUEST_CR0, X86_CR0_TS); 799 cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP);
800 vmcs_writel(GUEST_CR0, cr0);
776 update_exception_bitmap(vcpu); 801 update_exception_bitmap(vcpu);
802 vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
803 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
777} 804}
778 805
806static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
807
779static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) 808static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
780{ 809{
781 if (!vcpu->fpu_active) 810 vmx_decache_cr0_guest_bits(vcpu);
782 return; 811 vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP);
783 vcpu->fpu_active = 0;
784 vmcs_set_bits(GUEST_CR0, X86_CR0_TS);
785 update_exception_bitmap(vcpu); 812 update_exception_bitmap(vcpu);
813 vcpu->arch.cr0_guest_owned_bits = 0;
814 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
815 vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
786} 816}
787 817
788static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) 818static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
@@ -878,6 +908,11 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
878 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); 908 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
879} 909}
880 910
911static bool vmx_rdtscp_supported(void)
912{
913 return cpu_has_vmx_rdtscp();
914}
915
881/* 916/*
882 * Swap MSR entry in host/guest MSR entry array. 917 * Swap MSR entry in host/guest MSR entry array.
883 */ 918 */
@@ -913,12 +948,15 @@ static void setup_msrs(struct vcpu_vmx *vmx)
913 index = __find_msr_index(vmx, MSR_CSTAR); 948 index = __find_msr_index(vmx, MSR_CSTAR);
914 if (index >= 0) 949 if (index >= 0)
915 move_msr_up(vmx, index, save_nmsrs++); 950 move_msr_up(vmx, index, save_nmsrs++);
951 index = __find_msr_index(vmx, MSR_TSC_AUX);
952 if (index >= 0 && vmx->rdtscp_enabled)
953 move_msr_up(vmx, index, save_nmsrs++);
916 /* 954 /*
917 * MSR_K6_STAR is only needed on long mode guests, and only 955 * MSR_K6_STAR is only needed on long mode guests, and only
918 * if efer.sce is enabled. 956 * if efer.sce is enabled.
919 */ 957 */
920 index = __find_msr_index(vmx, MSR_K6_STAR); 958 index = __find_msr_index(vmx, MSR_K6_STAR);
921 if ((index >= 0) && (vmx->vcpu.arch.shadow_efer & EFER_SCE)) 959 if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE))
922 move_msr_up(vmx, index, save_nmsrs++); 960 move_msr_up(vmx, index, save_nmsrs++);
923 } 961 }
924#endif 962#endif
@@ -1002,6 +1040,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
1002 case MSR_IA32_SYSENTER_ESP: 1040 case MSR_IA32_SYSENTER_ESP:
1003 data = vmcs_readl(GUEST_SYSENTER_ESP); 1041 data = vmcs_readl(GUEST_SYSENTER_ESP);
1004 break; 1042 break;
1043 case MSR_TSC_AUX:
1044 if (!to_vmx(vcpu)->rdtscp_enabled)
1045 return 1;
1046 /* Otherwise falls through */
1005 default: 1047 default:
1006 vmx_load_host_state(to_vmx(vcpu)); 1048 vmx_load_host_state(to_vmx(vcpu));
1007 msr = find_msr_entry(to_vmx(vcpu), msr_index); 1049 msr = find_msr_entry(to_vmx(vcpu), msr_index);
@@ -1065,7 +1107,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1065 vcpu->arch.pat = data; 1107 vcpu->arch.pat = data;
1066 break; 1108 break;
1067 } 1109 }
1068 /* Otherwise falls through to kvm_set_msr_common */ 1110 ret = kvm_set_msr_common(vcpu, msr_index, data);
1111 break;
1112 case MSR_TSC_AUX:
1113 if (!vmx->rdtscp_enabled)
1114 return 1;
1115 /* Check reserved bit, higher 32 bits should be zero */
1116 if ((data >> 32) != 0)
1117 return 1;
1118 /* Otherwise falls through */
1069 default: 1119 default:
1070 msr = find_msr_entry(vmx, msr_index); 1120 msr = find_msr_entry(vmx, msr_index);
1071 if (msr) { 1121 if (msr) {
@@ -1224,6 +1274,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1224 CPU_BASED_USE_IO_BITMAPS | 1274 CPU_BASED_USE_IO_BITMAPS |
1225 CPU_BASED_MOV_DR_EXITING | 1275 CPU_BASED_MOV_DR_EXITING |
1226 CPU_BASED_USE_TSC_OFFSETING | 1276 CPU_BASED_USE_TSC_OFFSETING |
1277 CPU_BASED_MWAIT_EXITING |
1278 CPU_BASED_MONITOR_EXITING |
1227 CPU_BASED_INVLPG_EXITING; 1279 CPU_BASED_INVLPG_EXITING;
1228 opt = CPU_BASED_TPR_SHADOW | 1280 opt = CPU_BASED_TPR_SHADOW |
1229 CPU_BASED_USE_MSR_BITMAPS | 1281 CPU_BASED_USE_MSR_BITMAPS |
@@ -1243,7 +1295,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1243 SECONDARY_EXEC_ENABLE_VPID | 1295 SECONDARY_EXEC_ENABLE_VPID |
1244 SECONDARY_EXEC_ENABLE_EPT | 1296 SECONDARY_EXEC_ENABLE_EPT |
1245 SECONDARY_EXEC_UNRESTRICTED_GUEST | 1297 SECONDARY_EXEC_UNRESTRICTED_GUEST |
1246 SECONDARY_EXEC_PAUSE_LOOP_EXITING; 1298 SECONDARY_EXEC_PAUSE_LOOP_EXITING |
1299 SECONDARY_EXEC_RDTSCP;
1247 if (adjust_vmx_controls(min2, opt2, 1300 if (adjust_vmx_controls(min2, opt2,
1248 MSR_IA32_VMX_PROCBASED_CTLS2, 1301 MSR_IA32_VMX_PROCBASED_CTLS2,
1249 &_cpu_based_2nd_exec_control) < 0) 1302 &_cpu_based_2nd_exec_control) < 0)
@@ -1457,8 +1510,12 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
1457static gva_t rmode_tss_base(struct kvm *kvm) 1510static gva_t rmode_tss_base(struct kvm *kvm)
1458{ 1511{
1459 if (!kvm->arch.tss_addr) { 1512 if (!kvm->arch.tss_addr) {
1460 gfn_t base_gfn = kvm->memslots[0].base_gfn + 1513 struct kvm_memslots *slots;
1461 kvm->memslots[0].npages - 3; 1514 gfn_t base_gfn;
1515
1516 slots = rcu_dereference(kvm->memslots);
1517 base_gfn = kvm->memslots->memslots[0].base_gfn +
1518 kvm->memslots->memslots[0].npages - 3;
1462 return base_gfn << PAGE_SHIFT; 1519 return base_gfn << PAGE_SHIFT;
1463 } 1520 }
1464 return kvm->arch.tss_addr; 1521 return kvm->arch.tss_addr;
@@ -1544,9 +1601,7 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
1544 * of this msr depends on is_long_mode(). 1601 * of this msr depends on is_long_mode().
1545 */ 1602 */
1546 vmx_load_host_state(to_vmx(vcpu)); 1603 vmx_load_host_state(to_vmx(vcpu));
1547 vcpu->arch.shadow_efer = efer; 1604 vcpu->arch.efer = efer;
1548 if (!msr)
1549 return;
1550 if (efer & EFER_LMA) { 1605 if (efer & EFER_LMA) {
1551 vmcs_write32(VM_ENTRY_CONTROLS, 1606 vmcs_write32(VM_ENTRY_CONTROLS,
1552 vmcs_read32(VM_ENTRY_CONTROLS) | 1607 vmcs_read32(VM_ENTRY_CONTROLS) |
@@ -1576,13 +1631,13 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
1576 (guest_tr_ar & ~AR_TYPE_MASK) 1631 (guest_tr_ar & ~AR_TYPE_MASK)
1577 | AR_TYPE_BUSY_64_TSS); 1632 | AR_TYPE_BUSY_64_TSS);
1578 } 1633 }
1579 vcpu->arch.shadow_efer |= EFER_LMA; 1634 vcpu->arch.efer |= EFER_LMA;
1580 vmx_set_efer(vcpu, vcpu->arch.shadow_efer); 1635 vmx_set_efer(vcpu, vcpu->arch.efer);
1581} 1636}
1582 1637
1583static void exit_lmode(struct kvm_vcpu *vcpu) 1638static void exit_lmode(struct kvm_vcpu *vcpu)
1584{ 1639{
1585 vcpu->arch.shadow_efer &= ~EFER_LMA; 1640 vcpu->arch.efer &= ~EFER_LMA;
1586 1641
1587 vmcs_write32(VM_ENTRY_CONTROLS, 1642 vmcs_write32(VM_ENTRY_CONTROLS,
1588 vmcs_read32(VM_ENTRY_CONTROLS) 1643 vmcs_read32(VM_ENTRY_CONTROLS)
@@ -1598,10 +1653,20 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
1598 ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); 1653 ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
1599} 1654}
1600 1655
1656static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
1657{
1658 ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
1659
1660 vcpu->arch.cr0 &= ~cr0_guest_owned_bits;
1661 vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
1662}
1663
1601static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) 1664static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
1602{ 1665{
1603 vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK; 1666 ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
1604 vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; 1667
1668 vcpu->arch.cr4 &= ~cr4_guest_owned_bits;
1669 vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits;
1605} 1670}
1606 1671
1607static void ept_load_pdptrs(struct kvm_vcpu *vcpu) 1672static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
@@ -1646,7 +1711,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
1646 (CPU_BASED_CR3_LOAD_EXITING | 1711 (CPU_BASED_CR3_LOAD_EXITING |
1647 CPU_BASED_CR3_STORE_EXITING)); 1712 CPU_BASED_CR3_STORE_EXITING));
1648 vcpu->arch.cr0 = cr0; 1713 vcpu->arch.cr0 = cr0;
1649 vmx_set_cr4(vcpu, vcpu->arch.cr4); 1714 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
1650 } else if (!is_paging(vcpu)) { 1715 } else if (!is_paging(vcpu)) {
1651 /* From nonpaging to paging */ 1716 /* From nonpaging to paging */
1652 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, 1717 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
@@ -1654,23 +1719,13 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
1654 ~(CPU_BASED_CR3_LOAD_EXITING | 1719 ~(CPU_BASED_CR3_LOAD_EXITING |
1655 CPU_BASED_CR3_STORE_EXITING)); 1720 CPU_BASED_CR3_STORE_EXITING));
1656 vcpu->arch.cr0 = cr0; 1721 vcpu->arch.cr0 = cr0;
1657 vmx_set_cr4(vcpu, vcpu->arch.cr4); 1722 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
1658 } 1723 }
1659 1724
1660 if (!(cr0 & X86_CR0_WP)) 1725 if (!(cr0 & X86_CR0_WP))
1661 *hw_cr0 &= ~X86_CR0_WP; 1726 *hw_cr0 &= ~X86_CR0_WP;
1662} 1727}
1663 1728
1664static void ept_update_paging_mode_cr4(unsigned long *hw_cr4,
1665 struct kvm_vcpu *vcpu)
1666{
1667 if (!is_paging(vcpu)) {
1668 *hw_cr4 &= ~X86_CR4_PAE;
1669 *hw_cr4 |= X86_CR4_PSE;
1670 } else if (!(vcpu->arch.cr4 & X86_CR4_PAE))
1671 *hw_cr4 &= ~X86_CR4_PAE;
1672}
1673
1674static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 1729static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1675{ 1730{
1676 struct vcpu_vmx *vmx = to_vmx(vcpu); 1731 struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -1682,8 +1737,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1682 else 1737 else
1683 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON; 1738 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON;
1684 1739
1685 vmx_fpu_deactivate(vcpu);
1686
1687 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) 1740 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
1688 enter_pmode(vcpu); 1741 enter_pmode(vcpu);
1689 1742
@@ -1691,7 +1744,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1691 enter_rmode(vcpu); 1744 enter_rmode(vcpu);
1692 1745
1693#ifdef CONFIG_X86_64 1746#ifdef CONFIG_X86_64
1694 if (vcpu->arch.shadow_efer & EFER_LME) { 1747 if (vcpu->arch.efer & EFER_LME) {
1695 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) 1748 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
1696 enter_lmode(vcpu); 1749 enter_lmode(vcpu);
1697 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) 1750 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
@@ -1702,12 +1755,12 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1702 if (enable_ept) 1755 if (enable_ept)
1703 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); 1756 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
1704 1757
1758 if (!vcpu->fpu_active)
1759 hw_cr0 |= X86_CR0_TS | X86_CR0_MP;
1760
1705 vmcs_writel(CR0_READ_SHADOW, cr0); 1761 vmcs_writel(CR0_READ_SHADOW, cr0);
1706 vmcs_writel(GUEST_CR0, hw_cr0); 1762 vmcs_writel(GUEST_CR0, hw_cr0);
1707 vcpu->arch.cr0 = cr0; 1763 vcpu->arch.cr0 = cr0;
1708
1709 if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE))
1710 vmx_fpu_activate(vcpu);
1711} 1764}
1712 1765
1713static u64 construct_eptp(unsigned long root_hpa) 1766static u64 construct_eptp(unsigned long root_hpa)
@@ -1738,8 +1791,6 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1738 1791
1739 vmx_flush_tlb(vcpu); 1792 vmx_flush_tlb(vcpu);
1740 vmcs_writel(GUEST_CR3, guest_cr3); 1793 vmcs_writel(GUEST_CR3, guest_cr3);
1741 if (vcpu->arch.cr0 & X86_CR0_PE)
1742 vmx_fpu_deactivate(vcpu);
1743} 1794}
1744 1795
1745static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 1796static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -1748,8 +1799,14 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1748 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); 1799 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
1749 1800
1750 vcpu->arch.cr4 = cr4; 1801 vcpu->arch.cr4 = cr4;
1751 if (enable_ept) 1802 if (enable_ept) {
1752 ept_update_paging_mode_cr4(&hw_cr4, vcpu); 1803 if (!is_paging(vcpu)) {
1804 hw_cr4 &= ~X86_CR4_PAE;
1805 hw_cr4 |= X86_CR4_PSE;
1806 } else if (!(cr4 & X86_CR4_PAE)) {
1807 hw_cr4 &= ~X86_CR4_PAE;
1808 }
1809 }
1753 1810
1754 vmcs_writel(CR4_READ_SHADOW, cr4); 1811 vmcs_writel(CR4_READ_SHADOW, cr4);
1755 vmcs_writel(GUEST_CR4, hw_cr4); 1812 vmcs_writel(GUEST_CR4, hw_cr4);
@@ -1787,7 +1844,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
1787 1844
1788static int vmx_get_cpl(struct kvm_vcpu *vcpu) 1845static int vmx_get_cpl(struct kvm_vcpu *vcpu)
1789{ 1846{
1790 if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */ 1847 if (!is_protmode(vcpu))
1791 return 0; 1848 return 0;
1792 1849
1793 if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ 1850 if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */
@@ -2042,7 +2099,7 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
2042static bool guest_state_valid(struct kvm_vcpu *vcpu) 2099static bool guest_state_valid(struct kvm_vcpu *vcpu)
2043{ 2100{
2044 /* real mode guest state checks */ 2101 /* real mode guest state checks */
2045 if (!(vcpu->arch.cr0 & X86_CR0_PE)) { 2102 if (!is_protmode(vcpu)) {
2046 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) 2103 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
2047 return false; 2104 return false;
2048 if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) 2105 if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
@@ -2175,7 +2232,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
2175 struct kvm_userspace_memory_region kvm_userspace_mem; 2232 struct kvm_userspace_memory_region kvm_userspace_mem;
2176 int r = 0; 2233 int r = 0;
2177 2234
2178 down_write(&kvm->slots_lock); 2235 mutex_lock(&kvm->slots_lock);
2179 if (kvm->arch.apic_access_page) 2236 if (kvm->arch.apic_access_page)
2180 goto out; 2237 goto out;
2181 kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; 2238 kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
@@ -2188,7 +2245,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
2188 2245
2189 kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); 2246 kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
2190out: 2247out:
2191 up_write(&kvm->slots_lock); 2248 mutex_unlock(&kvm->slots_lock);
2192 return r; 2249 return r;
2193} 2250}
2194 2251
@@ -2197,7 +2254,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
2197 struct kvm_userspace_memory_region kvm_userspace_mem; 2254 struct kvm_userspace_memory_region kvm_userspace_mem;
2198 int r = 0; 2255 int r = 0;
2199 2256
2200 down_write(&kvm->slots_lock); 2257 mutex_lock(&kvm->slots_lock);
2201 if (kvm->arch.ept_identity_pagetable) 2258 if (kvm->arch.ept_identity_pagetable)
2202 goto out; 2259 goto out;
2203 kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; 2260 kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
@@ -2212,7 +2269,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
2212 kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, 2269 kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
2213 kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); 2270 kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
2214out: 2271out:
2215 up_write(&kvm->slots_lock); 2272 mutex_unlock(&kvm->slots_lock);
2216 return r; 2273 return r;
2217} 2274}
2218 2275
@@ -2384,14 +2441,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2384 for (i = 0; i < NR_VMX_MSR; ++i) { 2441 for (i = 0; i < NR_VMX_MSR; ++i) {
2385 u32 index = vmx_msr_index[i]; 2442 u32 index = vmx_msr_index[i];
2386 u32 data_low, data_high; 2443 u32 data_low, data_high;
2387 u64 data;
2388 int j = vmx->nmsrs; 2444 int j = vmx->nmsrs;
2389 2445
2390 if (rdmsr_safe(index, &data_low, &data_high) < 0) 2446 if (rdmsr_safe(index, &data_low, &data_high) < 0)
2391 continue; 2447 continue;
2392 if (wrmsr_safe(index, data_low, data_high) < 0) 2448 if (wrmsr_safe(index, data_low, data_high) < 0)
2393 continue; 2449 continue;
2394 data = data_low | ((u64)data_high << 32);
2395 vmx->guest_msrs[j].index = i; 2450 vmx->guest_msrs[j].index = i;
2396 vmx->guest_msrs[j].data = 0; 2451 vmx->guest_msrs[j].data = 0;
2397 vmx->guest_msrs[j].mask = -1ull; 2452 vmx->guest_msrs[j].mask = -1ull;
@@ -2404,7 +2459,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2404 vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); 2459 vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl);
2405 2460
2406 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); 2461 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
2407 vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); 2462 vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
2463 if (enable_ept)
2464 vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
2465 vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
2408 2466
2409 tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc; 2467 tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
2410 rdtscll(tsc_this); 2468 rdtscll(tsc_this);
@@ -2429,10 +2487,10 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2429{ 2487{
2430 struct vcpu_vmx *vmx = to_vmx(vcpu); 2488 struct vcpu_vmx *vmx = to_vmx(vcpu);
2431 u64 msr; 2489 u64 msr;
2432 int ret; 2490 int ret, idx;
2433 2491
2434 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); 2492 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
2435 down_read(&vcpu->kvm->slots_lock); 2493 idx = srcu_read_lock(&vcpu->kvm->srcu);
2436 if (!init_rmode(vmx->vcpu.kvm)) { 2494 if (!init_rmode(vmx->vcpu.kvm)) {
2437 ret = -ENOMEM; 2495 ret = -ENOMEM;
2438 goto out; 2496 goto out;
@@ -2526,7 +2584,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2526 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); 2584 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
2527 2585
2528 vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; 2586 vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
2529 vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ 2587 vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
2530 vmx_set_cr4(&vmx->vcpu, 0); 2588 vmx_set_cr4(&vmx->vcpu, 0);
2531 vmx_set_efer(&vmx->vcpu, 0); 2589 vmx_set_efer(&vmx->vcpu, 0);
2532 vmx_fpu_activate(&vmx->vcpu); 2590 vmx_fpu_activate(&vmx->vcpu);
@@ -2540,7 +2598,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2540 vmx->emulation_required = 0; 2598 vmx->emulation_required = 0;
2541 2599
2542out: 2600out:
2543 up_read(&vcpu->kvm->slots_lock); 2601 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2544 return ret; 2602 return ret;
2545} 2603}
2546 2604
@@ -2717,6 +2775,12 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
2717 kvm_queue_exception(vcpu, vec); 2775 kvm_queue_exception(vcpu, vec);
2718 return 1; 2776 return 1;
2719 case BP_VECTOR: 2777 case BP_VECTOR:
2778 /*
2779 * Update instruction length as we may reinject the exception
2780 * from user space while in guest debugging mode.
2781 */
2782 to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
2783 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
2720 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) 2784 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
2721 return 0; 2785 return 0;
2722 /* fall through */ 2786 /* fall through */
@@ -2839,6 +2903,13 @@ static int handle_exception(struct kvm_vcpu *vcpu)
2839 kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); 2903 kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
2840 /* fall through */ 2904 /* fall through */
2841 case BP_VECTOR: 2905 case BP_VECTOR:
2906 /*
2907 * Update instruction length as we may reinject #BP from
2908 * user space while in guest debugging mode. Reading it for
2909 * #DB as well causes no harm, it is not used in that case.
2910 */
2911 vmx->vcpu.arch.event_exit_inst_len =
2912 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
2842 kvm_run->exit_reason = KVM_EXIT_DEBUG; 2913 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2843 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; 2914 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
2844 kvm_run->debug.arch.exception = ex_no; 2915 kvm_run->debug.arch.exception = ex_no;
@@ -2940,11 +3011,10 @@ static int handle_cr(struct kvm_vcpu *vcpu)
2940 }; 3011 };
2941 break; 3012 break;
2942 case 2: /* clts */ 3013 case 2: /* clts */
2943 vmx_fpu_deactivate(vcpu); 3014 vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
2944 vcpu->arch.cr0 &= ~X86_CR0_TS; 3015 trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
2945 vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
2946 vmx_fpu_activate(vcpu);
2947 skip_emulated_instruction(vcpu); 3016 skip_emulated_instruction(vcpu);
3017 vmx_fpu_activate(vcpu);
2948 return 1; 3018 return 1;
2949 case 1: /*mov from cr*/ 3019 case 1: /*mov from cr*/
2950 switch (cr) { 3020 switch (cr) {
@@ -2962,7 +3032,9 @@ static int handle_cr(struct kvm_vcpu *vcpu)
2962 } 3032 }
2963 break; 3033 break;
2964 case 3: /* lmsw */ 3034 case 3: /* lmsw */
2965 kvm_lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f); 3035 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
3036 trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
3037 kvm_lmsw(vcpu, val);
2966 3038
2967 skip_emulated_instruction(vcpu); 3039 skip_emulated_instruction(vcpu);
2968 return 1; 3040 return 1;
@@ -2975,12 +3047,22 @@ static int handle_cr(struct kvm_vcpu *vcpu)
2975 return 0; 3047 return 0;
2976} 3048}
2977 3049
3050static int check_dr_alias(struct kvm_vcpu *vcpu)
3051{
3052 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
3053 kvm_queue_exception(vcpu, UD_VECTOR);
3054 return -1;
3055 }
3056 return 0;
3057}
3058
2978static int handle_dr(struct kvm_vcpu *vcpu) 3059static int handle_dr(struct kvm_vcpu *vcpu)
2979{ 3060{
2980 unsigned long exit_qualification; 3061 unsigned long exit_qualification;
2981 unsigned long val; 3062 unsigned long val;
2982 int dr, reg; 3063 int dr, reg;
2983 3064
3065 /* Do not handle if the CPL > 0, will trigger GP on re-entry */
2984 if (!kvm_require_cpl(vcpu, 0)) 3066 if (!kvm_require_cpl(vcpu, 0))
2985 return 1; 3067 return 1;
2986 dr = vmcs_readl(GUEST_DR7); 3068 dr = vmcs_readl(GUEST_DR7);
@@ -3016,14 +3098,20 @@ static int handle_dr(struct kvm_vcpu *vcpu)
3016 case 0 ... 3: 3098 case 0 ... 3:
3017 val = vcpu->arch.db[dr]; 3099 val = vcpu->arch.db[dr];
3018 break; 3100 break;
3101 case 4:
3102 if (check_dr_alias(vcpu) < 0)
3103 return 1;
3104 /* fall through */
3019 case 6: 3105 case 6:
3020 val = vcpu->arch.dr6; 3106 val = vcpu->arch.dr6;
3021 break; 3107 break;
3022 case 7: 3108 case 5:
3109 if (check_dr_alias(vcpu) < 0)
3110 return 1;
3111 /* fall through */
3112 default: /* 7 */
3023 val = vcpu->arch.dr7; 3113 val = vcpu->arch.dr7;
3024 break; 3114 break;
3025 default:
3026 val = 0;
3027 } 3115 }
3028 kvm_register_write(vcpu, reg, val); 3116 kvm_register_write(vcpu, reg, val);
3029 } else { 3117 } else {
@@ -3034,21 +3122,25 @@ static int handle_dr(struct kvm_vcpu *vcpu)
3034 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) 3122 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
3035 vcpu->arch.eff_db[dr] = val; 3123 vcpu->arch.eff_db[dr] = val;
3036 break; 3124 break;
3037 case 4 ... 5: 3125 case 4:
3038 if (vcpu->arch.cr4 & X86_CR4_DE) 3126 if (check_dr_alias(vcpu) < 0)
3039 kvm_queue_exception(vcpu, UD_VECTOR); 3127 return 1;
3040 break; 3128 /* fall through */
3041 case 6: 3129 case 6:
3042 if (val & 0xffffffff00000000ULL) { 3130 if (val & 0xffffffff00000000ULL) {
3043 kvm_queue_exception(vcpu, GP_VECTOR); 3131 kvm_inject_gp(vcpu, 0);
3044 break; 3132 return 1;
3045 } 3133 }
3046 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; 3134 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
3047 break; 3135 break;
3048 case 7: 3136 case 5:
3137 if (check_dr_alias(vcpu) < 0)
3138 return 1;
3139 /* fall through */
3140 default: /* 7 */
3049 if (val & 0xffffffff00000000ULL) { 3141 if (val & 0xffffffff00000000ULL) {
3050 kvm_queue_exception(vcpu, GP_VECTOR); 3142 kvm_inject_gp(vcpu, 0);
3051 break; 3143 return 1;
3052 } 3144 }
3053 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; 3145 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
3054 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { 3146 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
@@ -3075,6 +3167,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
3075 u64 data; 3167 u64 data;
3076 3168
3077 if (vmx_get_msr(vcpu, ecx, &data)) { 3169 if (vmx_get_msr(vcpu, ecx, &data)) {
3170 trace_kvm_msr_read_ex(ecx);
3078 kvm_inject_gp(vcpu, 0); 3171 kvm_inject_gp(vcpu, 0);
3079 return 1; 3172 return 1;
3080 } 3173 }
@@ -3094,13 +3187,13 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
3094 u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) 3187 u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
3095 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); 3188 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
3096 3189
3097 trace_kvm_msr_write(ecx, data);
3098
3099 if (vmx_set_msr(vcpu, ecx, data) != 0) { 3190 if (vmx_set_msr(vcpu, ecx, data) != 0) {
3191 trace_kvm_msr_write_ex(ecx, data);
3100 kvm_inject_gp(vcpu, 0); 3192 kvm_inject_gp(vcpu, 0);
3101 return 1; 3193 return 1;
3102 } 3194 }
3103 3195
3196 trace_kvm_msr_write(ecx, data);
3104 skip_emulated_instruction(vcpu); 3197 skip_emulated_instruction(vcpu);
3105 return 1; 3198 return 1;
3106} 3199}
@@ -3385,7 +3478,6 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
3385 } 3478 }
3386 3479
3387 if (err != EMULATE_DONE) { 3480 if (err != EMULATE_DONE) {
3388 kvm_report_emulation_failure(vcpu, "emulation failure");
3389 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 3481 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3390 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 3482 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
3391 vcpu->run->internal.ndata = 0; 3483 vcpu->run->internal.ndata = 0;
@@ -3416,6 +3508,12 @@ static int handle_pause(struct kvm_vcpu *vcpu)
3416 return 1; 3508 return 1;
3417} 3509}
3418 3510
3511static int handle_invalid_op(struct kvm_vcpu *vcpu)
3512{
3513 kvm_queue_exception(vcpu, UD_VECTOR);
3514 return 1;
3515}
3516
3419/* 3517/*
3420 * The exit handlers return 1 if the exit was handled fully and guest execution 3518 * The exit handlers return 1 if the exit was handled fully and guest execution
3421 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 3519 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -3453,6 +3551,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
3453 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, 3551 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
3454 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, 3552 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
3455 [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, 3553 [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
3554 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op,
3555 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op,
3456}; 3556};
3457 3557
3458static const int kvm_vmx_max_exit_handlers = 3558static const int kvm_vmx_max_exit_handlers =
@@ -3686,9 +3786,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
3686 */ 3786 */
3687 vmcs_writel(HOST_CR0, read_cr0()); 3787 vmcs_writel(HOST_CR0, read_cr0());
3688 3788
3689 if (vcpu->arch.switch_db_regs)
3690 set_debugreg(vcpu->arch.dr6, 6);
3691
3692 asm( 3789 asm(
3693 /* Store host registers */ 3790 /* Store host registers */
3694 "push %%"R"dx; push %%"R"bp;" 3791 "push %%"R"dx; push %%"R"bp;"
@@ -3789,9 +3886,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
3789 | (1 << VCPU_EXREG_PDPTR)); 3886 | (1 << VCPU_EXREG_PDPTR));
3790 vcpu->arch.regs_dirty = 0; 3887 vcpu->arch.regs_dirty = 0;
3791 3888
3792 if (vcpu->arch.switch_db_regs)
3793 get_debugreg(vcpu->arch.dr6, 6);
3794
3795 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); 3889 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
3796 if (vmx->rmode.irq.pending) 3890 if (vmx->rmode.irq.pending)
3797 fixup_rmode_irq(vmx); 3891 fixup_rmode_irq(vmx);
@@ -3920,7 +4014,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
3920 * b. VT-d with snooping control feature: snooping control feature of 4014 * b. VT-d with snooping control feature: snooping control feature of
3921 * VT-d engine can guarantee the cache correctness. Just set it 4015 * VT-d engine can guarantee the cache correctness. Just set it
3922 * to WB to keep consistent with host. So the same as item 3. 4016 * to WB to keep consistent with host. So the same as item 3.
3923 * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep 4017 * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
3924 * consistent with host MTRR 4018 * consistent with host MTRR
3925 */ 4019 */
3926 if (is_mmio) 4020 if (is_mmio)
@@ -3931,37 +4025,88 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
3931 VMX_EPT_MT_EPTE_SHIFT; 4025 VMX_EPT_MT_EPTE_SHIFT;
3932 else 4026 else
3933 ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) 4027 ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
3934 | VMX_EPT_IGMT_BIT; 4028 | VMX_EPT_IPAT_BIT;
3935 4029
3936 return ret; 4030 return ret;
3937} 4031}
3938 4032
4033#define _ER(x) { EXIT_REASON_##x, #x }
4034
3939static const struct trace_print_flags vmx_exit_reasons_str[] = { 4035static const struct trace_print_flags vmx_exit_reasons_str[] = {
3940 { EXIT_REASON_EXCEPTION_NMI, "exception" }, 4036 _ER(EXCEPTION_NMI),
3941 { EXIT_REASON_EXTERNAL_INTERRUPT, "ext_irq" }, 4037 _ER(EXTERNAL_INTERRUPT),
3942 { EXIT_REASON_TRIPLE_FAULT, "triple_fault" }, 4038 _ER(TRIPLE_FAULT),
3943 { EXIT_REASON_NMI_WINDOW, "nmi_window" }, 4039 _ER(PENDING_INTERRUPT),
3944 { EXIT_REASON_IO_INSTRUCTION, "io_instruction" }, 4040 _ER(NMI_WINDOW),
3945 { EXIT_REASON_CR_ACCESS, "cr_access" }, 4041 _ER(TASK_SWITCH),
3946 { EXIT_REASON_DR_ACCESS, "dr_access" }, 4042 _ER(CPUID),
3947 { EXIT_REASON_CPUID, "cpuid" }, 4043 _ER(HLT),
3948 { EXIT_REASON_MSR_READ, "rdmsr" }, 4044 _ER(INVLPG),
3949 { EXIT_REASON_MSR_WRITE, "wrmsr" }, 4045 _ER(RDPMC),
3950 { EXIT_REASON_PENDING_INTERRUPT, "interrupt_window" }, 4046 _ER(RDTSC),
3951 { EXIT_REASON_HLT, "halt" }, 4047 _ER(VMCALL),
3952 { EXIT_REASON_INVLPG, "invlpg" }, 4048 _ER(VMCLEAR),
3953 { EXIT_REASON_VMCALL, "hypercall" }, 4049 _ER(VMLAUNCH),
3954 { EXIT_REASON_TPR_BELOW_THRESHOLD, "tpr_below_thres" }, 4050 _ER(VMPTRLD),
3955 { EXIT_REASON_APIC_ACCESS, "apic_access" }, 4051 _ER(VMPTRST),
3956 { EXIT_REASON_WBINVD, "wbinvd" }, 4052 _ER(VMREAD),
3957 { EXIT_REASON_TASK_SWITCH, "task_switch" }, 4053 _ER(VMRESUME),
3958 { EXIT_REASON_EPT_VIOLATION, "ept_violation" }, 4054 _ER(VMWRITE),
4055 _ER(VMOFF),
4056 _ER(VMON),
4057 _ER(CR_ACCESS),
4058 _ER(DR_ACCESS),
4059 _ER(IO_INSTRUCTION),
4060 _ER(MSR_READ),
4061 _ER(MSR_WRITE),
4062 _ER(MWAIT_INSTRUCTION),
4063 _ER(MONITOR_INSTRUCTION),
4064 _ER(PAUSE_INSTRUCTION),
4065 _ER(MCE_DURING_VMENTRY),
4066 _ER(TPR_BELOW_THRESHOLD),
4067 _ER(APIC_ACCESS),
4068 _ER(EPT_VIOLATION),
4069 _ER(EPT_MISCONFIG),
4070 _ER(WBINVD),
3959 { -1, NULL } 4071 { -1, NULL }
3960}; 4072};
3961 4073
3962static bool vmx_gb_page_enable(void) 4074#undef _ER
4075
4076static int vmx_get_lpage_level(void)
4077{
4078 if (enable_ept && !cpu_has_vmx_ept_1g_page())
4079 return PT_DIRECTORY_LEVEL;
4080 else
4081 /* For shadow and EPT supported 1GB page */
4082 return PT_PDPE_LEVEL;
4083}
4084
4085static inline u32 bit(int bitno)
4086{
4087 return 1 << (bitno & 31);
4088}
4089
4090static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
3963{ 4091{
3964 return false; 4092 struct kvm_cpuid_entry2 *best;
4093 struct vcpu_vmx *vmx = to_vmx(vcpu);
4094 u32 exec_control;
4095
4096 vmx->rdtscp_enabled = false;
4097 if (vmx_rdtscp_supported()) {
4098 exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
4099 if (exec_control & SECONDARY_EXEC_RDTSCP) {
4100 best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
4101 if (best && (best->edx & bit(X86_FEATURE_RDTSCP)))
4102 vmx->rdtscp_enabled = true;
4103 else {
4104 exec_control &= ~SECONDARY_EXEC_RDTSCP;
4105 vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
4106 exec_control);
4107 }
4108 }
4109 }
3965} 4110}
3966 4111
3967static struct kvm_x86_ops vmx_x86_ops = { 4112static struct kvm_x86_ops vmx_x86_ops = {
@@ -3990,6 +4135,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
3990 .set_segment = vmx_set_segment, 4135 .set_segment = vmx_set_segment,
3991 .get_cpl = vmx_get_cpl, 4136 .get_cpl = vmx_get_cpl,
3992 .get_cs_db_l_bits = vmx_get_cs_db_l_bits, 4137 .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
4138 .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
3993 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, 4139 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
3994 .set_cr0 = vmx_set_cr0, 4140 .set_cr0 = vmx_set_cr0,
3995 .set_cr3 = vmx_set_cr3, 4141 .set_cr3 = vmx_set_cr3,
@@ -4002,6 +4148,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
4002 .cache_reg = vmx_cache_reg, 4148 .cache_reg = vmx_cache_reg,
4003 .get_rflags = vmx_get_rflags, 4149 .get_rflags = vmx_get_rflags,
4004 .set_rflags = vmx_set_rflags, 4150 .set_rflags = vmx_set_rflags,
4151 .fpu_activate = vmx_fpu_activate,
4152 .fpu_deactivate = vmx_fpu_deactivate,
4005 4153
4006 .tlb_flush = vmx_flush_tlb, 4154 .tlb_flush = vmx_flush_tlb,
4007 4155
@@ -4027,7 +4175,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
4027 .get_mt_mask = vmx_get_mt_mask, 4175 .get_mt_mask = vmx_get_mt_mask,
4028 4176
4029 .exit_reasons_str = vmx_exit_reasons_str, 4177 .exit_reasons_str = vmx_exit_reasons_str,
4030 .gb_page_enable = vmx_gb_page_enable, 4178 .get_lpage_level = vmx_get_lpage_level,
4179
4180 .cpuid_update = vmx_cpuid_update,
4181
4182 .rdtscp_supported = vmx_rdtscp_supported,
4031}; 4183};
4032 4184
4033static int __init vmx_init(void) 4185static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a1e1bc9d412..e46282a5656 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -38,6 +38,7 @@
38#include <linux/intel-iommu.h> 38#include <linux/intel-iommu.h>
39#include <linux/cpufreq.h> 39#include <linux/cpufreq.h>
40#include <linux/user-return-notifier.h> 40#include <linux/user-return-notifier.h>
41#include <linux/srcu.h>
41#include <trace/events/kvm.h> 42#include <trace/events/kvm.h>
42#undef TRACE_INCLUDE_FILE 43#undef TRACE_INCLUDE_FILE
43#define CREATE_TRACE_POINTS 44#define CREATE_TRACE_POINTS
@@ -93,16 +94,16 @@ module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
93 94
94struct kvm_shared_msrs_global { 95struct kvm_shared_msrs_global {
95 int nr; 96 int nr;
96 struct kvm_shared_msr { 97 u32 msrs[KVM_NR_SHARED_MSRS];
97 u32 msr;
98 u64 value;
99 } msrs[KVM_NR_SHARED_MSRS];
100}; 98};
101 99
102struct kvm_shared_msrs { 100struct kvm_shared_msrs {
103 struct user_return_notifier urn; 101 struct user_return_notifier urn;
104 bool registered; 102 bool registered;
105 u64 current_value[KVM_NR_SHARED_MSRS]; 103 struct kvm_shared_msr_values {
104 u64 host;
105 u64 curr;
106 } values[KVM_NR_SHARED_MSRS];
106}; 107};
107 108
108static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; 109static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
@@ -147,53 +148,64 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
147static void kvm_on_user_return(struct user_return_notifier *urn) 148static void kvm_on_user_return(struct user_return_notifier *urn)
148{ 149{
149 unsigned slot; 150 unsigned slot;
150 struct kvm_shared_msr *global;
151 struct kvm_shared_msrs *locals 151 struct kvm_shared_msrs *locals
152 = container_of(urn, struct kvm_shared_msrs, urn); 152 = container_of(urn, struct kvm_shared_msrs, urn);
153 struct kvm_shared_msr_values *values;
153 154
154 for (slot = 0; slot < shared_msrs_global.nr; ++slot) { 155 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
155 global = &shared_msrs_global.msrs[slot]; 156 values = &locals->values[slot];
156 if (global->value != locals->current_value[slot]) { 157 if (values->host != values->curr) {
157 wrmsrl(global->msr, global->value); 158 wrmsrl(shared_msrs_global.msrs[slot], values->host);
158 locals->current_value[slot] = global->value; 159 values->curr = values->host;
159 } 160 }
160 } 161 }
161 locals->registered = false; 162 locals->registered = false;
162 user_return_notifier_unregister(urn); 163 user_return_notifier_unregister(urn);
163} 164}
164 165
165void kvm_define_shared_msr(unsigned slot, u32 msr) 166static void shared_msr_update(unsigned slot, u32 msr)
166{ 167{
167 int cpu; 168 struct kvm_shared_msrs *smsr;
168 u64 value; 169 u64 value;
169 170
171 smsr = &__get_cpu_var(shared_msrs);
172 /* only read, and nobody should modify it at this time,
173 * so don't need lock */
174 if (slot >= shared_msrs_global.nr) {
175 printk(KERN_ERR "kvm: invalid MSR slot!");
176 return;
177 }
178 rdmsrl_safe(msr, &value);
179 smsr->values[slot].host = value;
180 smsr->values[slot].curr = value;
181}
182
183void kvm_define_shared_msr(unsigned slot, u32 msr)
184{
170 if (slot >= shared_msrs_global.nr) 185 if (slot >= shared_msrs_global.nr)
171 shared_msrs_global.nr = slot + 1; 186 shared_msrs_global.nr = slot + 1;
172 shared_msrs_global.msrs[slot].msr = msr; 187 shared_msrs_global.msrs[slot] = msr;
173 rdmsrl_safe(msr, &value); 188 /* we need ensured the shared_msr_global have been updated */
174 shared_msrs_global.msrs[slot].value = value; 189 smp_wmb();
175 for_each_online_cpu(cpu)
176 per_cpu(shared_msrs, cpu).current_value[slot] = value;
177} 190}
178EXPORT_SYMBOL_GPL(kvm_define_shared_msr); 191EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
179 192
180static void kvm_shared_msr_cpu_online(void) 193static void kvm_shared_msr_cpu_online(void)
181{ 194{
182 unsigned i; 195 unsigned i;
183 struct kvm_shared_msrs *locals = &__get_cpu_var(shared_msrs);
184 196
185 for (i = 0; i < shared_msrs_global.nr; ++i) 197 for (i = 0; i < shared_msrs_global.nr; ++i)
186 locals->current_value[i] = shared_msrs_global.msrs[i].value; 198 shared_msr_update(i, shared_msrs_global.msrs[i]);
187} 199}
188 200
189void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) 201void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
190{ 202{
191 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); 203 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
192 204
193 if (((value ^ smsr->current_value[slot]) & mask) == 0) 205 if (((value ^ smsr->values[slot].curr) & mask) == 0)
194 return; 206 return;
195 smsr->current_value[slot] = value; 207 smsr->values[slot].curr = value;
196 wrmsrl(shared_msrs_global.msrs[slot].msr, value); 208 wrmsrl(shared_msrs_global.msrs[slot], value);
197 if (!smsr->registered) { 209 if (!smsr->registered) {
198 smsr->urn.on_user_return = kvm_on_user_return; 210 smsr->urn.on_user_return = kvm_on_user_return;
199 user_return_notifier_register(&smsr->urn); 211 user_return_notifier_register(&smsr->urn);
@@ -257,12 +269,68 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
257} 269}
258EXPORT_SYMBOL_GPL(kvm_set_apic_base); 270EXPORT_SYMBOL_GPL(kvm_set_apic_base);
259 271
272#define EXCPT_BENIGN 0
273#define EXCPT_CONTRIBUTORY 1
274#define EXCPT_PF 2
275
276static int exception_class(int vector)
277{
278 switch (vector) {
279 case PF_VECTOR:
280 return EXCPT_PF;
281 case DE_VECTOR:
282 case TS_VECTOR:
283 case NP_VECTOR:
284 case SS_VECTOR:
285 case GP_VECTOR:
286 return EXCPT_CONTRIBUTORY;
287 default:
288 break;
289 }
290 return EXCPT_BENIGN;
291}
292
293static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
294 unsigned nr, bool has_error, u32 error_code)
295{
296 u32 prev_nr;
297 int class1, class2;
298
299 if (!vcpu->arch.exception.pending) {
300 queue:
301 vcpu->arch.exception.pending = true;
302 vcpu->arch.exception.has_error_code = has_error;
303 vcpu->arch.exception.nr = nr;
304 vcpu->arch.exception.error_code = error_code;
305 return;
306 }
307
308 /* to check exception */
309 prev_nr = vcpu->arch.exception.nr;
310 if (prev_nr == DF_VECTOR) {
311 /* triple fault -> shutdown */
312 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
313 return;
314 }
315 class1 = exception_class(prev_nr);
316 class2 = exception_class(nr);
317 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
318 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
319 /* generate double fault per SDM Table 5-5 */
320 vcpu->arch.exception.pending = true;
321 vcpu->arch.exception.has_error_code = true;
322 vcpu->arch.exception.nr = DF_VECTOR;
323 vcpu->arch.exception.error_code = 0;
324 } else
325 /* replace previous exception with a new one in a hope
326 that instruction re-execution will regenerate lost
327 exception */
328 goto queue;
329}
330
260void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) 331void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
261{ 332{
262 WARN_ON(vcpu->arch.exception.pending); 333 kvm_multiple_exception(vcpu, nr, false, 0);
263 vcpu->arch.exception.pending = true;
264 vcpu->arch.exception.has_error_code = false;
265 vcpu->arch.exception.nr = nr;
266} 334}
267EXPORT_SYMBOL_GPL(kvm_queue_exception); 335EXPORT_SYMBOL_GPL(kvm_queue_exception);
268 336
@@ -270,25 +338,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
270 u32 error_code) 338 u32 error_code)
271{ 339{
272 ++vcpu->stat.pf_guest; 340 ++vcpu->stat.pf_guest;
273
274 if (vcpu->arch.exception.pending) {
275 switch(vcpu->arch.exception.nr) {
276 case DF_VECTOR:
277 /* triple fault -> shutdown */
278 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
279 return;
280 case PF_VECTOR:
281 vcpu->arch.exception.nr = DF_VECTOR;
282 vcpu->arch.exception.error_code = 0;
283 return;
284 default:
285 /* replace previous exception with a new one in a hope
286 that instruction re-execution will regenerate lost
287 exception */
288 vcpu->arch.exception.pending = false;
289 break;
290 }
291 }
292 vcpu->arch.cr2 = addr; 341 vcpu->arch.cr2 = addr;
293 kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); 342 kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
294} 343}
@@ -301,11 +350,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi);
301 350
302void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) 351void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
303{ 352{
304 WARN_ON(vcpu->arch.exception.pending); 353 kvm_multiple_exception(vcpu, nr, true, error_code);
305 vcpu->arch.exception.pending = true;
306 vcpu->arch.exception.has_error_code = true;
307 vcpu->arch.exception.nr = nr;
308 vcpu->arch.exception.error_code = error_code;
309} 354}
310EXPORT_SYMBOL_GPL(kvm_queue_exception_e); 355EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
311 356
@@ -383,12 +428,18 @@ out:
383 428
384void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 429void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
385{ 430{
386 if (cr0 & CR0_RESERVED_BITS) { 431 cr0 |= X86_CR0_ET;
432
433#ifdef CONFIG_X86_64
434 if (cr0 & 0xffffffff00000000UL) {
387 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", 435 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
388 cr0, vcpu->arch.cr0); 436 cr0, kvm_read_cr0(vcpu));
389 kvm_inject_gp(vcpu, 0); 437 kvm_inject_gp(vcpu, 0);
390 return; 438 return;
391 } 439 }
440#endif
441
442 cr0 &= ~CR0_RESERVED_BITS;
392 443
393 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { 444 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
394 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); 445 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
@@ -405,7 +456,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
405 456
406 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { 457 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
407#ifdef CONFIG_X86_64 458#ifdef CONFIG_X86_64
408 if ((vcpu->arch.shadow_efer & EFER_LME)) { 459 if ((vcpu->arch.efer & EFER_LME)) {
409 int cs_db, cs_l; 460 int cs_db, cs_l;
410 461
411 if (!is_pae(vcpu)) { 462 if (!is_pae(vcpu)) {
@@ -443,13 +494,13 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0);
443 494
444void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) 495void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
445{ 496{
446 kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)); 497 kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0ful) | (msw & 0x0f));
447} 498}
448EXPORT_SYMBOL_GPL(kvm_lmsw); 499EXPORT_SYMBOL_GPL(kvm_lmsw);
449 500
450void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 501void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
451{ 502{
452 unsigned long old_cr4 = vcpu->arch.cr4; 503 unsigned long old_cr4 = kvm_read_cr4(vcpu);
453 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; 504 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
454 505
455 if (cr4 & CR4_RESERVED_BITS) { 506 if (cr4 & CR4_RESERVED_BITS) {
@@ -575,9 +626,11 @@ static inline u32 bit(int bitno)
575 * kvm-specific. Those are put in the beginning of the list. 626 * kvm-specific. Those are put in the beginning of the list.
576 */ 627 */
577 628
578#define KVM_SAVE_MSRS_BEGIN 2 629#define KVM_SAVE_MSRS_BEGIN 5
579static u32 msrs_to_save[] = { 630static u32 msrs_to_save[] = {
580 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 631 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
632 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
633 HV_X64_MSR_APIC_ASSIST_PAGE,
581 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 634 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
582 MSR_K6_STAR, 635 MSR_K6_STAR,
583#ifdef CONFIG_X86_64 636#ifdef CONFIG_X86_64
@@ -602,7 +655,7 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
602 } 655 }
603 656
604 if (is_paging(vcpu) 657 if (is_paging(vcpu)
605 && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME)) { 658 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) {
606 printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n"); 659 printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
607 kvm_inject_gp(vcpu, 0); 660 kvm_inject_gp(vcpu, 0);
608 return; 661 return;
@@ -633,9 +686,9 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
633 kvm_x86_ops->set_efer(vcpu, efer); 686 kvm_x86_ops->set_efer(vcpu, efer);
634 687
635 efer &= ~EFER_LMA; 688 efer &= ~EFER_LMA;
636 efer |= vcpu->arch.shadow_efer & EFER_LMA; 689 efer |= vcpu->arch.efer & EFER_LMA;
637 690
638 vcpu->arch.shadow_efer = efer; 691 vcpu->arch.efer = efer;
639 692
640 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; 693 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
641 kvm_mmu_reset_context(vcpu); 694 kvm_mmu_reset_context(vcpu);
@@ -957,6 +1010,100 @@ out:
957 return r; 1010 return r;
958} 1011}
959 1012
1013static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
1014{
1015 return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
1016}
1017
1018static bool kvm_hv_msr_partition_wide(u32 msr)
1019{
1020 bool r = false;
1021 switch (msr) {
1022 case HV_X64_MSR_GUEST_OS_ID:
1023 case HV_X64_MSR_HYPERCALL:
1024 r = true;
1025 break;
1026 }
1027
1028 return r;
1029}
1030
1031static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1032{
1033 struct kvm *kvm = vcpu->kvm;
1034
1035 switch (msr) {
1036 case HV_X64_MSR_GUEST_OS_ID:
1037 kvm->arch.hv_guest_os_id = data;
1038 /* setting guest os id to zero disables hypercall page */
1039 if (!kvm->arch.hv_guest_os_id)
1040 kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
1041 break;
1042 case HV_X64_MSR_HYPERCALL: {
1043 u64 gfn;
1044 unsigned long addr;
1045 u8 instructions[4];
1046
1047 /* if guest os id is not set hypercall should remain disabled */
1048 if (!kvm->arch.hv_guest_os_id)
1049 break;
1050 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
1051 kvm->arch.hv_hypercall = data;
1052 break;
1053 }
1054 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
1055 addr = gfn_to_hva(kvm, gfn);
1056 if (kvm_is_error_hva(addr))
1057 return 1;
1058 kvm_x86_ops->patch_hypercall(vcpu, instructions);
1059 ((unsigned char *)instructions)[3] = 0xc3; /* ret */
1060 if (copy_to_user((void __user *)addr, instructions, 4))
1061 return 1;
1062 kvm->arch.hv_hypercall = data;
1063 break;
1064 }
1065 default:
1066 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1067 "data 0x%llx\n", msr, data);
1068 return 1;
1069 }
1070 return 0;
1071}
1072
1073static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1074{
1075 switch (msr) {
1076 case HV_X64_MSR_APIC_ASSIST_PAGE: {
1077 unsigned long addr;
1078
1079 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
1080 vcpu->arch.hv_vapic = data;
1081 break;
1082 }
1083 addr = gfn_to_hva(vcpu->kvm, data >>
1084 HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
1085 if (kvm_is_error_hva(addr))
1086 return 1;
1087 if (clear_user((void __user *)addr, PAGE_SIZE))
1088 return 1;
1089 vcpu->arch.hv_vapic = data;
1090 break;
1091 }
1092 case HV_X64_MSR_EOI:
1093 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
1094 case HV_X64_MSR_ICR:
1095 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
1096 case HV_X64_MSR_TPR:
1097 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
1098 default:
1099 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1100 "data 0x%llx\n", msr, data);
1101 return 1;
1102 }
1103
1104 return 0;
1105}
1106
960int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1107int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
961{ 1108{
962 switch (msr) { 1109 switch (msr) {
@@ -1071,6 +1218,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1071 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " 1218 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
1072 "0x%x data 0x%llx\n", msr, data); 1219 "0x%x data 0x%llx\n", msr, data);
1073 break; 1220 break;
1221 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
1222 if (kvm_hv_msr_partition_wide(msr)) {
1223 int r;
1224 mutex_lock(&vcpu->kvm->lock);
1225 r = set_msr_hyperv_pw(vcpu, msr, data);
1226 mutex_unlock(&vcpu->kvm->lock);
1227 return r;
1228 } else
1229 return set_msr_hyperv(vcpu, msr, data);
1230 break;
1074 default: 1231 default:
1075 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) 1232 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
1076 return xen_hvm_config(vcpu, data); 1233 return xen_hvm_config(vcpu, data);
@@ -1170,6 +1327,54 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1170 return 0; 1327 return 0;
1171} 1328}
1172 1329
1330static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1331{
1332 u64 data = 0;
1333 struct kvm *kvm = vcpu->kvm;
1334
1335 switch (msr) {
1336 case HV_X64_MSR_GUEST_OS_ID:
1337 data = kvm->arch.hv_guest_os_id;
1338 break;
1339 case HV_X64_MSR_HYPERCALL:
1340 data = kvm->arch.hv_hypercall;
1341 break;
1342 default:
1343 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1344 return 1;
1345 }
1346
1347 *pdata = data;
1348 return 0;
1349}
1350
1351static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1352{
1353 u64 data = 0;
1354
1355 switch (msr) {
1356 case HV_X64_MSR_VP_INDEX: {
1357 int r;
1358 struct kvm_vcpu *v;
1359 kvm_for_each_vcpu(r, v, vcpu->kvm)
1360 if (v == vcpu)
1361 data = r;
1362 break;
1363 }
1364 case HV_X64_MSR_EOI:
1365 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
1366 case HV_X64_MSR_ICR:
1367 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
1368 case HV_X64_MSR_TPR:
1369 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
1370 default:
1371 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1372 return 1;
1373 }
1374 *pdata = data;
1375 return 0;
1376}
1377
1173int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1378int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1174{ 1379{
1175 u64 data; 1380 u64 data;
@@ -1221,7 +1426,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1221 data |= (((uint64_t)4ULL) << 40); 1426 data |= (((uint64_t)4ULL) << 40);
1222 break; 1427 break;
1223 case MSR_EFER: 1428 case MSR_EFER:
1224 data = vcpu->arch.shadow_efer; 1429 data = vcpu->arch.efer;
1225 break; 1430 break;
1226 case MSR_KVM_WALL_CLOCK: 1431 case MSR_KVM_WALL_CLOCK:
1227 data = vcpu->kvm->arch.wall_clock; 1432 data = vcpu->kvm->arch.wall_clock;
@@ -1236,6 +1441,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1236 case MSR_IA32_MCG_STATUS: 1441 case MSR_IA32_MCG_STATUS:
1237 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: 1442 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
1238 return get_msr_mce(vcpu, msr, pdata); 1443 return get_msr_mce(vcpu, msr, pdata);
1444 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
1445 if (kvm_hv_msr_partition_wide(msr)) {
1446 int r;
1447 mutex_lock(&vcpu->kvm->lock);
1448 r = get_msr_hyperv_pw(vcpu, msr, pdata);
1449 mutex_unlock(&vcpu->kvm->lock);
1450 return r;
1451 } else
1452 return get_msr_hyperv(vcpu, msr, pdata);
1453 break;
1239 default: 1454 default:
1240 if (!ignore_msrs) { 1455 if (!ignore_msrs) {
1241 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); 1456 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
@@ -1261,15 +1476,15 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
1261 int (*do_msr)(struct kvm_vcpu *vcpu, 1476 int (*do_msr)(struct kvm_vcpu *vcpu,
1262 unsigned index, u64 *data)) 1477 unsigned index, u64 *data))
1263{ 1478{
1264 int i; 1479 int i, idx;
1265 1480
1266 vcpu_load(vcpu); 1481 vcpu_load(vcpu);
1267 1482
1268 down_read(&vcpu->kvm->slots_lock); 1483 idx = srcu_read_lock(&vcpu->kvm->srcu);
1269 for (i = 0; i < msrs->nmsrs; ++i) 1484 for (i = 0; i < msrs->nmsrs; ++i)
1270 if (do_msr(vcpu, entries[i].index, &entries[i].data)) 1485 if (do_msr(vcpu, entries[i].index, &entries[i].data))
1271 break; 1486 break;
1272 up_read(&vcpu->kvm->slots_lock); 1487 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1273 1488
1274 vcpu_put(vcpu); 1489 vcpu_put(vcpu);
1275 1490
@@ -1351,6 +1566,11 @@ int kvm_dev_ioctl_check_extension(long ext)
1351 case KVM_CAP_XEN_HVM: 1566 case KVM_CAP_XEN_HVM:
1352 case KVM_CAP_ADJUST_CLOCK: 1567 case KVM_CAP_ADJUST_CLOCK:
1353 case KVM_CAP_VCPU_EVENTS: 1568 case KVM_CAP_VCPU_EVENTS:
1569 case KVM_CAP_HYPERV:
1570 case KVM_CAP_HYPERV_VAPIC:
1571 case KVM_CAP_HYPERV_SPIN:
1572 case KVM_CAP_PCI_SEGMENT:
1573 case KVM_CAP_X86_ROBUST_SINGLESTEP:
1354 r = 1; 1574 r = 1;
1355 break; 1575 break;
1356 case KVM_CAP_COALESCED_MMIO: 1576 case KVM_CAP_COALESCED_MMIO:
@@ -1464,8 +1684,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1464 1684
1465void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 1685void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1466{ 1686{
1467 kvm_x86_ops->vcpu_put(vcpu);
1468 kvm_put_guest_fpu(vcpu); 1687 kvm_put_guest_fpu(vcpu);
1688 kvm_x86_ops->vcpu_put(vcpu);
1469} 1689}
1470 1690
1471static int is_efer_nx(void) 1691static int is_efer_nx(void)
@@ -1530,6 +1750,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
1530 cpuid_fix_nx_cap(vcpu); 1750 cpuid_fix_nx_cap(vcpu);
1531 r = 0; 1751 r = 0;
1532 kvm_apic_set_version(vcpu); 1752 kvm_apic_set_version(vcpu);
1753 kvm_x86_ops->cpuid_update(vcpu);
1533 1754
1534out_free: 1755out_free:
1535 vfree(cpuid_entries); 1756 vfree(cpuid_entries);
@@ -1552,6 +1773,7 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
1552 goto out; 1773 goto out;
1553 vcpu->arch.cpuid_nent = cpuid->nent; 1774 vcpu->arch.cpuid_nent = cpuid->nent;
1554 kvm_apic_set_version(vcpu); 1775 kvm_apic_set_version(vcpu);
1776 kvm_x86_ops->cpuid_update(vcpu);
1555 return 0; 1777 return 0;
1556 1778
1557out: 1779out:
@@ -1594,12 +1816,15 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1594 u32 index, int *nent, int maxnent) 1816 u32 index, int *nent, int maxnent)
1595{ 1817{
1596 unsigned f_nx = is_efer_nx() ? F(NX) : 0; 1818 unsigned f_nx = is_efer_nx() ? F(NX) : 0;
1597 unsigned f_gbpages = kvm_x86_ops->gb_page_enable() ? F(GBPAGES) : 0;
1598#ifdef CONFIG_X86_64 1819#ifdef CONFIG_X86_64
1820 unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
1821 ? F(GBPAGES) : 0;
1599 unsigned f_lm = F(LM); 1822 unsigned f_lm = F(LM);
1600#else 1823#else
1824 unsigned f_gbpages = 0;
1601 unsigned f_lm = 0; 1825 unsigned f_lm = 0;
1602#endif 1826#endif
1827 unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
1603 1828
1604 /* cpuid 1.edx */ 1829 /* cpuid 1.edx */
1605 const u32 kvm_supported_word0_x86_features = 1830 const u32 kvm_supported_word0_x86_features =
@@ -1619,7 +1844,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1619 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | 1844 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
1620 F(PAT) | F(PSE36) | 0 /* Reserved */ | 1845 F(PAT) | F(PSE36) | 0 /* Reserved */ |
1621 f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | 1846 f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
1622 F(FXSR) | F(FXSR_OPT) | f_gbpages | 0 /* RDTSCP */ | 1847 F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
1623 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); 1848 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
1624 /* cpuid 1.ecx */ 1849 /* cpuid 1.ecx */
1625 const u32 kvm_supported_word4_x86_features = 1850 const u32 kvm_supported_word4_x86_features =
@@ -1866,7 +2091,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
1866 return 0; 2091 return 0;
1867 if (mce->status & MCI_STATUS_UC) { 2092 if (mce->status & MCI_STATUS_UC) {
1868 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || 2093 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
1869 !(vcpu->arch.cr4 & X86_CR4_MCE)) { 2094 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
1870 printk(KERN_DEBUG "kvm: set_mce: " 2095 printk(KERN_DEBUG "kvm: set_mce: "
1871 "injects mce exception while " 2096 "injects mce exception while "
1872 "previous one is in progress!\n"); 2097 "previous one is in progress!\n");
@@ -2160,14 +2385,14 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
2160 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) 2385 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
2161 return -EINVAL; 2386 return -EINVAL;
2162 2387
2163 down_write(&kvm->slots_lock); 2388 mutex_lock(&kvm->slots_lock);
2164 spin_lock(&kvm->mmu_lock); 2389 spin_lock(&kvm->mmu_lock);
2165 2390
2166 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); 2391 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
2167 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; 2392 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
2168 2393
2169 spin_unlock(&kvm->mmu_lock); 2394 spin_unlock(&kvm->mmu_lock);
2170 up_write(&kvm->slots_lock); 2395 mutex_unlock(&kvm->slots_lock);
2171 return 0; 2396 return 0;
2172} 2397}
2173 2398
@@ -2176,13 +2401,35 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
2176 return kvm->arch.n_alloc_mmu_pages; 2401 return kvm->arch.n_alloc_mmu_pages;
2177} 2402}
2178 2403
2404gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn)
2405{
2406 int i;
2407 struct kvm_mem_alias *alias;
2408 struct kvm_mem_aliases *aliases;
2409
2410 aliases = rcu_dereference(kvm->arch.aliases);
2411
2412 for (i = 0; i < aliases->naliases; ++i) {
2413 alias = &aliases->aliases[i];
2414 if (alias->flags & KVM_ALIAS_INVALID)
2415 continue;
2416 if (gfn >= alias->base_gfn
2417 && gfn < alias->base_gfn + alias->npages)
2418 return alias->target_gfn + gfn - alias->base_gfn;
2419 }
2420 return gfn;
2421}
2422
2179gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 2423gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
2180{ 2424{
2181 int i; 2425 int i;
2182 struct kvm_mem_alias *alias; 2426 struct kvm_mem_alias *alias;
2427 struct kvm_mem_aliases *aliases;
2183 2428
2184 for (i = 0; i < kvm->arch.naliases; ++i) { 2429 aliases = rcu_dereference(kvm->arch.aliases);
2185 alias = &kvm->arch.aliases[i]; 2430
2431 for (i = 0; i < aliases->naliases; ++i) {
2432 alias = &aliases->aliases[i];
2186 if (gfn >= alias->base_gfn 2433 if (gfn >= alias->base_gfn
2187 && gfn < alias->base_gfn + alias->npages) 2434 && gfn < alias->base_gfn + alias->npages)
2188 return alias->target_gfn + gfn - alias->base_gfn; 2435 return alias->target_gfn + gfn - alias->base_gfn;
@@ -2200,6 +2447,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
2200{ 2447{
2201 int r, n; 2448 int r, n;
2202 struct kvm_mem_alias *p; 2449 struct kvm_mem_alias *p;
2450 struct kvm_mem_aliases *aliases, *old_aliases;
2203 2451
2204 r = -EINVAL; 2452 r = -EINVAL;
2205 /* General sanity checks */ 2453 /* General sanity checks */
@@ -2216,26 +2464,48 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
2216 < alias->target_phys_addr) 2464 < alias->target_phys_addr)
2217 goto out; 2465 goto out;
2218 2466
2219 down_write(&kvm->slots_lock); 2467 r = -ENOMEM;
2220 spin_lock(&kvm->mmu_lock); 2468 aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
2469 if (!aliases)
2470 goto out;
2471
2472 mutex_lock(&kvm->slots_lock);
2221 2473
2222 p = &kvm->arch.aliases[alias->slot]; 2474 /* invalidate any gfn reference in case of deletion/shrinking */
2475 memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
2476 aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID;
2477 old_aliases = kvm->arch.aliases;
2478 rcu_assign_pointer(kvm->arch.aliases, aliases);
2479 synchronize_srcu_expedited(&kvm->srcu);
2480 kvm_mmu_zap_all(kvm);
2481 kfree(old_aliases);
2482
2483 r = -ENOMEM;
2484 aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
2485 if (!aliases)
2486 goto out_unlock;
2487
2488 memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
2489
2490 p = &aliases->aliases[alias->slot];
2223 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; 2491 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
2224 p->npages = alias->memory_size >> PAGE_SHIFT; 2492 p->npages = alias->memory_size >> PAGE_SHIFT;
2225 p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; 2493 p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
2494 p->flags &= ~(KVM_ALIAS_INVALID);
2226 2495
2227 for (n = KVM_ALIAS_SLOTS; n > 0; --n) 2496 for (n = KVM_ALIAS_SLOTS; n > 0; --n)
2228 if (kvm->arch.aliases[n - 1].npages) 2497 if (aliases->aliases[n - 1].npages)
2229 break; 2498 break;
2230 kvm->arch.naliases = n; 2499 aliases->naliases = n;
2231 2500
2232 spin_unlock(&kvm->mmu_lock); 2501 old_aliases = kvm->arch.aliases;
2233 kvm_mmu_zap_all(kvm); 2502 rcu_assign_pointer(kvm->arch.aliases, aliases);
2234 2503 synchronize_srcu_expedited(&kvm->srcu);
2235 up_write(&kvm->slots_lock); 2504 kfree(old_aliases);
2236 2505 r = 0;
2237 return 0;
2238 2506
2507out_unlock:
2508 mutex_unlock(&kvm->slots_lock);
2239out: 2509out:
2240 return r; 2510 return r;
2241} 2511}
@@ -2273,18 +2543,18 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
2273 r = 0; 2543 r = 0;
2274 switch (chip->chip_id) { 2544 switch (chip->chip_id) {
2275 case KVM_IRQCHIP_PIC_MASTER: 2545 case KVM_IRQCHIP_PIC_MASTER:
2276 spin_lock(&pic_irqchip(kvm)->lock); 2546 raw_spin_lock(&pic_irqchip(kvm)->lock);
2277 memcpy(&pic_irqchip(kvm)->pics[0], 2547 memcpy(&pic_irqchip(kvm)->pics[0],
2278 &chip->chip.pic, 2548 &chip->chip.pic,
2279 sizeof(struct kvm_pic_state)); 2549 sizeof(struct kvm_pic_state));
2280 spin_unlock(&pic_irqchip(kvm)->lock); 2550 raw_spin_unlock(&pic_irqchip(kvm)->lock);
2281 break; 2551 break;
2282 case KVM_IRQCHIP_PIC_SLAVE: 2552 case KVM_IRQCHIP_PIC_SLAVE:
2283 spin_lock(&pic_irqchip(kvm)->lock); 2553 raw_spin_lock(&pic_irqchip(kvm)->lock);
2284 memcpy(&pic_irqchip(kvm)->pics[1], 2554 memcpy(&pic_irqchip(kvm)->pics[1],
2285 &chip->chip.pic, 2555 &chip->chip.pic,
2286 sizeof(struct kvm_pic_state)); 2556 sizeof(struct kvm_pic_state));
2287 spin_unlock(&pic_irqchip(kvm)->lock); 2557 raw_spin_unlock(&pic_irqchip(kvm)->lock);
2288 break; 2558 break;
2289 case KVM_IRQCHIP_IOAPIC: 2559 case KVM_IRQCHIP_IOAPIC:
2290 r = kvm_set_ioapic(kvm, &chip->chip.ioapic); 2560 r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
@@ -2364,29 +2634,62 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
2364int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 2634int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
2365 struct kvm_dirty_log *log) 2635 struct kvm_dirty_log *log)
2366{ 2636{
2367 int r; 2637 int r, n, i;
2368 int n;
2369 struct kvm_memory_slot *memslot; 2638 struct kvm_memory_slot *memslot;
2370 int is_dirty = 0; 2639 unsigned long is_dirty = 0;
2640 unsigned long *dirty_bitmap = NULL;
2371 2641
2372 down_write(&kvm->slots_lock); 2642 mutex_lock(&kvm->slots_lock);
2373 2643
2374 r = kvm_get_dirty_log(kvm, log, &is_dirty); 2644 r = -EINVAL;
2375 if (r) 2645 if (log->slot >= KVM_MEMORY_SLOTS)
2646 goto out;
2647
2648 memslot = &kvm->memslots->memslots[log->slot];
2649 r = -ENOENT;
2650 if (!memslot->dirty_bitmap)
2651 goto out;
2652
2653 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
2654
2655 r = -ENOMEM;
2656 dirty_bitmap = vmalloc(n);
2657 if (!dirty_bitmap)
2376 goto out; 2658 goto out;
2659 memset(dirty_bitmap, 0, n);
2660
2661 for (i = 0; !is_dirty && i < n/sizeof(long); i++)
2662 is_dirty = memslot->dirty_bitmap[i];
2377 2663
2378 /* If nothing is dirty, don't bother messing with page tables. */ 2664 /* If nothing is dirty, don't bother messing with page tables. */
2379 if (is_dirty) { 2665 if (is_dirty) {
2666 struct kvm_memslots *slots, *old_slots;
2667
2380 spin_lock(&kvm->mmu_lock); 2668 spin_lock(&kvm->mmu_lock);
2381 kvm_mmu_slot_remove_write_access(kvm, log->slot); 2669 kvm_mmu_slot_remove_write_access(kvm, log->slot);
2382 spin_unlock(&kvm->mmu_lock); 2670 spin_unlock(&kvm->mmu_lock);
2383 memslot = &kvm->memslots[log->slot]; 2671
2384 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 2672 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
2385 memset(memslot->dirty_bitmap, 0, n); 2673 if (!slots)
2674 goto out_free;
2675
2676 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
2677 slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
2678
2679 old_slots = kvm->memslots;
2680 rcu_assign_pointer(kvm->memslots, slots);
2681 synchronize_srcu_expedited(&kvm->srcu);
2682 dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
2683 kfree(old_slots);
2386 } 2684 }
2685
2387 r = 0; 2686 r = 0;
2687 if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
2688 r = -EFAULT;
2689out_free:
2690 vfree(dirty_bitmap);
2388out: 2691out:
2389 up_write(&kvm->slots_lock); 2692 mutex_unlock(&kvm->slots_lock);
2390 return r; 2693 return r;
2391} 2694}
2392 2695
@@ -2469,6 +2772,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
2469 if (vpic) { 2772 if (vpic) {
2470 r = kvm_ioapic_init(kvm); 2773 r = kvm_ioapic_init(kvm);
2471 if (r) { 2774 if (r) {
2775 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
2776 &vpic->dev);
2472 kfree(vpic); 2777 kfree(vpic);
2473 goto create_irqchip_unlock; 2778 goto create_irqchip_unlock;
2474 } 2779 }
@@ -2480,10 +2785,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
2480 r = kvm_setup_default_irq_routing(kvm); 2785 r = kvm_setup_default_irq_routing(kvm);
2481 if (r) { 2786 if (r) {
2482 mutex_lock(&kvm->irq_lock); 2787 mutex_lock(&kvm->irq_lock);
2483 kfree(kvm->arch.vpic); 2788 kvm_ioapic_destroy(kvm);
2484 kfree(kvm->arch.vioapic); 2789 kvm_destroy_pic(kvm);
2485 kvm->arch.vpic = NULL;
2486 kvm->arch.vioapic = NULL;
2487 mutex_unlock(&kvm->irq_lock); 2790 mutex_unlock(&kvm->irq_lock);
2488 } 2791 }
2489 create_irqchip_unlock: 2792 create_irqchip_unlock:
@@ -2499,7 +2802,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
2499 sizeof(struct kvm_pit_config))) 2802 sizeof(struct kvm_pit_config)))
2500 goto out; 2803 goto out;
2501 create_pit: 2804 create_pit:
2502 down_write(&kvm->slots_lock); 2805 mutex_lock(&kvm->slots_lock);
2503 r = -EEXIST; 2806 r = -EEXIST;
2504 if (kvm->arch.vpit) 2807 if (kvm->arch.vpit)
2505 goto create_pit_unlock; 2808 goto create_pit_unlock;
@@ -2508,7 +2811,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
2508 if (kvm->arch.vpit) 2811 if (kvm->arch.vpit)
2509 r = 0; 2812 r = 0;
2510 create_pit_unlock: 2813 create_pit_unlock:
2511 up_write(&kvm->slots_lock); 2814 mutex_unlock(&kvm->slots_lock);
2512 break; 2815 break;
2513 case KVM_IRQ_LINE_STATUS: 2816 case KVM_IRQ_LINE_STATUS:
2514 case KVM_IRQ_LINE: { 2817 case KVM_IRQ_LINE: {
@@ -2725,7 +3028,7 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
2725 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) 3028 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
2726 return 0; 3029 return 0;
2727 3030
2728 return kvm_io_bus_write(&vcpu->kvm->mmio_bus, addr, len, v); 3031 return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
2729} 3032}
2730 3033
2731static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) 3034static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
@@ -2734,17 +3037,44 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
2734 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) 3037 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
2735 return 0; 3038 return 0;
2736 3039
2737 return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v); 3040 return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
2738} 3041}
2739 3042
2740static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, 3043gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
2741 struct kvm_vcpu *vcpu) 3044{
3045 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3046 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
3047}
3048
3049 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3050{
3051 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3052 access |= PFERR_FETCH_MASK;
3053 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
3054}
3055
3056gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3057{
3058 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3059 access |= PFERR_WRITE_MASK;
3060 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
3061}
3062
3063/* uses this to access any guest's mapped memory without checking CPL */
3064gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3065{
3066 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, 0, error);
3067}
3068
3069static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
3070 struct kvm_vcpu *vcpu, u32 access,
3071 u32 *error)
2742{ 3072{
2743 void *data = val; 3073 void *data = val;
2744 int r = X86EMUL_CONTINUE; 3074 int r = X86EMUL_CONTINUE;
2745 3075
2746 while (bytes) { 3076 while (bytes) {
2747 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 3077 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, access, error);
2748 unsigned offset = addr & (PAGE_SIZE-1); 3078 unsigned offset = addr & (PAGE_SIZE-1);
2749 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); 3079 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
2750 int ret; 3080 int ret;
@@ -2767,14 +3097,37 @@ out:
2767 return r; 3097 return r;
2768} 3098}
2769 3099
3100/* used for instruction fetching */
3101static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes,
3102 struct kvm_vcpu *vcpu, u32 *error)
3103{
3104 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3105 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
3106 access | PFERR_FETCH_MASK, error);
3107}
3108
3109static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
3110 struct kvm_vcpu *vcpu, u32 *error)
3111{
3112 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3113 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
3114 error);
3115}
3116
3117static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes,
3118 struct kvm_vcpu *vcpu, u32 *error)
3119{
3120 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error);
3121}
3122
2770static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, 3123static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
2771 struct kvm_vcpu *vcpu) 3124 struct kvm_vcpu *vcpu, u32 *error)
2772{ 3125{
2773 void *data = val; 3126 void *data = val;
2774 int r = X86EMUL_CONTINUE; 3127 int r = X86EMUL_CONTINUE;
2775 3128
2776 while (bytes) { 3129 while (bytes) {
2777 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 3130 gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error);
2778 unsigned offset = addr & (PAGE_SIZE-1); 3131 unsigned offset = addr & (PAGE_SIZE-1);
2779 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); 3132 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
2780 int ret; 3133 int ret;
@@ -2804,6 +3157,7 @@ static int emulator_read_emulated(unsigned long addr,
2804 struct kvm_vcpu *vcpu) 3157 struct kvm_vcpu *vcpu)
2805{ 3158{
2806 gpa_t gpa; 3159 gpa_t gpa;
3160 u32 error_code;
2807 3161
2808 if (vcpu->mmio_read_completed) { 3162 if (vcpu->mmio_read_completed) {
2809 memcpy(val, vcpu->mmio_data, bytes); 3163 memcpy(val, vcpu->mmio_data, bytes);
@@ -2813,17 +3167,20 @@ static int emulator_read_emulated(unsigned long addr,
2813 return X86EMUL_CONTINUE; 3167 return X86EMUL_CONTINUE;
2814 } 3168 }
2815 3169
2816 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 3170 gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code);
3171
3172 if (gpa == UNMAPPED_GVA) {
3173 kvm_inject_page_fault(vcpu, addr, error_code);
3174 return X86EMUL_PROPAGATE_FAULT;
3175 }
2817 3176
2818 /* For APIC access vmexit */ 3177 /* For APIC access vmexit */
2819 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3178 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
2820 goto mmio; 3179 goto mmio;
2821 3180
2822 if (kvm_read_guest_virt(addr, val, bytes, vcpu) 3181 if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL)
2823 == X86EMUL_CONTINUE) 3182 == X86EMUL_CONTINUE)
2824 return X86EMUL_CONTINUE; 3183 return X86EMUL_CONTINUE;
2825 if (gpa == UNMAPPED_GVA)
2826 return X86EMUL_PROPAGATE_FAULT;
2827 3184
2828mmio: 3185mmio:
2829 /* 3186 /*
@@ -2862,11 +3219,12 @@ static int emulator_write_emulated_onepage(unsigned long addr,
2862 struct kvm_vcpu *vcpu) 3219 struct kvm_vcpu *vcpu)
2863{ 3220{
2864 gpa_t gpa; 3221 gpa_t gpa;
3222 u32 error_code;
2865 3223
2866 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 3224 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code);
2867 3225
2868 if (gpa == UNMAPPED_GVA) { 3226 if (gpa == UNMAPPED_GVA) {
2869 kvm_inject_page_fault(vcpu, addr, 2); 3227 kvm_inject_page_fault(vcpu, addr, error_code);
2870 return X86EMUL_PROPAGATE_FAULT; 3228 return X86EMUL_PROPAGATE_FAULT;
2871 } 3229 }
2872 3230
@@ -2930,7 +3288,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
2930 char *kaddr; 3288 char *kaddr;
2931 u64 val; 3289 u64 val;
2932 3290
2933 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 3291 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
2934 3292
2935 if (gpa == UNMAPPED_GVA || 3293 if (gpa == UNMAPPED_GVA ||
2936 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3294 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -2967,35 +3325,21 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
2967 3325
2968int emulate_clts(struct kvm_vcpu *vcpu) 3326int emulate_clts(struct kvm_vcpu *vcpu)
2969{ 3327{
2970 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS); 3328 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
3329 kvm_x86_ops->fpu_activate(vcpu);
2971 return X86EMUL_CONTINUE; 3330 return X86EMUL_CONTINUE;
2972} 3331}
2973 3332
2974int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) 3333int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
2975{ 3334{
2976 struct kvm_vcpu *vcpu = ctxt->vcpu; 3335 return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest);
2977
2978 switch (dr) {
2979 case 0 ... 3:
2980 *dest = kvm_x86_ops->get_dr(vcpu, dr);
2981 return X86EMUL_CONTINUE;
2982 default:
2983 pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr);
2984 return X86EMUL_UNHANDLEABLE;
2985 }
2986} 3336}
2987 3337
2988int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) 3338int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
2989{ 3339{
2990 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; 3340 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
2991 int exception;
2992 3341
2993 kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception); 3342 return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask);
2994 if (exception) {
2995 /* FIXME: better handling */
2996 return X86EMUL_UNHANDLEABLE;
2997 }
2998 return X86EMUL_CONTINUE;
2999} 3343}
3000 3344
3001void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) 3345void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
@@ -3009,7 +3353,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
3009 3353
3010 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); 3354 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
3011 3355
3012 kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu); 3356 kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL);
3013 3357
3014 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", 3358 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
3015 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); 3359 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
@@ -3017,7 +3361,8 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
3017EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); 3361EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
3018 3362
3019static struct x86_emulate_ops emulate_ops = { 3363static struct x86_emulate_ops emulate_ops = {
3020 .read_std = kvm_read_guest_virt, 3364 .read_std = kvm_read_guest_virt_system,
3365 .fetch = kvm_fetch_guest_virt,
3021 .read_emulated = emulator_read_emulated, 3366 .read_emulated = emulator_read_emulated,
3022 .write_emulated = emulator_write_emulated, 3367 .write_emulated = emulator_write_emulated,
3023 .cmpxchg_emulated = emulator_cmpxchg_emulated, 3368 .cmpxchg_emulated = emulator_cmpxchg_emulated,
@@ -3060,8 +3405,9 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3060 vcpu->arch.emulate_ctxt.vcpu = vcpu; 3405 vcpu->arch.emulate_ctxt.vcpu = vcpu;
3061 vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); 3406 vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu);
3062 vcpu->arch.emulate_ctxt.mode = 3407 vcpu->arch.emulate_ctxt.mode =
3408 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
3063 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) 3409 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
3064 ? X86EMUL_MODE_REAL : cs_l 3410 ? X86EMUL_MODE_VM86 : cs_l
3065 ? X86EMUL_MODE_PROT64 : cs_db 3411 ? X86EMUL_MODE_PROT64 : cs_db
3066 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 3412 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
3067 3413
@@ -3153,12 +3499,17 @@ static int pio_copy_data(struct kvm_vcpu *vcpu)
3153 gva_t q = vcpu->arch.pio.guest_gva; 3499 gva_t q = vcpu->arch.pio.guest_gva;
3154 unsigned bytes; 3500 unsigned bytes;
3155 int ret; 3501 int ret;
3502 u32 error_code;
3156 3503
3157 bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; 3504 bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
3158 if (vcpu->arch.pio.in) 3505 if (vcpu->arch.pio.in)
3159 ret = kvm_write_guest_virt(q, p, bytes, vcpu); 3506 ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code);
3160 else 3507 else
3161 ret = kvm_read_guest_virt(q, p, bytes, vcpu); 3508 ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code);
3509
3510 if (ret == X86EMUL_PROPAGATE_FAULT)
3511 kvm_inject_page_fault(vcpu, q, error_code);
3512
3162 return ret; 3513 return ret;
3163} 3514}
3164 3515
@@ -3179,7 +3530,7 @@ int complete_pio(struct kvm_vcpu *vcpu)
3179 if (io->in) { 3530 if (io->in) {
3180 r = pio_copy_data(vcpu); 3531 r = pio_copy_data(vcpu);
3181 if (r) 3532 if (r)
3182 return r; 3533 goto out;
3183 } 3534 }
3184 3535
3185 delta = 1; 3536 delta = 1;
@@ -3206,7 +3557,7 @@ int complete_pio(struct kvm_vcpu *vcpu)
3206 kvm_register_write(vcpu, VCPU_REGS_RSI, val); 3557 kvm_register_write(vcpu, VCPU_REGS_RSI, val);
3207 } 3558 }
3208 } 3559 }
3209 3560out:
3210 io->count -= io->cur_count; 3561 io->count -= io->cur_count;
3211 io->cur_count = 0; 3562 io->cur_count = 0;
3212 3563
@@ -3219,11 +3570,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
3219 int r; 3570 int r;
3220 3571
3221 if (vcpu->arch.pio.in) 3572 if (vcpu->arch.pio.in)
3222 r = kvm_io_bus_read(&vcpu->kvm->pio_bus, vcpu->arch.pio.port, 3573 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
3223 vcpu->arch.pio.size, pd); 3574 vcpu->arch.pio.size, pd);
3224 else 3575 else
3225 r = kvm_io_bus_write(&vcpu->kvm->pio_bus, vcpu->arch.pio.port, 3576 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
3226 vcpu->arch.pio.size, pd); 3577 vcpu->arch.pio.port, vcpu->arch.pio.size,
3578 pd);
3227 return r; 3579 return r;
3228} 3580}
3229 3581
@@ -3234,7 +3586,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu)
3234 int i, r = 0; 3586 int i, r = 0;
3235 3587
3236 for (i = 0; i < io->cur_count; i++) { 3588 for (i = 0; i < io->cur_count; i++) {
3237 if (kvm_io_bus_write(&vcpu->kvm->pio_bus, 3589 if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
3238 io->port, io->size, pd)) { 3590 io->port, io->size, pd)) {
3239 r = -EOPNOTSUPP; 3591 r = -EOPNOTSUPP;
3240 break; 3592 break;
@@ -3248,6 +3600,8 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port)
3248{ 3600{
3249 unsigned long val; 3601 unsigned long val;
3250 3602
3603 trace_kvm_pio(!in, port, size, 1);
3604
3251 vcpu->run->exit_reason = KVM_EXIT_IO; 3605 vcpu->run->exit_reason = KVM_EXIT_IO;
3252 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 3606 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
3253 vcpu->run->io.size = vcpu->arch.pio.size = size; 3607 vcpu->run->io.size = vcpu->arch.pio.size = size;
@@ -3259,11 +3613,10 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port)
3259 vcpu->arch.pio.down = 0; 3613 vcpu->arch.pio.down = 0;
3260 vcpu->arch.pio.rep = 0; 3614 vcpu->arch.pio.rep = 0;
3261 3615
3262 trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port, 3616 if (!vcpu->arch.pio.in) {
3263 size, 1); 3617 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
3264 3618 memcpy(vcpu->arch.pio_data, &val, 4);
3265 val = kvm_register_read(vcpu, VCPU_REGS_RAX); 3619 }
3266 memcpy(vcpu->arch.pio_data, &val, 4);
3267 3620
3268 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { 3621 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
3269 complete_pio(vcpu); 3622 complete_pio(vcpu);
@@ -3280,6 +3633,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
3280 unsigned now, in_page; 3633 unsigned now, in_page;
3281 int ret = 0; 3634 int ret = 0;
3282 3635
3636 trace_kvm_pio(!in, port, size, count);
3637
3283 vcpu->run->exit_reason = KVM_EXIT_IO; 3638 vcpu->run->exit_reason = KVM_EXIT_IO;
3284 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 3639 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
3285 vcpu->run->io.size = vcpu->arch.pio.size = size; 3640 vcpu->run->io.size = vcpu->arch.pio.size = size;
@@ -3291,9 +3646,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
3291 vcpu->arch.pio.down = down; 3646 vcpu->arch.pio.down = down;
3292 vcpu->arch.pio.rep = rep; 3647 vcpu->arch.pio.rep = rep;
3293 3648
3294 trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
3295 size, count);
3296
3297 if (!count) { 3649 if (!count) {
3298 kvm_x86_ops->skip_emulated_instruction(vcpu); 3650 kvm_x86_ops->skip_emulated_instruction(vcpu);
3299 return 1; 3651 return 1;
@@ -3325,10 +3677,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
3325 if (!vcpu->arch.pio.in) { 3677 if (!vcpu->arch.pio.in) {
3326 /* string PIO write */ 3678 /* string PIO write */
3327 ret = pio_copy_data(vcpu); 3679 ret = pio_copy_data(vcpu);
3328 if (ret == X86EMUL_PROPAGATE_FAULT) { 3680 if (ret == X86EMUL_PROPAGATE_FAULT)
3329 kvm_inject_gp(vcpu, 0);
3330 return 1; 3681 return 1;
3331 }
3332 if (ret == 0 && !pio_string_write(vcpu)) { 3682 if (ret == 0 && !pio_string_write(vcpu)) {
3333 complete_pio(vcpu); 3683 complete_pio(vcpu);
3334 if (vcpu->arch.pio.count == 0) 3684 if (vcpu->arch.pio.count == 0)
@@ -3487,11 +3837,76 @@ static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
3487 return a0 | ((gpa_t)a1 << 32); 3837 return a0 | ((gpa_t)a1 << 32);
3488} 3838}
3489 3839
3840int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
3841{
3842 u64 param, ingpa, outgpa, ret;
3843 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
3844 bool fast, longmode;
3845 int cs_db, cs_l;
3846
3847 /*
3848 * hypercall generates UD from non zero cpl and real mode
3849 * per HYPER-V spec
3850 */
3851 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
3852 kvm_queue_exception(vcpu, UD_VECTOR);
3853 return 0;
3854 }
3855
3856 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
3857 longmode = is_long_mode(vcpu) && cs_l == 1;
3858
3859 if (!longmode) {
3860 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
3861 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
3862 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
3863 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
3864 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
3865 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
3866 }
3867#ifdef CONFIG_X86_64
3868 else {
3869 param = kvm_register_read(vcpu, VCPU_REGS_RCX);
3870 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
3871 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
3872 }
3873#endif
3874
3875 code = param & 0xffff;
3876 fast = (param >> 16) & 0x1;
3877 rep_cnt = (param >> 32) & 0xfff;
3878 rep_idx = (param >> 48) & 0xfff;
3879
3880 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
3881
3882 switch (code) {
3883 case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
3884 kvm_vcpu_on_spin(vcpu);
3885 break;
3886 default:
3887 res = HV_STATUS_INVALID_HYPERCALL_CODE;
3888 break;
3889 }
3890
3891 ret = res | (((u64)rep_done & 0xfff) << 32);
3892 if (longmode) {
3893 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
3894 } else {
3895 kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
3896 kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
3897 }
3898
3899 return 1;
3900}
3901
3490int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) 3902int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
3491{ 3903{
3492 unsigned long nr, a0, a1, a2, a3, ret; 3904 unsigned long nr, a0, a1, a2, a3, ret;
3493 int r = 1; 3905 int r = 1;
3494 3906
3907 if (kvm_hv_hypercall_enabled(vcpu->kvm))
3908 return kvm_hv_hypercall(vcpu);
3909
3495 nr = kvm_register_read(vcpu, VCPU_REGS_RAX); 3910 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
3496 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); 3911 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
3497 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX); 3912 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
@@ -3534,10 +3949,8 @@ EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
3534int kvm_fix_hypercall(struct kvm_vcpu *vcpu) 3949int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
3535{ 3950{
3536 char instruction[3]; 3951 char instruction[3];
3537 int ret = 0;
3538 unsigned long rip = kvm_rip_read(vcpu); 3952 unsigned long rip = kvm_rip_read(vcpu);
3539 3953
3540
3541 /* 3954 /*
3542 * Blow out the MMU to ensure that no other VCPU has an active mapping 3955 * Blow out the MMU to ensure that no other VCPU has an active mapping
3543 * to ensure that the updated hypercall appears atomically across all 3956 * to ensure that the updated hypercall appears atomically across all
@@ -3546,11 +3959,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
3546 kvm_mmu_zap_all(vcpu->kvm); 3959 kvm_mmu_zap_all(vcpu->kvm);
3547 3960
3548 kvm_x86_ops->patch_hypercall(vcpu, instruction); 3961 kvm_x86_ops->patch_hypercall(vcpu, instruction);
3549 if (emulator_write_emulated(rip, instruction, 3, vcpu)
3550 != X86EMUL_CONTINUE)
3551 ret = -EFAULT;
3552 3962
3553 return ret; 3963 return emulator_write_emulated(rip, instruction, 3, vcpu);
3554} 3964}
3555 3965
3556static u64 mk_cr_64(u64 curr_cr, u32 new_val) 3966static u64 mk_cr_64(u64 curr_cr, u32 new_val)
@@ -3583,10 +3993,9 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
3583{ 3993{
3584 unsigned long value; 3994 unsigned long value;
3585 3995
3586 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
3587 switch (cr) { 3996 switch (cr) {
3588 case 0: 3997 case 0:
3589 value = vcpu->arch.cr0; 3998 value = kvm_read_cr0(vcpu);
3590 break; 3999 break;
3591 case 2: 4000 case 2:
3592 value = vcpu->arch.cr2; 4001 value = vcpu->arch.cr2;
@@ -3595,7 +4004,7 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
3595 value = vcpu->arch.cr3; 4004 value = vcpu->arch.cr3;
3596 break; 4005 break;
3597 case 4: 4006 case 4:
3598 value = vcpu->arch.cr4; 4007 value = kvm_read_cr4(vcpu);
3599 break; 4008 break;
3600 case 8: 4009 case 8:
3601 value = kvm_get_cr8(vcpu); 4010 value = kvm_get_cr8(vcpu);
@@ -3613,7 +4022,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
3613{ 4022{
3614 switch (cr) { 4023 switch (cr) {
3615 case 0: 4024 case 0:
3616 kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); 4025 kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
3617 *rflags = kvm_get_rflags(vcpu); 4026 *rflags = kvm_get_rflags(vcpu);
3618 break; 4027 break;
3619 case 2: 4028 case 2:
@@ -3623,7 +4032,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
3623 kvm_set_cr3(vcpu, val); 4032 kvm_set_cr3(vcpu, val);
3624 break; 4033 break;
3625 case 4: 4034 case 4:
3626 kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val)); 4035 kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
3627 break; 4036 break;
3628 case 8: 4037 case 8:
3629 kvm_set_cr8(vcpu, val & 0xfUL); 4038 kvm_set_cr8(vcpu, val & 0xfUL);
@@ -3690,6 +4099,7 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
3690 } 4099 }
3691 return best; 4100 return best;
3692} 4101}
4102EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
3693 4103
3694int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) 4104int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
3695{ 4105{
@@ -3773,14 +4183,15 @@ static void vapic_enter(struct kvm_vcpu *vcpu)
3773static void vapic_exit(struct kvm_vcpu *vcpu) 4183static void vapic_exit(struct kvm_vcpu *vcpu)
3774{ 4184{
3775 struct kvm_lapic *apic = vcpu->arch.apic; 4185 struct kvm_lapic *apic = vcpu->arch.apic;
4186 int idx;
3776 4187
3777 if (!apic || !apic->vapic_addr) 4188 if (!apic || !apic->vapic_addr)
3778 return; 4189 return;
3779 4190
3780 down_read(&vcpu->kvm->slots_lock); 4191 idx = srcu_read_lock(&vcpu->kvm->srcu);
3781 kvm_release_page_dirty(apic->vapic_page); 4192 kvm_release_page_dirty(apic->vapic_page);
3782 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); 4193 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
3783 up_read(&vcpu->kvm->slots_lock); 4194 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3784} 4195}
3785 4196
3786static void update_cr8_intercept(struct kvm_vcpu *vcpu) 4197static void update_cr8_intercept(struct kvm_vcpu *vcpu)
@@ -3876,12 +4287,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
3876 r = 0; 4287 r = 0;
3877 goto out; 4288 goto out;
3878 } 4289 }
4290 if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) {
4291 vcpu->fpu_active = 0;
4292 kvm_x86_ops->fpu_deactivate(vcpu);
4293 }
3879 } 4294 }
3880 4295
3881 preempt_disable(); 4296 preempt_disable();
3882 4297
3883 kvm_x86_ops->prepare_guest_switch(vcpu); 4298 kvm_x86_ops->prepare_guest_switch(vcpu);
3884 kvm_load_guest_fpu(vcpu); 4299 if (vcpu->fpu_active)
4300 kvm_load_guest_fpu(vcpu);
3885 4301
3886 local_irq_disable(); 4302 local_irq_disable();
3887 4303
@@ -3909,7 +4325,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
3909 kvm_lapic_sync_to_vapic(vcpu); 4325 kvm_lapic_sync_to_vapic(vcpu);
3910 } 4326 }
3911 4327
3912 up_read(&vcpu->kvm->slots_lock); 4328 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3913 4329
3914 kvm_guest_enter(); 4330 kvm_guest_enter();
3915 4331
@@ -3951,7 +4367,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
3951 4367
3952 preempt_enable(); 4368 preempt_enable();
3953 4369
3954 down_read(&vcpu->kvm->slots_lock); 4370 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3955 4371
3956 /* 4372 /*
3957 * Profile KVM exit RIPs: 4373 * Profile KVM exit RIPs:
@@ -3973,6 +4389,7 @@ out:
3973static int __vcpu_run(struct kvm_vcpu *vcpu) 4389static int __vcpu_run(struct kvm_vcpu *vcpu)
3974{ 4390{
3975 int r; 4391 int r;
4392 struct kvm *kvm = vcpu->kvm;
3976 4393
3977 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { 4394 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
3978 pr_debug("vcpu %d received sipi with vector # %x\n", 4395 pr_debug("vcpu %d received sipi with vector # %x\n",
@@ -3984,7 +4401,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
3984 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 4401 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
3985 } 4402 }
3986 4403
3987 down_read(&vcpu->kvm->slots_lock); 4404 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
3988 vapic_enter(vcpu); 4405 vapic_enter(vcpu);
3989 4406
3990 r = 1; 4407 r = 1;
@@ -3992,9 +4409,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
3992 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) 4409 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
3993 r = vcpu_enter_guest(vcpu); 4410 r = vcpu_enter_guest(vcpu);
3994 else { 4411 else {
3995 up_read(&vcpu->kvm->slots_lock); 4412 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
3996 kvm_vcpu_block(vcpu); 4413 kvm_vcpu_block(vcpu);
3997 down_read(&vcpu->kvm->slots_lock); 4414 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
3998 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) 4415 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
3999 { 4416 {
4000 switch(vcpu->arch.mp_state) { 4417 switch(vcpu->arch.mp_state) {
@@ -4029,13 +4446,13 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
4029 ++vcpu->stat.signal_exits; 4446 ++vcpu->stat.signal_exits;
4030 } 4447 }
4031 if (need_resched()) { 4448 if (need_resched()) {
4032 up_read(&vcpu->kvm->slots_lock); 4449 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
4033 kvm_resched(vcpu); 4450 kvm_resched(vcpu);
4034 down_read(&vcpu->kvm->slots_lock); 4451 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
4035 } 4452 }
4036 } 4453 }
4037 4454
4038 up_read(&vcpu->kvm->slots_lock); 4455 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
4039 post_kvm_run_save(vcpu); 4456 post_kvm_run_save(vcpu);
4040 4457
4041 vapic_exit(vcpu); 4458 vapic_exit(vcpu);
@@ -4074,10 +4491,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4074 vcpu->mmio_read_completed = 1; 4491 vcpu->mmio_read_completed = 1;
4075 vcpu->mmio_needed = 0; 4492 vcpu->mmio_needed = 0;
4076 4493
4077 down_read(&vcpu->kvm->slots_lock); 4494 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4078 r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, 4495 r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0,
4079 EMULTYPE_NO_DECODE); 4496 EMULTYPE_NO_DECODE);
4080 up_read(&vcpu->kvm->slots_lock); 4497 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4081 if (r == EMULATE_DO_MMIO) { 4498 if (r == EMULATE_DO_MMIO) {
4082 /* 4499 /*
4083 * Read-modify-write. Back to userspace. 4500 * Read-modify-write. Back to userspace.
@@ -4204,13 +4621,12 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4204 sregs->gdt.limit = dt.limit; 4621 sregs->gdt.limit = dt.limit;
4205 sregs->gdt.base = dt.base; 4622 sregs->gdt.base = dt.base;
4206 4623
4207 kvm_x86_ops->decache_cr4_guest_bits(vcpu); 4624 sregs->cr0 = kvm_read_cr0(vcpu);
4208 sregs->cr0 = vcpu->arch.cr0;
4209 sregs->cr2 = vcpu->arch.cr2; 4625 sregs->cr2 = vcpu->arch.cr2;
4210 sregs->cr3 = vcpu->arch.cr3; 4626 sregs->cr3 = vcpu->arch.cr3;
4211 sregs->cr4 = vcpu->arch.cr4; 4627 sregs->cr4 = kvm_read_cr4(vcpu);
4212 sregs->cr8 = kvm_get_cr8(vcpu); 4628 sregs->cr8 = kvm_get_cr8(vcpu);
4213 sregs->efer = vcpu->arch.shadow_efer; 4629 sregs->efer = vcpu->arch.efer;
4214 sregs->apic_base = kvm_get_apic_base(vcpu); 4630 sregs->apic_base = kvm_get_apic_base(vcpu);
4215 4631
4216 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); 4632 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
@@ -4298,14 +4714,23 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
4298{ 4714{
4299 struct descriptor_table dtable; 4715 struct descriptor_table dtable;
4300 u16 index = selector >> 3; 4716 u16 index = selector >> 3;
4717 int ret;
4718 u32 err;
4719 gva_t addr;
4301 4720
4302 get_segment_descriptor_dtable(vcpu, selector, &dtable); 4721 get_segment_descriptor_dtable(vcpu, selector, &dtable);
4303 4722
4304 if (dtable.limit < index * 8 + 7) { 4723 if (dtable.limit < index * 8 + 7) {
4305 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); 4724 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
4306 return 1; 4725 return X86EMUL_PROPAGATE_FAULT;
4307 } 4726 }
4308 return kvm_read_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); 4727 addr = dtable.base + index * 8;
4728 ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc),
4729 vcpu, &err);
4730 if (ret == X86EMUL_PROPAGATE_FAULT)
4731 kvm_inject_page_fault(vcpu, addr, err);
4732
4733 return ret;
4309} 4734}
4310 4735
4311/* allowed just for 8 bytes segments */ 4736/* allowed just for 8 bytes segments */
@@ -4319,15 +4744,23 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
4319 4744
4320 if (dtable.limit < index * 8 + 7) 4745 if (dtable.limit < index * 8 + 7)
4321 return 1; 4746 return 1;
4322 return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); 4747 return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL);
4748}
4749
4750static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu,
4751 struct desc_struct *seg_desc)
4752{
4753 u32 base_addr = get_desc_base(seg_desc);
4754
4755 return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL);
4323} 4756}
4324 4757
4325static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu, 4758static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu,
4326 struct desc_struct *seg_desc) 4759 struct desc_struct *seg_desc)
4327{ 4760{
4328 u32 base_addr = get_desc_base(seg_desc); 4761 u32 base_addr = get_desc_base(seg_desc);
4329 4762
4330 return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr); 4763 return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL);
4331} 4764}
4332 4765
4333static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) 4766static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
@@ -4338,18 +4771,6 @@ static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
4338 return kvm_seg.selector; 4771 return kvm_seg.selector;
4339} 4772}
4340 4773
4341static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu,
4342 u16 selector,
4343 struct kvm_segment *kvm_seg)
4344{
4345 struct desc_struct seg_desc;
4346
4347 if (load_guest_segment_descriptor(vcpu, selector, &seg_desc))
4348 return 1;
4349 seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg);
4350 return 0;
4351}
4352
4353static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) 4774static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
4354{ 4775{
4355 struct kvm_segment segvar = { 4776 struct kvm_segment segvar = {
@@ -4367,7 +4788,7 @@ static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int se
4367 .unusable = 0, 4788 .unusable = 0,
4368 }; 4789 };
4369 kvm_x86_ops->set_segment(vcpu, &segvar, seg); 4790 kvm_x86_ops->set_segment(vcpu, &segvar, seg);
4370 return 0; 4791 return X86EMUL_CONTINUE;
4371} 4792}
4372 4793
4373static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) 4794static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
@@ -4377,24 +4798,112 @@ static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
4377 (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); 4798 (kvm_get_rflags(vcpu) & X86_EFLAGS_VM);
4378} 4799}
4379 4800
4380int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 4801int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg)
4381 int type_bits, int seg)
4382{ 4802{
4383 struct kvm_segment kvm_seg; 4803 struct kvm_segment kvm_seg;
4804 struct desc_struct seg_desc;
4805 u8 dpl, rpl, cpl;
4806 unsigned err_vec = GP_VECTOR;
4807 u32 err_code = 0;
4808 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
4809 int ret;
4384 4810
4385 if (is_vm86_segment(vcpu, seg) || !(vcpu->arch.cr0 & X86_CR0_PE)) 4811 if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu))
4386 return kvm_load_realmode_segment(vcpu, selector, seg); 4812 return kvm_load_realmode_segment(vcpu, selector, seg);
4387 if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
4388 return 1;
4389 kvm_seg.type |= type_bits;
4390 4813
4391 if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS && 4814 /* NULL selector is not valid for TR, CS and SS */
4392 seg != VCPU_SREG_LDTR) 4815 if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
4393 if (!kvm_seg.s) 4816 && null_selector)
4394 kvm_seg.unusable = 1; 4817 goto exception;
4818
4819 /* TR should be in GDT only */
4820 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
4821 goto exception;
4822
4823 ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc);
4824 if (ret)
4825 return ret;
4826
4827 seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg);
4828
4829 if (null_selector) { /* for NULL selector skip all following checks */
4830 kvm_seg.unusable = 1;
4831 goto load;
4832 }
4833
4834 err_code = selector & 0xfffc;
4835 err_vec = GP_VECTOR;
4395 4836
4837 /* can't load system descriptor into segment selecor */
4838 if (seg <= VCPU_SREG_GS && !kvm_seg.s)
4839 goto exception;
4840
4841 if (!kvm_seg.present) {
4842 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
4843 goto exception;
4844 }
4845
4846 rpl = selector & 3;
4847 dpl = kvm_seg.dpl;
4848 cpl = kvm_x86_ops->get_cpl(vcpu);
4849
4850 switch (seg) {
4851 case VCPU_SREG_SS:
4852 /*
4853 * segment is not a writable data segment or segment
4854 * selector's RPL != CPL or segment selector's RPL != CPL
4855 */
4856 if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl)
4857 goto exception;
4858 break;
4859 case VCPU_SREG_CS:
4860 if (!(kvm_seg.type & 8))
4861 goto exception;
4862
4863 if (kvm_seg.type & 4) {
4864 /* conforming */
4865 if (dpl > cpl)
4866 goto exception;
4867 } else {
4868 /* nonconforming */
4869 if (rpl > cpl || dpl != cpl)
4870 goto exception;
4871 }
4872 /* CS(RPL) <- CPL */
4873 selector = (selector & 0xfffc) | cpl;
4874 break;
4875 case VCPU_SREG_TR:
4876 if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9))
4877 goto exception;
4878 break;
4879 case VCPU_SREG_LDTR:
4880 if (kvm_seg.s || kvm_seg.type != 2)
4881 goto exception;
4882 break;
4883 default: /* DS, ES, FS, or GS */
4884 /*
4885 * segment is not a data or readable code segment or
4886 * ((segment is a data or nonconforming code segment)
4887 * and (both RPL and CPL > DPL))
4888 */
4889 if ((kvm_seg.type & 0xa) == 0x8 ||
4890 (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl)))
4891 goto exception;
4892 break;
4893 }
4894
4895 if (!kvm_seg.unusable && kvm_seg.s) {
4896 /* mark segment as accessed */
4897 kvm_seg.type |= 1;
4898 seg_desc.type |= 1;
4899 save_guest_segment_descriptor(vcpu, selector, &seg_desc);
4900 }
4901load:
4396 kvm_set_segment(vcpu, &kvm_seg, seg); 4902 kvm_set_segment(vcpu, &kvm_seg, seg);
4397 return 0; 4903 return X86EMUL_CONTINUE;
4904exception:
4905 kvm_queue_exception_e(vcpu, err_vec, err_code);
4906 return X86EMUL_PROPAGATE_FAULT;
4398} 4907}
4399 4908
4400static void save_state_to_tss32(struct kvm_vcpu *vcpu, 4909static void save_state_to_tss32(struct kvm_vcpu *vcpu,
@@ -4420,6 +4929,14 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu,
4420 tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); 4929 tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
4421} 4930}
4422 4931
4932static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg)
4933{
4934 struct kvm_segment kvm_seg;
4935 kvm_get_segment(vcpu, &kvm_seg, seg);
4936 kvm_seg.selector = sel;
4937 kvm_set_segment(vcpu, &kvm_seg, seg);
4938}
4939
4423static int load_state_from_tss32(struct kvm_vcpu *vcpu, 4940static int load_state_from_tss32(struct kvm_vcpu *vcpu,
4424 struct tss_segment_32 *tss) 4941 struct tss_segment_32 *tss)
4425{ 4942{
@@ -4437,25 +4954,41 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu,
4437 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); 4954 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi);
4438 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); 4955 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi);
4439 4956
4440 if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) 4957 /*
4958 * SDM says that segment selectors are loaded before segment
4959 * descriptors
4960 */
4961 kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR);
4962 kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
4963 kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
4964 kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
4965 kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
4966 kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS);
4967 kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS);
4968
4969 /*
4970 * Now load segment descriptors. If fault happenes at this stage
4971 * it is handled in a context of new task
4972 */
4973 if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR))
4441 return 1; 4974 return 1;
4442 4975
4443 if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) 4976 if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
4444 return 1; 4977 return 1;
4445 4978
4446 if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) 4979 if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
4447 return 1; 4980 return 1;
4448 4981
4449 if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) 4982 if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
4450 return 1; 4983 return 1;
4451 4984
4452 if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) 4985 if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
4453 return 1; 4986 return 1;
4454 4987
4455 if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS)) 4988 if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS))
4456 return 1; 4989 return 1;
4457 4990
4458 if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS)) 4991 if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS))
4459 return 1; 4992 return 1;
4460 return 0; 4993 return 0;
4461} 4994}
@@ -4495,19 +5028,33 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu,
4495 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); 5028 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si);
4496 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); 5029 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di);
4497 5030
4498 if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) 5031 /*
5032 * SDM says that segment selectors are loaded before segment
5033 * descriptors
5034 */
5035 kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR);
5036 kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
5037 kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
5038 kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
5039 kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
5040
5041 /*
5042 * Now load segment descriptors. If fault happenes at this stage
5043 * it is handled in a context of new task
5044 */
5045 if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR))
4499 return 1; 5046 return 1;
4500 5047
4501 if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) 5048 if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
4502 return 1; 5049 return 1;
4503 5050
4504 if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) 5051 if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
4505 return 1; 5052 return 1;
4506 5053
4507 if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) 5054 if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
4508 return 1; 5055 return 1;
4509 5056
4510 if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) 5057 if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
4511 return 1; 5058 return 1;
4512 return 0; 5059 return 0;
4513} 5060}
@@ -4529,7 +5076,7 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
4529 sizeof tss_segment_16)) 5076 sizeof tss_segment_16))
4530 goto out; 5077 goto out;
4531 5078
4532 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), 5079 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
4533 &tss_segment_16, sizeof tss_segment_16)) 5080 &tss_segment_16, sizeof tss_segment_16))
4534 goto out; 5081 goto out;
4535 5082
@@ -4537,7 +5084,7 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
4537 tss_segment_16.prev_task_link = old_tss_sel; 5084 tss_segment_16.prev_task_link = old_tss_sel;
4538 5085
4539 if (kvm_write_guest(vcpu->kvm, 5086 if (kvm_write_guest(vcpu->kvm,
4540 get_tss_base_addr(vcpu, nseg_desc), 5087 get_tss_base_addr_write(vcpu, nseg_desc),
4541 &tss_segment_16.prev_task_link, 5088 &tss_segment_16.prev_task_link,
4542 sizeof tss_segment_16.prev_task_link)) 5089 sizeof tss_segment_16.prev_task_link))
4543 goto out; 5090 goto out;
@@ -4568,7 +5115,7 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
4568 sizeof tss_segment_32)) 5115 sizeof tss_segment_32))
4569 goto out; 5116 goto out;
4570 5117
4571 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), 5118 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
4572 &tss_segment_32, sizeof tss_segment_32)) 5119 &tss_segment_32, sizeof tss_segment_32))
4573 goto out; 5120 goto out;
4574 5121
@@ -4576,7 +5123,7 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
4576 tss_segment_32.prev_task_link = old_tss_sel; 5123 tss_segment_32.prev_task_link = old_tss_sel;
4577 5124
4578 if (kvm_write_guest(vcpu->kvm, 5125 if (kvm_write_guest(vcpu->kvm,
4579 get_tss_base_addr(vcpu, nseg_desc), 5126 get_tss_base_addr_write(vcpu, nseg_desc),
4580 &tss_segment_32.prev_task_link, 5127 &tss_segment_32.prev_task_link,
4581 sizeof tss_segment_32.prev_task_link)) 5128 sizeof tss_segment_32.prev_task_link))
4582 goto out; 5129 goto out;
@@ -4599,7 +5146,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
4599 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); 5146 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
4600 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); 5147 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
4601 5148
4602 old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base); 5149 old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);
4603 5150
4604 /* FIXME: Handle errors. Failure to read either TSS or their 5151 /* FIXME: Handle errors. Failure to read either TSS or their
4605 * descriptors should generate a pagefault. 5152 * descriptors should generate a pagefault.
@@ -4658,7 +5205,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
4658 &nseg_desc); 5205 &nseg_desc);
4659 } 5206 }
4660 5207
4661 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS); 5208 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS);
4662 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); 5209 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
4663 tr_seg.type = 11; 5210 tr_seg.type = 11;
4664 kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); 5211 kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
@@ -4689,17 +5236,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4689 5236
4690 kvm_set_cr8(vcpu, sregs->cr8); 5237 kvm_set_cr8(vcpu, sregs->cr8);
4691 5238
4692 mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer; 5239 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
4693 kvm_x86_ops->set_efer(vcpu, sregs->efer); 5240 kvm_x86_ops->set_efer(vcpu, sregs->efer);
4694 kvm_set_apic_base(vcpu, sregs->apic_base); 5241 kvm_set_apic_base(vcpu, sregs->apic_base);
4695 5242
4696 kvm_x86_ops->decache_cr4_guest_bits(vcpu); 5243 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
4697
4698 mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0;
4699 kvm_x86_ops->set_cr0(vcpu, sregs->cr0); 5244 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
4700 vcpu->arch.cr0 = sregs->cr0; 5245 vcpu->arch.cr0 = sregs->cr0;
4701 5246
4702 mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4; 5247 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
4703 kvm_x86_ops->set_cr4(vcpu, sregs->cr4); 5248 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
4704 if (!is_long_mode(vcpu) && is_pae(vcpu)) { 5249 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
4705 load_pdptrs(vcpu, vcpu->arch.cr3); 5250 load_pdptrs(vcpu, vcpu->arch.cr3);
@@ -4734,7 +5279,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4734 /* Older userspace won't unhalt the vcpu on reset. */ 5279 /* Older userspace won't unhalt the vcpu on reset. */
4735 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 && 5280 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
4736 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 && 5281 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
4737 !(vcpu->arch.cr0 & X86_CR0_PE)) 5282 !is_protmode(vcpu))
4738 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 5283 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
4739 5284
4740 vcpu_put(vcpu); 5285 vcpu_put(vcpu);
@@ -4832,11 +5377,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
4832{ 5377{
4833 unsigned long vaddr = tr->linear_address; 5378 unsigned long vaddr = tr->linear_address;
4834 gpa_t gpa; 5379 gpa_t gpa;
5380 int idx;
4835 5381
4836 vcpu_load(vcpu); 5382 vcpu_load(vcpu);
4837 down_read(&vcpu->kvm->slots_lock); 5383 idx = srcu_read_lock(&vcpu->kvm->srcu);
4838 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr); 5384 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
4839 up_read(&vcpu->kvm->slots_lock); 5385 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4840 tr->physical_address = gpa; 5386 tr->physical_address = gpa;
4841 tr->valid = gpa != UNMAPPED_GVA; 5387 tr->valid = gpa != UNMAPPED_GVA;
4842 tr->writeable = 1; 5388 tr->writeable = 1;
@@ -4917,14 +5463,14 @@ EXPORT_SYMBOL_GPL(fx_init);
4917 5463
4918void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) 5464void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
4919{ 5465{
4920 if (!vcpu->fpu_active || vcpu->guest_fpu_loaded) 5466 if (vcpu->guest_fpu_loaded)
4921 return; 5467 return;
4922 5468
4923 vcpu->guest_fpu_loaded = 1; 5469 vcpu->guest_fpu_loaded = 1;
4924 kvm_fx_save(&vcpu->arch.host_fx_image); 5470 kvm_fx_save(&vcpu->arch.host_fx_image);
4925 kvm_fx_restore(&vcpu->arch.guest_fx_image); 5471 kvm_fx_restore(&vcpu->arch.guest_fx_image);
5472 trace_kvm_fpu(1);
4926} 5473}
4927EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
4928 5474
4929void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) 5475void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
4930{ 5476{
@@ -4935,8 +5481,9 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
4935 kvm_fx_save(&vcpu->arch.guest_fx_image); 5481 kvm_fx_save(&vcpu->arch.guest_fx_image);
4936 kvm_fx_restore(&vcpu->arch.host_fx_image); 5482 kvm_fx_restore(&vcpu->arch.host_fx_image);
4937 ++vcpu->stat.fpu_reload; 5483 ++vcpu->stat.fpu_reload;
5484 set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
5485 trace_kvm_fpu(0);
4938} 5486}
4939EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
4940 5487
4941void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) 5488void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
4942{ 5489{
@@ -5088,11 +5635,13 @@ fail:
5088 5635
5089void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 5636void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
5090{ 5637{
5638 int idx;
5639
5091 kfree(vcpu->arch.mce_banks); 5640 kfree(vcpu->arch.mce_banks);
5092 kvm_free_lapic(vcpu); 5641 kvm_free_lapic(vcpu);
5093 down_read(&vcpu->kvm->slots_lock); 5642 idx = srcu_read_lock(&vcpu->kvm->srcu);
5094 kvm_mmu_destroy(vcpu); 5643 kvm_mmu_destroy(vcpu);
5095 up_read(&vcpu->kvm->slots_lock); 5644 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5096 free_page((unsigned long)vcpu->arch.pio_data); 5645 free_page((unsigned long)vcpu->arch.pio_data);
5097} 5646}
5098 5647
@@ -5103,6 +5652,12 @@ struct kvm *kvm_arch_create_vm(void)
5103 if (!kvm) 5652 if (!kvm)
5104 return ERR_PTR(-ENOMEM); 5653 return ERR_PTR(-ENOMEM);
5105 5654
5655 kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
5656 if (!kvm->arch.aliases) {
5657 kfree(kvm);
5658 return ERR_PTR(-ENOMEM);
5659 }
5660
5106 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 5661 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
5107 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 5662 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
5108 5663
@@ -5159,16 +5714,18 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
5159 put_page(kvm->arch.apic_access_page); 5714 put_page(kvm->arch.apic_access_page);
5160 if (kvm->arch.ept_identity_pagetable) 5715 if (kvm->arch.ept_identity_pagetable)
5161 put_page(kvm->arch.ept_identity_pagetable); 5716 put_page(kvm->arch.ept_identity_pagetable);
5717 cleanup_srcu_struct(&kvm->srcu);
5718 kfree(kvm->arch.aliases);
5162 kfree(kvm); 5719 kfree(kvm);
5163} 5720}
5164 5721
5165int kvm_arch_set_memory_region(struct kvm *kvm, 5722int kvm_arch_prepare_memory_region(struct kvm *kvm,
5166 struct kvm_userspace_memory_region *mem, 5723 struct kvm_memory_slot *memslot,
5167 struct kvm_memory_slot old, 5724 struct kvm_memory_slot old,
5725 struct kvm_userspace_memory_region *mem,
5168 int user_alloc) 5726 int user_alloc)
5169{ 5727{
5170 int npages = mem->memory_size >> PAGE_SHIFT; 5728 int npages = memslot->npages;
5171 struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
5172 5729
5173 /*To keep backward compatibility with older userspace, 5730 /*To keep backward compatibility with older userspace,
5174 *x86 needs to hanlde !user_alloc case. 5731 *x86 needs to hanlde !user_alloc case.
@@ -5188,26 +5745,35 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
5188 if (IS_ERR((void *)userspace_addr)) 5745 if (IS_ERR((void *)userspace_addr))
5189 return PTR_ERR((void *)userspace_addr); 5746 return PTR_ERR((void *)userspace_addr);
5190 5747
5191 /* set userspace_addr atomically for kvm_hva_to_rmapp */
5192 spin_lock(&kvm->mmu_lock);
5193 memslot->userspace_addr = userspace_addr; 5748 memslot->userspace_addr = userspace_addr;
5194 spin_unlock(&kvm->mmu_lock);
5195 } else {
5196 if (!old.user_alloc && old.rmap) {
5197 int ret;
5198
5199 down_write(&current->mm->mmap_sem);
5200 ret = do_munmap(current->mm, old.userspace_addr,
5201 old.npages * PAGE_SIZE);
5202 up_write(&current->mm->mmap_sem);
5203 if (ret < 0)
5204 printk(KERN_WARNING
5205 "kvm_vm_ioctl_set_memory_region: "
5206 "failed to munmap memory\n");
5207 }
5208 } 5749 }
5209 } 5750 }
5210 5751
5752
5753 return 0;
5754}
5755
5756void kvm_arch_commit_memory_region(struct kvm *kvm,
5757 struct kvm_userspace_memory_region *mem,
5758 struct kvm_memory_slot old,
5759 int user_alloc)
5760{
5761
5762 int npages = mem->memory_size >> PAGE_SHIFT;
5763
5764 if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
5765 int ret;
5766
5767 down_write(&current->mm->mmap_sem);
5768 ret = do_munmap(current->mm, old.userspace_addr,
5769 old.npages * PAGE_SIZE);
5770 up_write(&current->mm->mmap_sem);
5771 if (ret < 0)
5772 printk(KERN_WARNING
5773 "kvm_vm_ioctl_set_memory_region: "
5774 "failed to munmap memory\n");
5775 }
5776
5211 spin_lock(&kvm->mmu_lock); 5777 spin_lock(&kvm->mmu_lock);
5212 if (!kvm->arch.n_requested_mmu_pages) { 5778 if (!kvm->arch.n_requested_mmu_pages) {
5213 unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); 5779 unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
@@ -5216,8 +5782,6 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
5216 5782
5217 kvm_mmu_slot_remove_write_access(kvm, mem->slot); 5783 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
5218 spin_unlock(&kvm->mmu_lock); 5784 spin_unlock(&kvm->mmu_lock);
5219
5220 return 0;
5221} 5785}
5222 5786
5223void kvm_arch_flush_shadow(struct kvm *kvm) 5787void kvm_arch_flush_shadow(struct kvm *kvm)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 5eadea585d2..2d101639bd8 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -2,6 +2,7 @@
2#define ARCH_X86_KVM_X86_H 2#define ARCH_X86_KVM_X86_H
3 3
4#include <linux/kvm_host.h> 4#include <linux/kvm_host.h>
5#include "kvm_cache_regs.h"
5 6
6static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) 7static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
7{ 8{
@@ -35,4 +36,33 @@ static inline bool kvm_exception_is_soft(unsigned int nr)
35struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, 36struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
36 u32 function, u32 index); 37 u32 function, u32 index);
37 38
39static inline bool is_protmode(struct kvm_vcpu *vcpu)
40{
41 return kvm_read_cr0_bits(vcpu, X86_CR0_PE);
42}
43
44static inline int is_long_mode(struct kvm_vcpu *vcpu)
45{
46#ifdef CONFIG_X86_64
47 return vcpu->arch.efer & EFER_LMA;
48#else
49 return 0;
50#endif
51}
52
53static inline int is_pae(struct kvm_vcpu *vcpu)
54{
55 return kvm_read_cr4_bits(vcpu, X86_CR4_PAE);
56}
57
58static inline int is_pse(struct kvm_vcpu *vcpu)
59{
60 return kvm_read_cr4_bits(vcpu, X86_CR4_PSE);
61}
62
63static inline int is_paging(struct kvm_vcpu *vcpu)
64{
65 return kvm_read_cr0_bits(vcpu, X86_CR0_PG);
66}
67
38#endif 68#endif
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index 0b7d3e9593e..b110d97fb92 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -13,6 +13,8 @@ obj-$(CONFIG_X86_VISWS) += visws.o
13 13
14obj-$(CONFIG_X86_NUMAQ) += numaq_32.o 14obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
15 15
16obj-$(CONFIG_X86_MRST) += mrst.o
17
16obj-y += common.o early.o 18obj-y += common.o early.o
17obj-y += amd_bus.o bus_numa.o 19obj-y += amd_bus.o bus_numa.o
18 20
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 5f11ff6f538..6e22454bfaa 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -298,17 +298,14 @@ int __init pci_acpi_init(void)
298{ 298{
299 struct pci_dev *dev = NULL; 299 struct pci_dev *dev = NULL;
300 300
301 if (pcibios_scanned)
302 return 0;
303
304 if (acpi_noirq) 301 if (acpi_noirq)
305 return 0; 302 return -ENODEV;
306 303
307 printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n"); 304 printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
308 acpi_irq_penalty_init(); 305 acpi_irq_penalty_init();
309 pcibios_scanned++;
310 pcibios_enable_irq = acpi_pci_irq_enable; 306 pcibios_enable_irq = acpi_pci_irq_enable;
311 pcibios_disable_irq = acpi_pci_irq_disable; 307 pcibios_disable_irq = acpi_pci_irq_disable;
308 x86_init.pci.init_irq = x86_init_noop;
312 309
313 if (pci_routeirq) { 310 if (pci_routeirq) {
314 /* 311 /*
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 3736176acaa..294e10cb11e 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -72,12 +72,6 @@ struct pci_ops pci_root_ops = {
72}; 72};
73 73
74/* 74/*
75 * legacy, numa, and acpi all want to call pcibios_scan_root
76 * from their initcalls. This flag prevents that.
77 */
78int pcibios_scanned;
79
80/*
81 * This interrupt-safe spinlock protects all accesses to PCI 75 * This interrupt-safe spinlock protects all accesses to PCI
82 * configuration space. 76 * configuration space.
83 */ 77 */
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
index 25a1f8efed4..adb62aaa7ec 100644
--- a/arch/x86/pci/init.c
+++ b/arch/x86/pci/init.c
@@ -1,6 +1,7 @@
1#include <linux/pci.h> 1#include <linux/pci.h>
2#include <linux/init.h> 2#include <linux/init.h>
3#include <asm/pci_x86.h> 3#include <asm/pci_x86.h>
4#include <asm/x86_init.h>
4 5
5/* arch_initcall has too random ordering, so call the initializers 6/* arch_initcall has too random ordering, so call the initializers
6 in the right sequence from here. */ 7 in the right sequence from here. */
@@ -15,10 +16,9 @@ static __init int pci_arch_init(void)
15 if (!(pci_probe & PCI_PROBE_NOEARLY)) 16 if (!(pci_probe & PCI_PROBE_NOEARLY))
16 pci_mmcfg_early_init(); 17 pci_mmcfg_early_init();
17 18
18#ifdef CONFIG_PCI_OLPC 19 if (x86_init.pci.arch_init && !x86_init.pci.arch_init())
19 if (!pci_olpc_init()) 20 return 0;
20 return 0; /* skip additional checks if it's an XO */ 21
21#endif
22#ifdef CONFIG_PCI_BIOS 22#ifdef CONFIG_PCI_BIOS
23 pci_pcbios_init(); 23 pci_pcbios_init();
24#endif 24#endif
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index b02f6d8ac92..8b107521d24 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -53,7 +53,7 @@ struct irq_router_handler {
53 int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device); 53 int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device);
54}; 54};
55 55
56int (*pcibios_enable_irq)(struct pci_dev *dev) = NULL; 56int (*pcibios_enable_irq)(struct pci_dev *dev) = pirq_enable_irq;
57void (*pcibios_disable_irq)(struct pci_dev *dev) = NULL; 57void (*pcibios_disable_irq)(struct pci_dev *dev) = NULL;
58 58
59/* 59/*
@@ -1018,7 +1018,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
1018 return 1; 1018 return 1;
1019} 1019}
1020 1020
1021static void __init pcibios_fixup_irqs(void) 1021void __init pcibios_fixup_irqs(void)
1022{ 1022{
1023 struct pci_dev *dev = NULL; 1023 struct pci_dev *dev = NULL;
1024 u8 pin; 1024 u8 pin;
@@ -1112,12 +1112,12 @@ static struct dmi_system_id __initdata pciirq_dmi_table[] = {
1112 { } 1112 { }
1113}; 1113};
1114 1114
1115int __init pcibios_irq_init(void) 1115void __init pcibios_irq_init(void)
1116{ 1116{
1117 DBG(KERN_DEBUG "PCI: IRQ init\n"); 1117 DBG(KERN_DEBUG "PCI: IRQ init\n");
1118 1118
1119 if (pcibios_enable_irq || raw_pci_ops == NULL) 1119 if (raw_pci_ops == NULL)
1120 return 0; 1120 return;
1121 1121
1122 dmi_check_system(pciirq_dmi_table); 1122 dmi_check_system(pciirq_dmi_table);
1123 1123
@@ -1144,9 +1144,7 @@ int __init pcibios_irq_init(void)
1144 pirq_table = NULL; 1144 pirq_table = NULL;
1145 } 1145 }
1146 1146
1147 pcibios_enable_irq = pirq_enable_irq; 1147 x86_init.pci.fixup_irqs();
1148
1149 pcibios_fixup_irqs();
1150 1148
1151 if (io_apic_assign_pci_irqs && pci_routeirq) { 1149 if (io_apic_assign_pci_irqs && pci_routeirq) {
1152 struct pci_dev *dev = NULL; 1150 struct pci_dev *dev = NULL;
@@ -1159,8 +1157,6 @@ int __init pcibios_irq_init(void)
1159 for_each_pci_dev(dev) 1157 for_each_pci_dev(dev)
1160 pirq_enable_irq(dev); 1158 pirq_enable_irq(dev);
1161 } 1159 }
1162
1163 return 0;
1164} 1160}
1165 1161
1166static void pirq_penalize_isa_irq(int irq, int active) 1162static void pirq_penalize_isa_irq(int irq, int active)
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index 4061bb0f267..0db5eaf5456 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -35,16 +35,13 @@ static void __devinit pcibios_fixup_peer_bridges(void)
35 } 35 }
36} 36}
37 37
38static int __init pci_legacy_init(void) 38int __init pci_legacy_init(void)
39{ 39{
40 if (!raw_pci_ops) { 40 if (!raw_pci_ops) {
41 printk("PCI: System does not support PCI\n"); 41 printk("PCI: System does not support PCI\n");
42 return 0; 42 return 0;
43 } 43 }
44 44
45 if (pcibios_scanned++)
46 return 0;
47
48 printk("PCI: Probing PCI hardware\n"); 45 printk("PCI: Probing PCI hardware\n");
49 pci_root_bus = pcibios_scan_root(0); 46 pci_root_bus = pcibios_scan_root(0);
50 if (pci_root_bus) 47 if (pci_root_bus)
@@ -55,18 +52,15 @@ static int __init pci_legacy_init(void)
55 52
56int __init pci_subsys_init(void) 53int __init pci_subsys_init(void)
57{ 54{
58#ifdef CONFIG_X86_NUMAQ 55 /*
59 pci_numaq_init(); 56 * The init function returns an non zero value when
60#endif 57 * pci_legacy_init should be invoked.
61#ifdef CONFIG_ACPI 58 */
62 pci_acpi_init(); 59 if (x86_init.pci.init())
63#endif 60 pci_legacy_init();
64#ifdef CONFIG_X86_VISWS 61
65 pci_visws_init();
66#endif
67 pci_legacy_init();
68 pcibios_fixup_peer_bridges(); 62 pcibios_fixup_peer_bridges();
69 pcibios_irq_init(); 63 x86_init.pci.init_irq();
70 pcibios_init(); 64 pcibios_init();
71 65
72 return 0; 66 return 0;
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c
new file mode 100644
index 00000000000..8bf2fcb88d0
--- /dev/null
+++ b/arch/x86/pci/mrst.c
@@ -0,0 +1,262 @@
1/*
2 * Moorestown PCI support
3 * Copyright (c) 2008 Intel Corporation
4 * Jesse Barnes <jesse.barnes@intel.com>
5 *
6 * Moorestown has an interesting PCI implementation:
7 * - configuration space is memory mapped (as defined by MCFG)
8 * - Lincroft devices also have a real, type 1 configuration space
9 * - Early Lincroft silicon has a type 1 access bug that will cause
10 * a hang if non-existent devices are accessed
11 * - some devices have the "fixed BAR" capability, which means
12 * they can't be relocated or modified; check for that during
13 * BAR sizing
14 *
15 * So, we use the MCFG space for all reads and writes, but also send
16 * Lincroft writes to type 1 space. But only read/write if the device
17 * actually exists, otherwise return all 1s for reads and bit bucket
18 * the writes.
19 */
20
21#include <linux/sched.h>
22#include <linux/pci.h>
23#include <linux/ioport.h>
24#include <linux/init.h>
25#include <linux/dmi.h>
26
27#include <asm/acpi.h>
28#include <asm/segment.h>
29#include <asm/io.h>
30#include <asm/smp.h>
31#include <asm/pci_x86.h>
32#include <asm/hw_irq.h>
33#include <asm/io_apic.h>
34
35#define PCIE_CAP_OFFSET 0x100
36
37/* Fixed BAR fields */
38#define PCIE_VNDR_CAP_ID_FIXED_BAR 0x00 /* Fixed BAR (TBD) */
39#define PCI_FIXED_BAR_0_SIZE 0x04
40#define PCI_FIXED_BAR_1_SIZE 0x08
41#define PCI_FIXED_BAR_2_SIZE 0x0c
42#define PCI_FIXED_BAR_3_SIZE 0x10
43#define PCI_FIXED_BAR_4_SIZE 0x14
44#define PCI_FIXED_BAR_5_SIZE 0x1c
45
46/**
47 * fixed_bar_cap - return the offset of the fixed BAR cap if found
48 * @bus: PCI bus
49 * @devfn: device in question
50 *
51 * Look for the fixed BAR cap on @bus and @devfn, returning its offset
52 * if found or 0 otherwise.
53 */
54static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn)
55{
56 int pos;
57 u32 pcie_cap = 0, cap_data;
58
59 pos = PCIE_CAP_OFFSET;
60
61 if (!raw_pci_ext_ops)
62 return 0;
63
64 while (pos) {
65 if (raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
66 devfn, pos, 4, &pcie_cap))
67 return 0;
68
69 if (pcie_cap == 0xffffffff)
70 return 0;
71
72 if (PCI_EXT_CAP_ID(pcie_cap) == PCI_EXT_CAP_ID_VNDR) {
73 raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
74 devfn, pos + 4, 4, &cap_data);
75 if ((cap_data & 0xffff) == PCIE_VNDR_CAP_ID_FIXED_BAR)
76 return pos;
77 }
78
79 pos = pcie_cap >> 20;
80 }
81
82 return 0;
83}
84
85static int pci_device_update_fixed(struct pci_bus *bus, unsigned int devfn,
86 int reg, int len, u32 val, int offset)
87{
88 u32 size;
89 unsigned int domain, busnum;
90 int bar = (reg - PCI_BASE_ADDRESS_0) >> 2;
91
92 domain = pci_domain_nr(bus);
93 busnum = bus->number;
94
95 if (val == ~0 && len == 4) {
96 unsigned long decode;
97
98 raw_pci_ext_ops->read(domain, busnum, devfn,
99 offset + 8 + (bar * 4), 4, &size);
100
101 /* Turn the size into a decode pattern for the sizing code */
102 if (size) {
103 decode = size - 1;
104 decode |= decode >> 1;
105 decode |= decode >> 2;
106 decode |= decode >> 4;
107 decode |= decode >> 8;
108 decode |= decode >> 16;
109 decode++;
110 decode = ~(decode - 1);
111 } else {
112 decode = ~0;
113 }
114
115 /*
116 * If val is all ones, the core code is trying to size the reg,
117 * so update the mmconfig space with the real size.
118 *
119 * Note: this assumes the fixed size we got is a power of two.
120 */
121 return raw_pci_ext_ops->write(domain, busnum, devfn, reg, 4,
122 decode);
123 }
124
125 /* This is some other kind of BAR write, so just do it. */
126 return raw_pci_ext_ops->write(domain, busnum, devfn, reg, len, val);
127}
128
129/**
130 * type1_access_ok - check whether to use type 1
131 * @bus: bus number
132 * @devfn: device & function in question
133 *
134 * If the bus is on a Lincroft chip and it exists, or is not on a Lincroft at
135 * all, the we can go ahead with any reads & writes. If it's on a Lincroft,
136 * but doesn't exist, avoid the access altogether to keep the chip from
137 * hanging.
138 */
139static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg)
140{
141 /* This is a workaround for A0 LNC bug where PCI status register does
142 * not have new CAP bit set. can not be written by SW either.
143 *
144 * PCI header type in real LNC indicates a single function device, this
145 * will prevent probing other devices under the same function in PCI
146 * shim. Therefore, use the header type in shim instead.
147 */
148 if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE)
149 return 0;
150 if (bus == 0 && (devfn == PCI_DEVFN(2, 0) || devfn == PCI_DEVFN(0, 0)))
151 return 1;
152 return 0; /* langwell on others */
153}
154
155static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
156 int size, u32 *value)
157{
158 if (type1_access_ok(bus->number, devfn, where))
159 return pci_direct_conf1.read(pci_domain_nr(bus), bus->number,
160 devfn, where, size, value);
161 return raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
162 devfn, where, size, value);
163}
164
165static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
166 int size, u32 value)
167{
168 int offset;
169
170 /* On MRST, there is no PCI ROM BAR, this will cause a subsequent read
171 * to ROM BAR return 0 then being ignored.
172 */
173 if (where == PCI_ROM_ADDRESS)
174 return 0;
175
176 /*
177 * Devices with fixed BARs need special handling:
178 * - BAR sizing code will save, write ~0, read size, restore
179 * - so writes to fixed BARs need special handling
180 * - other writes to fixed BAR devices should go through mmconfig
181 */
182 offset = fixed_bar_cap(bus, devfn);
183 if (offset &&
184 (where >= PCI_BASE_ADDRESS_0 && where <= PCI_BASE_ADDRESS_5)) {
185 return pci_device_update_fixed(bus, devfn, where, size, value,
186 offset);
187 }
188
189 /*
190 * On Moorestown update both real & mmconfig space
191 * Note: early Lincroft silicon can't handle type 1 accesses to
192 * non-existent devices, so just eat the write in that case.
193 */
194 if (type1_access_ok(bus->number, devfn, where))
195 return pci_direct_conf1.write(pci_domain_nr(bus), bus->number,
196 devfn, where, size, value);
197 return raw_pci_ext_ops->write(pci_domain_nr(bus), bus->number, devfn,
198 where, size, value);
199}
200
201static int mrst_pci_irq_enable(struct pci_dev *dev)
202{
203 u8 pin;
204 struct io_apic_irq_attr irq_attr;
205
206 pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
207
208 /* MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to
209 * IOAPIC RTE entries, so we just enable RTE for the device.
210 */
211 irq_attr.ioapic = mp_find_ioapic(dev->irq);
212 irq_attr.ioapic_pin = dev->irq;
213 irq_attr.trigger = 1; /* level */
214 irq_attr.polarity = 1; /* active low */
215 io_apic_set_pci_routing(&dev->dev, dev->irq, &irq_attr);
216
217 return 0;
218}
219
220struct pci_ops pci_mrst_ops = {
221 .read = pci_read,
222 .write = pci_write,
223};
224
225/**
226 * pci_mrst_init - installs pci_mrst_ops
227 *
228 * Moorestown has an interesting PCI implementation (see above).
229 * Called when the early platform detection installs it.
230 */
231int __init pci_mrst_init(void)
232{
233 printk(KERN_INFO "Moorestown platform detected, using MRST PCI ops\n");
234 pci_mmcfg_late_init();
235 pcibios_enable_irq = mrst_pci_irq_enable;
236 pci_root_ops = pci_mrst_ops;
237 /* Continue with standard init */
238 return 1;
239}
240
241/*
242 * Langwell devices reside at fixed offsets, don't try to move them.
243 */
244static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev)
245{
246 unsigned long offset;
247 u32 size;
248 int i;
249
250 /* Fixup the BAR sizes for fixed BAR devices and make them unmoveable */
251 offset = fixed_bar_cap(dev->bus, dev->devfn);
252 if (!offset || PCI_DEVFN(2, 0) == dev->devfn ||
253 PCI_DEVFN(2, 2) == dev->devfn)
254 return;
255
256 for (i = 0; i < PCI_ROM_RESOURCE; i++) {
257 pci_read_config_dword(dev, offset + 8 + (i * 4), &size);
258 dev->resource[i].end = dev->resource[i].start + size - 1;
259 dev->resource[i].flags |= IORESOURCE_PCI_FIXED;
260 }
261}
262DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixed_bar_fixup);
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
index 8884a1c1ada..8223738ad80 100644
--- a/arch/x86/pci/numaq_32.c
+++ b/arch/x86/pci/numaq_32.c
@@ -148,14 +148,8 @@ int __init pci_numaq_init(void)
148{ 148{
149 int quad; 149 int quad;
150 150
151 if (!found_numaq)
152 return 0;
153
154 raw_pci_ops = &pci_direct_conf1_mq; 151 raw_pci_ops = &pci_direct_conf1_mq;
155 152
156 if (pcibios_scanned++)
157 return 0;
158
159 pci_root_bus = pcibios_scan_root(0); 153 pci_root_bus = pcibios_scan_root(0);
160 if (pci_root_bus) 154 if (pci_root_bus)
161 pci_bus_add_devices(pci_root_bus); 155 pci_bus_add_devices(pci_root_bus);
diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c
index b889d824f7c..b34815408f5 100644
--- a/arch/x86/pci/olpc.c
+++ b/arch/x86/pci/olpc.c
@@ -304,9 +304,6 @@ static struct pci_raw_ops pci_olpc_conf = {
304 304
305int __init pci_olpc_init(void) 305int __init pci_olpc_init(void)
306{ 306{
307 if (!machine_is_olpc() || olpc_has_vsa())
308 return -ENODEV;
309
310 printk(KERN_INFO "PCI: Using configuration type OLPC\n"); 307 printk(KERN_INFO "PCI: Using configuration type OLPC\n");
311 raw_pci_ops = &pci_olpc_conf; 308 raw_pci_ops = &pci_olpc_conf;
312 is_lx = is_geode_lx(); 309 is_lx = is_geode_lx();
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index bcead7a4687..03008f72eb0 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -69,9 +69,6 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq)
69 69
70int __init pci_visws_init(void) 70int __init pci_visws_init(void)
71{ 71{
72 if (!is_visws_box())
73 return -1;
74
75 pcibios_enable_irq = &pci_visws_enable_irq; 72 pcibios_enable_irq = &pci_visws_enable_irq;
76 pcibios_disable_irq = &pci_visws_disable_irq; 73 pcibios_disable_irq = &pci_visws_disable_irq;
77 74
@@ -90,5 +87,6 @@ int __init pci_visws_init(void)
90 pci_scan_bus_with_sysdata(pci_bus1); 87 pci_scan_bus_with_sysdata(pci_bus1);
91 pci_fixup_irqs(pci_common_swizzle, visws_map_irq); 88 pci_fixup_irqs(pci_common_swizzle, visws_map_irq);
92 pcibios_resource_survey(); 89 pcibios_resource_survey();
93 return 0; 90 /* Request bus scan */
91 return 1;
94} 92}