aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/Kconfig.debug20
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--arch/x86/include/asm/io.h24
-rw-r--r--arch/x86/include/asm/kgdb.h1
-rw-r--r--arch/x86/include/asm/ptrace.h18
-rw-r--r--arch/x86/include/asm/tsc.h4
-rw-r--r--arch/x86/include/asm/vdso.h14
-rw-r--r--arch/x86/include/asm/vgtod.h2
-rw-r--r--arch/x86/include/asm/vsyscall.h12
-rw-r--r--arch/x86/include/asm/vvar.h52
-rw-r--r--arch/x86/include/asm/xen/hypercall.h7
-rw-r--r--arch/x86/kernel/Makefile8
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/tboot.c1
-rw-r--r--arch/x86/kernel/time.c2
-rw-r--r--arch/x86/kernel/tsc.c19
-rw-r--r--arch/x86/kernel/vmlinux.lds.S34
-rw-r--r--arch/x86/kernel/vread_tsc_64.c36
-rw-r--r--arch/x86/kernel/vsyscall_64.c48
-rw-r--r--arch/x86/kvm/mmu.c3
-rw-r--r--arch/x86/mm/fault.c12
-rw-r--r--arch/x86/mm/hugetlbpage.c4
-rw-r--r--arch/x86/mm/init.c2
-rw-r--r--arch/x86/vdso/Makefile17
-rw-r--r--arch/x86/vdso/vclock_gettime.c74
-rw-r--r--arch/x86/vdso/vdso.lds.S9
-rw-r--r--arch/x86/vdso/vextern.h16
-rw-r--r--arch/x86/vdso/vgetcpu.c3
-rw-r--r--arch/x86/vdso/vma.c27
-rw-r--r--arch/x86/vdso/vvar.c12
-rw-r--r--arch/x86/xen/mmu.c284
-rw-r--r--arch/x86/xen/mmu.h37
34 files changed, 286 insertions, 524 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 880fcb6c86f4..da349723d411 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -17,8 +17,6 @@ config X86_64
17config X86 17config X86
18 def_bool y 18 def_bool y
19 select HAVE_AOUT if X86_32 19 select HAVE_AOUT if X86_32
20 select HAVE_READQ
21 select HAVE_WRITEQ
22 select HAVE_UNSTABLE_SCHED_CLOCK 20 select HAVE_UNSTABLE_SCHED_CLOCK
23 select HAVE_IDE 21 select HAVE_IDE
24 select HAVE_OPROFILE 22 select HAVE_OPROFILE
@@ -66,7 +64,6 @@ config X86
66 select HAVE_GENERIC_HARDIRQS 64 select HAVE_GENERIC_HARDIRQS
67 select HAVE_SPARSE_IRQ 65 select HAVE_SPARSE_IRQ
68 select GENERIC_FIND_FIRST_BIT 66 select GENERIC_FIND_FIRST_BIT
69 select GENERIC_FIND_NEXT_BIT
70 select GENERIC_IRQ_PROBE 67 select GENERIC_IRQ_PROBE
71 select GENERIC_PENDING_IRQ if SMP 68 select GENERIC_PENDING_IRQ if SMP
72 select GENERIC_IRQ_SHOW 69 select GENERIC_IRQ_SHOW
@@ -917,6 +914,7 @@ config TOSHIBA
917 914
918config I8K 915config I8K
919 tristate "Dell laptop support" 916 tristate "Dell laptop support"
917 select HWMON
920 ---help--- 918 ---help---
921 This adds a driver to safely access the System Management Mode 919 This adds a driver to safely access the System Management Mode
922 of the CPU on the Dell Inspiron 8000. The System Management Mode 920 of the CPU on the Dell Inspiron 8000. The System Management Mode
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 615e18810f48..c0f8a5c88910 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -66,26 +66,6 @@ config DEBUG_STACKOVERFLOW
66 This option will cause messages to be printed if free stack space 66 This option will cause messages to be printed if free stack space
67 drops below a certain limit. 67 drops below a certain limit.
68 68
69config DEBUG_STACK_USAGE
70 bool "Stack utilization instrumentation"
71 depends on DEBUG_KERNEL
72 ---help---
73 Enables the display of the minimum amount of free stack which each
74 task has ever had available in the sysrq-T and sysrq-P debug output.
75
76 This option will slow down process creation somewhat.
77
78config DEBUG_PER_CPU_MAPS
79 bool "Debug access to per_cpu maps"
80 depends on DEBUG_KERNEL
81 depends on SMP
82 ---help---
83 Say Y to verify that the per_cpu map being accessed has
84 been setup. Adds a fair amount of code to kernel memory
85 and decreases performance.
86
87 Say N if unsure.
88
89config X86_PTDUMP 69config X86_PTDUMP
90 bool "Export kernel pagetable layout to userspace via debugfs" 70 bool "Export kernel pagetable layout to userspace via debugfs"
91 depends on DEBUG_KERNEL 71 depends on DEBUG_KERNEL
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 6f9872658dd2..2bf18059fbea 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -10,7 +10,6 @@ CONFIG_TASK_IO_ACCOUNTING=y
10CONFIG_AUDIT=y 10CONFIG_AUDIT=y
11CONFIG_LOG_BUF_SHIFT=18 11CONFIG_LOG_BUF_SHIFT=18
12CONFIG_CGROUPS=y 12CONFIG_CGROUPS=y
13CONFIG_CGROUP_NS=y
14CONFIG_CGROUP_FREEZER=y 13CONFIG_CGROUP_FREEZER=y
15CONFIG_CPUSETS=y 14CONFIG_CPUSETS=y
16CONFIG_CGROUP_CPUACCT=y 15CONFIG_CGROUP_CPUACCT=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index ee01a9d5d4f0..22a0dc8e51dd 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -11,7 +11,6 @@ CONFIG_TASK_IO_ACCOUNTING=y
11CONFIG_AUDIT=y 11CONFIG_AUDIT=y
12CONFIG_LOG_BUF_SHIFT=18 12CONFIG_LOG_BUF_SHIFT=18
13CONFIG_CGROUPS=y 13CONFIG_CGROUPS=y
14CONFIG_CGROUP_NS=y
15CONFIG_CGROUP_FREEZER=y 14CONFIG_CGROUP_FREEZER=y
16CONFIG_CPUSETS=y 15CONFIG_CPUSETS=y
17CONFIG_CGROUP_CPUACCT=y 16CONFIG_CGROUP_CPUACCT=y
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 072273082528..d02804d650c4 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -38,7 +38,6 @@
38 38
39#include <linux/string.h> 39#include <linux/string.h>
40#include <linux/compiler.h> 40#include <linux/compiler.h>
41#include <asm-generic/int-ll64.h>
42#include <asm/page.h> 41#include <asm/page.h>
43 42
44#include <xen/xen.h> 43#include <xen/xen.h>
@@ -87,27 +86,6 @@ build_mmio_write(__writel, "l", unsigned int, "r", )
87build_mmio_read(readq, "q", unsigned long, "=r", :"memory") 86build_mmio_read(readq, "q", unsigned long, "=r", :"memory")
88build_mmio_write(writeq, "q", unsigned long, "r", :"memory") 87build_mmio_write(writeq, "q", unsigned long, "r", :"memory")
89 88
90#else
91
92static inline __u64 readq(const volatile void __iomem *addr)
93{
94 const volatile u32 __iomem *p = addr;
95 u32 low, high;
96
97 low = readl(p);
98 high = readl(p + 1);
99
100 return low + ((u64)high << 32);
101}
102
103static inline void writeq(__u64 val, volatile void __iomem *addr)
104{
105 writel(val, addr);
106 writel(val >> 32, addr+4);
107}
108
109#endif
110
111#define readq_relaxed(a) readq(a) 89#define readq_relaxed(a) readq(a)
112 90
113#define __raw_readq(a) readq(a) 91#define __raw_readq(a) readq(a)
@@ -117,6 +95,8 @@ static inline void writeq(__u64 val, volatile void __iomem *addr)
117#define readq readq 95#define readq readq
118#define writeq writeq 96#define writeq writeq
119 97
98#endif
99
120/** 100/**
121 * virt_to_phys - map virtual addresses to physical 101 * virt_to_phys - map virtual addresses to physical
122 * @address: address to remap 102 * @address: address to remap
diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h
index 396f5b5fc4d7..77e95f54570a 100644
--- a/arch/x86/include/asm/kgdb.h
+++ b/arch/x86/include/asm/kgdb.h
@@ -77,6 +77,7 @@ static inline void arch_kgdb_breakpoint(void)
77} 77}
78#define BREAK_INSTR_SIZE 1 78#define BREAK_INSTR_SIZE 1
79#define CACHE_FLUSH_IS_SAFE 1 79#define CACHE_FLUSH_IS_SAFE 1
80#define GDB_ADJUSTS_BREAK_OFFSET
80 81
81extern int kgdb_ll_trap(int cmd, const char *str, 82extern int kgdb_ll_trap(int cmd, const char *str,
82 struct pt_regs *regs, long err, int trap, int sig); 83 struct pt_regs *regs, long err, int trap, int sig);
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 1babf8adecdf..94e7618fcac8 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -136,6 +136,7 @@ struct cpuinfo_x86;
136struct task_struct; 136struct task_struct;
137 137
138extern unsigned long profile_pc(struct pt_regs *regs); 138extern unsigned long profile_pc(struct pt_regs *regs);
139#define profile_pc profile_pc
139 140
140extern unsigned long 141extern unsigned long
141convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs); 142convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
@@ -202,20 +203,11 @@ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
202#endif 203#endif
203} 204}
204 205
205static inline unsigned long instruction_pointer(struct pt_regs *regs) 206#define GET_IP(regs) ((regs)->ip)
206{ 207#define GET_FP(regs) ((regs)->bp)
207 return regs->ip; 208#define GET_USP(regs) ((regs)->sp)
208}
209
210static inline unsigned long frame_pointer(struct pt_regs *regs)
211{
212 return regs->bp;
213}
214 209
215static inline unsigned long user_stack_pointer(struct pt_regs *regs) 210#include <asm-generic/ptrace.h>
216{
217 return regs->sp;
218}
219 211
220/* Query offset/name of register from its name/offset */ 212/* Query offset/name of register from its name/offset */
221extern int regs_query_register_offset(const char *name); 213extern int regs_query_register_offset(const char *name);
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 83e2efd181e2..9db5583b6d38 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -51,6 +51,10 @@ extern int unsynchronized_tsc(void);
51extern int check_tsc_unstable(void); 51extern int check_tsc_unstable(void);
52extern unsigned long native_calibrate_tsc(void); 52extern unsigned long native_calibrate_tsc(void);
53 53
54#ifdef CONFIG_X86_64
55extern cycles_t vread_tsc(void);
56#endif
57
54/* 58/*
55 * Boot-time check whether the TSCs are synchronized across 59 * Boot-time check whether the TSCs are synchronized across
56 * all CPUs/cores: 60 * all CPUs/cores:
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 9064052b73de..bb0522850b74 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -1,20 +1,6 @@
1#ifndef _ASM_X86_VDSO_H 1#ifndef _ASM_X86_VDSO_H
2#define _ASM_X86_VDSO_H 2#define _ASM_X86_VDSO_H
3 3
4#ifdef CONFIG_X86_64
5extern const char VDSO64_PRELINK[];
6
7/*
8 * Given a pointer to the vDSO image, find the pointer to VDSO64_name
9 * as that symbol is defined in the vDSO sources or linker script.
10 */
11#define VDSO64_SYMBOL(base, name) \
12({ \
13 extern const char VDSO64_##name[]; \
14 (void *)(VDSO64_##name - VDSO64_PRELINK + (unsigned long)(base)); \
15})
16#endif
17
18#if defined CONFIG_X86_32 || defined CONFIG_COMPAT 4#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
19extern const char VDSO32_PRELINK[]; 5extern const char VDSO32_PRELINK[];
20 6
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 3d61e204826f..646b4c1ca695 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -23,8 +23,6 @@ struct vsyscall_gtod_data {
23 struct timespec wall_to_monotonic; 23 struct timespec wall_to_monotonic;
24 struct timespec wall_time_coarse; 24 struct timespec wall_time_coarse;
25}; 25};
26extern struct vsyscall_gtod_data __vsyscall_gtod_data
27__section_vsyscall_gtod_data;
28extern struct vsyscall_gtod_data vsyscall_gtod_data; 26extern struct vsyscall_gtod_data vsyscall_gtod_data;
29 27
30#endif /* _ASM_X86_VGTOD_H */ 28#endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index d0983d255fbd..d55597351f6a 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -16,27 +16,19 @@ enum vsyscall_num {
16#ifdef __KERNEL__ 16#ifdef __KERNEL__
17#include <linux/seqlock.h> 17#include <linux/seqlock.h>
18 18
19#define __section_vgetcpu_mode __attribute__ ((unused, __section__ (".vgetcpu_mode"), aligned(16)))
20#define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16)))
21
22/* Definitions for CONFIG_GENERIC_TIME definitions */ 19/* Definitions for CONFIG_GENERIC_TIME definitions */
23#define __section_vsyscall_gtod_data __attribute__ \
24 ((unused, __section__ (".vsyscall_gtod_data"),aligned(16)))
25#define __section_vsyscall_clock __attribute__ \
26 ((unused, __section__ (".vsyscall_clock"),aligned(16)))
27#define __vsyscall_fn \ 20#define __vsyscall_fn \
28 __attribute__ ((unused, __section__(".vsyscall_fn"))) notrace 21 __attribute__ ((unused, __section__(".vsyscall_fn"))) notrace
29 22
30#define VGETCPU_RDTSCP 1 23#define VGETCPU_RDTSCP 1
31#define VGETCPU_LSL 2 24#define VGETCPU_LSL 2
32 25
33extern int __vgetcpu_mode;
34extern volatile unsigned long __jiffies;
35
36/* kernel space (writeable) */ 26/* kernel space (writeable) */
37extern int vgetcpu_mode; 27extern int vgetcpu_mode;
38extern struct timezone sys_tz; 28extern struct timezone sys_tz;
39 29
30#include <asm/vvar.h>
31
40extern void map_vsyscall(void); 32extern void map_vsyscall(void);
41 33
42#endif /* __KERNEL__ */ 34#endif /* __KERNEL__ */
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
new file mode 100644
index 000000000000..341b3559452b
--- /dev/null
+++ b/arch/x86/include/asm/vvar.h
@@ -0,0 +1,52 @@
1/*
2 * vvar.h: Shared vDSO/kernel variable declarations
3 * Copyright (c) 2011 Andy Lutomirski
4 * Subject to the GNU General Public License, version 2
5 *
6 * A handful of variables are accessible (read-only) from userspace
7 * code in the vsyscall page and the vdso. They are declared here.
8 * Some other file must define them with DEFINE_VVAR.
9 *
10 * In normal kernel code, they are used like any other variable.
11 * In user code, they are accessed through the VVAR macro.
12 *
13 * Each of these variables lives in the vsyscall page, and each
14 * one needs a unique offset within the little piece of the page
15 * reserved for vvars. Specify that offset in DECLARE_VVAR.
16 * (There are 896 bytes available. If you mess up, the linker will
17 * catch it.)
18 */
19
20/* Offset of vars within vsyscall page */
21#define VSYSCALL_VARS_OFFSET (3072 + 128)
22
23#if defined(__VVAR_KERNEL_LDS)
24
25/* The kernel linker script defines its own magic to put vvars in the
26 * right place.
27 */
28#define DECLARE_VVAR(offset, type, name) \
29 EMIT_VVAR(name, VSYSCALL_VARS_OFFSET + offset)
30
31#else
32
33#define DECLARE_VVAR(offset, type, name) \
34 static type const * const vvaraddr_ ## name = \
35 (void *)(VSYSCALL_START + VSYSCALL_VARS_OFFSET + (offset));
36
37#define DEFINE_VVAR(type, name) \
38 type __vvar_ ## name \
39 __attribute__((section(".vsyscall_var_" #name), aligned(16)))
40
41#define VVAR(name) (*vvaraddr_ ## name)
42
43#endif
44
45/* DECLARE_VVAR(offset, type, name) */
46
47DECLARE_VVAR(0, volatile unsigned long, jiffies)
48DECLARE_VVAR(8, int, vgetcpu_mode)
49DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
50
51#undef DECLARE_VVAR
52#undef VSYSCALL_VARS_OFFSET
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 8508bfe52296..d240ea950519 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -447,6 +447,13 @@ HYPERVISOR_hvm_op(int op, void *arg)
447 return _hypercall2(unsigned long, hvm_op, op, arg); 447 return _hypercall2(unsigned long, hvm_op, op, arg);
448} 448}
449 449
450static inline int
451HYPERVISOR_tmem_op(
452 struct tmem_op *op)
453{
454 return _hypercall1(int, tmem_op, op);
455}
456
450static inline void 457static inline void
451MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) 458MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
452{ 459{
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 250806472a7e..f5abe3a245b8 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -8,7 +8,6 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
8 8
9ifdef CONFIG_FUNCTION_TRACER 9ifdef CONFIG_FUNCTION_TRACER
10# Do not profile debug and lowlevel utilities 10# Do not profile debug and lowlevel utilities
11CFLAGS_REMOVE_tsc.o = -pg
12CFLAGS_REMOVE_rtc.o = -pg 11CFLAGS_REMOVE_rtc.o = -pg
13CFLAGS_REMOVE_paravirt-spinlocks.o = -pg 12CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
14CFLAGS_REMOVE_pvclock.o = -pg 13CFLAGS_REMOVE_pvclock.o = -pg
@@ -24,13 +23,16 @@ endif
24nostackp := $(call cc-option, -fno-stack-protector) 23nostackp := $(call cc-option, -fno-stack-protector)
25CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) 24CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
26CFLAGS_hpet.o := $(nostackp) 25CFLAGS_hpet.o := $(nostackp)
27CFLAGS_tsc.o := $(nostackp) 26CFLAGS_vread_tsc_64.o := $(nostackp)
28CFLAGS_paravirt.o := $(nostackp) 27CFLAGS_paravirt.o := $(nostackp)
29GCOV_PROFILE_vsyscall_64.o := n 28GCOV_PROFILE_vsyscall_64.o := n
30GCOV_PROFILE_hpet.o := n 29GCOV_PROFILE_hpet.o := n
31GCOV_PROFILE_tsc.o := n 30GCOV_PROFILE_tsc.o := n
32GCOV_PROFILE_paravirt.o := n 31GCOV_PROFILE_paravirt.o := n
33 32
33# vread_tsc_64 is hot and should be fully optimized:
34CFLAGS_REMOVE_vread_tsc_64.o = -pg -fno-optimize-sibling-calls
35
34obj-y := process_$(BITS).o signal.o entry_$(BITS).o 36obj-y := process_$(BITS).o signal.o entry_$(BITS).o
35obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 37obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
36obj-y += time.o ioport.o ldt.o dumpstack.o 38obj-y += time.o ioport.o ldt.o dumpstack.o
@@ -39,7 +41,7 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o
39obj-y += probe_roms.o 41obj-y += probe_roms.o
40obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 42obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
41obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 43obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
42obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o 44obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o vread_tsc_64.o
43obj-y += bootflag.o e820.o 45obj-y += bootflag.o e820.o
44obj-y += pci-dma.o quirks.o topology.o kdebugfs.o 46obj-y += pci-dma.o quirks.o topology.o kdebugfs.o
45obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o 47obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 605e5ae19c7f..a3e5948670c2 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -946,6 +946,8 @@ void __init setup_arch(char **cmdline_p)
946 if (init_ohci1394_dma_early) 946 if (init_ohci1394_dma_early)
947 init_ohci1394_dma_on_all_controllers(); 947 init_ohci1394_dma_on_all_controllers();
948#endif 948#endif
949 /* Allocate bigger log buffer */
950 setup_log_buf(1);
949 951
950 reserve_initrd(); 952 reserve_initrd();
951 953
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 998e972f3b1a..30ac65df7d4e 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -110,7 +110,6 @@ static struct mm_struct tboot_mm = {
110 .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), 110 .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem),
111 .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), 111 .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
112 .mmlist = LIST_HEAD_INIT(init_mm.mmlist), 112 .mmlist = LIST_HEAD_INIT(init_mm.mmlist),
113 .cpu_vm_mask = CPU_MASK_ALL,
114}; 113};
115 114
116static inline void switch_to_tboot_pt(void) 115static inline void switch_to_tboot_pt(void)
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 25a28a245937..00cbb272627f 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -23,7 +23,7 @@
23#include <asm/time.h> 23#include <asm/time.h>
24 24
25#ifdef CONFIG_X86_64 25#ifdef CONFIG_X86_64
26volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; 26DEFINE_VVAR(volatile unsigned long, jiffies) = INITIAL_JIFFIES;
27#endif 27#endif
28 28
29unsigned long profile_pc(struct pt_regs *regs) 29unsigned long profile_pc(struct pt_regs *regs)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 9335bf7dd2e7..6cc6922262af 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -763,25 +763,6 @@ static cycle_t read_tsc(struct clocksource *cs)
763 ret : clocksource_tsc.cycle_last; 763 ret : clocksource_tsc.cycle_last;
764} 764}
765 765
766#ifdef CONFIG_X86_64
767static cycle_t __vsyscall_fn vread_tsc(void)
768{
769 cycle_t ret;
770
771 /*
772 * Surround the RDTSC by barriers, to make sure it's not
773 * speculated to outside the seqlock critical section and
774 * does not cause time warps:
775 */
776 rdtsc_barrier();
777 ret = (cycle_t)vget_cycles();
778 rdtsc_barrier();
779
780 return ret >= __vsyscall_gtod_data.clock.cycle_last ?
781 ret : __vsyscall_gtod_data.clock.cycle_last;
782}
783#endif
784
785static void resume_tsc(struct clocksource *cs) 766static void resume_tsc(struct clocksource *cs)
786{ 767{
787 clocksource_tsc.cycle_last = 0; 768 clocksource_tsc.cycle_last = 0;
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 61682f0ac264..89aed99aafce 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -161,6 +161,12 @@ SECTIONS
161 161
162#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0) 162#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
163#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) 163#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
164#define EMIT_VVAR(x, offset) .vsyscall_var_ ## x \
165 ADDR(.vsyscall_0) + offset \
166 : AT(VLOAD(.vsyscall_var_ ## x)) { \
167 *(.vsyscall_var_ ## x) \
168 } \
169 x = VVIRT(.vsyscall_var_ ## x);
164 170
165 . = ALIGN(4096); 171 . = ALIGN(4096);
166 __vsyscall_0 = .; 172 __vsyscall_0 = .;
@@ -175,18 +181,6 @@ SECTIONS
175 *(.vsyscall_fn) 181 *(.vsyscall_fn)
176 } 182 }
177 183
178 . = ALIGN(L1_CACHE_BYTES);
179 .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) {
180 *(.vsyscall_gtod_data)
181 }
182
183 vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
184 .vsyscall_clock : AT(VLOAD(.vsyscall_clock)) {
185 *(.vsyscall_clock)
186 }
187 vsyscall_clock = VVIRT(.vsyscall_clock);
188
189
190 .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { 184 .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) {
191 *(.vsyscall_1) 185 *(.vsyscall_1)
192 } 186 }
@@ -194,21 +188,14 @@ SECTIONS
194 *(.vsyscall_2) 188 *(.vsyscall_2)
195 } 189 }
196 190
197 .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) {
198 *(.vgetcpu_mode)
199 }
200 vgetcpu_mode = VVIRT(.vgetcpu_mode);
201
202 . = ALIGN(L1_CACHE_BYTES);
203 .jiffies : AT(VLOAD(.jiffies)) {
204 *(.jiffies)
205 }
206 jiffies = VVIRT(.jiffies);
207
208 .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { 191 .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) {
209 *(.vsyscall_3) 192 *(.vsyscall_3)
210 } 193 }
211 194
195#define __VVAR_KERNEL_LDS
196#include <asm/vvar.h>
197#undef __VVAR_KERNEL_LDS
198
212 . = __vsyscall_0 + PAGE_SIZE; 199 . = __vsyscall_0 + PAGE_SIZE;
213 200
214#undef VSYSCALL_ADDR 201#undef VSYSCALL_ADDR
@@ -216,6 +203,7 @@ SECTIONS
216#undef VLOAD 203#undef VLOAD
217#undef VVIRT_OFFSET 204#undef VVIRT_OFFSET
218#undef VVIRT 205#undef VVIRT
206#undef EMIT_VVAR
219 207
220#endif /* CONFIG_X86_64 */ 208#endif /* CONFIG_X86_64 */
221 209
diff --git a/arch/x86/kernel/vread_tsc_64.c b/arch/x86/kernel/vread_tsc_64.c
new file mode 100644
index 000000000000..a81aa9e9894c
--- /dev/null
+++ b/arch/x86/kernel/vread_tsc_64.c
@@ -0,0 +1,36 @@
1/* This code runs in userspace. */
2
3#define DISABLE_BRANCH_PROFILING
4#include <asm/vgtod.h>
5
6notrace cycle_t __vsyscall_fn vread_tsc(void)
7{
8 cycle_t ret;
9 u64 last;
10
11 /*
12 * Empirically, a fence (of type that depends on the CPU)
13 * before rdtsc is enough to ensure that rdtsc is ordered
14 * with respect to loads. The various CPU manuals are unclear
15 * as to whether rdtsc can be reordered with later loads,
16 * but no one has ever seen it happen.
17 */
18 rdtsc_barrier();
19 ret = (cycle_t)vget_cycles();
20
21 last = VVAR(vsyscall_gtod_data).clock.cycle_last;
22
23 if (likely(ret >= last))
24 return ret;
25
26 /*
27 * GCC likes to generate cmov here, but this branch is extremely
28 * predictable (it's just a funciton of time and the likely is
29 * very likely) and there's a data dependence, so force GCC
30 * to generate a branch instead. I don't barrier() because
31 * we don't actually need a barrier, and if this function
32 * ever gets inlined it will generate worse code.
33 */
34 asm volatile ("");
35 return last;
36}
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index dcbb28c4b694..3e682184d76c 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -49,17 +49,10 @@
49 __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace 49 __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
50#define __syscall_clobber "r11","cx","memory" 50#define __syscall_clobber "r11","cx","memory"
51 51
52/* 52DEFINE_VVAR(int, vgetcpu_mode);
53 * vsyscall_gtod_data contains data that is : 53DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
54 * - readonly from vsyscalls
55 * - written by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
56 * Try to keep this structure as small as possible to avoid cache line ping pongs
57 */
58int __vgetcpu_mode __section_vgetcpu_mode;
59
60struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data =
61{ 54{
62 .lock = SEQLOCK_UNLOCKED, 55 .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
63 .sysctl_enabled = 1, 56 .sysctl_enabled = 1,
64}; 57};
65 58
@@ -97,7 +90,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
97 */ 90 */
98static __always_inline void do_get_tz(struct timezone * tz) 91static __always_inline void do_get_tz(struct timezone * tz)
99{ 92{
100 *tz = __vsyscall_gtod_data.sys_tz; 93 *tz = VVAR(vsyscall_gtod_data).sys_tz;
101} 94}
102 95
103static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) 96static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
@@ -126,23 +119,24 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
126 unsigned long mult, shift, nsec; 119 unsigned long mult, shift, nsec;
127 cycle_t (*vread)(void); 120 cycle_t (*vread)(void);
128 do { 121 do {
129 seq = read_seqbegin(&__vsyscall_gtod_data.lock); 122 seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock);
130 123
131 vread = __vsyscall_gtod_data.clock.vread; 124 vread = VVAR(vsyscall_gtod_data).clock.vread;
132 if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) { 125 if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled ||
126 !vread)) {
133 gettimeofday(tv,NULL); 127 gettimeofday(tv,NULL);
134 return; 128 return;
135 } 129 }
136 130
137 now = vread(); 131 now = vread();
138 base = __vsyscall_gtod_data.clock.cycle_last; 132 base = VVAR(vsyscall_gtod_data).clock.cycle_last;
139 mask = __vsyscall_gtod_data.clock.mask; 133 mask = VVAR(vsyscall_gtod_data).clock.mask;
140 mult = __vsyscall_gtod_data.clock.mult; 134 mult = VVAR(vsyscall_gtod_data).clock.mult;
141 shift = __vsyscall_gtod_data.clock.shift; 135 shift = VVAR(vsyscall_gtod_data).clock.shift;
142 136
143 tv->tv_sec = __vsyscall_gtod_data.wall_time_sec; 137 tv->tv_sec = VVAR(vsyscall_gtod_data).wall_time_sec;
144 nsec = __vsyscall_gtod_data.wall_time_nsec; 138 nsec = VVAR(vsyscall_gtod_data).wall_time_nsec;
145 } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); 139 } while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq));
146 140
147 /* calculate interval: */ 141 /* calculate interval: */
148 cycle_delta = (now - base) & mask; 142 cycle_delta = (now - base) & mask;
@@ -171,15 +165,15 @@ time_t __vsyscall(1) vtime(time_t *t)
171{ 165{
172 unsigned seq; 166 unsigned seq;
173 time_t result; 167 time_t result;
174 if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) 168 if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled))
175 return time_syscall(t); 169 return time_syscall(t);
176 170
177 do { 171 do {
178 seq = read_seqbegin(&__vsyscall_gtod_data.lock); 172 seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock);
179 173
180 result = __vsyscall_gtod_data.wall_time_sec; 174 result = VVAR(vsyscall_gtod_data).wall_time_sec;
181 175
182 } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); 176 } while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq));
183 177
184 if (t) 178 if (t)
185 *t = result; 179 *t = result;
@@ -208,9 +202,9 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
208 We do this here because otherwise user space would do it on 202 We do this here because otherwise user space would do it on
209 its own in a likely inferior way (no access to jiffies). 203 its own in a likely inferior way (no access to jiffies).
210 If you don't like it pass NULL. */ 204 If you don't like it pass NULL. */
211 if (tcache && tcache->blob[0] == (j = __jiffies)) { 205 if (tcache && tcache->blob[0] == (j = VVAR(jiffies))) {
212 p = tcache->blob[1]; 206 p = tcache->blob[1];
213 } else if (__vgetcpu_mode == VGETCPU_RDTSCP) { 207 } else if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
214 /* Load per CPU data from RDTSCP */ 208 /* Load per CPU data from RDTSCP */
215 native_read_tscp(&p); 209 native_read_tscp(&p);
216 } else { 210 } else {
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 28418054b880..bd14bb4c8594 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3545,10 +3545,11 @@ static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm,
3545 return kvm_mmu_prepare_zap_page(kvm, page, invalid_list); 3545 return kvm_mmu_prepare_zap_page(kvm, page, invalid_list);
3546} 3546}
3547 3547
3548static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) 3548static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
3549{ 3549{
3550 struct kvm *kvm; 3550 struct kvm *kvm;
3551 struct kvm *kvm_freed = NULL; 3551 struct kvm *kvm_freed = NULL;
3552 int nr_to_scan = sc->nr_to_scan;
3552 3553
3553 if (nr_to_scan == 0) 3554 if (nr_to_scan == 0)
3554 goto out; 3555 goto out;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index bcb394dfbb35..f7a2a054a3c0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -965,7 +965,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
965 struct mm_struct *mm; 965 struct mm_struct *mm;
966 int fault; 966 int fault;
967 int write = error_code & PF_WRITE; 967 int write = error_code & PF_WRITE;
968 unsigned int flags = FAULT_FLAG_ALLOW_RETRY | 968 unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
969 (write ? FAULT_FLAG_WRITE : 0); 969 (write ? FAULT_FLAG_WRITE : 0);
970 970
971 tsk = current; 971 tsk = current;
@@ -1139,6 +1139,16 @@ good_area:
1139 } 1139 }
1140 1140
1141 /* 1141 /*
1142 * Pagefault was interrupted by SIGKILL. We have no reason to
1143 * continue pagefault.
1144 */
1145 if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
1146 if (!(error_code & PF_USER))
1147 no_context(regs, error_code, address);
1148 return;
1149 }
1150
1151 /*
1142 * Major/minor page fault accounting is only done on the 1152 * Major/minor page fault accounting is only done on the
1143 * initial attempt. If we go through a retry, it is extremely 1153 * initial attempt. If we go through a retry, it is extremely
1144 * likely that the page will be found in page cache at that point. 1154 * likely that the page will be found in page cache at that point.
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index d4203988504a..f581a18c0d4d 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -72,7 +72,7 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
72 if (!vma_shareable(vma, addr)) 72 if (!vma_shareable(vma, addr))
73 return; 73 return;
74 74
75 spin_lock(&mapping->i_mmap_lock); 75 mutex_lock(&mapping->i_mmap_mutex);
76 vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { 76 vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
77 if (svma == vma) 77 if (svma == vma)
78 continue; 78 continue;
@@ -97,7 +97,7 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
97 put_page(virt_to_page(spte)); 97 put_page(virt_to_page(spte));
98 spin_unlock(&mm->page_table_lock); 98 spin_unlock(&mm->page_table_lock);
99out: 99out:
100 spin_unlock(&mapping->i_mmap_lock); 100 mutex_unlock(&mapping->i_mmap_mutex);
101} 101}
102 102
103/* 103/*
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 37b8b0fe8320..30326443ab81 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -16,8 +16,6 @@
16#include <asm/tlb.h> 16#include <asm/tlb.h>
17#include <asm/proto.h> 17#include <asm/proto.h>
18 18
19DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
20
21unsigned long __initdata pgt_buf_start; 19unsigned long __initdata pgt_buf_start;
22unsigned long __meminitdata pgt_buf_end; 20unsigned long __meminitdata pgt_buf_end;
23unsigned long __meminitdata pgt_buf_top; 21unsigned long __meminitdata pgt_buf_top;
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index b6552b189bcd..bef0bc962400 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -11,7 +11,7 @@ vdso-install-$(VDSO32-y) += $(vdso32-images)
11 11
12 12
13# files to link into the vdso 13# files to link into the vdso
14vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vvar.o 14vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
15 15
16# files to link into kernel 16# files to link into kernel
17obj-$(VDSO64-y) += vma.o vdso.o 17obj-$(VDSO64-y) += vma.o vdso.o
@@ -37,11 +37,24 @@ $(obj)/%.so: OBJCOPYFLAGS := -S
37$(obj)/%.so: $(obj)/%.so.dbg FORCE 37$(obj)/%.so: $(obj)/%.so.dbg FORCE
38 $(call if_changed,objcopy) 38 $(call if_changed,objcopy)
39 39
40#
41# Don't omit frame pointers for ease of userspace debugging, but do
42# optimize sibling calls.
43#
40CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ 44CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
41 $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) 45 $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
46 -fno-omit-frame-pointer -foptimize-sibling-calls
42 47
43$(vobjs): KBUILD_CFLAGS += $(CFL) 48$(vobjs): KBUILD_CFLAGS += $(CFL)
44 49
50#
51# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
52#
53CFLAGS_REMOVE_vdso-note.o = -pg
54CFLAGS_REMOVE_vclock_gettime.o = -pg
55CFLAGS_REMOVE_vgetcpu.o = -pg
56CFLAGS_REMOVE_vvar.o = -pg
57
45targets += vdso-syms.lds 58targets += vdso-syms.lds
46obj-$(VDSO64-y) += vdso-syms.lds 59obj-$(VDSO64-y) += vdso-syms.lds
47 60
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index ee55754cc3c5..a724905fdae7 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -2,7 +2,7 @@
2 * Copyright 2006 Andi Kleen, SUSE Labs. 2 * Copyright 2006 Andi Kleen, SUSE Labs.
3 * Subject to the GNU Public License, v.2 3 * Subject to the GNU Public License, v.2
4 * 4 *
5 * Fast user context implementation of clock_gettime and gettimeofday. 5 * Fast user context implementation of clock_gettime, gettimeofday, and time.
6 * 6 *
7 * The code should have no internal unresolved relocations. 7 * The code should have no internal unresolved relocations.
8 * Check with readelf after changing. 8 * Check with readelf after changing.
@@ -22,9 +22,8 @@
22#include <asm/hpet.h> 22#include <asm/hpet.h>
23#include <asm/unistd.h> 23#include <asm/unistd.h>
24#include <asm/io.h> 24#include <asm/io.h>
25#include "vextern.h"
26 25
27#define gtod vdso_vsyscall_gtod_data 26#define gtod (&VVAR(vsyscall_gtod_data))
28 27
29notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 28notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
30{ 29{
@@ -56,22 +55,6 @@ notrace static noinline int do_realtime(struct timespec *ts)
56 return 0; 55 return 0;
57} 56}
58 57
59/* Copy of the version in kernel/time.c which we cannot directly access */
60notrace static void
61vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
62{
63 while (nsec >= NSEC_PER_SEC) {
64 nsec -= NSEC_PER_SEC;
65 ++sec;
66 }
67 while (nsec < 0) {
68 nsec += NSEC_PER_SEC;
69 --sec;
70 }
71 ts->tv_sec = sec;
72 ts->tv_nsec = nsec;
73}
74
75notrace static noinline int do_monotonic(struct timespec *ts) 58notrace static noinline int do_monotonic(struct timespec *ts)
76{ 59{
77 unsigned long seq, ns, secs; 60 unsigned long seq, ns, secs;
@@ -82,7 +65,17 @@ notrace static noinline int do_monotonic(struct timespec *ts)
82 secs += gtod->wall_to_monotonic.tv_sec; 65 secs += gtod->wall_to_monotonic.tv_sec;
83 ns += gtod->wall_to_monotonic.tv_nsec; 66 ns += gtod->wall_to_monotonic.tv_nsec;
84 } while (unlikely(read_seqretry(&gtod->lock, seq))); 67 } while (unlikely(read_seqretry(&gtod->lock, seq)));
85 vset_normalized_timespec(ts, secs, ns); 68
69 /* wall_time_nsec, vgetns(), and wall_to_monotonic.tv_nsec
70 * are all guaranteed to be nonnegative.
71 */
72 while (ns >= NSEC_PER_SEC) {
73 ns -= NSEC_PER_SEC;
74 ++secs;
75 }
76 ts->tv_sec = secs;
77 ts->tv_nsec = ns;
78
86 return 0; 79 return 0;
87} 80}
88 81
@@ -107,7 +100,17 @@ notrace static noinline int do_monotonic_coarse(struct timespec *ts)
107 secs += gtod->wall_to_monotonic.tv_sec; 100 secs += gtod->wall_to_monotonic.tv_sec;
108 ns += gtod->wall_to_monotonic.tv_nsec; 101 ns += gtod->wall_to_monotonic.tv_nsec;
109 } while (unlikely(read_seqretry(&gtod->lock, seq))); 102 } while (unlikely(read_seqretry(&gtod->lock, seq)));
110 vset_normalized_timespec(ts, secs, ns); 103
104 /* wall_time_nsec and wall_to_monotonic.tv_nsec are
105 * guaranteed to be between 0 and NSEC_PER_SEC.
106 */
107 if (ns >= NSEC_PER_SEC) {
108 ns -= NSEC_PER_SEC;
109 ++secs;
110 }
111 ts->tv_sec = secs;
112 ts->tv_nsec = ns;
113
111 return 0; 114 return 0;
112} 115}
113 116
@@ -157,3 +160,32 @@ notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
157} 160}
158int gettimeofday(struct timeval *, struct timezone *) 161int gettimeofday(struct timeval *, struct timezone *)
159 __attribute__((weak, alias("__vdso_gettimeofday"))); 162 __attribute__((weak, alias("__vdso_gettimeofday")));
163
164/* This will break when the xtime seconds get inaccurate, but that is
165 * unlikely */
166
167static __always_inline long time_syscall(long *t)
168{
169 long secs;
170 asm volatile("syscall"
171 : "=a" (secs)
172 : "0" (__NR_time), "D" (t) : "cc", "r11", "cx", "memory");
173 return secs;
174}
175
176notrace time_t __vdso_time(time_t *t)
177{
178 time_t result;
179
180 if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled))
181 return time_syscall(t);
182
183 /* This is atomic on x86_64 so we don't need any locks. */
184 result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec);
185
186 if (t)
187 *t = result;
188 return result;
189}
190int time(time_t *t)
191 __attribute__((weak, alias("__vdso_time")));
diff --git a/arch/x86/vdso/vdso.lds.S b/arch/x86/vdso/vdso.lds.S
index 4e5dd3b4de7f..b96b2677cad8 100644
--- a/arch/x86/vdso/vdso.lds.S
+++ b/arch/x86/vdso/vdso.lds.S
@@ -23,15 +23,10 @@ VERSION {
23 __vdso_gettimeofday; 23 __vdso_gettimeofday;
24 getcpu; 24 getcpu;
25 __vdso_getcpu; 25 __vdso_getcpu;
26 time;
27 __vdso_time;
26 local: *; 28 local: *;
27 }; 29 };
28} 30}
29 31
30VDSO64_PRELINK = VDSO_PRELINK; 32VDSO64_PRELINK = VDSO_PRELINK;
31
32/*
33 * Define VDSO64_x for each VEXTERN(x), for use via VDSO64_SYMBOL.
34 */
35#define VEXTERN(x) VDSO64_ ## x = vdso_ ## x;
36#include "vextern.h"
37#undef VEXTERN
diff --git a/arch/x86/vdso/vextern.h b/arch/x86/vdso/vextern.h
deleted file mode 100644
index 1683ba2ae3e8..000000000000
--- a/arch/x86/vdso/vextern.h
+++ /dev/null
@@ -1,16 +0,0 @@
1#ifndef VEXTERN
2#include <asm/vsyscall.h>
3#define VEXTERN(x) \
4 extern typeof(x) *vdso_ ## x __attribute__((visibility("hidden")));
5#endif
6
7#define VMAGIC 0xfeedbabeabcdefabUL
8
9/* Any kernel variables used in the vDSO must be exported in the main
10 kernel's vmlinux.lds.S/vsyscall.h/proper __section and
11 put into vextern.h and be referenced as a pointer with vdso prefix.
12 The main kernel later fills in the values. */
13
14VEXTERN(jiffies)
15VEXTERN(vgetcpu_mode)
16VEXTERN(vsyscall_gtod_data)
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c
index 9fbc6b20026b..5463ad558573 100644
--- a/arch/x86/vdso/vgetcpu.c
+++ b/arch/x86/vdso/vgetcpu.c
@@ -11,14 +11,13 @@
11#include <linux/time.h> 11#include <linux/time.h>
12#include <asm/vsyscall.h> 12#include <asm/vsyscall.h>
13#include <asm/vgtod.h> 13#include <asm/vgtod.h>
14#include "vextern.h"
15 14
16notrace long 15notrace long
17__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) 16__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
18{ 17{
19 unsigned int p; 18 unsigned int p;
20 19
21 if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) { 20 if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
22 /* Load per CPU data from RDTSCP */ 21 /* Load per CPU data from RDTSCP */
23 native_read_tscp(&p); 22 native_read_tscp(&p);
24 } else { 23 } else {
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 4b5d26f108bb..7abd2be0f9b9 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -15,9 +15,6 @@
15#include <asm/proto.h> 15#include <asm/proto.h>
16#include <asm/vdso.h> 16#include <asm/vdso.h>
17 17
18#include "vextern.h" /* Just for VMAGIC. */
19#undef VEXTERN
20
21unsigned int __read_mostly vdso_enabled = 1; 18unsigned int __read_mostly vdso_enabled = 1;
22 19
23extern char vdso_start[], vdso_end[]; 20extern char vdso_start[], vdso_end[];
@@ -26,20 +23,10 @@ extern unsigned short vdso_sync_cpuid;
26static struct page **vdso_pages; 23static struct page **vdso_pages;
27static unsigned vdso_size; 24static unsigned vdso_size;
28 25
29static inline void *var_ref(void *p, char *name)
30{
31 if (*(void **)p != (void *)VMAGIC) {
32 printk("VDSO: variable %s broken\n", name);
33 vdso_enabled = 0;
34 }
35 return p;
36}
37
38static int __init init_vdso_vars(void) 26static int __init init_vdso_vars(void)
39{ 27{
40 int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE; 28 int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE;
41 int i; 29 int i;
42 char *vbase;
43 30
44 vdso_size = npages << PAGE_SHIFT; 31 vdso_size = npages << PAGE_SHIFT;
45 vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL); 32 vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL);
@@ -54,20 +41,6 @@ static int __init init_vdso_vars(void)
54 copy_page(page_address(p), vdso_start + i*PAGE_SIZE); 41 copy_page(page_address(p), vdso_start + i*PAGE_SIZE);
55 } 42 }
56 43
57 vbase = vmap(vdso_pages, npages, 0, PAGE_KERNEL);
58 if (!vbase)
59 goto oom;
60
61 if (memcmp(vbase, "\177ELF", 4)) {
62 printk("VDSO: I'm broken; not ELF\n");
63 vdso_enabled = 0;
64 }
65
66#define VEXTERN(x) \
67 *(typeof(__ ## x) **) var_ref(VDSO64_SYMBOL(vbase, x), #x) = &__ ## x;
68#include "vextern.h"
69#undef VEXTERN
70 vunmap(vbase);
71 return 0; 44 return 0;
72 45
73 oom: 46 oom:
diff --git a/arch/x86/vdso/vvar.c b/arch/x86/vdso/vvar.c
deleted file mode 100644
index 1b7e703684f9..000000000000
--- a/arch/x86/vdso/vvar.c
+++ /dev/null
@@ -1,12 +0,0 @@
1/* Define pointer to external vDSO variables.
2 These are part of the vDSO. The kernel fills in the real addresses
3 at boot time. This is done because when the vdso is linked the
4 kernel isn't yet and we don't know the final addresses. */
5#include <linux/kernel.h>
6#include <linux/time.h>
7#include <asm/vsyscall.h>
8#include <asm/timex.h>
9#include <asm/vgtod.h>
10
11#define VEXTERN(x) typeof (__ ## x) *const vdso_ ## x = (void *)VMAGIC;
12#include "vextern.h"
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 02d752460371..dc708dcc62f1 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -75,67 +75,12 @@
75#include "mmu.h" 75#include "mmu.h"
76#include "debugfs.h" 76#include "debugfs.h"
77 77
78#define MMU_UPDATE_HISTO 30
79
80/* 78/*
81 * Protects atomic reservation decrease/increase against concurrent increases. 79 * Protects atomic reservation decrease/increase against concurrent increases.
82 * Also protects non-atomic updates of current_pages and balloon lists. 80 * Also protects non-atomic updates of current_pages and balloon lists.
83 */ 81 */
84DEFINE_SPINLOCK(xen_reservation_lock); 82DEFINE_SPINLOCK(xen_reservation_lock);
85 83
86#ifdef CONFIG_XEN_DEBUG_FS
87
88static struct {
89 u32 pgd_update;
90 u32 pgd_update_pinned;
91 u32 pgd_update_batched;
92
93 u32 pud_update;
94 u32 pud_update_pinned;
95 u32 pud_update_batched;
96
97 u32 pmd_update;
98 u32 pmd_update_pinned;
99 u32 pmd_update_batched;
100
101 u32 pte_update;
102 u32 pte_update_pinned;
103 u32 pte_update_batched;
104
105 u32 mmu_update;
106 u32 mmu_update_extended;
107 u32 mmu_update_histo[MMU_UPDATE_HISTO];
108
109 u32 prot_commit;
110 u32 prot_commit_batched;
111
112 u32 set_pte_at;
113 u32 set_pte_at_batched;
114 u32 set_pte_at_pinned;
115 u32 set_pte_at_current;
116 u32 set_pte_at_kernel;
117} mmu_stats;
118
119static u8 zero_stats;
120
121static inline void check_zero(void)
122{
123 if (unlikely(zero_stats)) {
124 memset(&mmu_stats, 0, sizeof(mmu_stats));
125 zero_stats = 0;
126 }
127}
128
129#define ADD_STATS(elem, val) \
130 do { check_zero(); mmu_stats.elem += (val); } while(0)
131
132#else /* !CONFIG_XEN_DEBUG_FS */
133
134#define ADD_STATS(elem, val) do { (void)(val); } while(0)
135
136#endif /* CONFIG_XEN_DEBUG_FS */
137
138
139/* 84/*
140 * Identity map, in addition to plain kernel map. This needs to be 85 * Identity map, in addition to plain kernel map. This needs to be
141 * large enough to allocate page table pages to allocate the rest. 86 * large enough to allocate page table pages to allocate the rest.
@@ -243,11 +188,6 @@ static bool xen_page_pinned(void *ptr)
243 return PagePinned(page); 188 return PagePinned(page);
244} 189}
245 190
246static bool xen_iomap_pte(pte_t pte)
247{
248 return pte_flags(pte) & _PAGE_IOMAP;
249}
250
251void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid) 191void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
252{ 192{
253 struct multicall_space mcs; 193 struct multicall_space mcs;
@@ -257,7 +197,7 @@ void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
257 u = mcs.args; 197 u = mcs.args;
258 198
259 /* ptep might be kmapped when using 32-bit HIGHPTE */ 199 /* ptep might be kmapped when using 32-bit HIGHPTE */
260 u->ptr = arbitrary_virt_to_machine(ptep).maddr; 200 u->ptr = virt_to_machine(ptep).maddr;
261 u->val = pte_val_ma(pteval); 201 u->val = pte_val_ma(pteval);
262 202
263 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, domid); 203 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, domid);
@@ -266,11 +206,6 @@ void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
266} 206}
267EXPORT_SYMBOL_GPL(xen_set_domain_pte); 207EXPORT_SYMBOL_GPL(xen_set_domain_pte);
268 208
269static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval)
270{
271 xen_set_domain_pte(ptep, pteval, DOMID_IO);
272}
273
274static void xen_extend_mmu_update(const struct mmu_update *update) 209static void xen_extend_mmu_update(const struct mmu_update *update)
275{ 210{
276 struct multicall_space mcs; 211 struct multicall_space mcs;
@@ -279,27 +214,17 @@ static void xen_extend_mmu_update(const struct mmu_update *update)
279 mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); 214 mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
280 215
281 if (mcs.mc != NULL) { 216 if (mcs.mc != NULL) {
282 ADD_STATS(mmu_update_extended, 1);
283 ADD_STATS(mmu_update_histo[mcs.mc->args[1]], -1);
284
285 mcs.mc->args[1]++; 217 mcs.mc->args[1]++;
286
287 if (mcs.mc->args[1] < MMU_UPDATE_HISTO)
288 ADD_STATS(mmu_update_histo[mcs.mc->args[1]], 1);
289 else
290 ADD_STATS(mmu_update_histo[0], 1);
291 } else { 218 } else {
292 ADD_STATS(mmu_update, 1);
293 mcs = __xen_mc_entry(sizeof(*u)); 219 mcs = __xen_mc_entry(sizeof(*u));
294 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); 220 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
295 ADD_STATS(mmu_update_histo[1], 1);
296 } 221 }
297 222
298 u = mcs.args; 223 u = mcs.args;
299 *u = *update; 224 *u = *update;
300} 225}
301 226
302void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) 227static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
303{ 228{
304 struct mmu_update u; 229 struct mmu_update u;
305 230
@@ -312,17 +237,13 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
312 u.val = pmd_val_ma(val); 237 u.val = pmd_val_ma(val);
313 xen_extend_mmu_update(&u); 238 xen_extend_mmu_update(&u);
314 239
315 ADD_STATS(pmd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
316
317 xen_mc_issue(PARAVIRT_LAZY_MMU); 240 xen_mc_issue(PARAVIRT_LAZY_MMU);
318 241
319 preempt_enable(); 242 preempt_enable();
320} 243}
321 244
322void xen_set_pmd(pmd_t *ptr, pmd_t val) 245static void xen_set_pmd(pmd_t *ptr, pmd_t val)
323{ 246{
324 ADD_STATS(pmd_update, 1);
325
326 /* If page is not pinned, we can just update the entry 247 /* If page is not pinned, we can just update the entry
327 directly */ 248 directly */
328 if (!xen_page_pinned(ptr)) { 249 if (!xen_page_pinned(ptr)) {
@@ -330,8 +251,6 @@ void xen_set_pmd(pmd_t *ptr, pmd_t val)
330 return; 251 return;
331 } 252 }
332 253
333 ADD_STATS(pmd_update_pinned, 1);
334
335 xen_set_pmd_hyper(ptr, val); 254 xen_set_pmd_hyper(ptr, val);
336} 255}
337 256
@@ -344,35 +263,34 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
344 set_pte_vaddr(vaddr, mfn_pte(mfn, flags)); 263 set_pte_vaddr(vaddr, mfn_pte(mfn, flags));
345} 264}
346 265
347void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, 266static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
348 pte_t *ptep, pte_t pteval)
349{ 267{
350 if (xen_iomap_pte(pteval)) { 268 struct mmu_update u;
351 xen_set_iomap_pte(ptep, pteval);
352 goto out;
353 }
354 269
355 ADD_STATS(set_pte_at, 1); 270 if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU)
356// ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); 271 return false;
357 ADD_STATS(set_pte_at_current, mm == current->mm);
358 ADD_STATS(set_pte_at_kernel, mm == &init_mm);
359 272
360 if (mm == current->mm || mm == &init_mm) { 273 xen_mc_batch();
361 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
362 struct multicall_space mcs;
363 mcs = xen_mc_entry(0);
364 274
365 MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); 275 u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
366 ADD_STATS(set_pte_at_batched, 1); 276 u.val = pte_val_ma(pteval);
367 xen_mc_issue(PARAVIRT_LAZY_MMU); 277 xen_extend_mmu_update(&u);
368 goto out; 278
369 } else 279 xen_mc_issue(PARAVIRT_LAZY_MMU);
370 if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0)
371 goto out;
372 }
373 xen_set_pte(ptep, pteval);
374 280
375out: return; 281 return true;
282}
283
284static void xen_set_pte(pte_t *ptep, pte_t pteval)
285{
286 if (!xen_batched_set_pte(ptep, pteval))
287 native_set_pte(ptep, pteval);
288}
289
290static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
291 pte_t *ptep, pte_t pteval)
292{
293 xen_set_pte(ptep, pteval);
376} 294}
377 295
378pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, 296pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
@@ -389,13 +307,10 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
389 307
390 xen_mc_batch(); 308 xen_mc_batch();
391 309
392 u.ptr = arbitrary_virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; 310 u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
393 u.val = pte_val_ma(pte); 311 u.val = pte_val_ma(pte);
394 xen_extend_mmu_update(&u); 312 xen_extend_mmu_update(&u);
395 313
396 ADD_STATS(prot_commit, 1);
397 ADD_STATS(prot_commit_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
398
399 xen_mc_issue(PARAVIRT_LAZY_MMU); 314 xen_mc_issue(PARAVIRT_LAZY_MMU);
400} 315}
401 316
@@ -463,7 +378,7 @@ static pteval_t iomap_pte(pteval_t val)
463 return val; 378 return val;
464} 379}
465 380
466pteval_t xen_pte_val(pte_t pte) 381static pteval_t xen_pte_val(pte_t pte)
467{ 382{
468 pteval_t pteval = pte.pte; 383 pteval_t pteval = pte.pte;
469 384
@@ -480,7 +395,7 @@ pteval_t xen_pte_val(pte_t pte)
480} 395}
481PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); 396PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
482 397
483pgdval_t xen_pgd_val(pgd_t pgd) 398static pgdval_t xen_pgd_val(pgd_t pgd)
484{ 399{
485 return pte_mfn_to_pfn(pgd.pgd); 400 return pte_mfn_to_pfn(pgd.pgd);
486} 401}
@@ -511,7 +426,7 @@ void xen_set_pat(u64 pat)
511 WARN_ON(pat != 0x0007010600070106ull); 426 WARN_ON(pat != 0x0007010600070106ull);
512} 427}
513 428
514pte_t xen_make_pte(pteval_t pte) 429static pte_t xen_make_pte(pteval_t pte)
515{ 430{
516 phys_addr_t addr = (pte & PTE_PFN_MASK); 431 phys_addr_t addr = (pte & PTE_PFN_MASK);
517 432
@@ -581,20 +496,20 @@ pte_t xen_make_pte_debug(pteval_t pte)
581PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug); 496PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
582#endif 497#endif
583 498
584pgd_t xen_make_pgd(pgdval_t pgd) 499static pgd_t xen_make_pgd(pgdval_t pgd)
585{ 500{
586 pgd = pte_pfn_to_mfn(pgd); 501 pgd = pte_pfn_to_mfn(pgd);
587 return native_make_pgd(pgd); 502 return native_make_pgd(pgd);
588} 503}
589PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd); 504PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd);
590 505
591pmdval_t xen_pmd_val(pmd_t pmd) 506static pmdval_t xen_pmd_val(pmd_t pmd)
592{ 507{
593 return pte_mfn_to_pfn(pmd.pmd); 508 return pte_mfn_to_pfn(pmd.pmd);
594} 509}
595PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val); 510PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val);
596 511
597void xen_set_pud_hyper(pud_t *ptr, pud_t val) 512static void xen_set_pud_hyper(pud_t *ptr, pud_t val)
598{ 513{
599 struct mmu_update u; 514 struct mmu_update u;
600 515
@@ -607,17 +522,13 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val)
607 u.val = pud_val_ma(val); 522 u.val = pud_val_ma(val);
608 xen_extend_mmu_update(&u); 523 xen_extend_mmu_update(&u);
609 524
610 ADD_STATS(pud_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
611
612 xen_mc_issue(PARAVIRT_LAZY_MMU); 525 xen_mc_issue(PARAVIRT_LAZY_MMU);
613 526
614 preempt_enable(); 527 preempt_enable();
615} 528}
616 529
617void xen_set_pud(pud_t *ptr, pud_t val) 530static void xen_set_pud(pud_t *ptr, pud_t val)
618{ 531{
619 ADD_STATS(pud_update, 1);
620
621 /* If page is not pinned, we can just update the entry 532 /* If page is not pinned, we can just update the entry
622 directly */ 533 directly */
623 if (!xen_page_pinned(ptr)) { 534 if (!xen_page_pinned(ptr)) {
@@ -625,56 +536,28 @@ void xen_set_pud(pud_t *ptr, pud_t val)
625 return; 536 return;
626 } 537 }
627 538
628 ADD_STATS(pud_update_pinned, 1);
629
630 xen_set_pud_hyper(ptr, val); 539 xen_set_pud_hyper(ptr, val);
631} 540}
632 541
633void xen_set_pte(pte_t *ptep, pte_t pte)
634{
635 if (xen_iomap_pte(pte)) {
636 xen_set_iomap_pte(ptep, pte);
637 return;
638 }
639
640 ADD_STATS(pte_update, 1);
641// ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
642 ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
643
644#ifdef CONFIG_X86_PAE 542#ifdef CONFIG_X86_PAE
645 ptep->pte_high = pte.pte_high; 543static void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
646 smp_wmb();
647 ptep->pte_low = pte.pte_low;
648#else
649 *ptep = pte;
650#endif
651}
652
653#ifdef CONFIG_X86_PAE
654void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
655{ 544{
656 if (xen_iomap_pte(pte)) {
657 xen_set_iomap_pte(ptep, pte);
658 return;
659 }
660
661 set_64bit((u64 *)ptep, native_pte_val(pte)); 545 set_64bit((u64 *)ptep, native_pte_val(pte));
662} 546}
663 547
664void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 548static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
665{ 549{
666 ptep->pte_low = 0; 550 if (!xen_batched_set_pte(ptep, native_make_pte(0)))
667 smp_wmb(); /* make sure low gets written first */ 551 native_pte_clear(mm, addr, ptep);
668 ptep->pte_high = 0;
669} 552}
670 553
671void xen_pmd_clear(pmd_t *pmdp) 554static void xen_pmd_clear(pmd_t *pmdp)
672{ 555{
673 set_pmd(pmdp, __pmd(0)); 556 set_pmd(pmdp, __pmd(0));
674} 557}
675#endif /* CONFIG_X86_PAE */ 558#endif /* CONFIG_X86_PAE */
676 559
677pmd_t xen_make_pmd(pmdval_t pmd) 560static pmd_t xen_make_pmd(pmdval_t pmd)
678{ 561{
679 pmd = pte_pfn_to_mfn(pmd); 562 pmd = pte_pfn_to_mfn(pmd);
680 return native_make_pmd(pmd); 563 return native_make_pmd(pmd);
@@ -682,13 +565,13 @@ pmd_t xen_make_pmd(pmdval_t pmd)
682PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); 565PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
683 566
684#if PAGETABLE_LEVELS == 4 567#if PAGETABLE_LEVELS == 4
685pudval_t xen_pud_val(pud_t pud) 568static pudval_t xen_pud_val(pud_t pud)
686{ 569{
687 return pte_mfn_to_pfn(pud.pud); 570 return pte_mfn_to_pfn(pud.pud);
688} 571}
689PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val); 572PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val);
690 573
691pud_t xen_make_pud(pudval_t pud) 574static pud_t xen_make_pud(pudval_t pud)
692{ 575{
693 pud = pte_pfn_to_mfn(pud); 576 pud = pte_pfn_to_mfn(pud);
694 577
@@ -696,7 +579,7 @@ pud_t xen_make_pud(pudval_t pud)
696} 579}
697PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud); 580PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud);
698 581
699pgd_t *xen_get_user_pgd(pgd_t *pgd) 582static pgd_t *xen_get_user_pgd(pgd_t *pgd)
700{ 583{
701 pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK); 584 pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK);
702 unsigned offset = pgd - pgd_page; 585 unsigned offset = pgd - pgd_page;
@@ -728,7 +611,7 @@ static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
728 * 2. It is always pinned 611 * 2. It is always pinned
729 * 3. It has no user pagetable attached to it 612 * 3. It has no user pagetable attached to it
730 */ 613 */
731void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) 614static void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
732{ 615{
733 preempt_disable(); 616 preempt_disable();
734 617
@@ -741,12 +624,10 @@ void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
741 preempt_enable(); 624 preempt_enable();
742} 625}
743 626
744void xen_set_pgd(pgd_t *ptr, pgd_t val) 627static void xen_set_pgd(pgd_t *ptr, pgd_t val)
745{ 628{
746 pgd_t *user_ptr = xen_get_user_pgd(ptr); 629 pgd_t *user_ptr = xen_get_user_pgd(ptr);
747 630
748 ADD_STATS(pgd_update, 1);
749
750 /* If page is not pinned, we can just update the entry 631 /* If page is not pinned, we can just update the entry
751 directly */ 632 directly */
752 if (!xen_page_pinned(ptr)) { 633 if (!xen_page_pinned(ptr)) {
@@ -758,9 +639,6 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
758 return; 639 return;
759 } 640 }
760 641
761 ADD_STATS(pgd_update_pinned, 1);
762 ADD_STATS(pgd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
763
764 /* If it's pinned, then we can at least batch the kernel and 642 /* If it's pinned, then we can at least batch the kernel and
765 user updates together. */ 643 user updates together. */
766 xen_mc_batch(); 644 xen_mc_batch();
@@ -1162,14 +1040,14 @@ void xen_mm_unpin_all(void)
1162 spin_unlock(&pgd_lock); 1040 spin_unlock(&pgd_lock);
1163} 1041}
1164 1042
1165void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) 1043static void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
1166{ 1044{
1167 spin_lock(&next->page_table_lock); 1045 spin_lock(&next->page_table_lock);
1168 xen_pgd_pin(next); 1046 xen_pgd_pin(next);
1169 spin_unlock(&next->page_table_lock); 1047 spin_unlock(&next->page_table_lock);
1170} 1048}
1171 1049
1172void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) 1050static void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
1173{ 1051{
1174 spin_lock(&mm->page_table_lock); 1052 spin_lock(&mm->page_table_lock);
1175 xen_pgd_pin(mm); 1053 xen_pgd_pin(mm);
@@ -1256,7 +1134,7 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
1256 * pagetable because of lazy tlb flushing. This means we need need to 1134 * pagetable because of lazy tlb flushing. This means we need need to
1257 * switch all CPUs off this pagetable before we can unpin it. 1135 * switch all CPUs off this pagetable before we can unpin it.
1258 */ 1136 */
1259void xen_exit_mmap(struct mm_struct *mm) 1137static void xen_exit_mmap(struct mm_struct *mm)
1260{ 1138{
1261 get_cpu(); /* make sure we don't move around */ 1139 get_cpu(); /* make sure we don't move around */
1262 xen_drop_mm_ref(mm); 1140 xen_drop_mm_ref(mm);
@@ -2371,7 +2249,7 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
2371 struct remap_data *rmd = data; 2249 struct remap_data *rmd = data;
2372 pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); 2250 pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
2373 2251
2374 rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr; 2252 rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
2375 rmd->mmu_update->val = pte_val_ma(pte); 2253 rmd->mmu_update->val = pte_val_ma(pte);
2376 rmd->mmu_update++; 2254 rmd->mmu_update++;
2377 2255
@@ -2425,7 +2303,6 @@ out:
2425EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); 2303EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
2426 2304
2427#ifdef CONFIG_XEN_DEBUG_FS 2305#ifdef CONFIG_XEN_DEBUG_FS
2428
2429static int p2m_dump_open(struct inode *inode, struct file *filp) 2306static int p2m_dump_open(struct inode *inode, struct file *filp)
2430{ 2307{
2431 return single_open(filp, p2m_dump_show, NULL); 2308 return single_open(filp, p2m_dump_show, NULL);
@@ -2437,65 +2314,4 @@ static const struct file_operations p2m_dump_fops = {
2437 .llseek = seq_lseek, 2314 .llseek = seq_lseek,
2438 .release = single_release, 2315 .release = single_release,
2439}; 2316};
2440 2317#endif /* CONFIG_XEN_DEBUG_FS */
2441static struct dentry *d_mmu_debug;
2442
2443static int __init xen_mmu_debugfs(void)
2444{
2445 struct dentry *d_xen = xen_init_debugfs();
2446
2447 if (d_xen == NULL)
2448 return -ENOMEM;
2449
2450 d_mmu_debug = debugfs_create_dir("mmu", d_xen);
2451
2452 debugfs_create_u8("zero_stats", 0644, d_mmu_debug, &zero_stats);
2453
2454 debugfs_create_u32("pgd_update", 0444, d_mmu_debug, &mmu_stats.pgd_update);
2455 debugfs_create_u32("pgd_update_pinned", 0444, d_mmu_debug,
2456 &mmu_stats.pgd_update_pinned);
2457 debugfs_create_u32("pgd_update_batched", 0444, d_mmu_debug,
2458 &mmu_stats.pgd_update_pinned);
2459
2460 debugfs_create_u32("pud_update", 0444, d_mmu_debug, &mmu_stats.pud_update);
2461 debugfs_create_u32("pud_update_pinned", 0444, d_mmu_debug,
2462 &mmu_stats.pud_update_pinned);
2463 debugfs_create_u32("pud_update_batched", 0444, d_mmu_debug,
2464 &mmu_stats.pud_update_pinned);
2465
2466 debugfs_create_u32("pmd_update", 0444, d_mmu_debug, &mmu_stats.pmd_update);
2467 debugfs_create_u32("pmd_update_pinned", 0444, d_mmu_debug,
2468 &mmu_stats.pmd_update_pinned);
2469 debugfs_create_u32("pmd_update_batched", 0444, d_mmu_debug,
2470 &mmu_stats.pmd_update_pinned);
2471
2472 debugfs_create_u32("pte_update", 0444, d_mmu_debug, &mmu_stats.pte_update);
2473// debugfs_create_u32("pte_update_pinned", 0444, d_mmu_debug,
2474// &mmu_stats.pte_update_pinned);
2475 debugfs_create_u32("pte_update_batched", 0444, d_mmu_debug,
2476 &mmu_stats.pte_update_pinned);
2477
2478 debugfs_create_u32("mmu_update", 0444, d_mmu_debug, &mmu_stats.mmu_update);
2479 debugfs_create_u32("mmu_update_extended", 0444, d_mmu_debug,
2480 &mmu_stats.mmu_update_extended);
2481 xen_debugfs_create_u32_array("mmu_update_histo", 0444, d_mmu_debug,
2482 mmu_stats.mmu_update_histo, 20);
2483
2484 debugfs_create_u32("set_pte_at", 0444, d_mmu_debug, &mmu_stats.set_pte_at);
2485 debugfs_create_u32("set_pte_at_batched", 0444, d_mmu_debug,
2486 &mmu_stats.set_pte_at_batched);
2487 debugfs_create_u32("set_pte_at_current", 0444, d_mmu_debug,
2488 &mmu_stats.set_pte_at_current);
2489 debugfs_create_u32("set_pte_at_kernel", 0444, d_mmu_debug,
2490 &mmu_stats.set_pte_at_kernel);
2491
2492 debugfs_create_u32("prot_commit", 0444, d_mmu_debug, &mmu_stats.prot_commit);
2493 debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
2494 &mmu_stats.prot_commit_batched);
2495
2496 debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
2497 return 0;
2498}
2499fs_initcall(xen_mmu_debugfs);
2500
2501#endif /* CONFIG_XEN_DEBUG_FS */
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 537bb9aab777..73809bb951b4 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -15,43 +15,6 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
15 15
16void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); 16void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
17 17
18
19void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next);
20void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
21void xen_exit_mmap(struct mm_struct *mm);
22
23pteval_t xen_pte_val(pte_t);
24pmdval_t xen_pmd_val(pmd_t);
25pgdval_t xen_pgd_val(pgd_t);
26
27pte_t xen_make_pte(pteval_t);
28pmd_t xen_make_pmd(pmdval_t);
29pgd_t xen_make_pgd(pgdval_t);
30
31void xen_set_pte(pte_t *ptep, pte_t pteval);
32void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
33 pte_t *ptep, pte_t pteval);
34
35#ifdef CONFIG_X86_PAE
36void xen_set_pte_atomic(pte_t *ptep, pte_t pte);
37void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
38void xen_pmd_clear(pmd_t *pmdp);
39#endif /* CONFIG_X86_PAE */
40
41void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval);
42void xen_set_pud(pud_t *ptr, pud_t val);
43void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval);
44void xen_set_pud_hyper(pud_t *ptr, pud_t val);
45
46#if PAGETABLE_LEVELS == 4
47pudval_t xen_pud_val(pud_t pud);
48pud_t xen_make_pud(pudval_t pudval);
49void xen_set_pgd(pgd_t *pgdp, pgd_t pgd);
50void xen_set_pgd_hyper(pgd_t *pgdp, pgd_t pgd);
51#endif
52
53pgd_t *xen_get_user_pgd(pgd_t *pgd);
54
55pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); 18pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
56void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, 19void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
57 pte_t *ptep, pte_t pte); 20 pte_t *ptep, pte_t pte);