aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/paravirt.h22
-rw-r--r--arch/x86/include/asm/pgtable.h2
-rw-r--r--arch/x86/include/asm/required-features.h8
-rw-r--r--arch/x86/include/asm/thread_info.h4
-rw-r--r--arch/x86/include/asm/xen/page.h3
-rw-r--r--arch/x86/kernel/kvm.c2
-rw-r--r--arch/x86/kernel/paravirt.c56
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/vmi_32.c20
-rw-r--r--arch/x86/lguest/boot.c16
-rw-r--r--arch/x86/mm/fault.c6
-rw-r--r--arch/x86/mm/highmem_32.c2
-rw-r--r--arch/x86/mm/iomap_32.c1
-rw-r--r--arch/x86/mm/pageattr.c14
-rw-r--r--arch/x86/xen/enlighten.c99
-rw-r--r--arch/x86/xen/mmu.c134
-rw-r--r--arch/x86/xen/mmu.h3
-rw-r--r--arch/x86/xen/smp.c4
-rw-r--r--arch/x86/xen/xen-ops.h3
20 files changed, 248 insertions, 155 deletions
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 7727aa8b7dda..bc384be6aa44 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -56,6 +56,7 @@ struct desc_ptr;
56struct tss_struct; 56struct tss_struct;
57struct mm_struct; 57struct mm_struct;
58struct desc_struct; 58struct desc_struct;
59struct task_struct;
59 60
60/* 61/*
61 * Wrapper type for pointers to code which uses the non-standard 62 * Wrapper type for pointers to code which uses the non-standard
@@ -203,7 +204,8 @@ struct pv_cpu_ops {
203 204
204 void (*swapgs)(void); 205 void (*swapgs)(void);
205 206
206 struct pv_lazy_ops lazy_mode; 207 void (*start_context_switch)(struct task_struct *prev);
208 void (*end_context_switch)(struct task_struct *next);
207}; 209};
208 210
209struct pv_irq_ops { 211struct pv_irq_ops {
@@ -1399,25 +1401,23 @@ enum paravirt_lazy_mode {
1399}; 1401};
1400 1402
1401enum paravirt_lazy_mode paravirt_get_lazy_mode(void); 1403enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
1402void paravirt_enter_lazy_cpu(void); 1404void paravirt_start_context_switch(struct task_struct *prev);
1403void paravirt_leave_lazy_cpu(void); 1405void paravirt_end_context_switch(struct task_struct *next);
1406
1404void paravirt_enter_lazy_mmu(void); 1407void paravirt_enter_lazy_mmu(void);
1405void paravirt_leave_lazy_mmu(void); 1408void paravirt_leave_lazy_mmu(void);
1406void paravirt_leave_lazy(enum paravirt_lazy_mode mode);
1407 1409
1408#define __HAVE_ARCH_ENTER_LAZY_CPU_MODE 1410#define __HAVE_ARCH_START_CONTEXT_SWITCH
1409static inline void arch_enter_lazy_cpu_mode(void) 1411static inline void arch_start_context_switch(struct task_struct *prev)
1410{ 1412{
1411 PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); 1413 PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev);
1412} 1414}
1413 1415
1414static inline void arch_leave_lazy_cpu_mode(void) 1416static inline void arch_end_context_switch(struct task_struct *next)
1415{ 1417{
1416 PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); 1418 PVOP_VCALL1(pv_cpu_ops.end_context_switch, next);
1417} 1419}
1418 1420
1419void arch_flush_lazy_cpu_mode(void);
1420
1421#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE 1421#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
1422static inline void arch_enter_lazy_mmu_mode(void) 1422static inline void arch_enter_lazy_mmu_mode(void)
1423{ 1423{
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 29d96d168bc0..b27c4f29b5e0 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -81,6 +81,8 @@ static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
81#define pte_val(x) native_pte_val(x) 81#define pte_val(x) native_pte_val(x)
82#define __pte(x) native_make_pte(x) 82#define __pte(x) native_make_pte(x)
83 83
84#define arch_end_context_switch(prev) do {} while(0)
85
84#endif /* CONFIG_PARAVIRT */ 86#endif /* CONFIG_PARAVIRT */
85 87
86/* 88/*
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index d5cd6c586881..64cf2d24fad1 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -48,9 +48,15 @@
48#endif 48#endif
49 49
50#ifdef CONFIG_X86_64 50#ifdef CONFIG_X86_64
51#ifdef CONFIG_PARAVIRT
52/* Paravirtualized systems may not have PSE or PGE available */
51#define NEED_PSE 0 53#define NEED_PSE 0
54#define NEED_PGE 0
55#else
56#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31)
57#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31)
58#endif
52#define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) 59#define NEED_MSR (1<<(X86_FEATURE_MSR & 31))
53#define NEED_PGE (1<<(X86_FEATURE_PGE & 31))
54#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) 60#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31))
55#define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) 61#define NEED_XMM (1<<(X86_FEATURE_XMM & 31))
56#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) 62#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31))
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 8820a73ae090..602c769fc98c 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -94,7 +94,8 @@ struct thread_info {
94#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ 94#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
95#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ 95#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */
96#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ 96#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */
97#define TIF_SYSCALL_FTRACE 27 /* for ftrace syscall instrumentation */ 97#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
98#define TIF_SYSCALL_FTRACE 28 /* for ftrace syscall instrumentation */
98 99
99#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) 100#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
100#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) 101#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
@@ -116,6 +117,7 @@ struct thread_info {
116#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) 117#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
117#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) 118#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR)
118#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) 119#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
120#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
119#define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE) 121#define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE)
120 122
121/* work to do in syscall_trace_enter() */ 123/* work to do in syscall_trace_enter() */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 1a918dde46b5..018a0a400799 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -124,7 +124,8 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
124 124
125/* VIRT <-> MACHINE conversion */ 125/* VIRT <-> MACHINE conversion */
126#define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) 126#define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v))))
127#define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v)))) 127#define virt_to_pfn(v) (PFN_DOWN(__pa(v)))
128#define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v)))
128#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) 129#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
129 130
130static inline unsigned long pte_mfn(pte_t pte) 131static inline unsigned long pte_mfn(pte_t pte)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 33019ddb56b4..6551dedee20c 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -195,7 +195,7 @@ static void kvm_leave_lazy_mmu(void)
195 struct kvm_para_state *state = kvm_para_state(); 195 struct kvm_para_state *state = kvm_para_state();
196 196
197 mmu_queue_flush(state); 197 mmu_queue_flush(state);
198 paravirt_leave_lazy(paravirt_get_lazy_mode()); 198 paravirt_leave_lazy_mmu();
199 state->mode = paravirt_get_lazy_mode(); 199 state->mode = paravirt_get_lazy_mode();
200} 200}
201 201
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 8e45f4464880..aa3442340705 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -246,18 +246,16 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA
246 246
247static inline void enter_lazy(enum paravirt_lazy_mode mode) 247static inline void enter_lazy(enum paravirt_lazy_mode mode)
248{ 248{
249 BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); 249 BUG_ON(percpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
250 BUG_ON(preemptible());
251 250
252 __get_cpu_var(paravirt_lazy_mode) = mode; 251 percpu_write(paravirt_lazy_mode, mode);
253} 252}
254 253
255void paravirt_leave_lazy(enum paravirt_lazy_mode mode) 254static void leave_lazy(enum paravirt_lazy_mode mode)
256{ 255{
257 BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode); 256 BUG_ON(percpu_read(paravirt_lazy_mode) != mode);
258 BUG_ON(preemptible());
259 257
260 __get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; 258 percpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
261} 259}
262 260
263void paravirt_enter_lazy_mmu(void) 261void paravirt_enter_lazy_mmu(void)
@@ -267,22 +265,36 @@ void paravirt_enter_lazy_mmu(void)
267 265
268void paravirt_leave_lazy_mmu(void) 266void paravirt_leave_lazy_mmu(void)
269{ 267{
270 paravirt_leave_lazy(PARAVIRT_LAZY_MMU); 268 leave_lazy(PARAVIRT_LAZY_MMU);
271} 269}
272 270
273void paravirt_enter_lazy_cpu(void) 271void paravirt_start_context_switch(struct task_struct *prev)
274{ 272{
273 BUG_ON(preemptible());
274
275 if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
276 arch_leave_lazy_mmu_mode();
277 set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
278 }
275 enter_lazy(PARAVIRT_LAZY_CPU); 279 enter_lazy(PARAVIRT_LAZY_CPU);
276} 280}
277 281
278void paravirt_leave_lazy_cpu(void) 282void paravirt_end_context_switch(struct task_struct *next)
279{ 283{
280 paravirt_leave_lazy(PARAVIRT_LAZY_CPU); 284 BUG_ON(preemptible());
285
286 leave_lazy(PARAVIRT_LAZY_CPU);
287
288 if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
289 arch_enter_lazy_mmu_mode();
281} 290}
282 291
283enum paravirt_lazy_mode paravirt_get_lazy_mode(void) 292enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
284{ 293{
285 return __get_cpu_var(paravirt_lazy_mode); 294 if (in_interrupt())
295 return PARAVIRT_LAZY_NONE;
296
297 return percpu_read(paravirt_lazy_mode);
286} 298}
287 299
288void arch_flush_lazy_mmu_mode(void) 300void arch_flush_lazy_mmu_mode(void)
@@ -290,7 +302,6 @@ void arch_flush_lazy_mmu_mode(void)
290 preempt_disable(); 302 preempt_disable();
291 303
292 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { 304 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
293 WARN_ON(preempt_count() == 1);
294 arch_leave_lazy_mmu_mode(); 305 arch_leave_lazy_mmu_mode();
295 arch_enter_lazy_mmu_mode(); 306 arch_enter_lazy_mmu_mode();
296 } 307 }
@@ -298,19 +309,6 @@ void arch_flush_lazy_mmu_mode(void)
298 preempt_enable(); 309 preempt_enable();
299} 310}
300 311
301void arch_flush_lazy_cpu_mode(void)
302{
303 preempt_disable();
304
305 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
306 WARN_ON(preempt_count() == 1);
307 arch_leave_lazy_cpu_mode();
308 arch_enter_lazy_cpu_mode();
309 }
310
311 preempt_enable();
312}
313
314struct pv_info pv_info = { 312struct pv_info pv_info = {
315 .name = "bare hardware", 313 .name = "bare hardware",
316 .paravirt_enabled = 0, 314 .paravirt_enabled = 0,
@@ -402,10 +400,8 @@ struct pv_cpu_ops pv_cpu_ops = {
402 .set_iopl_mask = native_set_iopl_mask, 400 .set_iopl_mask = native_set_iopl_mask,
403 .io_delay = native_io_delay, 401 .io_delay = native_io_delay,
404 402
405 .lazy_mode = { 403 .start_context_switch = paravirt_nop,
406 .enter = paravirt_nop, 404 .end_context_switch = paravirt_nop,
407 .leave = paravirt_nop,
408 },
409}; 405};
410 406
411struct pv_apic_ops pv_apic_ops = { 407struct pv_apic_ops pv_apic_ops = {
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 76f8f84043a2..5de30f0960fb 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -407,7 +407,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
407 * done before math_state_restore, so the TS bit is up 407 * done before math_state_restore, so the TS bit is up
408 * to date. 408 * to date.
409 */ 409 */
410 arch_leave_lazy_cpu_mode(); 410 arch_end_context_switch(next_p);
411 411
412 /* If the task has used fpu the last 5 timeslices, just do a full 412 /* If the task has used fpu the last 5 timeslices, just do a full
413 * restore of the math state immediately to avoid the trap; the 413 * restore of the math state immediately to avoid the trap; the
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b751a41392b1..66ad06791d6f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -428,7 +428,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
428 * done before math_state_restore, so the TS bit is up 428 * done before math_state_restore, so the TS bit is up
429 * to date. 429 * to date.
430 */ 430 */
431 arch_leave_lazy_cpu_mode(); 431 arch_end_context_switch(next_p);
432 432
433 /* 433 /*
434 * Switch FS and GS. 434 * Switch FS and GS.
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 95deb9f2211e..b263423fbe2a 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -462,22 +462,28 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
462} 462}
463#endif 463#endif
464 464
465static void vmi_enter_lazy_cpu(void) 465static void vmi_start_context_switch(struct task_struct *prev)
466{ 466{
467 paravirt_enter_lazy_cpu(); 467 paravirt_start_context_switch(prev);
468 vmi_ops.set_lazy_mode(2); 468 vmi_ops.set_lazy_mode(2);
469} 469}
470 470
471static void vmi_end_context_switch(struct task_struct *next)
472{
473 vmi_ops.set_lazy_mode(0);
474 paravirt_end_context_switch(next);
475}
476
471static void vmi_enter_lazy_mmu(void) 477static void vmi_enter_lazy_mmu(void)
472{ 478{
473 paravirt_enter_lazy_mmu(); 479 paravirt_enter_lazy_mmu();
474 vmi_ops.set_lazy_mode(1); 480 vmi_ops.set_lazy_mode(1);
475} 481}
476 482
477static void vmi_leave_lazy(void) 483static void vmi_leave_lazy_mmu(void)
478{ 484{
479 paravirt_leave_lazy(paravirt_get_lazy_mode());
480 vmi_ops.set_lazy_mode(0); 485 vmi_ops.set_lazy_mode(0);
486 paravirt_leave_lazy_mmu();
481} 487}
482 488
483static inline int __init check_vmi_rom(struct vrom_header *rom) 489static inline int __init check_vmi_rom(struct vrom_header *rom)
@@ -711,14 +717,14 @@ static inline int __init activate_vmi(void)
711 para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); 717 para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask);
712 para_fill(pv_cpu_ops.io_delay, IODelay); 718 para_fill(pv_cpu_ops.io_delay, IODelay);
713 719
714 para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu, 720 para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch,
715 set_lazy_mode, SetLazyMode); 721 set_lazy_mode, SetLazyMode);
716 para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy, 722 para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch,
717 set_lazy_mode, SetLazyMode); 723 set_lazy_mode, SetLazyMode);
718 724
719 para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, 725 para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu,
720 set_lazy_mode, SetLazyMode); 726 set_lazy_mode, SetLazyMode);
721 para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy, 727 para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy_mmu,
722 set_lazy_mode, SetLazyMode); 728 set_lazy_mode, SetLazyMode);
723 729
724 /* user and kernel flush are just handled with different flags to FlushTLB */ 730 /* user and kernel flush are just handled with different flags to FlushTLB */
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index e94a11e42f98..5ab239711cc2 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -166,10 +166,16 @@ static void lazy_hcall3(unsigned long call,
166 166
167/* When lazy mode is turned off reset the per-cpu lazy mode variable and then 167/* When lazy mode is turned off reset the per-cpu lazy mode variable and then
168 * issue the do-nothing hypercall to flush any stored calls. */ 168 * issue the do-nothing hypercall to flush any stored calls. */
169static void lguest_leave_lazy_mode(void) 169static void lguest_leave_lazy_mmu_mode(void)
170{
171 hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
172 paravirt_leave_lazy_mmu();
173}
174
175static void lguest_end_context_switch(struct task_struct *next)
170{ 176{
171 paravirt_leave_lazy(paravirt_get_lazy_mode());
172 kvm_hypercall0(LHCALL_FLUSH_ASYNC); 177 kvm_hypercall0(LHCALL_FLUSH_ASYNC);
178 paravirt_end_context_switch(next);
173} 179}
174 180
175/*G:033 181/*G:033
@@ -1051,8 +1057,8 @@ __init void lguest_init(void)
1051 pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry; 1057 pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry;
1052 pv_cpu_ops.write_idt_entry = lguest_write_idt_entry; 1058 pv_cpu_ops.write_idt_entry = lguest_write_idt_entry;
1053 pv_cpu_ops.wbinvd = lguest_wbinvd; 1059 pv_cpu_ops.wbinvd = lguest_wbinvd;
1054 pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu; 1060 pv_cpu_ops.start_context_switch = paravirt_start_context_switch;
1055 pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode; 1061 pv_cpu_ops.end_context_switch = lguest_end_context_switch;
1056 1062
1057 /* pagetable management */ 1063 /* pagetable management */
1058 pv_mmu_ops.write_cr3 = lguest_write_cr3; 1064 pv_mmu_ops.write_cr3 = lguest_write_cr3;
@@ -1065,7 +1071,7 @@ __init void lguest_init(void)
1065 pv_mmu_ops.read_cr2 = lguest_read_cr2; 1071 pv_mmu_ops.read_cr2 = lguest_read_cr2;
1066 pv_mmu_ops.read_cr3 = lguest_read_cr3; 1072 pv_mmu_ops.read_cr3 = lguest_read_cr3;
1067 pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; 1073 pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
1068 pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode; 1074 pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode;
1069 pv_mmu_ops.pte_update = lguest_pte_update; 1075 pv_mmu_ops.pte_update = lguest_pte_update;
1070 pv_mmu_ops.pte_update_defer = lguest_pte_update; 1076 pv_mmu_ops.pte_update_defer = lguest_pte_update;
1071 1077
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a03b7279efa0..cfbb4a738011 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -225,12 +225,10 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
225 if (!pmd_present(*pmd_k)) 225 if (!pmd_present(*pmd_k))
226 return NULL; 226 return NULL;
227 227
228 if (!pmd_present(*pmd)) { 228 if (!pmd_present(*pmd))
229 set_pmd(pmd, *pmd_k); 229 set_pmd(pmd, *pmd_k);
230 arch_flush_lazy_mmu_mode(); 230 else
231 } else {
232 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); 231 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
233 }
234 232
235 return pmd_k; 233 return pmd_k;
236} 234}
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 8126e8d1a2a4..58f621e81919 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -44,7 +44,6 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
44 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); 44 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
45 BUG_ON(!pte_none(*(kmap_pte-idx))); 45 BUG_ON(!pte_none(*(kmap_pte-idx)));
46 set_pte(kmap_pte-idx, mk_pte(page, prot)); 46 set_pte(kmap_pte-idx, mk_pte(page, prot));
47 arch_flush_lazy_mmu_mode();
48 47
49 return (void *)vaddr; 48 return (void *)vaddr;
50} 49}
@@ -74,7 +73,6 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
74#endif 73#endif
75 } 74 }
76 75
77 arch_flush_lazy_mmu_mode();
78 pagefault_enable(); 76 pagefault_enable();
79} 77}
80 78
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 8056545e2d39..fe6f84ca121e 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -82,7 +82,6 @@ iounmap_atomic(void *kvaddr, enum km_type type)
82 if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) 82 if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
83 kpte_clear_flush(kmap_pte-idx, vaddr); 83 kpte_clear_flush(kmap_pte-idx, vaddr);
84 84
85 arch_flush_lazy_mmu_mode();
86 pagefault_enable(); 85 pagefault_enable();
87} 86}
88EXPORT_SYMBOL_GPL(iounmap_atomic); 87EXPORT_SYMBOL_GPL(iounmap_atomic);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index d71e1b636ce6..660cac75ae11 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -844,13 +844,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
844 844
845 vm_unmap_aliases(); 845 vm_unmap_aliases();
846 846
847 /*
848 * If we're called with lazy mmu updates enabled, the
849 * in-memory pte state may be stale. Flush pending updates to
850 * bring them up to date.
851 */
852 arch_flush_lazy_mmu_mode();
853
854 cpa.vaddr = addr; 847 cpa.vaddr = addr;
855 cpa.pages = pages; 848 cpa.pages = pages;
856 cpa.numpages = numpages; 849 cpa.numpages = numpages;
@@ -895,13 +888,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
895 } else 888 } else
896 cpa_flush_all(cache); 889 cpa_flush_all(cache);
897 890
898 /*
899 * If we've been called with lazy mmu updates enabled, then
900 * make sure that everything gets flushed out before we
901 * return.
902 */
903 arch_flush_lazy_mmu_mode();
904
905out: 891out:
906 return ret; 892 return ret;
907} 893}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 82cd39a6cbd3..12a3159333bc 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -42,6 +42,7 @@
42#include <asm/xen/hypervisor.h> 42#include <asm/xen/hypervisor.h>
43#include <asm/fixmap.h> 43#include <asm/fixmap.h>
44#include <asm/processor.h> 44#include <asm/processor.h>
45#include <asm/proto.h>
45#include <asm/msr-index.h> 46#include <asm/msr-index.h>
46#include <asm/setup.h> 47#include <asm/setup.h>
47#include <asm/desc.h> 48#include <asm/desc.h>
@@ -168,21 +169,23 @@ static void __init xen_banner(void)
168 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); 169 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
169} 170}
170 171
172static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
173static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
174
171static void xen_cpuid(unsigned int *ax, unsigned int *bx, 175static void xen_cpuid(unsigned int *ax, unsigned int *bx,
172 unsigned int *cx, unsigned int *dx) 176 unsigned int *cx, unsigned int *dx)
173{ 177{
178 unsigned maskecx = ~0;
174 unsigned maskedx = ~0; 179 unsigned maskedx = ~0;
175 180
176 /* 181 /*
177 * Mask out inconvenient features, to try and disable as many 182 * Mask out inconvenient features, to try and disable as many
178 * unsupported kernel subsystems as possible. 183 * unsupported kernel subsystems as possible.
179 */ 184 */
180 if (*ax == 1) 185 if (*ax == 1) {
181 maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ 186 maskecx = cpuid_leaf1_ecx_mask;
182 (1 << X86_FEATURE_ACPI) | /* disable ACPI */ 187 maskedx = cpuid_leaf1_edx_mask;
183 (1 << X86_FEATURE_MCE) | /* disable MCE */ 188 }
184 (1 << X86_FEATURE_MCA) | /* disable MCA */
185 (1 << X86_FEATURE_ACC)); /* thermal monitoring */
186 189
187 asm(XEN_EMULATE_PREFIX "cpuid" 190 asm(XEN_EMULATE_PREFIX "cpuid"
188 : "=a" (*ax), 191 : "=a" (*ax),
@@ -190,9 +193,43 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
190 "=c" (*cx), 193 "=c" (*cx),
191 "=d" (*dx) 194 "=d" (*dx)
192 : "0" (*ax), "2" (*cx)); 195 : "0" (*ax), "2" (*cx));
196
197 *cx &= maskecx;
193 *dx &= maskedx; 198 *dx &= maskedx;
194} 199}
195 200
201static __init void xen_init_cpuid_mask(void)
202{
203 unsigned int ax, bx, cx, dx;
204
205 cpuid_leaf1_edx_mask =
206 ~((1 << X86_FEATURE_MCE) | /* disable MCE */
207 (1 << X86_FEATURE_MCA) | /* disable MCA */
208 (1 << X86_FEATURE_ACC)); /* thermal monitoring */
209
210 if (!xen_initial_domain())
211 cpuid_leaf1_edx_mask &=
212 ~((1 << X86_FEATURE_APIC) | /* disable local APIC */
213 (1 << X86_FEATURE_ACPI)); /* disable ACPI */
214
215 ax = 1;
216 xen_cpuid(&ax, &bx, &cx, &dx);
217
218 /* cpuid claims we support xsave; try enabling it to see what happens */
219 if (cx & (1 << (X86_FEATURE_XSAVE % 32))) {
220 unsigned long cr4;
221
222 set_in_cr4(X86_CR4_OSXSAVE);
223
224 cr4 = read_cr4();
225
226 if ((cr4 & X86_CR4_OSXSAVE) == 0)
227 cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32));
228
229 clear_in_cr4(X86_CR4_OSXSAVE);
230 }
231}
232
196static void xen_set_debugreg(int reg, unsigned long val) 233static void xen_set_debugreg(int reg, unsigned long val)
197{ 234{
198 HYPERVISOR_set_debugreg(reg, val); 235 HYPERVISOR_set_debugreg(reg, val);
@@ -203,10 +240,10 @@ static unsigned long xen_get_debugreg(int reg)
203 return HYPERVISOR_get_debugreg(reg); 240 return HYPERVISOR_get_debugreg(reg);
204} 241}
205 242
206void xen_leave_lazy(void) 243static void xen_end_context_switch(struct task_struct *next)
207{ 244{
208 paravirt_leave_lazy(paravirt_get_lazy_mode());
209 xen_mc_flush(); 245 xen_mc_flush();
246 paravirt_end_context_switch(next);
210} 247}
211 248
212static unsigned long xen_store_tr(void) 249static unsigned long xen_store_tr(void)
@@ -284,12 +321,11 @@ static void xen_set_ldt(const void *addr, unsigned entries)
284 321
285static void xen_load_gdt(const struct desc_ptr *dtr) 322static void xen_load_gdt(const struct desc_ptr *dtr)
286{ 323{
287 unsigned long *frames;
288 unsigned long va = dtr->address; 324 unsigned long va = dtr->address;
289 unsigned int size = dtr->size + 1; 325 unsigned int size = dtr->size + 1;
290 unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; 326 unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
327 unsigned long frames[pages];
291 int f; 328 int f;
292 struct multicall_space mcs;
293 329
294 /* A GDT can be up to 64k in size, which corresponds to 8192 330 /* A GDT can be up to 64k in size, which corresponds to 8192
295 8-byte entries, or 16 4k pages.. */ 331 8-byte entries, or 16 4k pages.. */
@@ -297,19 +333,26 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
297 BUG_ON(size > 65536); 333 BUG_ON(size > 65536);
298 BUG_ON(va & ~PAGE_MASK); 334 BUG_ON(va & ~PAGE_MASK);
299 335
300 mcs = xen_mc_entry(sizeof(*frames) * pages);
301 frames = mcs.args;
302
303 for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { 336 for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
304 frames[f] = arbitrary_virt_to_mfn((void *)va); 337 int level;
338 pte_t *ptep = lookup_address(va, &level);
339 unsigned long pfn, mfn;
340 void *virt;
341
342 BUG_ON(ptep == NULL);
343
344 pfn = pte_pfn(*ptep);
345 mfn = pfn_to_mfn(pfn);
346 virt = __va(PFN_PHYS(pfn));
347
348 frames[f] = mfn;
305 349
306 make_lowmem_page_readonly((void *)va); 350 make_lowmem_page_readonly((void *)va);
307 make_lowmem_page_readonly(mfn_to_virt(frames[f])); 351 make_lowmem_page_readonly(virt);
308 } 352 }
309 353
310 MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); 354 if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
311 355 BUG();
312 xen_mc_issue(PARAVIRT_LAZY_CPU);
313} 356}
314 357
315static void load_TLS_descriptor(struct thread_struct *t, 358static void load_TLS_descriptor(struct thread_struct *t,
@@ -385,7 +428,7 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
385static int cvt_gate_to_trap(int vector, const gate_desc *val, 428static int cvt_gate_to_trap(int vector, const gate_desc *val,
386 struct trap_info *info) 429 struct trap_info *info)
387{ 430{
388 if (val->type != 0xf && val->type != 0xe) 431 if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
389 return 0; 432 return 0;
390 433
391 info->vector = vector; 434 info->vector = vector;
@@ -393,8 +436,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
393 info->cs = gate_segment(*val); 436 info->cs = gate_segment(*val);
394 info->flags = val->dpl; 437 info->flags = val->dpl;
395 /* interrupt gates clear IF */ 438 /* interrupt gates clear IF */
396 if (val->type == 0xe) 439 if (val->type == GATE_INTERRUPT)
397 info->flags |= 4; 440 info->flags |= 1 << 2;
398 441
399 return 1; 442 return 1;
400} 443}
@@ -817,10 +860,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
817 /* Xen takes care of %gs when switching to usermode for us */ 860 /* Xen takes care of %gs when switching to usermode for us */
818 .swapgs = paravirt_nop, 861 .swapgs = paravirt_nop,
819 862
820 .lazy_mode = { 863 .start_context_switch = paravirt_start_context_switch,
821 .enter = paravirt_enter_lazy_cpu, 864 .end_context_switch = xen_end_context_switch,
822 .leave = xen_leave_lazy,
823 },
824}; 865};
825 866
826static const struct pv_apic_ops xen_apic_ops __initdata = { 867static const struct pv_apic_ops xen_apic_ops __initdata = {
@@ -872,7 +913,6 @@ static const struct machine_ops __initdata xen_machine_ops = {
872 .emergency_restart = xen_emergency_restart, 913 .emergency_restart = xen_emergency_restart,
873}; 914};
874 915
875
876/* First C function to be called on Xen boot */ 916/* First C function to be called on Xen boot */
877asmlinkage void __init xen_start_kernel(void) 917asmlinkage void __init xen_start_kernel(void)
878{ 918{
@@ -897,6 +937,8 @@ asmlinkage void __init xen_start_kernel(void)
897 937
898 xen_init_irq_ops(); 938 xen_init_irq_ops();
899 939
940 xen_init_cpuid_mask();
941
900#ifdef CONFIG_X86_LOCAL_APIC 942#ifdef CONFIG_X86_LOCAL_APIC
901 /* 943 /*
902 * set up the basic apic ops. 944 * set up the basic apic ops.
@@ -938,6 +980,11 @@ asmlinkage void __init xen_start_kernel(void)
938 if (!xen_initial_domain()) 980 if (!xen_initial_domain())
939 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); 981 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
940 982
983#ifdef CONFIG_X86_64
984 /* Work out if we support NX */
985 check_efer();
986#endif
987
941 /* Don't do the full vcpu_info placement stuff until we have a 988 /* Don't do the full vcpu_info placement stuff until we have a
942 possible map and a non-dummy shared_info. */ 989 possible map and a non-dummy shared_info. */
943 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; 990 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index db3802fb7b84..77b242c9a11e 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -184,7 +184,7 @@ static inline unsigned p2m_index(unsigned long pfn)
184} 184}
185 185
186/* Build the parallel p2m_top_mfn structures */ 186/* Build the parallel p2m_top_mfn structures */
187void xen_setup_mfn_list_list(void) 187static void __init xen_build_mfn_list_list(void)
188{ 188{
189 unsigned pfn, idx; 189 unsigned pfn, idx;
190 190
@@ -198,7 +198,10 @@ void xen_setup_mfn_list_list(void)
198 unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; 198 unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
199 p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); 199 p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
200 } 200 }
201}
201 202
203void xen_setup_mfn_list_list(void)
204{
202 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); 205 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
203 206
204 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = 207 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
@@ -218,6 +221,8 @@ void __init xen_build_dynamic_phys_to_machine(void)
218 221
219 p2m_top[topidx] = &mfn_list[pfn]; 222 p2m_top[topidx] = &mfn_list[pfn];
220 } 223 }
224
225 xen_build_mfn_list_list();
221} 226}
222 227
223unsigned long get_phys_to_machine(unsigned long pfn) 228unsigned long get_phys_to_machine(unsigned long pfn)
@@ -233,47 +238,74 @@ unsigned long get_phys_to_machine(unsigned long pfn)
233} 238}
234EXPORT_SYMBOL_GPL(get_phys_to_machine); 239EXPORT_SYMBOL_GPL(get_phys_to_machine);
235 240
236static void alloc_p2m(unsigned long **pp, unsigned long *mfnp) 241/* install a new p2m_top page */
242bool install_p2mtop_page(unsigned long pfn, unsigned long *p)
237{ 243{
238 unsigned long *p; 244 unsigned topidx = p2m_top_index(pfn);
245 unsigned long **pfnp, *mfnp;
239 unsigned i; 246 unsigned i;
240 247
241 p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); 248 pfnp = &p2m_top[topidx];
242 BUG_ON(p == NULL); 249 mfnp = &p2m_top_mfn[topidx];
243 250
244 for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++) 251 for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
245 p[i] = INVALID_P2M_ENTRY; 252 p[i] = INVALID_P2M_ENTRY;
246 253
247 if (cmpxchg(pp, p2m_missing, p) != p2m_missing) 254 if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) {
248 free_page((unsigned long)p);
249 else
250 *mfnp = virt_to_mfn(p); 255 *mfnp = virt_to_mfn(p);
256 return true;
257 }
258
259 return false;
251} 260}
252 261
253void set_phys_to_machine(unsigned long pfn, unsigned long mfn) 262static void alloc_p2m(unsigned long pfn)
254{ 263{
255 unsigned topidx, idx; 264 unsigned long *p;
256 265
257 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { 266 p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
258 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); 267 BUG_ON(p == NULL);
259 return; 268
260 } 269 if (!install_p2mtop_page(pfn, p))
270 free_page((unsigned long)p);
271}
272
273/* Try to install p2m mapping; fail if intermediate bits missing */
274bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
275{
276 unsigned topidx, idx;
261 277
262 if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { 278 if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
263 BUG_ON(mfn != INVALID_P2M_ENTRY); 279 BUG_ON(mfn != INVALID_P2M_ENTRY);
264 return; 280 return true;
265 } 281 }
266 282
267 topidx = p2m_top_index(pfn); 283 topidx = p2m_top_index(pfn);
268 if (p2m_top[topidx] == p2m_missing) { 284 if (p2m_top[topidx] == p2m_missing) {
269 /* no need to allocate a page to store an invalid entry */
270 if (mfn == INVALID_P2M_ENTRY) 285 if (mfn == INVALID_P2M_ENTRY)
271 return; 286 return true;
272 alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]); 287 return false;
273 } 288 }
274 289
275 idx = p2m_index(pfn); 290 idx = p2m_index(pfn);
276 p2m_top[topidx][idx] = mfn; 291 p2m_top[topidx][idx] = mfn;
292
293 return true;
294}
295
296void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
297{
298 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
299 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
300 return;
301 }
302
303 if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
304 alloc_p2m(pfn);
305
306 if (!__set_phys_to_machine(pfn, mfn))
307 BUG();
308 }
277} 309}
278 310
279unsigned long arbitrary_virt_to_mfn(void *vaddr) 311unsigned long arbitrary_virt_to_mfn(void *vaddr)
@@ -419,10 +451,6 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
419void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, 451void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
420 pte_t *ptep, pte_t pteval) 452 pte_t *ptep, pte_t pteval)
421{ 453{
422 /* updates to init_mm may be done without lock */
423 if (mm == &init_mm)
424 preempt_disable();
425
426 ADD_STATS(set_pte_at, 1); 454 ADD_STATS(set_pte_at, 1);
427// ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); 455// ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
428 ADD_STATS(set_pte_at_current, mm == current->mm); 456 ADD_STATS(set_pte_at_current, mm == current->mm);
@@ -443,9 +471,7 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
443 } 471 }
444 xen_set_pte(ptep, pteval); 472 xen_set_pte(ptep, pteval);
445 473
446out: 474out: return;
447 if (mm == &init_mm)
448 preempt_enable();
449} 475}
450 476
451pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, 477pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
@@ -987,7 +1013,7 @@ static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page,
987 return 0; 1013 return 0;
988} 1014}
989 1015
990void __init xen_mark_init_mm_pinned(void) 1016static void __init xen_mark_init_mm_pinned(void)
991{ 1017{
992 xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); 1018 xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
993} 1019}
@@ -1119,10 +1145,8 @@ static void drop_other_mm_ref(void *info)
1119 1145
1120 /* If this cpu still has a stale cr3 reference, then make sure 1146 /* If this cpu still has a stale cr3 reference, then make sure
1121 it has been flushed. */ 1147 it has been flushed. */
1122 if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) { 1148 if (percpu_read(xen_current_cr3) == __pa(mm->pgd))
1123 load_cr3(swapper_pg_dir); 1149 load_cr3(swapper_pg_dir);
1124 arch_flush_lazy_cpu_mode();
1125 }
1126} 1150}
1127 1151
1128static void xen_drop_mm_ref(struct mm_struct *mm) 1152static void xen_drop_mm_ref(struct mm_struct *mm)
@@ -1135,7 +1159,6 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
1135 load_cr3(swapper_pg_dir); 1159 load_cr3(swapper_pg_dir);
1136 else 1160 else
1137 leave_mm(smp_processor_id()); 1161 leave_mm(smp_processor_id());
1138 arch_flush_lazy_cpu_mode();
1139 } 1162 }
1140 1163
1141 /* Get the "official" set of cpus referring to our pagetable. */ 1164 /* Get the "official" set of cpus referring to our pagetable. */
@@ -1270,8 +1293,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
1270 } *args; 1293 } *args;
1271 struct multicall_space mcs; 1294 struct multicall_space mcs;
1272 1295
1273 BUG_ON(cpumask_empty(cpus)); 1296 if (cpumask_empty(cpus))
1274 BUG_ON(!mm); 1297 return; /* nothing to do */
1275 1298
1276 mcs = xen_mc_entry(sizeof(*args)); 1299 mcs = xen_mc_entry(sizeof(*args));
1277 args = mcs.args; 1300 args = mcs.args;
@@ -1438,6 +1461,15 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
1438} 1461}
1439#endif 1462#endif
1440 1463
1464static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
1465{
1466 struct mmuext_op op;
1467 op.cmd = cmd;
1468 op.arg1.mfn = pfn_to_mfn(pfn);
1469 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
1470 BUG();
1471}
1472
1441/* Early in boot, while setting up the initial pagetable, assume 1473/* Early in boot, while setting up the initial pagetable, assume
1442 everything is pinned. */ 1474 everything is pinned. */
1443static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) 1475static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
@@ -1446,22 +1478,29 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
1446 BUG_ON(mem_map); /* should only be used early */ 1478 BUG_ON(mem_map); /* should only be used early */
1447#endif 1479#endif
1448 make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); 1480 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
1481 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
1482}
1483
1484/* Used for pmd and pud */
1485static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
1486{
1487#ifdef CONFIG_FLATMEM
1488 BUG_ON(mem_map); /* should only be used early */
1489#endif
1490 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
1449} 1491}
1450 1492
1451/* Early release_pte assumes that all pts are pinned, since there's 1493/* Early release_pte assumes that all pts are pinned, since there's
1452 only init_mm and anything attached to that is pinned. */ 1494 only init_mm and anything attached to that is pinned. */
1453static void xen_release_pte_init(unsigned long pfn) 1495static __init void xen_release_pte_init(unsigned long pfn)
1454{ 1496{
1497 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
1455 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); 1498 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1456} 1499}
1457 1500
1458static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) 1501static __init void xen_release_pmd_init(unsigned long pfn)
1459{ 1502{
1460 struct mmuext_op op; 1503 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1461 op.cmd = cmd;
1462 op.arg1.mfn = pfn_to_mfn(pfn);
1463 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
1464 BUG();
1465} 1504}
1466 1505
1467/* This needs to make sure the new pte page is pinned iff its being 1506/* This needs to make sure the new pte page is pinned iff its being
@@ -1819,6 +1858,13 @@ __init void xen_post_allocator_init(void)
1819 xen_mark_init_mm_pinned(); 1858 xen_mark_init_mm_pinned();
1820} 1859}
1821 1860
1861static void xen_leave_lazy_mmu(void)
1862{
1863 preempt_disable();
1864 xen_mc_flush();
1865 paravirt_leave_lazy_mmu();
1866 preempt_enable();
1867}
1822 1868
1823const struct pv_mmu_ops xen_mmu_ops __initdata = { 1869const struct pv_mmu_ops xen_mmu_ops __initdata = {
1824 .pagetable_setup_start = xen_pagetable_setup_start, 1870 .pagetable_setup_start = xen_pagetable_setup_start,
@@ -1843,9 +1889,9 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
1843 1889
1844 .alloc_pte = xen_alloc_pte_init, 1890 .alloc_pte = xen_alloc_pte_init,
1845 .release_pte = xen_release_pte_init, 1891 .release_pte = xen_release_pte_init,
1846 .alloc_pmd = xen_alloc_pte_init, 1892 .alloc_pmd = xen_alloc_pmd_init,
1847 .alloc_pmd_clone = paravirt_nop, 1893 .alloc_pmd_clone = paravirt_nop,
1848 .release_pmd = xen_release_pte_init, 1894 .release_pmd = xen_release_pmd_init,
1849 1895
1850#ifdef CONFIG_HIGHPTE 1896#ifdef CONFIG_HIGHPTE
1851 .kmap_atomic_pte = xen_kmap_atomic_pte, 1897 .kmap_atomic_pte = xen_kmap_atomic_pte,
@@ -1883,8 +1929,8 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
1883 .make_pud = PV_CALLEE_SAVE(xen_make_pud), 1929 .make_pud = PV_CALLEE_SAVE(xen_make_pud),
1884 .set_pgd = xen_set_pgd_hyper, 1930 .set_pgd = xen_set_pgd_hyper,
1885 1931
1886 .alloc_pud = xen_alloc_pte_init, 1932 .alloc_pud = xen_alloc_pmd_init,
1887 .release_pud = xen_release_pte_init, 1933 .release_pud = xen_release_pmd_init,
1888#endif /* PAGETABLE_LEVELS == 4 */ 1934#endif /* PAGETABLE_LEVELS == 4 */
1889 1935
1890 .activate_mm = xen_activate_mm, 1936 .activate_mm = xen_activate_mm,
@@ -1893,7 +1939,7 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
1893 1939
1894 .lazy_mode = { 1940 .lazy_mode = {
1895 .enter = paravirt_enter_lazy_mmu, 1941 .enter = paravirt_enter_lazy_mmu,
1896 .leave = xen_leave_lazy, 1942 .leave = xen_leave_lazy_mmu,
1897 }, 1943 },
1898 1944
1899 .set_fixmap = xen_set_fixmap, 1945 .set_fixmap = xen_set_fixmap,
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 24d1b44a337d..da7302624897 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -11,6 +11,9 @@ enum pt_level {
11}; 11};
12 12
13 13
14bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
15bool install_p2mtop_page(unsigned long pfn, unsigned long *p);
16
14void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); 17void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
15 18
16 19
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 585a6e330837..429834ec1687 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -317,7 +317,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
317 BUG_ON(rc); 317 BUG_ON(rc);
318 318
319 while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { 319 while(per_cpu(cpu_state, cpu) != CPU_ONLINE) {
320 HYPERVISOR_sched_op(SCHEDOP_yield, 0); 320 HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
321 barrier(); 321 barrier();
322 } 322 }
323 323
@@ -422,7 +422,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
422 /* Make sure other vcpus get a chance to run if they need to. */ 422 /* Make sure other vcpus get a chance to run if they need to. */
423 for_each_cpu(cpu, mask) { 423 for_each_cpu(cpu, mask) {
424 if (xen_vcpu_stolen(cpu)) { 424 if (xen_vcpu_stolen(cpu)) {
425 HYPERVISOR_sched_op(SCHEDOP_yield, 0); 425 HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
426 break; 426 break;
427 } 427 }
428 } 428 }
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 2f5ef2632ea2..5c50a1017a37 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -30,7 +30,6 @@ pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
30void xen_ident_map_ISA(void); 30void xen_ident_map_ISA(void);
31void xen_reserve_top(void); 31void xen_reserve_top(void);
32 32
33void xen_leave_lazy(void);
34void xen_post_allocator_init(void); 33void xen_post_allocator_init(void);
35 34
36char * __init xen_memory_setup(void); 35char * __init xen_memory_setup(void);
@@ -57,8 +56,6 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
57 56
58bool xen_vcpu_stolen(int vcpu); 57bool xen_vcpu_stolen(int vcpu);
59 58
60void xen_mark_init_mm_pinned(void);
61
62void xen_setup_vcpu_info_placement(void); 59void xen_setup_vcpu_info_placement(void);
63 60
64#ifdef CONFIG_SMP 61#ifdef CONFIG_SMP