aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/paravirt.h22
-rw-r--r--arch/x86/include/asm/pgtable.h2
-rw-r--r--arch/x86/include/asm/required-features.h8
-rw-r--r--arch/x86/include/asm/thread_info.h4
-rw-r--r--arch/x86/include/asm/xen/page.h3
-rw-r--r--arch/x86/kernel/kvm.c2
-rw-r--r--arch/x86/kernel/paravirt.c56
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/vmi_32.c20
-rw-r--r--arch/x86/lguest/boot.c16
-rw-r--r--arch/x86/mm/fault.c6
-rw-r--r--arch/x86/mm/highmem_32.c2
-rw-r--r--arch/x86/mm/iomap_32.c1
-rw-r--r--arch/x86/mm/pageattr.c14
-rw-r--r--arch/x86/xen/enlighten.c99
-rw-r--r--arch/x86/xen/mmu.c134
-rw-r--r--arch/x86/xen/mmu.h3
-rw-r--r--arch/x86/xen/smp.c4
-rw-r--r--arch/x86/xen/xen-ops.h3
-rw-r--r--drivers/xen/Kconfig20
-rw-r--r--drivers/xen/Makefile4
-rw-r--r--drivers/xen/cpu_hotplug.c40
-rw-r--r--drivers/xen/events.c6
-rw-r--r--drivers/xen/evtchn.c507
-rw-r--r--drivers/xen/manage.c14
-rw-r--r--drivers/xen/sys-hypervisor.c445
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c61
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c2
-rw-r--r--drivers/xen/xenfs/super.c19
-rw-r--r--include/Kbuild1
-rw-r--r--include/asm-frv/pgtable.h4
-rw-r--r--include/asm-generic/pgtable.h21
-rw-r--r--include/xen/Kbuild1
-rw-r--r--include/xen/events.h3
-rw-r--r--include/xen/evtchn.h88
-rw-r--r--include/xen/interface/version.h3
-rw-r--r--include/xen/xenbus.h3
-rw-r--r--kernel/sched.c2
39 files changed, 1407 insertions, 240 deletions
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 7727aa8b7dda..bc384be6aa44 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -56,6 +56,7 @@ struct desc_ptr;
56struct tss_struct; 56struct tss_struct;
57struct mm_struct; 57struct mm_struct;
58struct desc_struct; 58struct desc_struct;
59struct task_struct;
59 60
60/* 61/*
61 * Wrapper type for pointers to code which uses the non-standard 62 * Wrapper type for pointers to code which uses the non-standard
@@ -203,7 +204,8 @@ struct pv_cpu_ops {
203 204
204 void (*swapgs)(void); 205 void (*swapgs)(void);
205 206
206 struct pv_lazy_ops lazy_mode; 207 void (*start_context_switch)(struct task_struct *prev);
208 void (*end_context_switch)(struct task_struct *next);
207}; 209};
208 210
209struct pv_irq_ops { 211struct pv_irq_ops {
@@ -1399,25 +1401,23 @@ enum paravirt_lazy_mode {
1399}; 1401};
1400 1402
1401enum paravirt_lazy_mode paravirt_get_lazy_mode(void); 1403enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
1402void paravirt_enter_lazy_cpu(void); 1404void paravirt_start_context_switch(struct task_struct *prev);
1403void paravirt_leave_lazy_cpu(void); 1405void paravirt_end_context_switch(struct task_struct *next);
1406
1404void paravirt_enter_lazy_mmu(void); 1407void paravirt_enter_lazy_mmu(void);
1405void paravirt_leave_lazy_mmu(void); 1408void paravirt_leave_lazy_mmu(void);
1406void paravirt_leave_lazy(enum paravirt_lazy_mode mode);
1407 1409
1408#define __HAVE_ARCH_ENTER_LAZY_CPU_MODE 1410#define __HAVE_ARCH_START_CONTEXT_SWITCH
1409static inline void arch_enter_lazy_cpu_mode(void) 1411static inline void arch_start_context_switch(struct task_struct *prev)
1410{ 1412{
1411 PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); 1413 PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev);
1412} 1414}
1413 1415
1414static inline void arch_leave_lazy_cpu_mode(void) 1416static inline void arch_end_context_switch(struct task_struct *next)
1415{ 1417{
1416 PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); 1418 PVOP_VCALL1(pv_cpu_ops.end_context_switch, next);
1417} 1419}
1418 1420
1419void arch_flush_lazy_cpu_mode(void);
1420
1421#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE 1421#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
1422static inline void arch_enter_lazy_mmu_mode(void) 1422static inline void arch_enter_lazy_mmu_mode(void)
1423{ 1423{
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 29d96d168bc0..b27c4f29b5e0 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -81,6 +81,8 @@ static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
81#define pte_val(x) native_pte_val(x) 81#define pte_val(x) native_pte_val(x)
82#define __pte(x) native_make_pte(x) 82#define __pte(x) native_make_pte(x)
83 83
84#define arch_end_context_switch(prev) do {} while(0)
85
84#endif /* CONFIG_PARAVIRT */ 86#endif /* CONFIG_PARAVIRT */
85 87
86/* 88/*
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index d5cd6c586881..64cf2d24fad1 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -48,9 +48,15 @@
48#endif 48#endif
49 49
50#ifdef CONFIG_X86_64 50#ifdef CONFIG_X86_64
51#ifdef CONFIG_PARAVIRT
52/* Paravirtualized systems may not have PSE or PGE available */
51#define NEED_PSE 0 53#define NEED_PSE 0
54#define NEED_PGE 0
55#else
56#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31)
57#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31)
58#endif
52#define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) 59#define NEED_MSR (1<<(X86_FEATURE_MSR & 31))
53#define NEED_PGE (1<<(X86_FEATURE_PGE & 31))
54#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) 60#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31))
55#define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) 61#define NEED_XMM (1<<(X86_FEATURE_XMM & 31))
56#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) 62#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31))
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 8820a73ae090..602c769fc98c 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -94,7 +94,8 @@ struct thread_info {
94#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ 94#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
95#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ 95#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */
96#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ 96#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */
97#define TIF_SYSCALL_FTRACE 27 /* for ftrace syscall instrumentation */ 97#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
98#define TIF_SYSCALL_FTRACE 28 /* for ftrace syscall instrumentation */
98 99
99#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) 100#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
100#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) 101#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
@@ -116,6 +117,7 @@ struct thread_info {
116#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) 117#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
117#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) 118#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR)
118#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) 119#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
120#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
119#define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE) 121#define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE)
120 122
121/* work to do in syscall_trace_enter() */ 123/* work to do in syscall_trace_enter() */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 1a918dde46b5..018a0a400799 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -124,7 +124,8 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
124 124
125/* VIRT <-> MACHINE conversion */ 125/* VIRT <-> MACHINE conversion */
126#define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) 126#define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v))))
127#define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v)))) 127#define virt_to_pfn(v) (PFN_DOWN(__pa(v)))
128#define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v)))
128#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) 129#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
129 130
130static inline unsigned long pte_mfn(pte_t pte) 131static inline unsigned long pte_mfn(pte_t pte)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 33019ddb56b4..6551dedee20c 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -195,7 +195,7 @@ static void kvm_leave_lazy_mmu(void)
195 struct kvm_para_state *state = kvm_para_state(); 195 struct kvm_para_state *state = kvm_para_state();
196 196
197 mmu_queue_flush(state); 197 mmu_queue_flush(state);
198 paravirt_leave_lazy(paravirt_get_lazy_mode()); 198 paravirt_leave_lazy_mmu();
199 state->mode = paravirt_get_lazy_mode(); 199 state->mode = paravirt_get_lazy_mode();
200} 200}
201 201
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 8e45f4464880..aa3442340705 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -246,18 +246,16 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA
246 246
247static inline void enter_lazy(enum paravirt_lazy_mode mode) 247static inline void enter_lazy(enum paravirt_lazy_mode mode)
248{ 248{
249 BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); 249 BUG_ON(percpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
250 BUG_ON(preemptible());
251 250
252 __get_cpu_var(paravirt_lazy_mode) = mode; 251 percpu_write(paravirt_lazy_mode, mode);
253} 252}
254 253
255void paravirt_leave_lazy(enum paravirt_lazy_mode mode) 254static void leave_lazy(enum paravirt_lazy_mode mode)
256{ 255{
257 BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode); 256 BUG_ON(percpu_read(paravirt_lazy_mode) != mode);
258 BUG_ON(preemptible());
259 257
260 __get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; 258 percpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
261} 259}
262 260
263void paravirt_enter_lazy_mmu(void) 261void paravirt_enter_lazy_mmu(void)
@@ -267,22 +265,36 @@ void paravirt_enter_lazy_mmu(void)
267 265
268void paravirt_leave_lazy_mmu(void) 266void paravirt_leave_lazy_mmu(void)
269{ 267{
270 paravirt_leave_lazy(PARAVIRT_LAZY_MMU); 268 leave_lazy(PARAVIRT_LAZY_MMU);
271} 269}
272 270
273void paravirt_enter_lazy_cpu(void) 271void paravirt_start_context_switch(struct task_struct *prev)
274{ 272{
273 BUG_ON(preemptible());
274
275 if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
276 arch_leave_lazy_mmu_mode();
277 set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
278 }
275 enter_lazy(PARAVIRT_LAZY_CPU); 279 enter_lazy(PARAVIRT_LAZY_CPU);
276} 280}
277 281
278void paravirt_leave_lazy_cpu(void) 282void paravirt_end_context_switch(struct task_struct *next)
279{ 283{
280 paravirt_leave_lazy(PARAVIRT_LAZY_CPU); 284 BUG_ON(preemptible());
285
286 leave_lazy(PARAVIRT_LAZY_CPU);
287
288 if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
289 arch_enter_lazy_mmu_mode();
281} 290}
282 291
283enum paravirt_lazy_mode paravirt_get_lazy_mode(void) 292enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
284{ 293{
285 return __get_cpu_var(paravirt_lazy_mode); 294 if (in_interrupt())
295 return PARAVIRT_LAZY_NONE;
296
297 return percpu_read(paravirt_lazy_mode);
286} 298}
287 299
288void arch_flush_lazy_mmu_mode(void) 300void arch_flush_lazy_mmu_mode(void)
@@ -290,7 +302,6 @@ void arch_flush_lazy_mmu_mode(void)
290 preempt_disable(); 302 preempt_disable();
291 303
292 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { 304 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
293 WARN_ON(preempt_count() == 1);
294 arch_leave_lazy_mmu_mode(); 305 arch_leave_lazy_mmu_mode();
295 arch_enter_lazy_mmu_mode(); 306 arch_enter_lazy_mmu_mode();
296 } 307 }
@@ -298,19 +309,6 @@ void arch_flush_lazy_mmu_mode(void)
298 preempt_enable(); 309 preempt_enable();
299} 310}
300 311
301void arch_flush_lazy_cpu_mode(void)
302{
303 preempt_disable();
304
305 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
306 WARN_ON(preempt_count() == 1);
307 arch_leave_lazy_cpu_mode();
308 arch_enter_lazy_cpu_mode();
309 }
310
311 preempt_enable();
312}
313
314struct pv_info pv_info = { 312struct pv_info pv_info = {
315 .name = "bare hardware", 313 .name = "bare hardware",
316 .paravirt_enabled = 0, 314 .paravirt_enabled = 0,
@@ -402,10 +400,8 @@ struct pv_cpu_ops pv_cpu_ops = {
402 .set_iopl_mask = native_set_iopl_mask, 400 .set_iopl_mask = native_set_iopl_mask,
403 .io_delay = native_io_delay, 401 .io_delay = native_io_delay,
404 402
405 .lazy_mode = { 403 .start_context_switch = paravirt_nop,
406 .enter = paravirt_nop, 404 .end_context_switch = paravirt_nop,
407 .leave = paravirt_nop,
408 },
409}; 405};
410 406
411struct pv_apic_ops pv_apic_ops = { 407struct pv_apic_ops pv_apic_ops = {
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 76f8f84043a2..5de30f0960fb 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -407,7 +407,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
407 * done before math_state_restore, so the TS bit is up 407 * done before math_state_restore, so the TS bit is up
408 * to date. 408 * to date.
409 */ 409 */
410 arch_leave_lazy_cpu_mode(); 410 arch_end_context_switch(next_p);
411 411
412 /* If the task has used fpu the last 5 timeslices, just do a full 412 /* If the task has used fpu the last 5 timeslices, just do a full
413 * restore of the math state immediately to avoid the trap; the 413 * restore of the math state immediately to avoid the trap; the
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b751a41392b1..66ad06791d6f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -428,7 +428,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
428 * done before math_state_restore, so the TS bit is up 428 * done before math_state_restore, so the TS bit is up
429 * to date. 429 * to date.
430 */ 430 */
431 arch_leave_lazy_cpu_mode(); 431 arch_end_context_switch(next_p);
432 432
433 /* 433 /*
434 * Switch FS and GS. 434 * Switch FS and GS.
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 95deb9f2211e..b263423fbe2a 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -462,22 +462,28 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
462} 462}
463#endif 463#endif
464 464
465static void vmi_enter_lazy_cpu(void) 465static void vmi_start_context_switch(struct task_struct *prev)
466{ 466{
467 paravirt_enter_lazy_cpu(); 467 paravirt_start_context_switch(prev);
468 vmi_ops.set_lazy_mode(2); 468 vmi_ops.set_lazy_mode(2);
469} 469}
470 470
471static void vmi_end_context_switch(struct task_struct *next)
472{
473 vmi_ops.set_lazy_mode(0);
474 paravirt_end_context_switch(next);
475}
476
471static void vmi_enter_lazy_mmu(void) 477static void vmi_enter_lazy_mmu(void)
472{ 478{
473 paravirt_enter_lazy_mmu(); 479 paravirt_enter_lazy_mmu();
474 vmi_ops.set_lazy_mode(1); 480 vmi_ops.set_lazy_mode(1);
475} 481}
476 482
477static void vmi_leave_lazy(void) 483static void vmi_leave_lazy_mmu(void)
478{ 484{
479 paravirt_leave_lazy(paravirt_get_lazy_mode());
480 vmi_ops.set_lazy_mode(0); 485 vmi_ops.set_lazy_mode(0);
486 paravirt_leave_lazy_mmu();
481} 487}
482 488
483static inline int __init check_vmi_rom(struct vrom_header *rom) 489static inline int __init check_vmi_rom(struct vrom_header *rom)
@@ -711,14 +717,14 @@ static inline int __init activate_vmi(void)
711 para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); 717 para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask);
712 para_fill(pv_cpu_ops.io_delay, IODelay); 718 para_fill(pv_cpu_ops.io_delay, IODelay);
713 719
714 para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu, 720 para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch,
715 set_lazy_mode, SetLazyMode); 721 set_lazy_mode, SetLazyMode);
716 para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy, 722 para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch,
717 set_lazy_mode, SetLazyMode); 723 set_lazy_mode, SetLazyMode);
718 724
719 para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, 725 para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu,
720 set_lazy_mode, SetLazyMode); 726 set_lazy_mode, SetLazyMode);
721 para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy, 727 para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy_mmu,
722 set_lazy_mode, SetLazyMode); 728 set_lazy_mode, SetLazyMode);
723 729
724 /* user and kernel flush are just handled with different flags to FlushTLB */ 730 /* user and kernel flush are just handled with different flags to FlushTLB */
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index e94a11e42f98..5ab239711cc2 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -166,10 +166,16 @@ static void lazy_hcall3(unsigned long call,
166 166
167/* When lazy mode is turned off reset the per-cpu lazy mode variable and then 167/* When lazy mode is turned off reset the per-cpu lazy mode variable and then
168 * issue the do-nothing hypercall to flush any stored calls. */ 168 * issue the do-nothing hypercall to flush any stored calls. */
169static void lguest_leave_lazy_mode(void) 169static void lguest_leave_lazy_mmu_mode(void)
170{
171 hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
172 paravirt_leave_lazy_mmu();
173}
174
175static void lguest_end_context_switch(struct task_struct *next)
170{ 176{
171 paravirt_leave_lazy(paravirt_get_lazy_mode());
172 kvm_hypercall0(LHCALL_FLUSH_ASYNC); 177 kvm_hypercall0(LHCALL_FLUSH_ASYNC);
178 paravirt_end_context_switch(next);
173} 179}
174 180
175/*G:033 181/*G:033
@@ -1051,8 +1057,8 @@ __init void lguest_init(void)
1051 pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry; 1057 pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry;
1052 pv_cpu_ops.write_idt_entry = lguest_write_idt_entry; 1058 pv_cpu_ops.write_idt_entry = lguest_write_idt_entry;
1053 pv_cpu_ops.wbinvd = lguest_wbinvd; 1059 pv_cpu_ops.wbinvd = lguest_wbinvd;
1054 pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu; 1060 pv_cpu_ops.start_context_switch = paravirt_start_context_switch;
1055 pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode; 1061 pv_cpu_ops.end_context_switch = lguest_end_context_switch;
1056 1062
1057 /* pagetable management */ 1063 /* pagetable management */
1058 pv_mmu_ops.write_cr3 = lguest_write_cr3; 1064 pv_mmu_ops.write_cr3 = lguest_write_cr3;
@@ -1065,7 +1071,7 @@ __init void lguest_init(void)
1065 pv_mmu_ops.read_cr2 = lguest_read_cr2; 1071 pv_mmu_ops.read_cr2 = lguest_read_cr2;
1066 pv_mmu_ops.read_cr3 = lguest_read_cr3; 1072 pv_mmu_ops.read_cr3 = lguest_read_cr3;
1067 pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; 1073 pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
1068 pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode; 1074 pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode;
1069 pv_mmu_ops.pte_update = lguest_pte_update; 1075 pv_mmu_ops.pte_update = lguest_pte_update;
1070 pv_mmu_ops.pte_update_defer = lguest_pte_update; 1076 pv_mmu_ops.pte_update_defer = lguest_pte_update;
1071 1077
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a03b7279efa0..cfbb4a738011 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -225,12 +225,10 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
225 if (!pmd_present(*pmd_k)) 225 if (!pmd_present(*pmd_k))
226 return NULL; 226 return NULL;
227 227
228 if (!pmd_present(*pmd)) { 228 if (!pmd_present(*pmd))
229 set_pmd(pmd, *pmd_k); 229 set_pmd(pmd, *pmd_k);
230 arch_flush_lazy_mmu_mode(); 230 else
231 } else {
232 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); 231 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
233 }
234 232
235 return pmd_k; 233 return pmd_k;
236} 234}
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 8126e8d1a2a4..58f621e81919 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -44,7 +44,6 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
44 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); 44 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
45 BUG_ON(!pte_none(*(kmap_pte-idx))); 45 BUG_ON(!pte_none(*(kmap_pte-idx)));
46 set_pte(kmap_pte-idx, mk_pte(page, prot)); 46 set_pte(kmap_pte-idx, mk_pte(page, prot));
47 arch_flush_lazy_mmu_mode();
48 47
49 return (void *)vaddr; 48 return (void *)vaddr;
50} 49}
@@ -74,7 +73,6 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
74#endif 73#endif
75 } 74 }
76 75
77 arch_flush_lazy_mmu_mode();
78 pagefault_enable(); 76 pagefault_enable();
79} 77}
80 78
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 8056545e2d39..fe6f84ca121e 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -82,7 +82,6 @@ iounmap_atomic(void *kvaddr, enum km_type type)
82 if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) 82 if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
83 kpte_clear_flush(kmap_pte-idx, vaddr); 83 kpte_clear_flush(kmap_pte-idx, vaddr);
84 84
85 arch_flush_lazy_mmu_mode();
86 pagefault_enable(); 85 pagefault_enable();
87} 86}
88EXPORT_SYMBOL_GPL(iounmap_atomic); 87EXPORT_SYMBOL_GPL(iounmap_atomic);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index d71e1b636ce6..660cac75ae11 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -844,13 +844,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
844 844
845 vm_unmap_aliases(); 845 vm_unmap_aliases();
846 846
847 /*
848 * If we're called with lazy mmu updates enabled, the
849 * in-memory pte state may be stale. Flush pending updates to
850 * bring them up to date.
851 */
852 arch_flush_lazy_mmu_mode();
853
854 cpa.vaddr = addr; 847 cpa.vaddr = addr;
855 cpa.pages = pages; 848 cpa.pages = pages;
856 cpa.numpages = numpages; 849 cpa.numpages = numpages;
@@ -895,13 +888,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
895 } else 888 } else
896 cpa_flush_all(cache); 889 cpa_flush_all(cache);
897 890
898 /*
899 * If we've been called with lazy mmu updates enabled, then
900 * make sure that everything gets flushed out before we
901 * return.
902 */
903 arch_flush_lazy_mmu_mode();
904
905out: 891out:
906 return ret; 892 return ret;
907} 893}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 82cd39a6cbd3..12a3159333bc 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -42,6 +42,7 @@
42#include <asm/xen/hypervisor.h> 42#include <asm/xen/hypervisor.h>
43#include <asm/fixmap.h> 43#include <asm/fixmap.h>
44#include <asm/processor.h> 44#include <asm/processor.h>
45#include <asm/proto.h>
45#include <asm/msr-index.h> 46#include <asm/msr-index.h>
46#include <asm/setup.h> 47#include <asm/setup.h>
47#include <asm/desc.h> 48#include <asm/desc.h>
@@ -168,21 +169,23 @@ static void __init xen_banner(void)
168 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); 169 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
169} 170}
170 171
172static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
173static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
174
171static void xen_cpuid(unsigned int *ax, unsigned int *bx, 175static void xen_cpuid(unsigned int *ax, unsigned int *bx,
172 unsigned int *cx, unsigned int *dx) 176 unsigned int *cx, unsigned int *dx)
173{ 177{
178 unsigned maskecx = ~0;
174 unsigned maskedx = ~0; 179 unsigned maskedx = ~0;
175 180
176 /* 181 /*
177 * Mask out inconvenient features, to try and disable as many 182 * Mask out inconvenient features, to try and disable as many
178 * unsupported kernel subsystems as possible. 183 * unsupported kernel subsystems as possible.
179 */ 184 */
180 if (*ax == 1) 185 if (*ax == 1) {
181 maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ 186 maskecx = cpuid_leaf1_ecx_mask;
182 (1 << X86_FEATURE_ACPI) | /* disable ACPI */ 187 maskedx = cpuid_leaf1_edx_mask;
183 (1 << X86_FEATURE_MCE) | /* disable MCE */ 188 }
184 (1 << X86_FEATURE_MCA) | /* disable MCA */
185 (1 << X86_FEATURE_ACC)); /* thermal monitoring */
186 189
187 asm(XEN_EMULATE_PREFIX "cpuid" 190 asm(XEN_EMULATE_PREFIX "cpuid"
188 : "=a" (*ax), 191 : "=a" (*ax),
@@ -190,9 +193,43 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
190 "=c" (*cx), 193 "=c" (*cx),
191 "=d" (*dx) 194 "=d" (*dx)
192 : "0" (*ax), "2" (*cx)); 195 : "0" (*ax), "2" (*cx));
196
197 *cx &= maskecx;
193 *dx &= maskedx; 198 *dx &= maskedx;
194} 199}
195 200
201static __init void xen_init_cpuid_mask(void)
202{
203 unsigned int ax, bx, cx, dx;
204
205 cpuid_leaf1_edx_mask =
206 ~((1 << X86_FEATURE_MCE) | /* disable MCE */
207 (1 << X86_FEATURE_MCA) | /* disable MCA */
208 (1 << X86_FEATURE_ACC)); /* thermal monitoring */
209
210 if (!xen_initial_domain())
211 cpuid_leaf1_edx_mask &=
212 ~((1 << X86_FEATURE_APIC) | /* disable local APIC */
213 (1 << X86_FEATURE_ACPI)); /* disable ACPI */
214
215 ax = 1;
216 xen_cpuid(&ax, &bx, &cx, &dx);
217
218 /* cpuid claims we support xsave; try enabling it to see what happens */
219 if (cx & (1 << (X86_FEATURE_XSAVE % 32))) {
220 unsigned long cr4;
221
222 set_in_cr4(X86_CR4_OSXSAVE);
223
224 cr4 = read_cr4();
225
226 if ((cr4 & X86_CR4_OSXSAVE) == 0)
227 cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32));
228
229 clear_in_cr4(X86_CR4_OSXSAVE);
230 }
231}
232
196static void xen_set_debugreg(int reg, unsigned long val) 233static void xen_set_debugreg(int reg, unsigned long val)
197{ 234{
198 HYPERVISOR_set_debugreg(reg, val); 235 HYPERVISOR_set_debugreg(reg, val);
@@ -203,10 +240,10 @@ static unsigned long xen_get_debugreg(int reg)
203 return HYPERVISOR_get_debugreg(reg); 240 return HYPERVISOR_get_debugreg(reg);
204} 241}
205 242
206void xen_leave_lazy(void) 243static void xen_end_context_switch(struct task_struct *next)
207{ 244{
208 paravirt_leave_lazy(paravirt_get_lazy_mode());
209 xen_mc_flush(); 245 xen_mc_flush();
246 paravirt_end_context_switch(next);
210} 247}
211 248
212static unsigned long xen_store_tr(void) 249static unsigned long xen_store_tr(void)
@@ -284,12 +321,11 @@ static void xen_set_ldt(const void *addr, unsigned entries)
284 321
285static void xen_load_gdt(const struct desc_ptr *dtr) 322static void xen_load_gdt(const struct desc_ptr *dtr)
286{ 323{
287 unsigned long *frames;
288 unsigned long va = dtr->address; 324 unsigned long va = dtr->address;
289 unsigned int size = dtr->size + 1; 325 unsigned int size = dtr->size + 1;
290 unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; 326 unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
327 unsigned long frames[pages];
291 int f; 328 int f;
292 struct multicall_space mcs;
293 329
294 /* A GDT can be up to 64k in size, which corresponds to 8192 330 /* A GDT can be up to 64k in size, which corresponds to 8192
295 8-byte entries, or 16 4k pages.. */ 331 8-byte entries, or 16 4k pages.. */
@@ -297,19 +333,26 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
297 BUG_ON(size > 65536); 333 BUG_ON(size > 65536);
298 BUG_ON(va & ~PAGE_MASK); 334 BUG_ON(va & ~PAGE_MASK);
299 335
300 mcs = xen_mc_entry(sizeof(*frames) * pages);
301 frames = mcs.args;
302
303 for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { 336 for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
304 frames[f] = arbitrary_virt_to_mfn((void *)va); 337 int level;
338 pte_t *ptep = lookup_address(va, &level);
339 unsigned long pfn, mfn;
340 void *virt;
341
342 BUG_ON(ptep == NULL);
343
344 pfn = pte_pfn(*ptep);
345 mfn = pfn_to_mfn(pfn);
346 virt = __va(PFN_PHYS(pfn));
347
348 frames[f] = mfn;
305 349
306 make_lowmem_page_readonly((void *)va); 350 make_lowmem_page_readonly((void *)va);
307 make_lowmem_page_readonly(mfn_to_virt(frames[f])); 351 make_lowmem_page_readonly(virt);
308 } 352 }
309 353
310 MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); 354 if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
311 355 BUG();
312 xen_mc_issue(PARAVIRT_LAZY_CPU);
313} 356}
314 357
315static void load_TLS_descriptor(struct thread_struct *t, 358static void load_TLS_descriptor(struct thread_struct *t,
@@ -385,7 +428,7 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
385static int cvt_gate_to_trap(int vector, const gate_desc *val, 428static int cvt_gate_to_trap(int vector, const gate_desc *val,
386 struct trap_info *info) 429 struct trap_info *info)
387{ 430{
388 if (val->type != 0xf && val->type != 0xe) 431 if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
389 return 0; 432 return 0;
390 433
391 info->vector = vector; 434 info->vector = vector;
@@ -393,8 +436,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
393 info->cs = gate_segment(*val); 436 info->cs = gate_segment(*val);
394 info->flags = val->dpl; 437 info->flags = val->dpl;
395 /* interrupt gates clear IF */ 438 /* interrupt gates clear IF */
396 if (val->type == 0xe) 439 if (val->type == GATE_INTERRUPT)
397 info->flags |= 4; 440 info->flags |= 1 << 2;
398 441
399 return 1; 442 return 1;
400} 443}
@@ -817,10 +860,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
817 /* Xen takes care of %gs when switching to usermode for us */ 860 /* Xen takes care of %gs when switching to usermode for us */
818 .swapgs = paravirt_nop, 861 .swapgs = paravirt_nop,
819 862
820 .lazy_mode = { 863 .start_context_switch = paravirt_start_context_switch,
821 .enter = paravirt_enter_lazy_cpu, 864 .end_context_switch = xen_end_context_switch,
822 .leave = xen_leave_lazy,
823 },
824}; 865};
825 866
826static const struct pv_apic_ops xen_apic_ops __initdata = { 867static const struct pv_apic_ops xen_apic_ops __initdata = {
@@ -872,7 +913,6 @@ static const struct machine_ops __initdata xen_machine_ops = {
872 .emergency_restart = xen_emergency_restart, 913 .emergency_restart = xen_emergency_restart,
873}; 914};
874 915
875
876/* First C function to be called on Xen boot */ 916/* First C function to be called on Xen boot */
877asmlinkage void __init xen_start_kernel(void) 917asmlinkage void __init xen_start_kernel(void)
878{ 918{
@@ -897,6 +937,8 @@ asmlinkage void __init xen_start_kernel(void)
897 937
898 xen_init_irq_ops(); 938 xen_init_irq_ops();
899 939
940 xen_init_cpuid_mask();
941
900#ifdef CONFIG_X86_LOCAL_APIC 942#ifdef CONFIG_X86_LOCAL_APIC
901 /* 943 /*
902 * set up the basic apic ops. 944 * set up the basic apic ops.
@@ -938,6 +980,11 @@ asmlinkage void __init xen_start_kernel(void)
938 if (!xen_initial_domain()) 980 if (!xen_initial_domain())
939 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); 981 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
940 982
983#ifdef CONFIG_X86_64
984 /* Work out if we support NX */
985 check_efer();
986#endif
987
941 /* Don't do the full vcpu_info placement stuff until we have a 988 /* Don't do the full vcpu_info placement stuff until we have a
942 possible map and a non-dummy shared_info. */ 989 possible map and a non-dummy shared_info. */
943 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; 990 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index db3802fb7b84..77b242c9a11e 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -184,7 +184,7 @@ static inline unsigned p2m_index(unsigned long pfn)
184} 184}
185 185
186/* Build the parallel p2m_top_mfn structures */ 186/* Build the parallel p2m_top_mfn structures */
187void xen_setup_mfn_list_list(void) 187static void __init xen_build_mfn_list_list(void)
188{ 188{
189 unsigned pfn, idx; 189 unsigned pfn, idx;
190 190
@@ -198,7 +198,10 @@ void xen_setup_mfn_list_list(void)
198 unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; 198 unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
199 p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); 199 p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
200 } 200 }
201}
201 202
203void xen_setup_mfn_list_list(void)
204{
202 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); 205 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
203 206
204 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = 207 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
@@ -218,6 +221,8 @@ void __init xen_build_dynamic_phys_to_machine(void)
218 221
219 p2m_top[topidx] = &mfn_list[pfn]; 222 p2m_top[topidx] = &mfn_list[pfn];
220 } 223 }
224
225 xen_build_mfn_list_list();
221} 226}
222 227
223unsigned long get_phys_to_machine(unsigned long pfn) 228unsigned long get_phys_to_machine(unsigned long pfn)
@@ -233,47 +238,74 @@ unsigned long get_phys_to_machine(unsigned long pfn)
233} 238}
234EXPORT_SYMBOL_GPL(get_phys_to_machine); 239EXPORT_SYMBOL_GPL(get_phys_to_machine);
235 240
236static void alloc_p2m(unsigned long **pp, unsigned long *mfnp) 241/* install a new p2m_top page */
242bool install_p2mtop_page(unsigned long pfn, unsigned long *p)
237{ 243{
238 unsigned long *p; 244 unsigned topidx = p2m_top_index(pfn);
245 unsigned long **pfnp, *mfnp;
239 unsigned i; 246 unsigned i;
240 247
241 p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); 248 pfnp = &p2m_top[topidx];
242 BUG_ON(p == NULL); 249 mfnp = &p2m_top_mfn[topidx];
243 250
244 for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++) 251 for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
245 p[i] = INVALID_P2M_ENTRY; 252 p[i] = INVALID_P2M_ENTRY;
246 253
247 if (cmpxchg(pp, p2m_missing, p) != p2m_missing) 254 if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) {
248 free_page((unsigned long)p);
249 else
250 *mfnp = virt_to_mfn(p); 255 *mfnp = virt_to_mfn(p);
256 return true;
257 }
258
259 return false;
251} 260}
252 261
253void set_phys_to_machine(unsigned long pfn, unsigned long mfn) 262static void alloc_p2m(unsigned long pfn)
254{ 263{
255 unsigned topidx, idx; 264 unsigned long *p;
256 265
257 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { 266 p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
258 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); 267 BUG_ON(p == NULL);
259 return; 268
260 } 269 if (!install_p2mtop_page(pfn, p))
270 free_page((unsigned long)p);
271}
272
273/* Try to install p2m mapping; fail if intermediate bits missing */
274bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
275{
276 unsigned topidx, idx;
261 277
262 if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { 278 if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
263 BUG_ON(mfn != INVALID_P2M_ENTRY); 279 BUG_ON(mfn != INVALID_P2M_ENTRY);
264 return; 280 return true;
265 } 281 }
266 282
267 topidx = p2m_top_index(pfn); 283 topidx = p2m_top_index(pfn);
268 if (p2m_top[topidx] == p2m_missing) { 284 if (p2m_top[topidx] == p2m_missing) {
269 /* no need to allocate a page to store an invalid entry */
270 if (mfn == INVALID_P2M_ENTRY) 285 if (mfn == INVALID_P2M_ENTRY)
271 return; 286 return true;
272 alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]); 287 return false;
273 } 288 }
274 289
275 idx = p2m_index(pfn); 290 idx = p2m_index(pfn);
276 p2m_top[topidx][idx] = mfn; 291 p2m_top[topidx][idx] = mfn;
292
293 return true;
294}
295
296void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
297{
298 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
299 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
300 return;
301 }
302
303 if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
304 alloc_p2m(pfn);
305
306 if (!__set_phys_to_machine(pfn, mfn))
307 BUG();
308 }
277} 309}
278 310
279unsigned long arbitrary_virt_to_mfn(void *vaddr) 311unsigned long arbitrary_virt_to_mfn(void *vaddr)
@@ -419,10 +451,6 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
419void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, 451void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
420 pte_t *ptep, pte_t pteval) 452 pte_t *ptep, pte_t pteval)
421{ 453{
422 /* updates to init_mm may be done without lock */
423 if (mm == &init_mm)
424 preempt_disable();
425
426 ADD_STATS(set_pte_at, 1); 454 ADD_STATS(set_pte_at, 1);
427// ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); 455// ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
428 ADD_STATS(set_pte_at_current, mm == current->mm); 456 ADD_STATS(set_pte_at_current, mm == current->mm);
@@ -443,9 +471,7 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
443 } 471 }
444 xen_set_pte(ptep, pteval); 472 xen_set_pte(ptep, pteval);
445 473
446out: 474out: return;
447 if (mm == &init_mm)
448 preempt_enable();
449} 475}
450 476
451pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, 477pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
@@ -987,7 +1013,7 @@ static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page,
987 return 0; 1013 return 0;
988} 1014}
989 1015
990void __init xen_mark_init_mm_pinned(void) 1016static void __init xen_mark_init_mm_pinned(void)
991{ 1017{
992 xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); 1018 xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
993} 1019}
@@ -1119,10 +1145,8 @@ static void drop_other_mm_ref(void *info)
1119 1145
1120 /* If this cpu still has a stale cr3 reference, then make sure 1146 /* If this cpu still has a stale cr3 reference, then make sure
1121 it has been flushed. */ 1147 it has been flushed. */
1122 if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) { 1148 if (percpu_read(xen_current_cr3) == __pa(mm->pgd))
1123 load_cr3(swapper_pg_dir); 1149 load_cr3(swapper_pg_dir);
1124 arch_flush_lazy_cpu_mode();
1125 }
1126} 1150}
1127 1151
1128static void xen_drop_mm_ref(struct mm_struct *mm) 1152static void xen_drop_mm_ref(struct mm_struct *mm)
@@ -1135,7 +1159,6 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
1135 load_cr3(swapper_pg_dir); 1159 load_cr3(swapper_pg_dir);
1136 else 1160 else
1137 leave_mm(smp_processor_id()); 1161 leave_mm(smp_processor_id());
1138 arch_flush_lazy_cpu_mode();
1139 } 1162 }
1140 1163
1141 /* Get the "official" set of cpus referring to our pagetable. */ 1164 /* Get the "official" set of cpus referring to our pagetable. */
@@ -1270,8 +1293,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
1270 } *args; 1293 } *args;
1271 struct multicall_space mcs; 1294 struct multicall_space mcs;
1272 1295
1273 BUG_ON(cpumask_empty(cpus)); 1296 if (cpumask_empty(cpus))
1274 BUG_ON(!mm); 1297 return; /* nothing to do */
1275 1298
1276 mcs = xen_mc_entry(sizeof(*args)); 1299 mcs = xen_mc_entry(sizeof(*args));
1277 args = mcs.args; 1300 args = mcs.args;
@@ -1438,6 +1461,15 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
1438} 1461}
1439#endif 1462#endif
1440 1463
1464static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
1465{
1466 struct mmuext_op op;
1467 op.cmd = cmd;
1468 op.arg1.mfn = pfn_to_mfn(pfn);
1469 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
1470 BUG();
1471}
1472
1441/* Early in boot, while setting up the initial pagetable, assume 1473/* Early in boot, while setting up the initial pagetable, assume
1442 everything is pinned. */ 1474 everything is pinned. */
1443static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) 1475static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
@@ -1446,22 +1478,29 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
1446 BUG_ON(mem_map); /* should only be used early */ 1478 BUG_ON(mem_map); /* should only be used early */
1447#endif 1479#endif
1448 make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); 1480 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
1481 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
1482}
1483
1484/* Used for pmd and pud */
1485static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
1486{
1487#ifdef CONFIG_FLATMEM
1488 BUG_ON(mem_map); /* should only be used early */
1489#endif
1490 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
1449} 1491}
1450 1492
1451/* Early release_pte assumes that all pts are pinned, since there's 1493/* Early release_pte assumes that all pts are pinned, since there's
1452 only init_mm and anything attached to that is pinned. */ 1494 only init_mm and anything attached to that is pinned. */
1453static void xen_release_pte_init(unsigned long pfn) 1495static __init void xen_release_pte_init(unsigned long pfn)
1454{ 1496{
1497 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
1455 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); 1498 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1456} 1499}
1457 1500
1458static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) 1501static __init void xen_release_pmd_init(unsigned long pfn)
1459{ 1502{
1460 struct mmuext_op op; 1503 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1461 op.cmd = cmd;
1462 op.arg1.mfn = pfn_to_mfn(pfn);
1463 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
1464 BUG();
1465} 1504}
1466 1505
1467/* This needs to make sure the new pte page is pinned iff its being 1506/* This needs to make sure the new pte page is pinned iff its being
@@ -1819,6 +1858,13 @@ __init void xen_post_allocator_init(void)
1819 xen_mark_init_mm_pinned(); 1858 xen_mark_init_mm_pinned();
1820} 1859}
1821 1860
1861static void xen_leave_lazy_mmu(void)
1862{
1863 preempt_disable();
1864 xen_mc_flush();
1865 paravirt_leave_lazy_mmu();
1866 preempt_enable();
1867}
1822 1868
1823const struct pv_mmu_ops xen_mmu_ops __initdata = { 1869const struct pv_mmu_ops xen_mmu_ops __initdata = {
1824 .pagetable_setup_start = xen_pagetable_setup_start, 1870 .pagetable_setup_start = xen_pagetable_setup_start,
@@ -1843,9 +1889,9 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
1843 1889
1844 .alloc_pte = xen_alloc_pte_init, 1890 .alloc_pte = xen_alloc_pte_init,
1845 .release_pte = xen_release_pte_init, 1891 .release_pte = xen_release_pte_init,
1846 .alloc_pmd = xen_alloc_pte_init, 1892 .alloc_pmd = xen_alloc_pmd_init,
1847 .alloc_pmd_clone = paravirt_nop, 1893 .alloc_pmd_clone = paravirt_nop,
1848 .release_pmd = xen_release_pte_init, 1894 .release_pmd = xen_release_pmd_init,
1849 1895
1850#ifdef CONFIG_HIGHPTE 1896#ifdef CONFIG_HIGHPTE
1851 .kmap_atomic_pte = xen_kmap_atomic_pte, 1897 .kmap_atomic_pte = xen_kmap_atomic_pte,
@@ -1883,8 +1929,8 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
1883 .make_pud = PV_CALLEE_SAVE(xen_make_pud), 1929 .make_pud = PV_CALLEE_SAVE(xen_make_pud),
1884 .set_pgd = xen_set_pgd_hyper, 1930 .set_pgd = xen_set_pgd_hyper,
1885 1931
1886 .alloc_pud = xen_alloc_pte_init, 1932 .alloc_pud = xen_alloc_pmd_init,
1887 .release_pud = xen_release_pte_init, 1933 .release_pud = xen_release_pmd_init,
1888#endif /* PAGETABLE_LEVELS == 4 */ 1934#endif /* PAGETABLE_LEVELS == 4 */
1889 1935
1890 .activate_mm = xen_activate_mm, 1936 .activate_mm = xen_activate_mm,
@@ -1893,7 +1939,7 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
1893 1939
1894 .lazy_mode = { 1940 .lazy_mode = {
1895 .enter = paravirt_enter_lazy_mmu, 1941 .enter = paravirt_enter_lazy_mmu,
1896 .leave = xen_leave_lazy, 1942 .leave = xen_leave_lazy_mmu,
1897 }, 1943 },
1898 1944
1899 .set_fixmap = xen_set_fixmap, 1945 .set_fixmap = xen_set_fixmap,
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 24d1b44a337d..da7302624897 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -11,6 +11,9 @@ enum pt_level {
11}; 11};
12 12
13 13
14bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
15bool install_p2mtop_page(unsigned long pfn, unsigned long *p);
16
14void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); 17void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
15 18
16 19
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 585a6e330837..429834ec1687 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -317,7 +317,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
317 BUG_ON(rc); 317 BUG_ON(rc);
318 318
319 while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { 319 while(per_cpu(cpu_state, cpu) != CPU_ONLINE) {
320 HYPERVISOR_sched_op(SCHEDOP_yield, 0); 320 HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
321 barrier(); 321 barrier();
322 } 322 }
323 323
@@ -422,7 +422,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
422 /* Make sure other vcpus get a chance to run if they need to. */ 422 /* Make sure other vcpus get a chance to run if they need to. */
423 for_each_cpu(cpu, mask) { 423 for_each_cpu(cpu, mask) {
424 if (xen_vcpu_stolen(cpu)) { 424 if (xen_vcpu_stolen(cpu)) {
425 HYPERVISOR_sched_op(SCHEDOP_yield, 0); 425 HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
426 break; 426 break;
427 } 427 }
428 } 428 }
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 2f5ef2632ea2..5c50a1017a37 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -30,7 +30,6 @@ pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
30void xen_ident_map_ISA(void); 30void xen_ident_map_ISA(void);
31void xen_reserve_top(void); 31void xen_reserve_top(void);
32 32
33void xen_leave_lazy(void);
34void xen_post_allocator_init(void); 33void xen_post_allocator_init(void);
35 34
36char * __init xen_memory_setup(void); 35char * __init xen_memory_setup(void);
@@ -57,8 +56,6 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
57 56
58bool xen_vcpu_stolen(int vcpu); 57bool xen_vcpu_stolen(int vcpu);
59 58
60void xen_mark_init_mm_pinned(void);
61
62void xen_setup_vcpu_info_placement(void); 59void xen_setup_vcpu_info_placement(void);
63 60
64#ifdef CONFIG_SMP 61#ifdef CONFIG_SMP
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 8ac9cddac575..cab100acf983 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -18,6 +18,16 @@ config XEN_SCRUB_PAGES
18 secure, but slightly less efficient. 18 secure, but slightly less efficient.
19 If in doubt, say yes. 19 If in doubt, say yes.
20 20
21config XEN_DEV_EVTCHN
22 tristate "Xen /dev/xen/evtchn device"
23 depends on XEN
24 default y
25 help
26 The evtchn driver allows a userspace process to triger event
27 channels and to receive notification of an event channel
28 firing.
29 If in doubt, say yes.
30
21config XENFS 31config XENFS
22 tristate "Xen filesystem" 32 tristate "Xen filesystem"
23 depends on XEN 33 depends on XEN
@@ -41,3 +51,13 @@ config XEN_COMPAT_XENFS
41 a xen platform. 51 a xen platform.
42 If in doubt, say yes. 52 If in doubt, say yes.
43 53
54config XEN_SYS_HYPERVISOR
55 bool "Create xen entries under /sys/hypervisor"
56 depends on XEN && SYSFS
57 select SYS_HYPERVISOR
58 default y
59 help
60 Create entries under /sys/hypervisor describing the Xen
61 hypervisor environment. When running native or in another
62 virtual environment, /sys/hypervisor will still be present,
63 but will have no xen contents. \ No newline at end of file
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index ff8accc9e103..ec2a39b1e26f 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -4,4 +4,6 @@ obj-y += xenbus/
4obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o 4obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
5obj-$(CONFIG_XEN_XENCOMM) += xencomm.o 5obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
6obj-$(CONFIG_XEN_BALLOON) += balloon.o 6obj-$(CONFIG_XEN_BALLOON) += balloon.o
7obj-$(CONFIG_XENFS) += xenfs/ \ No newline at end of file 7obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o
8obj-$(CONFIG_XENFS) += xenfs/
9obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o \ No newline at end of file
diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
index 5f54c01c1568..bdfd584ad853 100644
--- a/drivers/xen/cpu_hotplug.c
+++ b/drivers/xen/cpu_hotplug.c
@@ -21,29 +21,41 @@ static void disable_hotplug_cpu(int cpu)
21 set_cpu_present(cpu, false); 21 set_cpu_present(cpu, false);
22} 22}
23 23
24static void vcpu_hotplug(unsigned int cpu) 24static int vcpu_online(unsigned int cpu)
25{ 25{
26 int err; 26 int err;
27 char dir[32], state[32]; 27 char dir[32], state[32];
28 28
29 if (!cpu_possible(cpu))
30 return;
31
32 sprintf(dir, "cpu/%u", cpu); 29 sprintf(dir, "cpu/%u", cpu);
33 err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state); 30 err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state);
34 if (err != 1) { 31 if (err != 1) {
35 printk(KERN_ERR "XENBUS: Unable to read cpu state\n"); 32 printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
36 return; 33 return err;
37 } 34 }
38 35
39 if (strcmp(state, "online") == 0) { 36 if (strcmp(state, "online") == 0)
37 return 1;
38 else if (strcmp(state, "offline") == 0)
39 return 0;
40
41 printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", state, cpu);
42 return -EINVAL;
43}
44static void vcpu_hotplug(unsigned int cpu)
45{
46 if (!cpu_possible(cpu))
47 return;
48
49 switch (vcpu_online(cpu)) {
50 case 1:
40 enable_hotplug_cpu(cpu); 51 enable_hotplug_cpu(cpu);
41 } else if (strcmp(state, "offline") == 0) { 52 break;
53 case 0:
42 (void)cpu_down(cpu); 54 (void)cpu_down(cpu);
43 disable_hotplug_cpu(cpu); 55 disable_hotplug_cpu(cpu);
44 } else { 56 break;
45 printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", 57 default:
46 state, cpu); 58 break;
47 } 59 }
48} 60}
49 61
@@ -64,12 +76,20 @@ static void handle_vcpu_hotplug_event(struct xenbus_watch *watch,
64static int setup_cpu_watcher(struct notifier_block *notifier, 76static int setup_cpu_watcher(struct notifier_block *notifier,
65 unsigned long event, void *data) 77 unsigned long event, void *data)
66{ 78{
79 int cpu;
67 static struct xenbus_watch cpu_watch = { 80 static struct xenbus_watch cpu_watch = {
68 .node = "cpu", 81 .node = "cpu",
69 .callback = handle_vcpu_hotplug_event}; 82 .callback = handle_vcpu_hotplug_event};
70 83
71 (void)register_xenbus_watch(&cpu_watch); 84 (void)register_xenbus_watch(&cpu_watch);
72 85
86 for_each_possible_cpu(cpu) {
87 if (vcpu_online(cpu) == 0) {
88 (void)cpu_down(cpu);
89 cpu_clear(cpu, cpu_present_map);
90 }
91 }
92
73 return NOTIFY_DONE; 93 return NOTIFY_DONE;
74} 94}
75 95
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 30963af5dba0..1cd2a0e15ae8 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -151,6 +151,12 @@ static unsigned int evtchn_from_irq(unsigned irq)
151 return info_for_irq(irq)->evtchn; 151 return info_for_irq(irq)->evtchn;
152} 152}
153 153
154unsigned irq_from_evtchn(unsigned int evtchn)
155{
156 return evtchn_to_irq[evtchn];
157}
158EXPORT_SYMBOL_GPL(irq_from_evtchn);
159
154static enum ipi_vector ipi_from_irq(unsigned irq) 160static enum ipi_vector ipi_from_irq(unsigned irq)
155{ 161{
156 struct irq_info *info = info_for_irq(irq); 162 struct irq_info *info = info_for_irq(irq);
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
new file mode 100644
index 000000000000..af031950f9b1
--- /dev/null
+++ b/drivers/xen/evtchn.c
@@ -0,0 +1,507 @@
1/******************************************************************************
2 * evtchn.c
3 *
4 * Driver for receiving and demuxing event-channel signals.
5 *
6 * Copyright (c) 2004-2005, K A Fraser
7 * Multi-process extensions Copyright (c) 2004, Steven Smith
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation; or, when distributed
12 * separately from the Linux kernel or incorporated into other
13 * software packages, subject to the following license:
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining a copy
16 * of this source file (the "Software"), to deal in the Software without
17 * restriction, including without limitation the rights to use, copy, modify,
18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19 * and to permit persons to whom the Software is furnished to do so, subject to
20 * the following conditions:
21 *
22 * The above copyright notice and this permission notice shall be included in
23 * all copies or substantial portions of the Software.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 * IN THE SOFTWARE.
32 */
33
34#include <linux/module.h>
35#include <linux/kernel.h>
36#include <linux/sched.h>
37#include <linux/slab.h>
38#include <linux/string.h>
39#include <linux/errno.h>
40#include <linux/fs.h>
41#include <linux/errno.h>
42#include <linux/miscdevice.h>
43#include <linux/major.h>
44#include <linux/proc_fs.h>
45#include <linux/stat.h>
46#include <linux/poll.h>
47#include <linux/irq.h>
48#include <linux/init.h>
49#include <linux/gfp.h>
50#include <linux/mutex.h>
51#include <linux/cpu.h>
52#include <xen/events.h>
53#include <xen/evtchn.h>
54#include <asm/xen/hypervisor.h>
55
56struct per_user_data {
57 struct mutex bind_mutex; /* serialize bind/unbind operations */
58
59 /* Notification ring, accessed via /dev/xen/evtchn. */
60#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t))
61#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
62 evtchn_port_t *ring;
63 unsigned int ring_cons, ring_prod, ring_overflow;
64 struct mutex ring_cons_mutex; /* protect against concurrent readers */
65
66 /* Processes wait on this queue when ring is empty. */
67 wait_queue_head_t evtchn_wait;
68 struct fasync_struct *evtchn_async_queue;
69 const char *name;
70};
71
72/* Who's bound to each port? */
73static struct per_user_data *port_user[NR_EVENT_CHANNELS];
74static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */
75
76irqreturn_t evtchn_interrupt(int irq, void *data)
77{
78 unsigned int port = (unsigned long)data;
79 struct per_user_data *u;
80
81 spin_lock(&port_user_lock);
82
83 u = port_user[port];
84
85 disable_irq_nosync(irq);
86
87 if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
88 u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
89 wmb(); /* Ensure ring contents visible */
90 if (u->ring_cons == u->ring_prod++) {
91 wake_up_interruptible(&u->evtchn_wait);
92 kill_fasync(&u->evtchn_async_queue,
93 SIGIO, POLL_IN);
94 }
95 } else {
96 u->ring_overflow = 1;
97 }
98
99 spin_unlock(&port_user_lock);
100
101 return IRQ_HANDLED;
102}
103
104static ssize_t evtchn_read(struct file *file, char __user *buf,
105 size_t count, loff_t *ppos)
106{
107 int rc;
108 unsigned int c, p, bytes1 = 0, bytes2 = 0;
109 struct per_user_data *u = file->private_data;
110
111 /* Whole number of ports. */
112 count &= ~(sizeof(evtchn_port_t)-1);
113
114 if (count == 0)
115 return 0;
116
117 if (count > PAGE_SIZE)
118 count = PAGE_SIZE;
119
120 for (;;) {
121 mutex_lock(&u->ring_cons_mutex);
122
123 rc = -EFBIG;
124 if (u->ring_overflow)
125 goto unlock_out;
126
127 c = u->ring_cons;
128 p = u->ring_prod;
129 if (c != p)
130 break;
131
132 mutex_unlock(&u->ring_cons_mutex);
133
134 if (file->f_flags & O_NONBLOCK)
135 return -EAGAIN;
136
137 rc = wait_event_interruptible(u->evtchn_wait,
138 u->ring_cons != u->ring_prod);
139 if (rc)
140 return rc;
141 }
142
143 /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
144 if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
145 bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
146 sizeof(evtchn_port_t);
147 bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t);
148 } else {
149 bytes1 = (p - c) * sizeof(evtchn_port_t);
150 bytes2 = 0;
151 }
152
153 /* Truncate chunks according to caller's maximum byte count. */
154 if (bytes1 > count) {
155 bytes1 = count;
156 bytes2 = 0;
157 } else if ((bytes1 + bytes2) > count) {
158 bytes2 = count - bytes1;
159 }
160
161 rc = -EFAULT;
162 rmb(); /* Ensure that we see the port before we copy it. */
163 if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
164 ((bytes2 != 0) &&
165 copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
166 goto unlock_out;
167
168 u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
169 rc = bytes1 + bytes2;
170
171 unlock_out:
172 mutex_unlock(&u->ring_cons_mutex);
173 return rc;
174}
175
176static ssize_t evtchn_write(struct file *file, const char __user *buf,
177 size_t count, loff_t *ppos)
178{
179 int rc, i;
180 evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
181 struct per_user_data *u = file->private_data;
182
183 if (kbuf == NULL)
184 return -ENOMEM;
185
186 /* Whole number of ports. */
187 count &= ~(sizeof(evtchn_port_t)-1);
188
189 rc = 0;
190 if (count == 0)
191 goto out;
192
193 if (count > PAGE_SIZE)
194 count = PAGE_SIZE;
195
196 rc = -EFAULT;
197 if (copy_from_user(kbuf, buf, count) != 0)
198 goto out;
199
200 spin_lock_irq(&port_user_lock);
201 for (i = 0; i < (count/sizeof(evtchn_port_t)); i++)
202 if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u))
203 enable_irq(irq_from_evtchn(kbuf[i]));
204 spin_unlock_irq(&port_user_lock);
205
206 rc = count;
207
208 out:
209 free_page((unsigned long)kbuf);
210 return rc;
211}
212
213static int evtchn_bind_to_user(struct per_user_data *u, int port)
214{
215 int rc = 0;
216
217 /*
218 * Ports are never reused, so every caller should pass in a
219 * unique port.
220 *
221 * (Locking not necessary because we haven't registered the
222 * interrupt handler yet, and our caller has already
223 * serialized bind operations.)
224 */
225 BUG_ON(port_user[port] != NULL);
226 port_user[port] = u;
227
228 rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
229 u->name, (void *)(unsigned long)port);
230 if (rc >= 0)
231 rc = 0;
232
233 return rc;
234}
235
236static void evtchn_unbind_from_user(struct per_user_data *u, int port)
237{
238 int irq = irq_from_evtchn(port);
239
240 unbind_from_irqhandler(irq, (void *)(unsigned long)port);
241
242 /* make sure we unbind the irq handler before clearing the port */
243 barrier();
244
245 port_user[port] = NULL;
246}
247
248static long evtchn_ioctl(struct file *file,
249 unsigned int cmd, unsigned long arg)
250{
251 int rc;
252 struct per_user_data *u = file->private_data;
253 void __user *uarg = (void __user *) arg;
254
255 /* Prevent bind from racing with unbind */
256 mutex_lock(&u->bind_mutex);
257
258 switch (cmd) {
259 case IOCTL_EVTCHN_BIND_VIRQ: {
260 struct ioctl_evtchn_bind_virq bind;
261 struct evtchn_bind_virq bind_virq;
262
263 rc = -EFAULT;
264 if (copy_from_user(&bind, uarg, sizeof(bind)))
265 break;
266
267 bind_virq.virq = bind.virq;
268 bind_virq.vcpu = 0;
269 rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
270 &bind_virq);
271 if (rc != 0)
272 break;
273
274 rc = evtchn_bind_to_user(u, bind_virq.port);
275 if (rc == 0)
276 rc = bind_virq.port;
277 break;
278 }
279
280 case IOCTL_EVTCHN_BIND_INTERDOMAIN: {
281 struct ioctl_evtchn_bind_interdomain bind;
282 struct evtchn_bind_interdomain bind_interdomain;
283
284 rc = -EFAULT;
285 if (copy_from_user(&bind, uarg, sizeof(bind)))
286 break;
287
288 bind_interdomain.remote_dom = bind.remote_domain;
289 bind_interdomain.remote_port = bind.remote_port;
290 rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
291 &bind_interdomain);
292 if (rc != 0)
293 break;
294
295 rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
296 if (rc == 0)
297 rc = bind_interdomain.local_port;
298 break;
299 }
300
301 case IOCTL_EVTCHN_BIND_UNBOUND_PORT: {
302 struct ioctl_evtchn_bind_unbound_port bind;
303 struct evtchn_alloc_unbound alloc_unbound;
304
305 rc = -EFAULT;
306 if (copy_from_user(&bind, uarg, sizeof(bind)))
307 break;
308
309 alloc_unbound.dom = DOMID_SELF;
310 alloc_unbound.remote_dom = bind.remote_domain;
311 rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
312 &alloc_unbound);
313 if (rc != 0)
314 break;
315
316 rc = evtchn_bind_to_user(u, alloc_unbound.port);
317 if (rc == 0)
318 rc = alloc_unbound.port;
319 break;
320 }
321
322 case IOCTL_EVTCHN_UNBIND: {
323 struct ioctl_evtchn_unbind unbind;
324
325 rc = -EFAULT;
326 if (copy_from_user(&unbind, uarg, sizeof(unbind)))
327 break;
328
329 rc = -EINVAL;
330 if (unbind.port >= NR_EVENT_CHANNELS)
331 break;
332
333 spin_lock_irq(&port_user_lock);
334
335 rc = -ENOTCONN;
336 if (port_user[unbind.port] != u) {
337 spin_unlock_irq(&port_user_lock);
338 break;
339 }
340
341 evtchn_unbind_from_user(u, unbind.port);
342
343 spin_unlock_irq(&port_user_lock);
344
345 rc = 0;
346 break;
347 }
348
349 case IOCTL_EVTCHN_NOTIFY: {
350 struct ioctl_evtchn_notify notify;
351
352 rc = -EFAULT;
353 if (copy_from_user(&notify, uarg, sizeof(notify)))
354 break;
355
356 if (notify.port >= NR_EVENT_CHANNELS) {
357 rc = -EINVAL;
358 } else if (port_user[notify.port] != u) {
359 rc = -ENOTCONN;
360 } else {
361 notify_remote_via_evtchn(notify.port);
362 rc = 0;
363 }
364 break;
365 }
366
367 case IOCTL_EVTCHN_RESET: {
368 /* Initialise the ring to empty. Clear errors. */
369 mutex_lock(&u->ring_cons_mutex);
370 spin_lock_irq(&port_user_lock);
371 u->ring_cons = u->ring_prod = u->ring_overflow = 0;
372 spin_unlock_irq(&port_user_lock);
373 mutex_unlock(&u->ring_cons_mutex);
374 rc = 0;
375 break;
376 }
377
378 default:
379 rc = -ENOSYS;
380 break;
381 }
382 mutex_unlock(&u->bind_mutex);
383
384 return rc;
385}
386
387static unsigned int evtchn_poll(struct file *file, poll_table *wait)
388{
389 unsigned int mask = POLLOUT | POLLWRNORM;
390 struct per_user_data *u = file->private_data;
391
392 poll_wait(file, &u->evtchn_wait, wait);
393 if (u->ring_cons != u->ring_prod)
394 mask |= POLLIN | POLLRDNORM;
395 if (u->ring_overflow)
396 mask = POLLERR;
397 return mask;
398}
399
400static int evtchn_fasync(int fd, struct file *filp, int on)
401{
402 struct per_user_data *u = filp->private_data;
403 return fasync_helper(fd, filp, on, &u->evtchn_async_queue);
404}
405
406static int evtchn_open(struct inode *inode, struct file *filp)
407{
408 struct per_user_data *u;
409
410 u = kzalloc(sizeof(*u), GFP_KERNEL);
411 if (u == NULL)
412 return -ENOMEM;
413
414 u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm);
415 if (u->name == NULL) {
416 kfree(u);
417 return -ENOMEM;
418 }
419
420 init_waitqueue_head(&u->evtchn_wait);
421
422 u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
423 if (u->ring == NULL) {
424 kfree(u->name);
425 kfree(u);
426 return -ENOMEM;
427 }
428
429 mutex_init(&u->bind_mutex);
430 mutex_init(&u->ring_cons_mutex);
431
432 filp->private_data = u;
433
434 return 0;
435}
436
437static int evtchn_release(struct inode *inode, struct file *filp)
438{
439 int i;
440 struct per_user_data *u = filp->private_data;
441
442 spin_lock_irq(&port_user_lock);
443
444 free_page((unsigned long)u->ring);
445
446 for (i = 0; i < NR_EVENT_CHANNELS; i++) {
447 if (port_user[i] != u)
448 continue;
449
450 evtchn_unbind_from_user(port_user[i], i);
451 }
452
453 spin_unlock_irq(&port_user_lock);
454
455 kfree(u->name);
456 kfree(u);
457
458 return 0;
459}
460
461static const struct file_operations evtchn_fops = {
462 .owner = THIS_MODULE,
463 .read = evtchn_read,
464 .write = evtchn_write,
465 .unlocked_ioctl = evtchn_ioctl,
466 .poll = evtchn_poll,
467 .fasync = evtchn_fasync,
468 .open = evtchn_open,
469 .release = evtchn_release,
470};
471
472static struct miscdevice evtchn_miscdev = {
473 .minor = MISC_DYNAMIC_MINOR,
474 .name = "evtchn",
475 .fops = &evtchn_fops,
476};
477static int __init evtchn_init(void)
478{
479 int err;
480
481 if (!xen_domain())
482 return -ENODEV;
483
484 spin_lock_init(&port_user_lock);
485 memset(port_user, 0, sizeof(port_user));
486
487 /* Create '/dev/misc/evtchn'. */
488 err = misc_register(&evtchn_miscdev);
489 if (err != 0) {
490 printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
491 return err;
492 }
493
494 printk(KERN_INFO "Event-channel device installed.\n");
495
496 return 0;
497}
498
499static void __exit evtchn_cleanup(void)
500{
501 misc_deregister(&evtchn_miscdev);
502}
503
504module_init(evtchn_init);
505module_exit(evtchn_cleanup);
506
507MODULE_LICENSE("GPL");
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 0d61db1e7b49..fddc2025dece 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -62,14 +62,15 @@ static int xen_suspend(void *data)
62 gnttab_resume(); 62 gnttab_resume();
63 xen_mm_unpin_all(); 63 xen_mm_unpin_all();
64 64
65 sysdev_resume();
66
67 if (!*cancelled) { 65 if (!*cancelled) {
68 xen_irq_resume(); 66 xen_irq_resume();
69 xen_console_resume(); 67 xen_console_resume();
70 xen_timer_resume(); 68 xen_timer_resume();
71 } 69 }
72 70
71 sysdev_resume();
72 device_power_up(PMSG_RESUME);
73
73 return 0; 74 return 0;
74} 75}
75 76
@@ -97,9 +98,8 @@ static void do_suspend(void)
97 goto out; 98 goto out;
98 } 99 }
99 100
100 printk("suspending xenbus...\n"); 101 printk(KERN_DEBUG "suspending xenstore...\n");
101 /* XXX use normal device tree? */ 102 xs_suspend();
102 xenbus_suspend();
103 103
104 err = device_power_down(PMSG_SUSPEND); 104 err = device_power_down(PMSG_SUSPEND);
105 if (err) { 105 if (err) {
@@ -115,9 +115,9 @@ static void do_suspend(void)
115 115
116 if (!cancelled) { 116 if (!cancelled) {
117 xen_arch_resume(); 117 xen_arch_resume();
118 xenbus_resume(); 118 xs_resume();
119 } else 119 } else
120 xenbus_suspend_cancel(); 120 xs_suspend_cancel();
121 121
122 device_power_up(PMSG_RESUME); 122 device_power_up(PMSG_RESUME);
123 123
diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
new file mode 100644
index 000000000000..88a60e03ccf0
--- /dev/null
+++ b/drivers/xen/sys-hypervisor.c
@@ -0,0 +1,445 @@
1/*
2 * copyright (c) 2006 IBM Corporation
3 * Authored by: Mike D. Day <ncmike@us.ibm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/kernel.h>
11#include <linux/module.h>
12#include <linux/kobject.h>
13
14#include <asm/xen/hypervisor.h>
15#include <asm/xen/hypercall.h>
16
17#include <xen/xenbus.h>
18#include <xen/interface/xen.h>
19#include <xen/interface/version.h>
20
21#define HYPERVISOR_ATTR_RO(_name) \
22static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name)
23
24#define HYPERVISOR_ATTR_RW(_name) \
25static struct hyp_sysfs_attr _name##_attr = \
26 __ATTR(_name, 0644, _name##_show, _name##_store)
27
28struct hyp_sysfs_attr {
29 struct attribute attr;
30 ssize_t (*show)(struct hyp_sysfs_attr *, char *);
31 ssize_t (*store)(struct hyp_sysfs_attr *, const char *, size_t);
32 void *hyp_attr_data;
33};
34
35static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer)
36{
37 return sprintf(buffer, "xen\n");
38}
39
40HYPERVISOR_ATTR_RO(type);
41
42static int __init xen_sysfs_type_init(void)
43{
44 return sysfs_create_file(hypervisor_kobj, &type_attr.attr);
45}
46
47static void xen_sysfs_type_destroy(void)
48{
49 sysfs_remove_file(hypervisor_kobj, &type_attr.attr);
50}
51
52/* xen version attributes */
53static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer)
54{
55 int version = HYPERVISOR_xen_version(XENVER_version, NULL);
56 if (version)
57 return sprintf(buffer, "%d\n", version >> 16);
58 return -ENODEV;
59}
60
61HYPERVISOR_ATTR_RO(major);
62
63static ssize_t minor_show(struct hyp_sysfs_attr *attr, char *buffer)
64{
65 int version = HYPERVISOR_xen_version(XENVER_version, NULL);
66 if (version)
67 return sprintf(buffer, "%d\n", version & 0xff);
68 return -ENODEV;
69}
70
71HYPERVISOR_ATTR_RO(minor);
72
73static ssize_t extra_show(struct hyp_sysfs_attr *attr, char *buffer)
74{
75 int ret = -ENOMEM;
76 char *extra;
77
78 extra = kmalloc(XEN_EXTRAVERSION_LEN, GFP_KERNEL);
79 if (extra) {
80 ret = HYPERVISOR_xen_version(XENVER_extraversion, extra);
81 if (!ret)
82 ret = sprintf(buffer, "%s\n", extra);
83 kfree(extra);
84 }
85
86 return ret;
87}
88
89HYPERVISOR_ATTR_RO(extra);
90
91static struct attribute *version_attrs[] = {
92 &major_attr.attr,
93 &minor_attr.attr,
94 &extra_attr.attr,
95 NULL
96};
97
98static struct attribute_group version_group = {
99 .name = "version",
100 .attrs = version_attrs,
101};
102
103static int __init xen_sysfs_version_init(void)
104{
105 return sysfs_create_group(hypervisor_kobj, &version_group);
106}
107
108static void xen_sysfs_version_destroy(void)
109{
110 sysfs_remove_group(hypervisor_kobj, &version_group);
111}
112
113/* UUID */
114
115static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer)
116{
117 char *vm, *val;
118 int ret;
119 extern int xenstored_ready;
120
121 if (!xenstored_ready)
122 return -EBUSY;
123
124 vm = xenbus_read(XBT_NIL, "vm", "", NULL);
125 if (IS_ERR(vm))
126 return PTR_ERR(vm);
127 val = xenbus_read(XBT_NIL, vm, "uuid", NULL);
128 kfree(vm);
129 if (IS_ERR(val))
130 return PTR_ERR(val);
131 ret = sprintf(buffer, "%s\n", val);
132 kfree(val);
133 return ret;
134}
135
136HYPERVISOR_ATTR_RO(uuid);
137
138static int __init xen_sysfs_uuid_init(void)
139{
140 return sysfs_create_file(hypervisor_kobj, &uuid_attr.attr);
141}
142
143static void xen_sysfs_uuid_destroy(void)
144{
145 sysfs_remove_file(hypervisor_kobj, &uuid_attr.attr);
146}
147
148/* xen compilation attributes */
149
150static ssize_t compiler_show(struct hyp_sysfs_attr *attr, char *buffer)
151{
152 int ret = -ENOMEM;
153 struct xen_compile_info *info;
154
155 info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
156 if (info) {
157 ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
158 if (!ret)
159 ret = sprintf(buffer, "%s\n", info->compiler);
160 kfree(info);
161 }
162
163 return ret;
164}
165
166HYPERVISOR_ATTR_RO(compiler);
167
168static ssize_t compiled_by_show(struct hyp_sysfs_attr *attr, char *buffer)
169{
170 int ret = -ENOMEM;
171 struct xen_compile_info *info;
172
173 info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
174 if (info) {
175 ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
176 if (!ret)
177 ret = sprintf(buffer, "%s\n", info->compile_by);
178 kfree(info);
179 }
180
181 return ret;
182}
183
184HYPERVISOR_ATTR_RO(compiled_by);
185
186static ssize_t compile_date_show(struct hyp_sysfs_attr *attr, char *buffer)
187{
188 int ret = -ENOMEM;
189 struct xen_compile_info *info;
190
191 info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
192 if (info) {
193 ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
194 if (!ret)
195 ret = sprintf(buffer, "%s\n", info->compile_date);
196 kfree(info);
197 }
198
199 return ret;
200}
201
202HYPERVISOR_ATTR_RO(compile_date);
203
204static struct attribute *xen_compile_attrs[] = {
205 &compiler_attr.attr,
206 &compiled_by_attr.attr,
207 &compile_date_attr.attr,
208 NULL
209};
210
211static struct attribute_group xen_compilation_group = {
212 .name = "compilation",
213 .attrs = xen_compile_attrs,
214};
215
216int __init static xen_compilation_init(void)
217{
218 return sysfs_create_group(hypervisor_kobj, &xen_compilation_group);
219}
220
221static void xen_compilation_destroy(void)
222{
223 sysfs_remove_group(hypervisor_kobj, &xen_compilation_group);
224}
225
226/* xen properties info */
227
228static ssize_t capabilities_show(struct hyp_sysfs_attr *attr, char *buffer)
229{
230 int ret = -ENOMEM;
231 char *caps;
232
233 caps = kmalloc(XEN_CAPABILITIES_INFO_LEN, GFP_KERNEL);
234 if (caps) {
235 ret = HYPERVISOR_xen_version(XENVER_capabilities, caps);
236 if (!ret)
237 ret = sprintf(buffer, "%s\n", caps);
238 kfree(caps);
239 }
240
241 return ret;
242}
243
244HYPERVISOR_ATTR_RO(capabilities);
245
246static ssize_t changeset_show(struct hyp_sysfs_attr *attr, char *buffer)
247{
248 int ret = -ENOMEM;
249 char *cset;
250
251 cset = kmalloc(XEN_CHANGESET_INFO_LEN, GFP_KERNEL);
252 if (cset) {
253 ret = HYPERVISOR_xen_version(XENVER_changeset, cset);
254 if (!ret)
255 ret = sprintf(buffer, "%s\n", cset);
256 kfree(cset);
257 }
258
259 return ret;
260}
261
262HYPERVISOR_ATTR_RO(changeset);
263
264static ssize_t virtual_start_show(struct hyp_sysfs_attr *attr, char *buffer)
265{
266 int ret = -ENOMEM;
267 struct xen_platform_parameters *parms;
268
269 parms = kmalloc(sizeof(struct xen_platform_parameters), GFP_KERNEL);
270 if (parms) {
271 ret = HYPERVISOR_xen_version(XENVER_platform_parameters,
272 parms);
273 if (!ret)
274 ret = sprintf(buffer, "%lx\n", parms->virt_start);
275 kfree(parms);
276 }
277
278 return ret;
279}
280
281HYPERVISOR_ATTR_RO(virtual_start);
282
283static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer)
284{
285 int ret;
286
287 ret = HYPERVISOR_xen_version(XENVER_pagesize, NULL);
288 if (ret > 0)
289 ret = sprintf(buffer, "%x\n", ret);
290
291 return ret;
292}
293
294HYPERVISOR_ATTR_RO(pagesize);
295
296static ssize_t xen_feature_show(int index, char *buffer)
297{
298 ssize_t ret;
299 struct xen_feature_info info;
300
301 info.submap_idx = index;
302 ret = HYPERVISOR_xen_version(XENVER_get_features, &info);
303 if (!ret)
304 ret = sprintf(buffer, "%08x", info.submap);
305
306 return ret;
307}
308
309static ssize_t features_show(struct hyp_sysfs_attr *attr, char *buffer)
310{
311 ssize_t len;
312 int i;
313
314 len = 0;
315 for (i = XENFEAT_NR_SUBMAPS-1; i >= 0; i--) {
316 int ret = xen_feature_show(i, buffer + len);
317 if (ret < 0) {
318 if (len == 0)
319 len = ret;
320 break;
321 }
322 len += ret;
323 }
324 if (len > 0)
325 buffer[len++] = '\n';
326
327 return len;
328}
329
330HYPERVISOR_ATTR_RO(features);
331
332static struct attribute *xen_properties_attrs[] = {
333 &capabilities_attr.attr,
334 &changeset_attr.attr,
335 &virtual_start_attr.attr,
336 &pagesize_attr.attr,
337 &features_attr.attr,
338 NULL
339};
340
341static struct attribute_group xen_properties_group = {
342 .name = "properties",
343 .attrs = xen_properties_attrs,
344};
345
346static int __init xen_properties_init(void)
347{
348 return sysfs_create_group(hypervisor_kobj, &xen_properties_group);
349}
350
351static void xen_properties_destroy(void)
352{
353 sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
354}
355
356static int __init hyper_sysfs_init(void)
357{
358 int ret;
359
360 if (!xen_domain())
361 return -ENODEV;
362
363 ret = xen_sysfs_type_init();
364 if (ret)
365 goto out;
366 ret = xen_sysfs_version_init();
367 if (ret)
368 goto version_out;
369 ret = xen_compilation_init();
370 if (ret)
371 goto comp_out;
372 ret = xen_sysfs_uuid_init();
373 if (ret)
374 goto uuid_out;
375 ret = xen_properties_init();
376 if (ret)
377 goto prop_out;
378
379 goto out;
380
381prop_out:
382 xen_sysfs_uuid_destroy();
383uuid_out:
384 xen_compilation_destroy();
385comp_out:
386 xen_sysfs_version_destroy();
387version_out:
388 xen_sysfs_type_destroy();
389out:
390 return ret;
391}
392
393static void __exit hyper_sysfs_exit(void)
394{
395 xen_properties_destroy();
396 xen_compilation_destroy();
397 xen_sysfs_uuid_destroy();
398 xen_sysfs_version_destroy();
399 xen_sysfs_type_destroy();
400
401}
402module_init(hyper_sysfs_init);
403module_exit(hyper_sysfs_exit);
404
405static ssize_t hyp_sysfs_show(struct kobject *kobj,
406 struct attribute *attr,
407 char *buffer)
408{
409 struct hyp_sysfs_attr *hyp_attr;
410 hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
411 if (hyp_attr->show)
412 return hyp_attr->show(hyp_attr, buffer);
413 return 0;
414}
415
416static ssize_t hyp_sysfs_store(struct kobject *kobj,
417 struct attribute *attr,
418 const char *buffer,
419 size_t len)
420{
421 struct hyp_sysfs_attr *hyp_attr;
422 hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
423 if (hyp_attr->store)
424 return hyp_attr->store(hyp_attr, buffer, len);
425 return 0;
426}
427
428static struct sysfs_ops hyp_sysfs_ops = {
429 .show = hyp_sysfs_show,
430 .store = hyp_sysfs_store,
431};
432
433static struct kobj_type hyp_sysfs_kobj_type = {
434 .sysfs_ops = &hyp_sysfs_ops,
435};
436
437static int __init hypervisor_subsys_init(void)
438{
439 if (!xen_domain())
440 return -ENODEV;
441
442 hypervisor_kobj->ktype = &hyp_sysfs_kobj_type;
443 return 0;
444}
445device_initcall(hypervisor_subsys_init);
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 773d1cf23283..d42e25d5968d 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -71,6 +71,9 @@ static int xenbus_probe_frontend(const char *type, const char *name);
71 71
72static void xenbus_dev_shutdown(struct device *_dev); 72static void xenbus_dev_shutdown(struct device *_dev);
73 73
74static int xenbus_dev_suspend(struct device *dev, pm_message_t state);
75static int xenbus_dev_resume(struct device *dev);
76
74/* If something in array of ids matches this device, return it. */ 77/* If something in array of ids matches this device, return it. */
75static const struct xenbus_device_id * 78static const struct xenbus_device_id *
76match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) 79match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
@@ -188,6 +191,9 @@ static struct xen_bus_type xenbus_frontend = {
188 .remove = xenbus_dev_remove, 191 .remove = xenbus_dev_remove,
189 .shutdown = xenbus_dev_shutdown, 192 .shutdown = xenbus_dev_shutdown,
190 .dev_attrs = xenbus_dev_attrs, 193 .dev_attrs = xenbus_dev_attrs,
194
195 .suspend = xenbus_dev_suspend,
196 .resume = xenbus_dev_resume,
191 }, 197 },
192}; 198};
193 199
@@ -654,6 +660,7 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus)
654 660
655 kfree(root); 661 kfree(root);
656} 662}
663EXPORT_SYMBOL_GPL(xenbus_dev_changed);
657 664
658static void frontend_changed(struct xenbus_watch *watch, 665static void frontend_changed(struct xenbus_watch *watch,
659 const char **vec, unsigned int len) 666 const char **vec, unsigned int len)
@@ -669,7 +676,7 @@ static struct xenbus_watch fe_watch = {
669 .callback = frontend_changed, 676 .callback = frontend_changed,
670}; 677};
671 678
672static int suspend_dev(struct device *dev, void *data) 679static int xenbus_dev_suspend(struct device *dev, pm_message_t state)
673{ 680{
674 int err = 0; 681 int err = 0;
675 struct xenbus_driver *drv; 682 struct xenbus_driver *drv;
@@ -682,35 +689,14 @@ static int suspend_dev(struct device *dev, void *data)
682 drv = to_xenbus_driver(dev->driver); 689 drv = to_xenbus_driver(dev->driver);
683 xdev = container_of(dev, struct xenbus_device, dev); 690 xdev = container_of(dev, struct xenbus_device, dev);
684 if (drv->suspend) 691 if (drv->suspend)
685 err = drv->suspend(xdev); 692 err = drv->suspend(xdev, state);
686 if (err) 693 if (err)
687 printk(KERN_WARNING 694 printk(KERN_WARNING
688 "xenbus: suspend %s failed: %i\n", dev_name(dev), err); 695 "xenbus: suspend %s failed: %i\n", dev_name(dev), err);
689 return 0; 696 return 0;
690} 697}
691 698
692static int suspend_cancel_dev(struct device *dev, void *data) 699static int xenbus_dev_resume(struct device *dev)
693{
694 int err = 0;
695 struct xenbus_driver *drv;
696 struct xenbus_device *xdev;
697
698 DPRINTK("");
699
700 if (dev->driver == NULL)
701 return 0;
702 drv = to_xenbus_driver(dev->driver);
703 xdev = container_of(dev, struct xenbus_device, dev);
704 if (drv->suspend_cancel)
705 err = drv->suspend_cancel(xdev);
706 if (err)
707 printk(KERN_WARNING
708 "xenbus: suspend_cancel %s failed: %i\n",
709 dev_name(dev), err);
710 return 0;
711}
712
713static int resume_dev(struct device *dev, void *data)
714{ 700{
715 int err; 701 int err;
716 struct xenbus_driver *drv; 702 struct xenbus_driver *drv;
@@ -755,33 +741,6 @@ static int resume_dev(struct device *dev, void *data)
755 return 0; 741 return 0;
756} 742}
757 743
758void xenbus_suspend(void)
759{
760 DPRINTK("");
761
762 bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
763 xenbus_backend_suspend(suspend_dev);
764 xs_suspend();
765}
766EXPORT_SYMBOL_GPL(xenbus_suspend);
767
768void xenbus_resume(void)
769{
770 xb_init_comms();
771 xs_resume();
772 bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
773 xenbus_backend_resume(resume_dev);
774}
775EXPORT_SYMBOL_GPL(xenbus_resume);
776
777void xenbus_suspend_cancel(void)
778{
779 xs_suspend_cancel();
780 bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev);
781 xenbus_backend_resume(suspend_cancel_dev);
782}
783EXPORT_SYMBOL_GPL(xenbus_suspend_cancel);
784
785/* A flag to determine if xenstored is 'ready' (i.e. has started) */ 744/* A flag to determine if xenstored is 'ready' (i.e. has started) */
786int xenstored_ready = 0; 745int xenstored_ready = 0;
787 746
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index e325eab4724d..eab33f1dbdf7 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -673,6 +673,8 @@ void xs_resume(void)
673 struct xenbus_watch *watch; 673 struct xenbus_watch *watch;
674 char token[sizeof(watch) * 2 + 1]; 674 char token[sizeof(watch) * 2 + 1];
675 675
676 xb_init_comms();
677
676 mutex_unlock(&xs_state.response_mutex); 678 mutex_unlock(&xs_state.response_mutex);
677 mutex_unlock(&xs_state.request_mutex); 679 mutex_unlock(&xs_state.request_mutex);
678 up_write(&xs_state.transaction_mutex); 680 up_write(&xs_state.transaction_mutex);
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
index 515741a8e6b8..6559e0c752ce 100644
--- a/drivers/xen/xenfs/super.c
+++ b/drivers/xen/xenfs/super.c
@@ -20,10 +20,27 @@
20MODULE_DESCRIPTION("Xen filesystem"); 20MODULE_DESCRIPTION("Xen filesystem");
21MODULE_LICENSE("GPL"); 21MODULE_LICENSE("GPL");
22 22
23static ssize_t capabilities_read(struct file *file, char __user *buf,
24 size_t size, loff_t *off)
25{
26 char *tmp = "";
27
28 if (xen_initial_domain())
29 tmp = "control_d\n";
30
31 return simple_read_from_buffer(buf, size, off, tmp, strlen(tmp));
32}
33
34static const struct file_operations capabilities_file_ops = {
35 .read = capabilities_read,
36};
37
23static int xenfs_fill_super(struct super_block *sb, void *data, int silent) 38static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
24{ 39{
25 static struct tree_descr xenfs_files[] = { 40 static struct tree_descr xenfs_files[] = {
26 [2] = {"xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR}, 41 [1] = {},
42 { "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR },
43 { "capabilities", &capabilities_file_ops, S_IRUGO },
27 {""}, 44 {""},
28 }; 45 };
29 46
diff --git a/include/Kbuild b/include/Kbuild
index d8c3e3cbf416..fe36accd4328 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -8,3 +8,4 @@ header-y += mtd/
8header-y += rdma/ 8header-y += rdma/
9header-y += video/ 9header-y += video/
10header-y += drm/ 10header-y += drm/
11header-y += xen/
diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h
index e16fdb1f4f4f..09887045d03f 100644
--- a/include/asm-frv/pgtable.h
+++ b/include/asm-frv/pgtable.h
@@ -73,8 +73,8 @@ static inline int pte_file(pte_t pte) { return 0; }
73#define pgtable_cache_init() do {} while (0) 73#define pgtable_cache_init() do {} while (0)
74#define arch_enter_lazy_mmu_mode() do {} while (0) 74#define arch_enter_lazy_mmu_mode() do {} while (0)
75#define arch_leave_lazy_mmu_mode() do {} while (0) 75#define arch_leave_lazy_mmu_mode() do {} while (0)
76#define arch_enter_lazy_cpu_mode() do {} while (0) 76
77#define arch_leave_lazy_cpu_mode() do {} while (0) 77#define arch_start_context_switch(prev) do {} while (0)
78 78
79#else /* !CONFIG_MMU */ 79#else /* !CONFIG_MMU */
80/*****************************************************************************/ 80/*****************************************************************************/
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 8e6d0ca70aba..e410f602cab1 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -280,17 +280,18 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
280#endif 280#endif
281 281
282/* 282/*
283 * A facility to provide batching of the reload of page tables with the 283 * A facility to provide batching of the reload of page tables and
284 * actual context switch code for paravirtualized guests. By convention, 284 * other process state with the actual context switch code for
285 * only one of the lazy modes (CPU, MMU) should be active at any given 285 * paravirtualized guests. By convention, only one of the batched
286 * time, entry should never be nested, and entry and exits should always 286 * update (lazy) modes (CPU, MMU) should be active at any given time,
287 * be paired. This is for sanity of maintaining and reasoning about the 287 * entry should never be nested, and entry and exits should always be
288 * kernel code. 288 * paired. This is for sanity of maintaining and reasoning about the
289 * kernel code. In this case, the exit (end of the context switch) is
290 * in architecture-specific code, and so doesn't need a generic
291 * definition.
289 */ 292 */
290#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE 293#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
291#define arch_enter_lazy_cpu_mode() do {} while (0) 294#define arch_start_context_switch(prev) do {} while (0)
292#define arch_leave_lazy_cpu_mode() do {} while (0)
293#define arch_flush_lazy_cpu_mode() do {} while (0)
294#endif 295#endif
295 296
296#ifndef __HAVE_PFNMAP_TRACKING 297#ifndef __HAVE_PFNMAP_TRACKING
diff --git a/include/xen/Kbuild b/include/xen/Kbuild
new file mode 100644
index 000000000000..4e65c16a445b
--- /dev/null
+++ b/include/xen/Kbuild
@@ -0,0 +1 @@
header-y += evtchn.h
diff --git a/include/xen/events.h b/include/xen/events.h
index 0d5f1adc0363..e68d59a90ca8 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -53,4 +53,7 @@ bool xen_test_irq_pending(int irq);
53 irq will be disabled so it won't deliver an interrupt. */ 53 irq will be disabled so it won't deliver an interrupt. */
54void xen_poll_irq(int irq); 54void xen_poll_irq(int irq);
55 55
56/* Determine the IRQ which is bound to an event channel */
57unsigned irq_from_evtchn(unsigned int evtchn);
58
56#endif /* _XEN_EVENTS_H */ 59#endif /* _XEN_EVENTS_H */
diff --git a/include/xen/evtchn.h b/include/xen/evtchn.h
new file mode 100644
index 000000000000..14e833ee4e0b
--- /dev/null
+++ b/include/xen/evtchn.h
@@ -0,0 +1,88 @@
1/******************************************************************************
2 * evtchn.h
3 *
4 * Interface to /dev/xen/evtchn.
5 *
6 * Copyright (c) 2003-2005, K A Fraser
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE.
31 */
32
33#ifndef __LINUX_PUBLIC_EVTCHN_H__
34#define __LINUX_PUBLIC_EVTCHN_H__
35
36/*
37 * Bind a fresh port to VIRQ @virq.
38 * Return allocated port.
39 */
40#define IOCTL_EVTCHN_BIND_VIRQ \
41 _IOC(_IOC_NONE, 'E', 0, sizeof(struct ioctl_evtchn_bind_virq))
42struct ioctl_evtchn_bind_virq {
43 unsigned int virq;
44};
45
46/*
47 * Bind a fresh port to remote <@remote_domain, @remote_port>.
48 * Return allocated port.
49 */
50#define IOCTL_EVTCHN_BIND_INTERDOMAIN \
51 _IOC(_IOC_NONE, 'E', 1, sizeof(struct ioctl_evtchn_bind_interdomain))
52struct ioctl_evtchn_bind_interdomain {
53 unsigned int remote_domain, remote_port;
54};
55
56/*
57 * Allocate a fresh port for binding to @remote_domain.
58 * Return allocated port.
59 */
60#define IOCTL_EVTCHN_BIND_UNBOUND_PORT \
61 _IOC(_IOC_NONE, 'E', 2, sizeof(struct ioctl_evtchn_bind_unbound_port))
62struct ioctl_evtchn_bind_unbound_port {
63 unsigned int remote_domain;
64};
65
66/*
67 * Unbind previously allocated @port.
68 */
69#define IOCTL_EVTCHN_UNBIND \
70 _IOC(_IOC_NONE, 'E', 3, sizeof(struct ioctl_evtchn_unbind))
71struct ioctl_evtchn_unbind {
72 unsigned int port;
73};
74
75/*
76 * Unbind previously allocated @port.
77 */
78#define IOCTL_EVTCHN_NOTIFY \
79 _IOC(_IOC_NONE, 'E', 4, sizeof(struct ioctl_evtchn_notify))
80struct ioctl_evtchn_notify {
81 unsigned int port;
82};
83
84/* Clear and reinitialise the event buffer. Clear error condition. */
85#define IOCTL_EVTCHN_RESET \
86 _IOC(_IOC_NONE, 'E', 5, 0)
87
88#endif /* __LINUX_PUBLIC_EVTCHN_H__ */
diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h
index 453235e923f0..e8b6519d47e9 100644
--- a/include/xen/interface/version.h
+++ b/include/xen/interface/version.h
@@ -57,4 +57,7 @@ struct xen_feature_info {
57/* Declares the features reported by XENVER_get_features. */ 57/* Declares the features reported by XENVER_get_features. */
58#include "features.h" 58#include "features.h"
59 59
60/* arg == NULL; returns host memory page size. */
61#define XENVER_pagesize 7
62
60#endif /* __XEN_PUBLIC_VERSION_H__ */ 63#endif /* __XEN_PUBLIC_VERSION_H__ */
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index f87f9614844d..b9763badbd77 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -91,8 +91,7 @@ struct xenbus_driver {
91 void (*otherend_changed)(struct xenbus_device *dev, 91 void (*otherend_changed)(struct xenbus_device *dev,
92 enum xenbus_state backend_state); 92 enum xenbus_state backend_state);
93 int (*remove)(struct xenbus_device *dev); 93 int (*remove)(struct xenbus_device *dev);
94 int (*suspend)(struct xenbus_device *dev); 94 int (*suspend)(struct xenbus_device *dev, pm_message_t state);
95 int (*suspend_cancel)(struct xenbus_device *dev);
96 int (*resume)(struct xenbus_device *dev); 95 int (*resume)(struct xenbus_device *dev);
97 int (*uevent)(struct xenbus_device *, char **, int, char *, int); 96 int (*uevent)(struct xenbus_device *, char **, int, char *, int);
98 struct device_driver driver; 97 struct device_driver driver;
diff --git a/kernel/sched.c b/kernel/sched.c
index 6cc1fd5d5072..b38bd96098f6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2754,7 +2754,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
2754 * combine the page table reload and the switch backend into 2754 * combine the page table reload and the switch backend into
2755 * one hypercall. 2755 * one hypercall.
2756 */ 2756 */
2757 arch_enter_lazy_cpu_mode(); 2757 arch_start_context_switch(prev);
2758 2758
2759 if (unlikely(!mm)) { 2759 if (unlikely(!mm)) {
2760 next->active_mm = oldmm; 2760 next->active_mm = oldmm;