diff options
39 files changed, 1407 insertions, 240 deletions
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 7727aa8b7dda..bc384be6aa44 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -56,6 +56,7 @@ struct desc_ptr; | |||
56 | struct tss_struct; | 56 | struct tss_struct; |
57 | struct mm_struct; | 57 | struct mm_struct; |
58 | struct desc_struct; | 58 | struct desc_struct; |
59 | struct task_struct; | ||
59 | 60 | ||
60 | /* | 61 | /* |
61 | * Wrapper type for pointers to code which uses the non-standard | 62 | * Wrapper type for pointers to code which uses the non-standard |
@@ -203,7 +204,8 @@ struct pv_cpu_ops { | |||
203 | 204 | ||
204 | void (*swapgs)(void); | 205 | void (*swapgs)(void); |
205 | 206 | ||
206 | struct pv_lazy_ops lazy_mode; | 207 | void (*start_context_switch)(struct task_struct *prev); |
208 | void (*end_context_switch)(struct task_struct *next); | ||
207 | }; | 209 | }; |
208 | 210 | ||
209 | struct pv_irq_ops { | 211 | struct pv_irq_ops { |
@@ -1399,25 +1401,23 @@ enum paravirt_lazy_mode { | |||
1399 | }; | 1401 | }; |
1400 | 1402 | ||
1401 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void); | 1403 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void); |
1402 | void paravirt_enter_lazy_cpu(void); | 1404 | void paravirt_start_context_switch(struct task_struct *prev); |
1403 | void paravirt_leave_lazy_cpu(void); | 1405 | void paravirt_end_context_switch(struct task_struct *next); |
1406 | |||
1404 | void paravirt_enter_lazy_mmu(void); | 1407 | void paravirt_enter_lazy_mmu(void); |
1405 | void paravirt_leave_lazy_mmu(void); | 1408 | void paravirt_leave_lazy_mmu(void); |
1406 | void paravirt_leave_lazy(enum paravirt_lazy_mode mode); | ||
1407 | 1409 | ||
1408 | #define __HAVE_ARCH_ENTER_LAZY_CPU_MODE | 1410 | #define __HAVE_ARCH_START_CONTEXT_SWITCH |
1409 | static inline void arch_enter_lazy_cpu_mode(void) | 1411 | static inline void arch_start_context_switch(struct task_struct *prev) |
1410 | { | 1412 | { |
1411 | PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); | 1413 | PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev); |
1412 | } | 1414 | } |
1413 | 1415 | ||
1414 | static inline void arch_leave_lazy_cpu_mode(void) | 1416 | static inline void arch_end_context_switch(struct task_struct *next) |
1415 | { | 1417 | { |
1416 | PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); | 1418 | PVOP_VCALL1(pv_cpu_ops.end_context_switch, next); |
1417 | } | 1419 | } |
1418 | 1420 | ||
1419 | void arch_flush_lazy_cpu_mode(void); | ||
1420 | |||
1421 | #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE | 1421 | #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE |
1422 | static inline void arch_enter_lazy_mmu_mode(void) | 1422 | static inline void arch_enter_lazy_mmu_mode(void) |
1423 | { | 1423 | { |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 29d96d168bc0..b27c4f29b5e0 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -81,6 +81,8 @@ static inline void __init paravirt_pagetable_setup_done(pgd_t *base) | |||
81 | #define pte_val(x) native_pte_val(x) | 81 | #define pte_val(x) native_pte_val(x) |
82 | #define __pte(x) native_make_pte(x) | 82 | #define __pte(x) native_make_pte(x) |
83 | 83 | ||
84 | #define arch_end_context_switch(prev) do {} while(0) | ||
85 | |||
84 | #endif /* CONFIG_PARAVIRT */ | 86 | #endif /* CONFIG_PARAVIRT */ |
85 | 87 | ||
86 | /* | 88 | /* |
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index d5cd6c586881..64cf2d24fad1 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h | |||
@@ -48,9 +48,15 @@ | |||
48 | #endif | 48 | #endif |
49 | 49 | ||
50 | #ifdef CONFIG_X86_64 | 50 | #ifdef CONFIG_X86_64 |
51 | #ifdef CONFIG_PARAVIRT | ||
52 | /* Paravirtualized systems may not have PSE or PGE available */ | ||
51 | #define NEED_PSE 0 | 53 | #define NEED_PSE 0 |
54 | #define NEED_PGE 0 | ||
55 | #else | ||
56 | #define NEED_PSE (1<<(X86_FEATURE_PSE) & 31) | ||
57 | #define NEED_PGE (1<<(X86_FEATURE_PGE) & 31) | ||
58 | #endif | ||
52 | #define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) | 59 | #define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) |
53 | #define NEED_PGE (1<<(X86_FEATURE_PGE & 31)) | ||
54 | #define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) | 60 | #define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) |
55 | #define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) | 61 | #define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) |
56 | #define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) | 62 | #define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 8820a73ae090..602c769fc98c 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -94,7 +94,8 @@ struct thread_info { | |||
94 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ | 94 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ |
95 | #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ | 95 | #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ |
96 | #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ | 96 | #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ |
97 | #define TIF_SYSCALL_FTRACE 27 /* for ftrace syscall instrumentation */ | 97 | #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ |
98 | #define TIF_SYSCALL_FTRACE 28 /* for ftrace syscall instrumentation */ | ||
98 | 99 | ||
99 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | 100 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) |
100 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | 101 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) |
@@ -116,6 +117,7 @@ struct thread_info { | |||
116 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) | 117 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) |
117 | #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) | 118 | #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) |
118 | #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) | 119 | #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) |
120 | #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) | ||
119 | #define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE) | 121 | #define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE) |
120 | 122 | ||
121 | /* work to do in syscall_trace_enter() */ | 123 | /* work to do in syscall_trace_enter() */ |
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 1a918dde46b5..018a0a400799 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
@@ -124,7 +124,8 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) | |||
124 | 124 | ||
125 | /* VIRT <-> MACHINE conversion */ | 125 | /* VIRT <-> MACHINE conversion */ |
126 | #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) | 126 | #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) |
127 | #define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v)))) | 127 | #define virt_to_pfn(v) (PFN_DOWN(__pa(v))) |
128 | #define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v))) | ||
128 | #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) | 129 | #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) |
129 | 130 | ||
130 | static inline unsigned long pte_mfn(pte_t pte) | 131 | static inline unsigned long pte_mfn(pte_t pte) |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 33019ddb56b4..6551dedee20c 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -195,7 +195,7 @@ static void kvm_leave_lazy_mmu(void) | |||
195 | struct kvm_para_state *state = kvm_para_state(); | 195 | struct kvm_para_state *state = kvm_para_state(); |
196 | 196 | ||
197 | mmu_queue_flush(state); | 197 | mmu_queue_flush(state); |
198 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | 198 | paravirt_leave_lazy_mmu(); |
199 | state->mode = paravirt_get_lazy_mode(); | 199 | state->mode = paravirt_get_lazy_mode(); |
200 | } | 200 | } |
201 | 201 | ||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 8e45f4464880..aa3442340705 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -246,18 +246,16 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA | |||
246 | 246 | ||
247 | static inline void enter_lazy(enum paravirt_lazy_mode mode) | 247 | static inline void enter_lazy(enum paravirt_lazy_mode mode) |
248 | { | 248 | { |
249 | BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); | 249 | BUG_ON(percpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); |
250 | BUG_ON(preemptible()); | ||
251 | 250 | ||
252 | __get_cpu_var(paravirt_lazy_mode) = mode; | 251 | percpu_write(paravirt_lazy_mode, mode); |
253 | } | 252 | } |
254 | 253 | ||
255 | void paravirt_leave_lazy(enum paravirt_lazy_mode mode) | 254 | static void leave_lazy(enum paravirt_lazy_mode mode) |
256 | { | 255 | { |
257 | BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode); | 256 | BUG_ON(percpu_read(paravirt_lazy_mode) != mode); |
258 | BUG_ON(preemptible()); | ||
259 | 257 | ||
260 | __get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; | 258 | percpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); |
261 | } | 259 | } |
262 | 260 | ||
263 | void paravirt_enter_lazy_mmu(void) | 261 | void paravirt_enter_lazy_mmu(void) |
@@ -267,22 +265,36 @@ void paravirt_enter_lazy_mmu(void) | |||
267 | 265 | ||
268 | void paravirt_leave_lazy_mmu(void) | 266 | void paravirt_leave_lazy_mmu(void) |
269 | { | 267 | { |
270 | paravirt_leave_lazy(PARAVIRT_LAZY_MMU); | 268 | leave_lazy(PARAVIRT_LAZY_MMU); |
271 | } | 269 | } |
272 | 270 | ||
273 | void paravirt_enter_lazy_cpu(void) | 271 | void paravirt_start_context_switch(struct task_struct *prev) |
274 | { | 272 | { |
273 | BUG_ON(preemptible()); | ||
274 | |||
275 | if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { | ||
276 | arch_leave_lazy_mmu_mode(); | ||
277 | set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); | ||
278 | } | ||
275 | enter_lazy(PARAVIRT_LAZY_CPU); | 279 | enter_lazy(PARAVIRT_LAZY_CPU); |
276 | } | 280 | } |
277 | 281 | ||
278 | void paravirt_leave_lazy_cpu(void) | 282 | void paravirt_end_context_switch(struct task_struct *next) |
279 | { | 283 | { |
280 | paravirt_leave_lazy(PARAVIRT_LAZY_CPU); | 284 | BUG_ON(preemptible()); |
285 | |||
286 | leave_lazy(PARAVIRT_LAZY_CPU); | ||
287 | |||
288 | if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES)) | ||
289 | arch_enter_lazy_mmu_mode(); | ||
281 | } | 290 | } |
282 | 291 | ||
283 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void) | 292 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void) |
284 | { | 293 | { |
285 | return __get_cpu_var(paravirt_lazy_mode); | 294 | if (in_interrupt()) |
295 | return PARAVIRT_LAZY_NONE; | ||
296 | |||
297 | return percpu_read(paravirt_lazy_mode); | ||
286 | } | 298 | } |
287 | 299 | ||
288 | void arch_flush_lazy_mmu_mode(void) | 300 | void arch_flush_lazy_mmu_mode(void) |
@@ -290,7 +302,6 @@ void arch_flush_lazy_mmu_mode(void) | |||
290 | preempt_disable(); | 302 | preempt_disable(); |
291 | 303 | ||
292 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { | 304 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { |
293 | WARN_ON(preempt_count() == 1); | ||
294 | arch_leave_lazy_mmu_mode(); | 305 | arch_leave_lazy_mmu_mode(); |
295 | arch_enter_lazy_mmu_mode(); | 306 | arch_enter_lazy_mmu_mode(); |
296 | } | 307 | } |
@@ -298,19 +309,6 @@ void arch_flush_lazy_mmu_mode(void) | |||
298 | preempt_enable(); | 309 | preempt_enable(); |
299 | } | 310 | } |
300 | 311 | ||
301 | void arch_flush_lazy_cpu_mode(void) | ||
302 | { | ||
303 | preempt_disable(); | ||
304 | |||
305 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { | ||
306 | WARN_ON(preempt_count() == 1); | ||
307 | arch_leave_lazy_cpu_mode(); | ||
308 | arch_enter_lazy_cpu_mode(); | ||
309 | } | ||
310 | |||
311 | preempt_enable(); | ||
312 | } | ||
313 | |||
314 | struct pv_info pv_info = { | 312 | struct pv_info pv_info = { |
315 | .name = "bare hardware", | 313 | .name = "bare hardware", |
316 | .paravirt_enabled = 0, | 314 | .paravirt_enabled = 0, |
@@ -402,10 +400,8 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
402 | .set_iopl_mask = native_set_iopl_mask, | 400 | .set_iopl_mask = native_set_iopl_mask, |
403 | .io_delay = native_io_delay, | 401 | .io_delay = native_io_delay, |
404 | 402 | ||
405 | .lazy_mode = { | 403 | .start_context_switch = paravirt_nop, |
406 | .enter = paravirt_nop, | 404 | .end_context_switch = paravirt_nop, |
407 | .leave = paravirt_nop, | ||
408 | }, | ||
409 | }; | 405 | }; |
410 | 406 | ||
411 | struct pv_apic_ops pv_apic_ops = { | 407 | struct pv_apic_ops pv_apic_ops = { |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 76f8f84043a2..5de30f0960fb 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -407,7 +407,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
407 | * done before math_state_restore, so the TS bit is up | 407 | * done before math_state_restore, so the TS bit is up |
408 | * to date. | 408 | * to date. |
409 | */ | 409 | */ |
410 | arch_leave_lazy_cpu_mode(); | 410 | arch_end_context_switch(next_p); |
411 | 411 | ||
412 | /* If the task has used fpu the last 5 timeslices, just do a full | 412 | /* If the task has used fpu the last 5 timeslices, just do a full |
413 | * restore of the math state immediately to avoid the trap; the | 413 | * restore of the math state immediately to avoid the trap; the |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b751a41392b1..66ad06791d6f 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -428,7 +428,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
428 | * done before math_state_restore, so the TS bit is up | 428 | * done before math_state_restore, so the TS bit is up |
429 | * to date. | 429 | * to date. |
430 | */ | 430 | */ |
431 | arch_leave_lazy_cpu_mode(); | 431 | arch_end_context_switch(next_p); |
432 | 432 | ||
433 | /* | 433 | /* |
434 | * Switch FS and GS. | 434 | * Switch FS and GS. |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 95deb9f2211e..b263423fbe2a 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -462,22 +462,28 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, | |||
462 | } | 462 | } |
463 | #endif | 463 | #endif |
464 | 464 | ||
465 | static void vmi_enter_lazy_cpu(void) | 465 | static void vmi_start_context_switch(struct task_struct *prev) |
466 | { | 466 | { |
467 | paravirt_enter_lazy_cpu(); | 467 | paravirt_start_context_switch(prev); |
468 | vmi_ops.set_lazy_mode(2); | 468 | vmi_ops.set_lazy_mode(2); |
469 | } | 469 | } |
470 | 470 | ||
471 | static void vmi_end_context_switch(struct task_struct *next) | ||
472 | { | ||
473 | vmi_ops.set_lazy_mode(0); | ||
474 | paravirt_end_context_switch(next); | ||
475 | } | ||
476 | |||
471 | static void vmi_enter_lazy_mmu(void) | 477 | static void vmi_enter_lazy_mmu(void) |
472 | { | 478 | { |
473 | paravirt_enter_lazy_mmu(); | 479 | paravirt_enter_lazy_mmu(); |
474 | vmi_ops.set_lazy_mode(1); | 480 | vmi_ops.set_lazy_mode(1); |
475 | } | 481 | } |
476 | 482 | ||
477 | static void vmi_leave_lazy(void) | 483 | static void vmi_leave_lazy_mmu(void) |
478 | { | 484 | { |
479 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | ||
480 | vmi_ops.set_lazy_mode(0); | 485 | vmi_ops.set_lazy_mode(0); |
486 | paravirt_leave_lazy_mmu(); | ||
481 | } | 487 | } |
482 | 488 | ||
483 | static inline int __init check_vmi_rom(struct vrom_header *rom) | 489 | static inline int __init check_vmi_rom(struct vrom_header *rom) |
@@ -711,14 +717,14 @@ static inline int __init activate_vmi(void) | |||
711 | para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); | 717 | para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); |
712 | para_fill(pv_cpu_ops.io_delay, IODelay); | 718 | para_fill(pv_cpu_ops.io_delay, IODelay); |
713 | 719 | ||
714 | para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu, | 720 | para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch, |
715 | set_lazy_mode, SetLazyMode); | 721 | set_lazy_mode, SetLazyMode); |
716 | para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy, | 722 | para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch, |
717 | set_lazy_mode, SetLazyMode); | 723 | set_lazy_mode, SetLazyMode); |
718 | 724 | ||
719 | para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, | 725 | para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, |
720 | set_lazy_mode, SetLazyMode); | 726 | set_lazy_mode, SetLazyMode); |
721 | para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy, | 727 | para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy_mmu, |
722 | set_lazy_mode, SetLazyMode); | 728 | set_lazy_mode, SetLazyMode); |
723 | 729 | ||
724 | /* user and kernel flush are just handled with different flags to FlushTLB */ | 730 | /* user and kernel flush are just handled with different flags to FlushTLB */ |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index e94a11e42f98..5ab239711cc2 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -166,10 +166,16 @@ static void lazy_hcall3(unsigned long call, | |||
166 | 166 | ||
167 | /* When lazy mode is turned off reset the per-cpu lazy mode variable and then | 167 | /* When lazy mode is turned off reset the per-cpu lazy mode variable and then |
168 | * issue the do-nothing hypercall to flush any stored calls. */ | 168 | * issue the do-nothing hypercall to flush any stored calls. */ |
169 | static void lguest_leave_lazy_mode(void) | 169 | static void lguest_leave_lazy_mmu_mode(void) |
170 | { | ||
171 | hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); | ||
172 | paravirt_leave_lazy_mmu(); | ||
173 | } | ||
174 | |||
175 | static void lguest_end_context_switch(struct task_struct *next) | ||
170 | { | 176 | { |
171 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | ||
172 | kvm_hypercall0(LHCALL_FLUSH_ASYNC); | 177 | kvm_hypercall0(LHCALL_FLUSH_ASYNC); |
178 | paravirt_end_context_switch(next); | ||
173 | } | 179 | } |
174 | 180 | ||
175 | /*G:033 | 181 | /*G:033 |
@@ -1051,8 +1057,8 @@ __init void lguest_init(void) | |||
1051 | pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry; | 1057 | pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry; |
1052 | pv_cpu_ops.write_idt_entry = lguest_write_idt_entry; | 1058 | pv_cpu_ops.write_idt_entry = lguest_write_idt_entry; |
1053 | pv_cpu_ops.wbinvd = lguest_wbinvd; | 1059 | pv_cpu_ops.wbinvd = lguest_wbinvd; |
1054 | pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu; | 1060 | pv_cpu_ops.start_context_switch = paravirt_start_context_switch; |
1055 | pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode; | 1061 | pv_cpu_ops.end_context_switch = lguest_end_context_switch; |
1056 | 1062 | ||
1057 | /* pagetable management */ | 1063 | /* pagetable management */ |
1058 | pv_mmu_ops.write_cr3 = lguest_write_cr3; | 1064 | pv_mmu_ops.write_cr3 = lguest_write_cr3; |
@@ -1065,7 +1071,7 @@ __init void lguest_init(void) | |||
1065 | pv_mmu_ops.read_cr2 = lguest_read_cr2; | 1071 | pv_mmu_ops.read_cr2 = lguest_read_cr2; |
1066 | pv_mmu_ops.read_cr3 = lguest_read_cr3; | 1072 | pv_mmu_ops.read_cr3 = lguest_read_cr3; |
1067 | pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; | 1073 | pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; |
1068 | pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode; | 1074 | pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode; |
1069 | pv_mmu_ops.pte_update = lguest_pte_update; | 1075 | pv_mmu_ops.pte_update = lguest_pte_update; |
1070 | pv_mmu_ops.pte_update_defer = lguest_pte_update; | 1076 | pv_mmu_ops.pte_update_defer = lguest_pte_update; |
1071 | 1077 | ||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a03b7279efa0..cfbb4a738011 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -225,12 +225,10 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) | |||
225 | if (!pmd_present(*pmd_k)) | 225 | if (!pmd_present(*pmd_k)) |
226 | return NULL; | 226 | return NULL; |
227 | 227 | ||
228 | if (!pmd_present(*pmd)) { | 228 | if (!pmd_present(*pmd)) |
229 | set_pmd(pmd, *pmd_k); | 229 | set_pmd(pmd, *pmd_k); |
230 | arch_flush_lazy_mmu_mode(); | 230 | else |
231 | } else { | ||
232 | BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); | 231 | BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); |
233 | } | ||
234 | 232 | ||
235 | return pmd_k; | 233 | return pmd_k; |
236 | } | 234 | } |
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 8126e8d1a2a4..58f621e81919 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c | |||
@@ -44,7 +44,6 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) | |||
44 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | 44 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); |
45 | BUG_ON(!pte_none(*(kmap_pte-idx))); | 45 | BUG_ON(!pte_none(*(kmap_pte-idx))); |
46 | set_pte(kmap_pte-idx, mk_pte(page, prot)); | 46 | set_pte(kmap_pte-idx, mk_pte(page, prot)); |
47 | arch_flush_lazy_mmu_mode(); | ||
48 | 47 | ||
49 | return (void *)vaddr; | 48 | return (void *)vaddr; |
50 | } | 49 | } |
@@ -74,7 +73,6 @@ void kunmap_atomic(void *kvaddr, enum km_type type) | |||
74 | #endif | 73 | #endif |
75 | } | 74 | } |
76 | 75 | ||
77 | arch_flush_lazy_mmu_mode(); | ||
78 | pagefault_enable(); | 76 | pagefault_enable(); |
79 | } | 77 | } |
80 | 78 | ||
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 8056545e2d39..fe6f84ca121e 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c | |||
@@ -82,7 +82,6 @@ iounmap_atomic(void *kvaddr, enum km_type type) | |||
82 | if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) | 82 | if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) |
83 | kpte_clear_flush(kmap_pte-idx, vaddr); | 83 | kpte_clear_flush(kmap_pte-idx, vaddr); |
84 | 84 | ||
85 | arch_flush_lazy_mmu_mode(); | ||
86 | pagefault_enable(); | 85 | pagefault_enable(); |
87 | } | 86 | } |
88 | EXPORT_SYMBOL_GPL(iounmap_atomic); | 87 | EXPORT_SYMBOL_GPL(iounmap_atomic); |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index d71e1b636ce6..660cac75ae11 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -844,13 +844,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
844 | 844 | ||
845 | vm_unmap_aliases(); | 845 | vm_unmap_aliases(); |
846 | 846 | ||
847 | /* | ||
848 | * If we're called with lazy mmu updates enabled, the | ||
849 | * in-memory pte state may be stale. Flush pending updates to | ||
850 | * bring them up to date. | ||
851 | */ | ||
852 | arch_flush_lazy_mmu_mode(); | ||
853 | |||
854 | cpa.vaddr = addr; | 847 | cpa.vaddr = addr; |
855 | cpa.pages = pages; | 848 | cpa.pages = pages; |
856 | cpa.numpages = numpages; | 849 | cpa.numpages = numpages; |
@@ -895,13 +888,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
895 | } else | 888 | } else |
896 | cpa_flush_all(cache); | 889 | cpa_flush_all(cache); |
897 | 890 | ||
898 | /* | ||
899 | * If we've been called with lazy mmu updates enabled, then | ||
900 | * make sure that everything gets flushed out before we | ||
901 | * return. | ||
902 | */ | ||
903 | arch_flush_lazy_mmu_mode(); | ||
904 | |||
905 | out: | 891 | out: |
906 | return ret; | 892 | return ret; |
907 | } | 893 | } |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 82cd39a6cbd3..12a3159333bc 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include <asm/xen/hypervisor.h> | 42 | #include <asm/xen/hypervisor.h> |
43 | #include <asm/fixmap.h> | 43 | #include <asm/fixmap.h> |
44 | #include <asm/processor.h> | 44 | #include <asm/processor.h> |
45 | #include <asm/proto.h> | ||
45 | #include <asm/msr-index.h> | 46 | #include <asm/msr-index.h> |
46 | #include <asm/setup.h> | 47 | #include <asm/setup.h> |
47 | #include <asm/desc.h> | 48 | #include <asm/desc.h> |
@@ -168,21 +169,23 @@ static void __init xen_banner(void) | |||
168 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); | 169 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); |
169 | } | 170 | } |
170 | 171 | ||
172 | static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; | ||
173 | static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; | ||
174 | |||
171 | static void xen_cpuid(unsigned int *ax, unsigned int *bx, | 175 | static void xen_cpuid(unsigned int *ax, unsigned int *bx, |
172 | unsigned int *cx, unsigned int *dx) | 176 | unsigned int *cx, unsigned int *dx) |
173 | { | 177 | { |
178 | unsigned maskecx = ~0; | ||
174 | unsigned maskedx = ~0; | 179 | unsigned maskedx = ~0; |
175 | 180 | ||
176 | /* | 181 | /* |
177 | * Mask out inconvenient features, to try and disable as many | 182 | * Mask out inconvenient features, to try and disable as many |
178 | * unsupported kernel subsystems as possible. | 183 | * unsupported kernel subsystems as possible. |
179 | */ | 184 | */ |
180 | if (*ax == 1) | 185 | if (*ax == 1) { |
181 | maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ | 186 | maskecx = cpuid_leaf1_ecx_mask; |
182 | (1 << X86_FEATURE_ACPI) | /* disable ACPI */ | 187 | maskedx = cpuid_leaf1_edx_mask; |
183 | (1 << X86_FEATURE_MCE) | /* disable MCE */ | 188 | } |
184 | (1 << X86_FEATURE_MCA) | /* disable MCA */ | ||
185 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ | ||
186 | 189 | ||
187 | asm(XEN_EMULATE_PREFIX "cpuid" | 190 | asm(XEN_EMULATE_PREFIX "cpuid" |
188 | : "=a" (*ax), | 191 | : "=a" (*ax), |
@@ -190,9 +193,43 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, | |||
190 | "=c" (*cx), | 193 | "=c" (*cx), |
191 | "=d" (*dx) | 194 | "=d" (*dx) |
192 | : "0" (*ax), "2" (*cx)); | 195 | : "0" (*ax), "2" (*cx)); |
196 | |||
197 | *cx &= maskecx; | ||
193 | *dx &= maskedx; | 198 | *dx &= maskedx; |
194 | } | 199 | } |
195 | 200 | ||
201 | static __init void xen_init_cpuid_mask(void) | ||
202 | { | ||
203 | unsigned int ax, bx, cx, dx; | ||
204 | |||
205 | cpuid_leaf1_edx_mask = | ||
206 | ~((1 << X86_FEATURE_MCE) | /* disable MCE */ | ||
207 | (1 << X86_FEATURE_MCA) | /* disable MCA */ | ||
208 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ | ||
209 | |||
210 | if (!xen_initial_domain()) | ||
211 | cpuid_leaf1_edx_mask &= | ||
212 | ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ | ||
213 | (1 << X86_FEATURE_ACPI)); /* disable ACPI */ | ||
214 | |||
215 | ax = 1; | ||
216 | xen_cpuid(&ax, &bx, &cx, &dx); | ||
217 | |||
218 | /* cpuid claims we support xsave; try enabling it to see what happens */ | ||
219 | if (cx & (1 << (X86_FEATURE_XSAVE % 32))) { | ||
220 | unsigned long cr4; | ||
221 | |||
222 | set_in_cr4(X86_CR4_OSXSAVE); | ||
223 | |||
224 | cr4 = read_cr4(); | ||
225 | |||
226 | if ((cr4 & X86_CR4_OSXSAVE) == 0) | ||
227 | cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32)); | ||
228 | |||
229 | clear_in_cr4(X86_CR4_OSXSAVE); | ||
230 | } | ||
231 | } | ||
232 | |||
196 | static void xen_set_debugreg(int reg, unsigned long val) | 233 | static void xen_set_debugreg(int reg, unsigned long val) |
197 | { | 234 | { |
198 | HYPERVISOR_set_debugreg(reg, val); | 235 | HYPERVISOR_set_debugreg(reg, val); |
@@ -203,10 +240,10 @@ static unsigned long xen_get_debugreg(int reg) | |||
203 | return HYPERVISOR_get_debugreg(reg); | 240 | return HYPERVISOR_get_debugreg(reg); |
204 | } | 241 | } |
205 | 242 | ||
206 | void xen_leave_lazy(void) | 243 | static void xen_end_context_switch(struct task_struct *next) |
207 | { | 244 | { |
208 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | ||
209 | xen_mc_flush(); | 245 | xen_mc_flush(); |
246 | paravirt_end_context_switch(next); | ||
210 | } | 247 | } |
211 | 248 | ||
212 | static unsigned long xen_store_tr(void) | 249 | static unsigned long xen_store_tr(void) |
@@ -284,12 +321,11 @@ static void xen_set_ldt(const void *addr, unsigned entries) | |||
284 | 321 | ||
285 | static void xen_load_gdt(const struct desc_ptr *dtr) | 322 | static void xen_load_gdt(const struct desc_ptr *dtr) |
286 | { | 323 | { |
287 | unsigned long *frames; | ||
288 | unsigned long va = dtr->address; | 324 | unsigned long va = dtr->address; |
289 | unsigned int size = dtr->size + 1; | 325 | unsigned int size = dtr->size + 1; |
290 | unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; | 326 | unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; |
327 | unsigned long frames[pages]; | ||
291 | int f; | 328 | int f; |
292 | struct multicall_space mcs; | ||
293 | 329 | ||
294 | /* A GDT can be up to 64k in size, which corresponds to 8192 | 330 | /* A GDT can be up to 64k in size, which corresponds to 8192 |
295 | 8-byte entries, or 16 4k pages.. */ | 331 | 8-byte entries, or 16 4k pages.. */ |
@@ -297,19 +333,26 @@ static void xen_load_gdt(const struct desc_ptr *dtr) | |||
297 | BUG_ON(size > 65536); | 333 | BUG_ON(size > 65536); |
298 | BUG_ON(va & ~PAGE_MASK); | 334 | BUG_ON(va & ~PAGE_MASK); |
299 | 335 | ||
300 | mcs = xen_mc_entry(sizeof(*frames) * pages); | ||
301 | frames = mcs.args; | ||
302 | |||
303 | for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { | 336 | for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { |
304 | frames[f] = arbitrary_virt_to_mfn((void *)va); | 337 | int level; |
338 | pte_t *ptep = lookup_address(va, &level); | ||
339 | unsigned long pfn, mfn; | ||
340 | void *virt; | ||
341 | |||
342 | BUG_ON(ptep == NULL); | ||
343 | |||
344 | pfn = pte_pfn(*ptep); | ||
345 | mfn = pfn_to_mfn(pfn); | ||
346 | virt = __va(PFN_PHYS(pfn)); | ||
347 | |||
348 | frames[f] = mfn; | ||
305 | 349 | ||
306 | make_lowmem_page_readonly((void *)va); | 350 | make_lowmem_page_readonly((void *)va); |
307 | make_lowmem_page_readonly(mfn_to_virt(frames[f])); | 351 | make_lowmem_page_readonly(virt); |
308 | } | 352 | } |
309 | 353 | ||
310 | MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); | 354 | if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct))) |
311 | 355 | BUG(); | |
312 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
313 | } | 356 | } |
314 | 357 | ||
315 | static void load_TLS_descriptor(struct thread_struct *t, | 358 | static void load_TLS_descriptor(struct thread_struct *t, |
@@ -385,7 +428,7 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, | |||
385 | static int cvt_gate_to_trap(int vector, const gate_desc *val, | 428 | static int cvt_gate_to_trap(int vector, const gate_desc *val, |
386 | struct trap_info *info) | 429 | struct trap_info *info) |
387 | { | 430 | { |
388 | if (val->type != 0xf && val->type != 0xe) | 431 | if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT) |
389 | return 0; | 432 | return 0; |
390 | 433 | ||
391 | info->vector = vector; | 434 | info->vector = vector; |
@@ -393,8 +436,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, | |||
393 | info->cs = gate_segment(*val); | 436 | info->cs = gate_segment(*val); |
394 | info->flags = val->dpl; | 437 | info->flags = val->dpl; |
395 | /* interrupt gates clear IF */ | 438 | /* interrupt gates clear IF */ |
396 | if (val->type == 0xe) | 439 | if (val->type == GATE_INTERRUPT) |
397 | info->flags |= 4; | 440 | info->flags |= 1 << 2; |
398 | 441 | ||
399 | return 1; | 442 | return 1; |
400 | } | 443 | } |
@@ -817,10 +860,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
817 | /* Xen takes care of %gs when switching to usermode for us */ | 860 | /* Xen takes care of %gs when switching to usermode for us */ |
818 | .swapgs = paravirt_nop, | 861 | .swapgs = paravirt_nop, |
819 | 862 | ||
820 | .lazy_mode = { | 863 | .start_context_switch = paravirt_start_context_switch, |
821 | .enter = paravirt_enter_lazy_cpu, | 864 | .end_context_switch = xen_end_context_switch, |
822 | .leave = xen_leave_lazy, | ||
823 | }, | ||
824 | }; | 865 | }; |
825 | 866 | ||
826 | static const struct pv_apic_ops xen_apic_ops __initdata = { | 867 | static const struct pv_apic_ops xen_apic_ops __initdata = { |
@@ -872,7 +913,6 @@ static const struct machine_ops __initdata xen_machine_ops = { | |||
872 | .emergency_restart = xen_emergency_restart, | 913 | .emergency_restart = xen_emergency_restart, |
873 | }; | 914 | }; |
874 | 915 | ||
875 | |||
876 | /* First C function to be called on Xen boot */ | 916 | /* First C function to be called on Xen boot */ |
877 | asmlinkage void __init xen_start_kernel(void) | 917 | asmlinkage void __init xen_start_kernel(void) |
878 | { | 918 | { |
@@ -897,6 +937,8 @@ asmlinkage void __init xen_start_kernel(void) | |||
897 | 937 | ||
898 | xen_init_irq_ops(); | 938 | xen_init_irq_ops(); |
899 | 939 | ||
940 | xen_init_cpuid_mask(); | ||
941 | |||
900 | #ifdef CONFIG_X86_LOCAL_APIC | 942 | #ifdef CONFIG_X86_LOCAL_APIC |
901 | /* | 943 | /* |
902 | * set up the basic apic ops. | 944 | * set up the basic apic ops. |
@@ -938,6 +980,11 @@ asmlinkage void __init xen_start_kernel(void) | |||
938 | if (!xen_initial_domain()) | 980 | if (!xen_initial_domain()) |
939 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); | 981 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); |
940 | 982 | ||
983 | #ifdef CONFIG_X86_64 | ||
984 | /* Work out if we support NX */ | ||
985 | check_efer(); | ||
986 | #endif | ||
987 | |||
941 | /* Don't do the full vcpu_info placement stuff until we have a | 988 | /* Don't do the full vcpu_info placement stuff until we have a |
942 | possible map and a non-dummy shared_info. */ | 989 | possible map and a non-dummy shared_info. */ |
943 | per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; | 990 | per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index db3802fb7b84..77b242c9a11e 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -184,7 +184,7 @@ static inline unsigned p2m_index(unsigned long pfn) | |||
184 | } | 184 | } |
185 | 185 | ||
186 | /* Build the parallel p2m_top_mfn structures */ | 186 | /* Build the parallel p2m_top_mfn structures */ |
187 | void xen_setup_mfn_list_list(void) | 187 | static void __init xen_build_mfn_list_list(void) |
188 | { | 188 | { |
189 | unsigned pfn, idx; | 189 | unsigned pfn, idx; |
190 | 190 | ||
@@ -198,7 +198,10 @@ void xen_setup_mfn_list_list(void) | |||
198 | unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; | 198 | unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; |
199 | p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); | 199 | p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); |
200 | } | 200 | } |
201 | } | ||
201 | 202 | ||
203 | void xen_setup_mfn_list_list(void) | ||
204 | { | ||
202 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | 205 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); |
203 | 206 | ||
204 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | 207 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = |
@@ -218,6 +221,8 @@ void __init xen_build_dynamic_phys_to_machine(void) | |||
218 | 221 | ||
219 | p2m_top[topidx] = &mfn_list[pfn]; | 222 | p2m_top[topidx] = &mfn_list[pfn]; |
220 | } | 223 | } |
224 | |||
225 | xen_build_mfn_list_list(); | ||
221 | } | 226 | } |
222 | 227 | ||
223 | unsigned long get_phys_to_machine(unsigned long pfn) | 228 | unsigned long get_phys_to_machine(unsigned long pfn) |
@@ -233,47 +238,74 @@ unsigned long get_phys_to_machine(unsigned long pfn) | |||
233 | } | 238 | } |
234 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | 239 | EXPORT_SYMBOL_GPL(get_phys_to_machine); |
235 | 240 | ||
236 | static void alloc_p2m(unsigned long **pp, unsigned long *mfnp) | 241 | /* install a new p2m_top page */ |
242 | bool install_p2mtop_page(unsigned long pfn, unsigned long *p) | ||
237 | { | 243 | { |
238 | unsigned long *p; | 244 | unsigned topidx = p2m_top_index(pfn); |
245 | unsigned long **pfnp, *mfnp; | ||
239 | unsigned i; | 246 | unsigned i; |
240 | 247 | ||
241 | p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); | 248 | pfnp = &p2m_top[topidx]; |
242 | BUG_ON(p == NULL); | 249 | mfnp = &p2m_top_mfn[topidx]; |
243 | 250 | ||
244 | for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++) | 251 | for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++) |
245 | p[i] = INVALID_P2M_ENTRY; | 252 | p[i] = INVALID_P2M_ENTRY; |
246 | 253 | ||
247 | if (cmpxchg(pp, p2m_missing, p) != p2m_missing) | 254 | if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) { |
248 | free_page((unsigned long)p); | ||
249 | else | ||
250 | *mfnp = virt_to_mfn(p); | 255 | *mfnp = virt_to_mfn(p); |
256 | return true; | ||
257 | } | ||
258 | |||
259 | return false; | ||
251 | } | 260 | } |
252 | 261 | ||
253 | void set_phys_to_machine(unsigned long pfn, unsigned long mfn) | 262 | static void alloc_p2m(unsigned long pfn) |
254 | { | 263 | { |
255 | unsigned topidx, idx; | 264 | unsigned long *p; |
256 | 265 | ||
257 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { | 266 | p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); |
258 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | 267 | BUG_ON(p == NULL); |
259 | return; | 268 | |
260 | } | 269 | if (!install_p2mtop_page(pfn, p)) |
270 | free_page((unsigned long)p); | ||
271 | } | ||
272 | |||
273 | /* Try to install p2m mapping; fail if intermediate bits missing */ | ||
274 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
275 | { | ||
276 | unsigned topidx, idx; | ||
261 | 277 | ||
262 | if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { | 278 | if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { |
263 | BUG_ON(mfn != INVALID_P2M_ENTRY); | 279 | BUG_ON(mfn != INVALID_P2M_ENTRY); |
264 | return; | 280 | return true; |
265 | } | 281 | } |
266 | 282 | ||
267 | topidx = p2m_top_index(pfn); | 283 | topidx = p2m_top_index(pfn); |
268 | if (p2m_top[topidx] == p2m_missing) { | 284 | if (p2m_top[topidx] == p2m_missing) { |
269 | /* no need to allocate a page to store an invalid entry */ | ||
270 | if (mfn == INVALID_P2M_ENTRY) | 285 | if (mfn == INVALID_P2M_ENTRY) |
271 | return; | 286 | return true; |
272 | alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]); | 287 | return false; |
273 | } | 288 | } |
274 | 289 | ||
275 | idx = p2m_index(pfn); | 290 | idx = p2m_index(pfn); |
276 | p2m_top[topidx][idx] = mfn; | 291 | p2m_top[topidx][idx] = mfn; |
292 | |||
293 | return true; | ||
294 | } | ||
295 | |||
296 | void set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
297 | { | ||
298 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { | ||
299 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
300 | return; | ||
301 | } | ||
302 | |||
303 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | ||
304 | alloc_p2m(pfn); | ||
305 | |||
306 | if (!__set_phys_to_machine(pfn, mfn)) | ||
307 | BUG(); | ||
308 | } | ||
277 | } | 309 | } |
278 | 310 | ||
279 | unsigned long arbitrary_virt_to_mfn(void *vaddr) | 311 | unsigned long arbitrary_virt_to_mfn(void *vaddr) |
@@ -419,10 +451,6 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) | |||
419 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | 451 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
420 | pte_t *ptep, pte_t pteval) | 452 | pte_t *ptep, pte_t pteval) |
421 | { | 453 | { |
422 | /* updates to init_mm may be done without lock */ | ||
423 | if (mm == &init_mm) | ||
424 | preempt_disable(); | ||
425 | |||
426 | ADD_STATS(set_pte_at, 1); | 454 | ADD_STATS(set_pte_at, 1); |
427 | // ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); | 455 | // ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); |
428 | ADD_STATS(set_pte_at_current, mm == current->mm); | 456 | ADD_STATS(set_pte_at_current, mm == current->mm); |
@@ -443,9 +471,7 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | |||
443 | } | 471 | } |
444 | xen_set_pte(ptep, pteval); | 472 | xen_set_pte(ptep, pteval); |
445 | 473 | ||
446 | out: | 474 | out: return; |
447 | if (mm == &init_mm) | ||
448 | preempt_enable(); | ||
449 | } | 475 | } |
450 | 476 | ||
451 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, | 477 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, |
@@ -987,7 +1013,7 @@ static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page, | |||
987 | return 0; | 1013 | return 0; |
988 | } | 1014 | } |
989 | 1015 | ||
990 | void __init xen_mark_init_mm_pinned(void) | 1016 | static void __init xen_mark_init_mm_pinned(void) |
991 | { | 1017 | { |
992 | xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); | 1018 | xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); |
993 | } | 1019 | } |
@@ -1119,10 +1145,8 @@ static void drop_other_mm_ref(void *info) | |||
1119 | 1145 | ||
1120 | /* If this cpu still has a stale cr3 reference, then make sure | 1146 | /* If this cpu still has a stale cr3 reference, then make sure |
1121 | it has been flushed. */ | 1147 | it has been flushed. */ |
1122 | if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) { | 1148 | if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) |
1123 | load_cr3(swapper_pg_dir); | 1149 | load_cr3(swapper_pg_dir); |
1124 | arch_flush_lazy_cpu_mode(); | ||
1125 | } | ||
1126 | } | 1150 | } |
1127 | 1151 | ||
1128 | static void xen_drop_mm_ref(struct mm_struct *mm) | 1152 | static void xen_drop_mm_ref(struct mm_struct *mm) |
@@ -1135,7 +1159,6 @@ static void xen_drop_mm_ref(struct mm_struct *mm) | |||
1135 | load_cr3(swapper_pg_dir); | 1159 | load_cr3(swapper_pg_dir); |
1136 | else | 1160 | else |
1137 | leave_mm(smp_processor_id()); | 1161 | leave_mm(smp_processor_id()); |
1138 | arch_flush_lazy_cpu_mode(); | ||
1139 | } | 1162 | } |
1140 | 1163 | ||
1141 | /* Get the "official" set of cpus referring to our pagetable. */ | 1164 | /* Get the "official" set of cpus referring to our pagetable. */ |
@@ -1270,8 +1293,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, | |||
1270 | } *args; | 1293 | } *args; |
1271 | struct multicall_space mcs; | 1294 | struct multicall_space mcs; |
1272 | 1295 | ||
1273 | BUG_ON(cpumask_empty(cpus)); | 1296 | if (cpumask_empty(cpus)) |
1274 | BUG_ON(!mm); | 1297 | return; /* nothing to do */ |
1275 | 1298 | ||
1276 | mcs = xen_mc_entry(sizeof(*args)); | 1299 | mcs = xen_mc_entry(sizeof(*args)); |
1277 | args = mcs.args; | 1300 | args = mcs.args; |
@@ -1438,6 +1461,15 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) | |||
1438 | } | 1461 | } |
1439 | #endif | 1462 | #endif |
1440 | 1463 | ||
1464 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | ||
1465 | { | ||
1466 | struct mmuext_op op; | ||
1467 | op.cmd = cmd; | ||
1468 | op.arg1.mfn = pfn_to_mfn(pfn); | ||
1469 | if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) | ||
1470 | BUG(); | ||
1471 | } | ||
1472 | |||
1441 | /* Early in boot, while setting up the initial pagetable, assume | 1473 | /* Early in boot, while setting up the initial pagetable, assume |
1442 | everything is pinned. */ | 1474 | everything is pinned. */ |
1443 | static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) | 1475 | static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) |
@@ -1446,22 +1478,29 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) | |||
1446 | BUG_ON(mem_map); /* should only be used early */ | 1478 | BUG_ON(mem_map); /* should only be used early */ |
1447 | #endif | 1479 | #endif |
1448 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); | 1480 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); |
1481 | pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); | ||
1482 | } | ||
1483 | |||
1484 | /* Used for pmd and pud */ | ||
1485 | static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) | ||
1486 | { | ||
1487 | #ifdef CONFIG_FLATMEM | ||
1488 | BUG_ON(mem_map); /* should only be used early */ | ||
1489 | #endif | ||
1490 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); | ||
1449 | } | 1491 | } |
1450 | 1492 | ||
1451 | /* Early release_pte assumes that all pts are pinned, since there's | 1493 | /* Early release_pte assumes that all pts are pinned, since there's |
1452 | only init_mm and anything attached to that is pinned. */ | 1494 | only init_mm and anything attached to that is pinned. */ |
1453 | static void xen_release_pte_init(unsigned long pfn) | 1495 | static __init void xen_release_pte_init(unsigned long pfn) |
1454 | { | 1496 | { |
1497 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); | ||
1455 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 1498 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); |
1456 | } | 1499 | } |
1457 | 1500 | ||
1458 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | 1501 | static __init void xen_release_pmd_init(unsigned long pfn) |
1459 | { | 1502 | { |
1460 | struct mmuext_op op; | 1503 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); |
1461 | op.cmd = cmd; | ||
1462 | op.arg1.mfn = pfn_to_mfn(pfn); | ||
1463 | if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) | ||
1464 | BUG(); | ||
1465 | } | 1504 | } |
1466 | 1505 | ||
1467 | /* This needs to make sure the new pte page is pinned iff its being | 1506 | /* This needs to make sure the new pte page is pinned iff its being |
@@ -1819,6 +1858,13 @@ __init void xen_post_allocator_init(void) | |||
1819 | xen_mark_init_mm_pinned(); | 1858 | xen_mark_init_mm_pinned(); |
1820 | } | 1859 | } |
1821 | 1860 | ||
1861 | static void xen_leave_lazy_mmu(void) | ||
1862 | { | ||
1863 | preempt_disable(); | ||
1864 | xen_mc_flush(); | ||
1865 | paravirt_leave_lazy_mmu(); | ||
1866 | preempt_enable(); | ||
1867 | } | ||
1822 | 1868 | ||
1823 | const struct pv_mmu_ops xen_mmu_ops __initdata = { | 1869 | const struct pv_mmu_ops xen_mmu_ops __initdata = { |
1824 | .pagetable_setup_start = xen_pagetable_setup_start, | 1870 | .pagetable_setup_start = xen_pagetable_setup_start, |
@@ -1843,9 +1889,9 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1843 | 1889 | ||
1844 | .alloc_pte = xen_alloc_pte_init, | 1890 | .alloc_pte = xen_alloc_pte_init, |
1845 | .release_pte = xen_release_pte_init, | 1891 | .release_pte = xen_release_pte_init, |
1846 | .alloc_pmd = xen_alloc_pte_init, | 1892 | .alloc_pmd = xen_alloc_pmd_init, |
1847 | .alloc_pmd_clone = paravirt_nop, | 1893 | .alloc_pmd_clone = paravirt_nop, |
1848 | .release_pmd = xen_release_pte_init, | 1894 | .release_pmd = xen_release_pmd_init, |
1849 | 1895 | ||
1850 | #ifdef CONFIG_HIGHPTE | 1896 | #ifdef CONFIG_HIGHPTE |
1851 | .kmap_atomic_pte = xen_kmap_atomic_pte, | 1897 | .kmap_atomic_pte = xen_kmap_atomic_pte, |
@@ -1883,8 +1929,8 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1883 | .make_pud = PV_CALLEE_SAVE(xen_make_pud), | 1929 | .make_pud = PV_CALLEE_SAVE(xen_make_pud), |
1884 | .set_pgd = xen_set_pgd_hyper, | 1930 | .set_pgd = xen_set_pgd_hyper, |
1885 | 1931 | ||
1886 | .alloc_pud = xen_alloc_pte_init, | 1932 | .alloc_pud = xen_alloc_pmd_init, |
1887 | .release_pud = xen_release_pte_init, | 1933 | .release_pud = xen_release_pmd_init, |
1888 | #endif /* PAGETABLE_LEVELS == 4 */ | 1934 | #endif /* PAGETABLE_LEVELS == 4 */ |
1889 | 1935 | ||
1890 | .activate_mm = xen_activate_mm, | 1936 | .activate_mm = xen_activate_mm, |
@@ -1893,7 +1939,7 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1893 | 1939 | ||
1894 | .lazy_mode = { | 1940 | .lazy_mode = { |
1895 | .enter = paravirt_enter_lazy_mmu, | 1941 | .enter = paravirt_enter_lazy_mmu, |
1896 | .leave = xen_leave_lazy, | 1942 | .leave = xen_leave_lazy_mmu, |
1897 | }, | 1943 | }, |
1898 | 1944 | ||
1899 | .set_fixmap = xen_set_fixmap, | 1945 | .set_fixmap = xen_set_fixmap, |
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 24d1b44a337d..da7302624897 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h | |||
@@ -11,6 +11,9 @@ enum pt_level { | |||
11 | }; | 11 | }; |
12 | 12 | ||
13 | 13 | ||
14 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); | ||
15 | bool install_p2mtop_page(unsigned long pfn, unsigned long *p); | ||
16 | |||
14 | void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | 17 | void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); |
15 | 18 | ||
16 | 19 | ||
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 585a6e330837..429834ec1687 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -317,7 +317,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) | |||
317 | BUG_ON(rc); | 317 | BUG_ON(rc); |
318 | 318 | ||
319 | while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { | 319 | while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { |
320 | HYPERVISOR_sched_op(SCHEDOP_yield, 0); | 320 | HYPERVISOR_sched_op(SCHEDOP_yield, NULL); |
321 | barrier(); | 321 | barrier(); |
322 | } | 322 | } |
323 | 323 | ||
@@ -422,7 +422,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask) | |||
422 | /* Make sure other vcpus get a chance to run if they need to. */ | 422 | /* Make sure other vcpus get a chance to run if they need to. */ |
423 | for_each_cpu(cpu, mask) { | 423 | for_each_cpu(cpu, mask) { |
424 | if (xen_vcpu_stolen(cpu)) { | 424 | if (xen_vcpu_stolen(cpu)) { |
425 | HYPERVISOR_sched_op(SCHEDOP_yield, 0); | 425 | HYPERVISOR_sched_op(SCHEDOP_yield, NULL); |
426 | break; | 426 | break; |
427 | } | 427 | } |
428 | } | 428 | } |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 2f5ef2632ea2..5c50a1017a37 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -30,7 +30,6 @@ pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); | |||
30 | void xen_ident_map_ISA(void); | 30 | void xen_ident_map_ISA(void); |
31 | void xen_reserve_top(void); | 31 | void xen_reserve_top(void); |
32 | 32 | ||
33 | void xen_leave_lazy(void); | ||
34 | void xen_post_allocator_init(void); | 33 | void xen_post_allocator_init(void); |
35 | 34 | ||
36 | char * __init xen_memory_setup(void); | 35 | char * __init xen_memory_setup(void); |
@@ -57,8 +56,6 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id); | |||
57 | 56 | ||
58 | bool xen_vcpu_stolen(int vcpu); | 57 | bool xen_vcpu_stolen(int vcpu); |
59 | 58 | ||
60 | void xen_mark_init_mm_pinned(void); | ||
61 | |||
62 | void xen_setup_vcpu_info_placement(void); | 59 | void xen_setup_vcpu_info_placement(void); |
63 | 60 | ||
64 | #ifdef CONFIG_SMP | 61 | #ifdef CONFIG_SMP |
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 8ac9cddac575..cab100acf983 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig | |||
@@ -18,6 +18,16 @@ config XEN_SCRUB_PAGES | |||
18 | secure, but slightly less efficient. | 18 | secure, but slightly less efficient. |
19 | If in doubt, say yes. | 19 | If in doubt, say yes. |
20 | 20 | ||
21 | config XEN_DEV_EVTCHN | ||
22 | tristate "Xen /dev/xen/evtchn device" | ||
23 | depends on XEN | ||
24 | default y | ||
25 | help | ||
26 | The evtchn driver allows a userspace process to triger event | ||
27 | channels and to receive notification of an event channel | ||
28 | firing. | ||
29 | If in doubt, say yes. | ||
30 | |||
21 | config XENFS | 31 | config XENFS |
22 | tristate "Xen filesystem" | 32 | tristate "Xen filesystem" |
23 | depends on XEN | 33 | depends on XEN |
@@ -41,3 +51,13 @@ config XEN_COMPAT_XENFS | |||
41 | a xen platform. | 51 | a xen platform. |
42 | If in doubt, say yes. | 52 | If in doubt, say yes. |
43 | 53 | ||
54 | config XEN_SYS_HYPERVISOR | ||
55 | bool "Create xen entries under /sys/hypervisor" | ||
56 | depends on XEN && SYSFS | ||
57 | select SYS_HYPERVISOR | ||
58 | default y | ||
59 | help | ||
60 | Create entries under /sys/hypervisor describing the Xen | ||
61 | hypervisor environment. When running native or in another | ||
62 | virtual environment, /sys/hypervisor will still be present, | ||
63 | but will have no xen contents. \ No newline at end of file | ||
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index ff8accc9e103..ec2a39b1e26f 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile | |||
@@ -4,4 +4,6 @@ obj-y += xenbus/ | |||
4 | obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o | 4 | obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o |
5 | obj-$(CONFIG_XEN_XENCOMM) += xencomm.o | 5 | obj-$(CONFIG_XEN_XENCOMM) += xencomm.o |
6 | obj-$(CONFIG_XEN_BALLOON) += balloon.o | 6 | obj-$(CONFIG_XEN_BALLOON) += balloon.o |
7 | obj-$(CONFIG_XENFS) += xenfs/ \ No newline at end of file | 7 | obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o |
8 | obj-$(CONFIG_XENFS) += xenfs/ | ||
9 | obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o \ No newline at end of file | ||
diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c index 5f54c01c1568..bdfd584ad853 100644 --- a/drivers/xen/cpu_hotplug.c +++ b/drivers/xen/cpu_hotplug.c | |||
@@ -21,29 +21,41 @@ static void disable_hotplug_cpu(int cpu) | |||
21 | set_cpu_present(cpu, false); | 21 | set_cpu_present(cpu, false); |
22 | } | 22 | } |
23 | 23 | ||
24 | static void vcpu_hotplug(unsigned int cpu) | 24 | static int vcpu_online(unsigned int cpu) |
25 | { | 25 | { |
26 | int err; | 26 | int err; |
27 | char dir[32], state[32]; | 27 | char dir[32], state[32]; |
28 | 28 | ||
29 | if (!cpu_possible(cpu)) | ||
30 | return; | ||
31 | |||
32 | sprintf(dir, "cpu/%u", cpu); | 29 | sprintf(dir, "cpu/%u", cpu); |
33 | err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state); | 30 | err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state); |
34 | if (err != 1) { | 31 | if (err != 1) { |
35 | printk(KERN_ERR "XENBUS: Unable to read cpu state\n"); | 32 | printk(KERN_ERR "XENBUS: Unable to read cpu state\n"); |
36 | return; | 33 | return err; |
37 | } | 34 | } |
38 | 35 | ||
39 | if (strcmp(state, "online") == 0) { | 36 | if (strcmp(state, "online") == 0) |
37 | return 1; | ||
38 | else if (strcmp(state, "offline") == 0) | ||
39 | return 0; | ||
40 | |||
41 | printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", state, cpu); | ||
42 | return -EINVAL; | ||
43 | } | ||
44 | static void vcpu_hotplug(unsigned int cpu) | ||
45 | { | ||
46 | if (!cpu_possible(cpu)) | ||
47 | return; | ||
48 | |||
49 | switch (vcpu_online(cpu)) { | ||
50 | case 1: | ||
40 | enable_hotplug_cpu(cpu); | 51 | enable_hotplug_cpu(cpu); |
41 | } else if (strcmp(state, "offline") == 0) { | 52 | break; |
53 | case 0: | ||
42 | (void)cpu_down(cpu); | 54 | (void)cpu_down(cpu); |
43 | disable_hotplug_cpu(cpu); | 55 | disable_hotplug_cpu(cpu); |
44 | } else { | 56 | break; |
45 | printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", | 57 | default: |
46 | state, cpu); | 58 | break; |
47 | } | 59 | } |
48 | } | 60 | } |
49 | 61 | ||
@@ -64,12 +76,20 @@ static void handle_vcpu_hotplug_event(struct xenbus_watch *watch, | |||
64 | static int setup_cpu_watcher(struct notifier_block *notifier, | 76 | static int setup_cpu_watcher(struct notifier_block *notifier, |
65 | unsigned long event, void *data) | 77 | unsigned long event, void *data) |
66 | { | 78 | { |
79 | int cpu; | ||
67 | static struct xenbus_watch cpu_watch = { | 80 | static struct xenbus_watch cpu_watch = { |
68 | .node = "cpu", | 81 | .node = "cpu", |
69 | .callback = handle_vcpu_hotplug_event}; | 82 | .callback = handle_vcpu_hotplug_event}; |
70 | 83 | ||
71 | (void)register_xenbus_watch(&cpu_watch); | 84 | (void)register_xenbus_watch(&cpu_watch); |
72 | 85 | ||
86 | for_each_possible_cpu(cpu) { | ||
87 | if (vcpu_online(cpu) == 0) { | ||
88 | (void)cpu_down(cpu); | ||
89 | cpu_clear(cpu, cpu_present_map); | ||
90 | } | ||
91 | } | ||
92 | |||
73 | return NOTIFY_DONE; | 93 | return NOTIFY_DONE; |
74 | } | 94 | } |
75 | 95 | ||
diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 30963af5dba0..1cd2a0e15ae8 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c | |||
@@ -151,6 +151,12 @@ static unsigned int evtchn_from_irq(unsigned irq) | |||
151 | return info_for_irq(irq)->evtchn; | 151 | return info_for_irq(irq)->evtchn; |
152 | } | 152 | } |
153 | 153 | ||
154 | unsigned irq_from_evtchn(unsigned int evtchn) | ||
155 | { | ||
156 | return evtchn_to_irq[evtchn]; | ||
157 | } | ||
158 | EXPORT_SYMBOL_GPL(irq_from_evtchn); | ||
159 | |||
154 | static enum ipi_vector ipi_from_irq(unsigned irq) | 160 | static enum ipi_vector ipi_from_irq(unsigned irq) |
155 | { | 161 | { |
156 | struct irq_info *info = info_for_irq(irq); | 162 | struct irq_info *info = info_for_irq(irq); |
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c new file mode 100644 index 000000000000..af031950f9b1 --- /dev/null +++ b/drivers/xen/evtchn.c | |||
@@ -0,0 +1,507 @@ | |||
1 | /****************************************************************************** | ||
2 | * evtchn.c | ||
3 | * | ||
4 | * Driver for receiving and demuxing event-channel signals. | ||
5 | * | ||
6 | * Copyright (c) 2004-2005, K A Fraser | ||
7 | * Multi-process extensions Copyright (c) 2004, Steven Smith | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License version 2 | ||
11 | * as published by the Free Software Foundation; or, when distributed | ||
12 | * separately from the Linux kernel or incorporated into other | ||
13 | * software packages, subject to the following license: | ||
14 | * | ||
15 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
16 | * of this source file (the "Software"), to deal in the Software without | ||
17 | * restriction, including without limitation the rights to use, copy, modify, | ||
18 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
19 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
20 | * the following conditions: | ||
21 | * | ||
22 | * The above copyright notice and this permission notice shall be included in | ||
23 | * all copies or substantial portions of the Software. | ||
24 | * | ||
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
26 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
27 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
28 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
29 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
31 | * IN THE SOFTWARE. | ||
32 | */ | ||
33 | |||
34 | #include <linux/module.h> | ||
35 | #include <linux/kernel.h> | ||
36 | #include <linux/sched.h> | ||
37 | #include <linux/slab.h> | ||
38 | #include <linux/string.h> | ||
39 | #include <linux/errno.h> | ||
40 | #include <linux/fs.h> | ||
41 | #include <linux/errno.h> | ||
42 | #include <linux/miscdevice.h> | ||
43 | #include <linux/major.h> | ||
44 | #include <linux/proc_fs.h> | ||
45 | #include <linux/stat.h> | ||
46 | #include <linux/poll.h> | ||
47 | #include <linux/irq.h> | ||
48 | #include <linux/init.h> | ||
49 | #include <linux/gfp.h> | ||
50 | #include <linux/mutex.h> | ||
51 | #include <linux/cpu.h> | ||
52 | #include <xen/events.h> | ||
53 | #include <xen/evtchn.h> | ||
54 | #include <asm/xen/hypervisor.h> | ||
55 | |||
56 | struct per_user_data { | ||
57 | struct mutex bind_mutex; /* serialize bind/unbind operations */ | ||
58 | |||
59 | /* Notification ring, accessed via /dev/xen/evtchn. */ | ||
60 | #define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) | ||
61 | #define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1)) | ||
62 | evtchn_port_t *ring; | ||
63 | unsigned int ring_cons, ring_prod, ring_overflow; | ||
64 | struct mutex ring_cons_mutex; /* protect against concurrent readers */ | ||
65 | |||
66 | /* Processes wait on this queue when ring is empty. */ | ||
67 | wait_queue_head_t evtchn_wait; | ||
68 | struct fasync_struct *evtchn_async_queue; | ||
69 | const char *name; | ||
70 | }; | ||
71 | |||
72 | /* Who's bound to each port? */ | ||
73 | static struct per_user_data *port_user[NR_EVENT_CHANNELS]; | ||
74 | static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */ | ||
75 | |||
76 | irqreturn_t evtchn_interrupt(int irq, void *data) | ||
77 | { | ||
78 | unsigned int port = (unsigned long)data; | ||
79 | struct per_user_data *u; | ||
80 | |||
81 | spin_lock(&port_user_lock); | ||
82 | |||
83 | u = port_user[port]; | ||
84 | |||
85 | disable_irq_nosync(irq); | ||
86 | |||
87 | if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { | ||
88 | u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; | ||
89 | wmb(); /* Ensure ring contents visible */ | ||
90 | if (u->ring_cons == u->ring_prod++) { | ||
91 | wake_up_interruptible(&u->evtchn_wait); | ||
92 | kill_fasync(&u->evtchn_async_queue, | ||
93 | SIGIO, POLL_IN); | ||
94 | } | ||
95 | } else { | ||
96 | u->ring_overflow = 1; | ||
97 | } | ||
98 | |||
99 | spin_unlock(&port_user_lock); | ||
100 | |||
101 | return IRQ_HANDLED; | ||
102 | } | ||
103 | |||
104 | static ssize_t evtchn_read(struct file *file, char __user *buf, | ||
105 | size_t count, loff_t *ppos) | ||
106 | { | ||
107 | int rc; | ||
108 | unsigned int c, p, bytes1 = 0, bytes2 = 0; | ||
109 | struct per_user_data *u = file->private_data; | ||
110 | |||
111 | /* Whole number of ports. */ | ||
112 | count &= ~(sizeof(evtchn_port_t)-1); | ||
113 | |||
114 | if (count == 0) | ||
115 | return 0; | ||
116 | |||
117 | if (count > PAGE_SIZE) | ||
118 | count = PAGE_SIZE; | ||
119 | |||
120 | for (;;) { | ||
121 | mutex_lock(&u->ring_cons_mutex); | ||
122 | |||
123 | rc = -EFBIG; | ||
124 | if (u->ring_overflow) | ||
125 | goto unlock_out; | ||
126 | |||
127 | c = u->ring_cons; | ||
128 | p = u->ring_prod; | ||
129 | if (c != p) | ||
130 | break; | ||
131 | |||
132 | mutex_unlock(&u->ring_cons_mutex); | ||
133 | |||
134 | if (file->f_flags & O_NONBLOCK) | ||
135 | return -EAGAIN; | ||
136 | |||
137 | rc = wait_event_interruptible(u->evtchn_wait, | ||
138 | u->ring_cons != u->ring_prod); | ||
139 | if (rc) | ||
140 | return rc; | ||
141 | } | ||
142 | |||
143 | /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ | ||
144 | if (((c ^ p) & EVTCHN_RING_SIZE) != 0) { | ||
145 | bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * | ||
146 | sizeof(evtchn_port_t); | ||
147 | bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t); | ||
148 | } else { | ||
149 | bytes1 = (p - c) * sizeof(evtchn_port_t); | ||
150 | bytes2 = 0; | ||
151 | } | ||
152 | |||
153 | /* Truncate chunks according to caller's maximum byte count. */ | ||
154 | if (bytes1 > count) { | ||
155 | bytes1 = count; | ||
156 | bytes2 = 0; | ||
157 | } else if ((bytes1 + bytes2) > count) { | ||
158 | bytes2 = count - bytes1; | ||
159 | } | ||
160 | |||
161 | rc = -EFAULT; | ||
162 | rmb(); /* Ensure that we see the port before we copy it. */ | ||
163 | if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) || | ||
164 | ((bytes2 != 0) && | ||
165 | copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) | ||
166 | goto unlock_out; | ||
167 | |||
168 | u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t); | ||
169 | rc = bytes1 + bytes2; | ||
170 | |||
171 | unlock_out: | ||
172 | mutex_unlock(&u->ring_cons_mutex); | ||
173 | return rc; | ||
174 | } | ||
175 | |||
176 | static ssize_t evtchn_write(struct file *file, const char __user *buf, | ||
177 | size_t count, loff_t *ppos) | ||
178 | { | ||
179 | int rc, i; | ||
180 | evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL); | ||
181 | struct per_user_data *u = file->private_data; | ||
182 | |||
183 | if (kbuf == NULL) | ||
184 | return -ENOMEM; | ||
185 | |||
186 | /* Whole number of ports. */ | ||
187 | count &= ~(sizeof(evtchn_port_t)-1); | ||
188 | |||
189 | rc = 0; | ||
190 | if (count == 0) | ||
191 | goto out; | ||
192 | |||
193 | if (count > PAGE_SIZE) | ||
194 | count = PAGE_SIZE; | ||
195 | |||
196 | rc = -EFAULT; | ||
197 | if (copy_from_user(kbuf, buf, count) != 0) | ||
198 | goto out; | ||
199 | |||
200 | spin_lock_irq(&port_user_lock); | ||
201 | for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) | ||
202 | if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u)) | ||
203 | enable_irq(irq_from_evtchn(kbuf[i])); | ||
204 | spin_unlock_irq(&port_user_lock); | ||
205 | |||
206 | rc = count; | ||
207 | |||
208 | out: | ||
209 | free_page((unsigned long)kbuf); | ||
210 | return rc; | ||
211 | } | ||
212 | |||
213 | static int evtchn_bind_to_user(struct per_user_data *u, int port) | ||
214 | { | ||
215 | int rc = 0; | ||
216 | |||
217 | /* | ||
218 | * Ports are never reused, so every caller should pass in a | ||
219 | * unique port. | ||
220 | * | ||
221 | * (Locking not necessary because we haven't registered the | ||
222 | * interrupt handler yet, and our caller has already | ||
223 | * serialized bind operations.) | ||
224 | */ | ||
225 | BUG_ON(port_user[port] != NULL); | ||
226 | port_user[port] = u; | ||
227 | |||
228 | rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, | ||
229 | u->name, (void *)(unsigned long)port); | ||
230 | if (rc >= 0) | ||
231 | rc = 0; | ||
232 | |||
233 | return rc; | ||
234 | } | ||
235 | |||
236 | static void evtchn_unbind_from_user(struct per_user_data *u, int port) | ||
237 | { | ||
238 | int irq = irq_from_evtchn(port); | ||
239 | |||
240 | unbind_from_irqhandler(irq, (void *)(unsigned long)port); | ||
241 | |||
242 | /* make sure we unbind the irq handler before clearing the port */ | ||
243 | barrier(); | ||
244 | |||
245 | port_user[port] = NULL; | ||
246 | } | ||
247 | |||
248 | static long evtchn_ioctl(struct file *file, | ||
249 | unsigned int cmd, unsigned long arg) | ||
250 | { | ||
251 | int rc; | ||
252 | struct per_user_data *u = file->private_data; | ||
253 | void __user *uarg = (void __user *) arg; | ||
254 | |||
255 | /* Prevent bind from racing with unbind */ | ||
256 | mutex_lock(&u->bind_mutex); | ||
257 | |||
258 | switch (cmd) { | ||
259 | case IOCTL_EVTCHN_BIND_VIRQ: { | ||
260 | struct ioctl_evtchn_bind_virq bind; | ||
261 | struct evtchn_bind_virq bind_virq; | ||
262 | |||
263 | rc = -EFAULT; | ||
264 | if (copy_from_user(&bind, uarg, sizeof(bind))) | ||
265 | break; | ||
266 | |||
267 | bind_virq.virq = bind.virq; | ||
268 | bind_virq.vcpu = 0; | ||
269 | rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, | ||
270 | &bind_virq); | ||
271 | if (rc != 0) | ||
272 | break; | ||
273 | |||
274 | rc = evtchn_bind_to_user(u, bind_virq.port); | ||
275 | if (rc == 0) | ||
276 | rc = bind_virq.port; | ||
277 | break; | ||
278 | } | ||
279 | |||
280 | case IOCTL_EVTCHN_BIND_INTERDOMAIN: { | ||
281 | struct ioctl_evtchn_bind_interdomain bind; | ||
282 | struct evtchn_bind_interdomain bind_interdomain; | ||
283 | |||
284 | rc = -EFAULT; | ||
285 | if (copy_from_user(&bind, uarg, sizeof(bind))) | ||
286 | break; | ||
287 | |||
288 | bind_interdomain.remote_dom = bind.remote_domain; | ||
289 | bind_interdomain.remote_port = bind.remote_port; | ||
290 | rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, | ||
291 | &bind_interdomain); | ||
292 | if (rc != 0) | ||
293 | break; | ||
294 | |||
295 | rc = evtchn_bind_to_user(u, bind_interdomain.local_port); | ||
296 | if (rc == 0) | ||
297 | rc = bind_interdomain.local_port; | ||
298 | break; | ||
299 | } | ||
300 | |||
301 | case IOCTL_EVTCHN_BIND_UNBOUND_PORT: { | ||
302 | struct ioctl_evtchn_bind_unbound_port bind; | ||
303 | struct evtchn_alloc_unbound alloc_unbound; | ||
304 | |||
305 | rc = -EFAULT; | ||
306 | if (copy_from_user(&bind, uarg, sizeof(bind))) | ||
307 | break; | ||
308 | |||
309 | alloc_unbound.dom = DOMID_SELF; | ||
310 | alloc_unbound.remote_dom = bind.remote_domain; | ||
311 | rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, | ||
312 | &alloc_unbound); | ||
313 | if (rc != 0) | ||
314 | break; | ||
315 | |||
316 | rc = evtchn_bind_to_user(u, alloc_unbound.port); | ||
317 | if (rc == 0) | ||
318 | rc = alloc_unbound.port; | ||
319 | break; | ||
320 | } | ||
321 | |||
322 | case IOCTL_EVTCHN_UNBIND: { | ||
323 | struct ioctl_evtchn_unbind unbind; | ||
324 | |||
325 | rc = -EFAULT; | ||
326 | if (copy_from_user(&unbind, uarg, sizeof(unbind))) | ||
327 | break; | ||
328 | |||
329 | rc = -EINVAL; | ||
330 | if (unbind.port >= NR_EVENT_CHANNELS) | ||
331 | break; | ||
332 | |||
333 | spin_lock_irq(&port_user_lock); | ||
334 | |||
335 | rc = -ENOTCONN; | ||
336 | if (port_user[unbind.port] != u) { | ||
337 | spin_unlock_irq(&port_user_lock); | ||
338 | break; | ||
339 | } | ||
340 | |||
341 | evtchn_unbind_from_user(u, unbind.port); | ||
342 | |||
343 | spin_unlock_irq(&port_user_lock); | ||
344 | |||
345 | rc = 0; | ||
346 | break; | ||
347 | } | ||
348 | |||
349 | case IOCTL_EVTCHN_NOTIFY: { | ||
350 | struct ioctl_evtchn_notify notify; | ||
351 | |||
352 | rc = -EFAULT; | ||
353 | if (copy_from_user(¬ify, uarg, sizeof(notify))) | ||
354 | break; | ||
355 | |||
356 | if (notify.port >= NR_EVENT_CHANNELS) { | ||
357 | rc = -EINVAL; | ||
358 | } else if (port_user[notify.port] != u) { | ||
359 | rc = -ENOTCONN; | ||
360 | } else { | ||
361 | notify_remote_via_evtchn(notify.port); | ||
362 | rc = 0; | ||
363 | } | ||
364 | break; | ||
365 | } | ||
366 | |||
367 | case IOCTL_EVTCHN_RESET: { | ||
368 | /* Initialise the ring to empty. Clear errors. */ | ||
369 | mutex_lock(&u->ring_cons_mutex); | ||
370 | spin_lock_irq(&port_user_lock); | ||
371 | u->ring_cons = u->ring_prod = u->ring_overflow = 0; | ||
372 | spin_unlock_irq(&port_user_lock); | ||
373 | mutex_unlock(&u->ring_cons_mutex); | ||
374 | rc = 0; | ||
375 | break; | ||
376 | } | ||
377 | |||
378 | default: | ||
379 | rc = -ENOSYS; | ||
380 | break; | ||
381 | } | ||
382 | mutex_unlock(&u->bind_mutex); | ||
383 | |||
384 | return rc; | ||
385 | } | ||
386 | |||
387 | static unsigned int evtchn_poll(struct file *file, poll_table *wait) | ||
388 | { | ||
389 | unsigned int mask = POLLOUT | POLLWRNORM; | ||
390 | struct per_user_data *u = file->private_data; | ||
391 | |||
392 | poll_wait(file, &u->evtchn_wait, wait); | ||
393 | if (u->ring_cons != u->ring_prod) | ||
394 | mask |= POLLIN | POLLRDNORM; | ||
395 | if (u->ring_overflow) | ||
396 | mask = POLLERR; | ||
397 | return mask; | ||
398 | } | ||
399 | |||
400 | static int evtchn_fasync(int fd, struct file *filp, int on) | ||
401 | { | ||
402 | struct per_user_data *u = filp->private_data; | ||
403 | return fasync_helper(fd, filp, on, &u->evtchn_async_queue); | ||
404 | } | ||
405 | |||
406 | static int evtchn_open(struct inode *inode, struct file *filp) | ||
407 | { | ||
408 | struct per_user_data *u; | ||
409 | |||
410 | u = kzalloc(sizeof(*u), GFP_KERNEL); | ||
411 | if (u == NULL) | ||
412 | return -ENOMEM; | ||
413 | |||
414 | u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm); | ||
415 | if (u->name == NULL) { | ||
416 | kfree(u); | ||
417 | return -ENOMEM; | ||
418 | } | ||
419 | |||
420 | init_waitqueue_head(&u->evtchn_wait); | ||
421 | |||
422 | u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL); | ||
423 | if (u->ring == NULL) { | ||
424 | kfree(u->name); | ||
425 | kfree(u); | ||
426 | return -ENOMEM; | ||
427 | } | ||
428 | |||
429 | mutex_init(&u->bind_mutex); | ||
430 | mutex_init(&u->ring_cons_mutex); | ||
431 | |||
432 | filp->private_data = u; | ||
433 | |||
434 | return 0; | ||
435 | } | ||
436 | |||
437 | static int evtchn_release(struct inode *inode, struct file *filp) | ||
438 | { | ||
439 | int i; | ||
440 | struct per_user_data *u = filp->private_data; | ||
441 | |||
442 | spin_lock_irq(&port_user_lock); | ||
443 | |||
444 | free_page((unsigned long)u->ring); | ||
445 | |||
446 | for (i = 0; i < NR_EVENT_CHANNELS; i++) { | ||
447 | if (port_user[i] != u) | ||
448 | continue; | ||
449 | |||
450 | evtchn_unbind_from_user(port_user[i], i); | ||
451 | } | ||
452 | |||
453 | spin_unlock_irq(&port_user_lock); | ||
454 | |||
455 | kfree(u->name); | ||
456 | kfree(u); | ||
457 | |||
458 | return 0; | ||
459 | } | ||
460 | |||
461 | static const struct file_operations evtchn_fops = { | ||
462 | .owner = THIS_MODULE, | ||
463 | .read = evtchn_read, | ||
464 | .write = evtchn_write, | ||
465 | .unlocked_ioctl = evtchn_ioctl, | ||
466 | .poll = evtchn_poll, | ||
467 | .fasync = evtchn_fasync, | ||
468 | .open = evtchn_open, | ||
469 | .release = evtchn_release, | ||
470 | }; | ||
471 | |||
472 | static struct miscdevice evtchn_miscdev = { | ||
473 | .minor = MISC_DYNAMIC_MINOR, | ||
474 | .name = "evtchn", | ||
475 | .fops = &evtchn_fops, | ||
476 | }; | ||
477 | static int __init evtchn_init(void) | ||
478 | { | ||
479 | int err; | ||
480 | |||
481 | if (!xen_domain()) | ||
482 | return -ENODEV; | ||
483 | |||
484 | spin_lock_init(&port_user_lock); | ||
485 | memset(port_user, 0, sizeof(port_user)); | ||
486 | |||
487 | /* Create '/dev/misc/evtchn'. */ | ||
488 | err = misc_register(&evtchn_miscdev); | ||
489 | if (err != 0) { | ||
490 | printk(KERN_ALERT "Could not register /dev/misc/evtchn\n"); | ||
491 | return err; | ||
492 | } | ||
493 | |||
494 | printk(KERN_INFO "Event-channel device installed.\n"); | ||
495 | |||
496 | return 0; | ||
497 | } | ||
498 | |||
499 | static void __exit evtchn_cleanup(void) | ||
500 | { | ||
501 | misc_deregister(&evtchn_miscdev); | ||
502 | } | ||
503 | |||
504 | module_init(evtchn_init); | ||
505 | module_exit(evtchn_cleanup); | ||
506 | |||
507 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 0d61db1e7b49..fddc2025dece 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c | |||
@@ -62,14 +62,15 @@ static int xen_suspend(void *data) | |||
62 | gnttab_resume(); | 62 | gnttab_resume(); |
63 | xen_mm_unpin_all(); | 63 | xen_mm_unpin_all(); |
64 | 64 | ||
65 | sysdev_resume(); | ||
66 | |||
67 | if (!*cancelled) { | 65 | if (!*cancelled) { |
68 | xen_irq_resume(); | 66 | xen_irq_resume(); |
69 | xen_console_resume(); | 67 | xen_console_resume(); |
70 | xen_timer_resume(); | 68 | xen_timer_resume(); |
71 | } | 69 | } |
72 | 70 | ||
71 | sysdev_resume(); | ||
72 | device_power_up(PMSG_RESUME); | ||
73 | |||
73 | return 0; | 74 | return 0; |
74 | } | 75 | } |
75 | 76 | ||
@@ -97,9 +98,8 @@ static void do_suspend(void) | |||
97 | goto out; | 98 | goto out; |
98 | } | 99 | } |
99 | 100 | ||
100 | printk("suspending xenbus...\n"); | 101 | printk(KERN_DEBUG "suspending xenstore...\n"); |
101 | /* XXX use normal device tree? */ | 102 | xs_suspend(); |
102 | xenbus_suspend(); | ||
103 | 103 | ||
104 | err = device_power_down(PMSG_SUSPEND); | 104 | err = device_power_down(PMSG_SUSPEND); |
105 | if (err) { | 105 | if (err) { |
@@ -115,9 +115,9 @@ static void do_suspend(void) | |||
115 | 115 | ||
116 | if (!cancelled) { | 116 | if (!cancelled) { |
117 | xen_arch_resume(); | 117 | xen_arch_resume(); |
118 | xenbus_resume(); | 118 | xs_resume(); |
119 | } else | 119 | } else |
120 | xenbus_suspend_cancel(); | 120 | xs_suspend_cancel(); |
121 | 121 | ||
122 | device_power_up(PMSG_RESUME); | 122 | device_power_up(PMSG_RESUME); |
123 | 123 | ||
diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c new file mode 100644 index 000000000000..88a60e03ccf0 --- /dev/null +++ b/drivers/xen/sys-hypervisor.c | |||
@@ -0,0 +1,445 @@ | |||
1 | /* | ||
2 | * copyright (c) 2006 IBM Corporation | ||
3 | * Authored by: Mike D. Day <ncmike@us.ibm.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/kobject.h> | ||
13 | |||
14 | #include <asm/xen/hypervisor.h> | ||
15 | #include <asm/xen/hypercall.h> | ||
16 | |||
17 | #include <xen/xenbus.h> | ||
18 | #include <xen/interface/xen.h> | ||
19 | #include <xen/interface/version.h> | ||
20 | |||
21 | #define HYPERVISOR_ATTR_RO(_name) \ | ||
22 | static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name) | ||
23 | |||
24 | #define HYPERVISOR_ATTR_RW(_name) \ | ||
25 | static struct hyp_sysfs_attr _name##_attr = \ | ||
26 | __ATTR(_name, 0644, _name##_show, _name##_store) | ||
27 | |||
28 | struct hyp_sysfs_attr { | ||
29 | struct attribute attr; | ||
30 | ssize_t (*show)(struct hyp_sysfs_attr *, char *); | ||
31 | ssize_t (*store)(struct hyp_sysfs_attr *, const char *, size_t); | ||
32 | void *hyp_attr_data; | ||
33 | }; | ||
34 | |||
35 | static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
36 | { | ||
37 | return sprintf(buffer, "xen\n"); | ||
38 | } | ||
39 | |||
40 | HYPERVISOR_ATTR_RO(type); | ||
41 | |||
42 | static int __init xen_sysfs_type_init(void) | ||
43 | { | ||
44 | return sysfs_create_file(hypervisor_kobj, &type_attr.attr); | ||
45 | } | ||
46 | |||
47 | static void xen_sysfs_type_destroy(void) | ||
48 | { | ||
49 | sysfs_remove_file(hypervisor_kobj, &type_attr.attr); | ||
50 | } | ||
51 | |||
52 | /* xen version attributes */ | ||
53 | static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
54 | { | ||
55 | int version = HYPERVISOR_xen_version(XENVER_version, NULL); | ||
56 | if (version) | ||
57 | return sprintf(buffer, "%d\n", version >> 16); | ||
58 | return -ENODEV; | ||
59 | } | ||
60 | |||
61 | HYPERVISOR_ATTR_RO(major); | ||
62 | |||
63 | static ssize_t minor_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
64 | { | ||
65 | int version = HYPERVISOR_xen_version(XENVER_version, NULL); | ||
66 | if (version) | ||
67 | return sprintf(buffer, "%d\n", version & 0xff); | ||
68 | return -ENODEV; | ||
69 | } | ||
70 | |||
71 | HYPERVISOR_ATTR_RO(minor); | ||
72 | |||
73 | static ssize_t extra_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
74 | { | ||
75 | int ret = -ENOMEM; | ||
76 | char *extra; | ||
77 | |||
78 | extra = kmalloc(XEN_EXTRAVERSION_LEN, GFP_KERNEL); | ||
79 | if (extra) { | ||
80 | ret = HYPERVISOR_xen_version(XENVER_extraversion, extra); | ||
81 | if (!ret) | ||
82 | ret = sprintf(buffer, "%s\n", extra); | ||
83 | kfree(extra); | ||
84 | } | ||
85 | |||
86 | return ret; | ||
87 | } | ||
88 | |||
89 | HYPERVISOR_ATTR_RO(extra); | ||
90 | |||
91 | static struct attribute *version_attrs[] = { | ||
92 | &major_attr.attr, | ||
93 | &minor_attr.attr, | ||
94 | &extra_attr.attr, | ||
95 | NULL | ||
96 | }; | ||
97 | |||
98 | static struct attribute_group version_group = { | ||
99 | .name = "version", | ||
100 | .attrs = version_attrs, | ||
101 | }; | ||
102 | |||
103 | static int __init xen_sysfs_version_init(void) | ||
104 | { | ||
105 | return sysfs_create_group(hypervisor_kobj, &version_group); | ||
106 | } | ||
107 | |||
108 | static void xen_sysfs_version_destroy(void) | ||
109 | { | ||
110 | sysfs_remove_group(hypervisor_kobj, &version_group); | ||
111 | } | ||
112 | |||
113 | /* UUID */ | ||
114 | |||
115 | static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
116 | { | ||
117 | char *vm, *val; | ||
118 | int ret; | ||
119 | extern int xenstored_ready; | ||
120 | |||
121 | if (!xenstored_ready) | ||
122 | return -EBUSY; | ||
123 | |||
124 | vm = xenbus_read(XBT_NIL, "vm", "", NULL); | ||
125 | if (IS_ERR(vm)) | ||
126 | return PTR_ERR(vm); | ||
127 | val = xenbus_read(XBT_NIL, vm, "uuid", NULL); | ||
128 | kfree(vm); | ||
129 | if (IS_ERR(val)) | ||
130 | return PTR_ERR(val); | ||
131 | ret = sprintf(buffer, "%s\n", val); | ||
132 | kfree(val); | ||
133 | return ret; | ||
134 | } | ||
135 | |||
136 | HYPERVISOR_ATTR_RO(uuid); | ||
137 | |||
138 | static int __init xen_sysfs_uuid_init(void) | ||
139 | { | ||
140 | return sysfs_create_file(hypervisor_kobj, &uuid_attr.attr); | ||
141 | } | ||
142 | |||
143 | static void xen_sysfs_uuid_destroy(void) | ||
144 | { | ||
145 | sysfs_remove_file(hypervisor_kobj, &uuid_attr.attr); | ||
146 | } | ||
147 | |||
148 | /* xen compilation attributes */ | ||
149 | |||
150 | static ssize_t compiler_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
151 | { | ||
152 | int ret = -ENOMEM; | ||
153 | struct xen_compile_info *info; | ||
154 | |||
155 | info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL); | ||
156 | if (info) { | ||
157 | ret = HYPERVISOR_xen_version(XENVER_compile_info, info); | ||
158 | if (!ret) | ||
159 | ret = sprintf(buffer, "%s\n", info->compiler); | ||
160 | kfree(info); | ||
161 | } | ||
162 | |||
163 | return ret; | ||
164 | } | ||
165 | |||
166 | HYPERVISOR_ATTR_RO(compiler); | ||
167 | |||
168 | static ssize_t compiled_by_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
169 | { | ||
170 | int ret = -ENOMEM; | ||
171 | struct xen_compile_info *info; | ||
172 | |||
173 | info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL); | ||
174 | if (info) { | ||
175 | ret = HYPERVISOR_xen_version(XENVER_compile_info, info); | ||
176 | if (!ret) | ||
177 | ret = sprintf(buffer, "%s\n", info->compile_by); | ||
178 | kfree(info); | ||
179 | } | ||
180 | |||
181 | return ret; | ||
182 | } | ||
183 | |||
184 | HYPERVISOR_ATTR_RO(compiled_by); | ||
185 | |||
186 | static ssize_t compile_date_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
187 | { | ||
188 | int ret = -ENOMEM; | ||
189 | struct xen_compile_info *info; | ||
190 | |||
191 | info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL); | ||
192 | if (info) { | ||
193 | ret = HYPERVISOR_xen_version(XENVER_compile_info, info); | ||
194 | if (!ret) | ||
195 | ret = sprintf(buffer, "%s\n", info->compile_date); | ||
196 | kfree(info); | ||
197 | } | ||
198 | |||
199 | return ret; | ||
200 | } | ||
201 | |||
202 | HYPERVISOR_ATTR_RO(compile_date); | ||
203 | |||
204 | static struct attribute *xen_compile_attrs[] = { | ||
205 | &compiler_attr.attr, | ||
206 | &compiled_by_attr.attr, | ||
207 | &compile_date_attr.attr, | ||
208 | NULL | ||
209 | }; | ||
210 | |||
211 | static struct attribute_group xen_compilation_group = { | ||
212 | .name = "compilation", | ||
213 | .attrs = xen_compile_attrs, | ||
214 | }; | ||
215 | |||
216 | int __init static xen_compilation_init(void) | ||
217 | { | ||
218 | return sysfs_create_group(hypervisor_kobj, &xen_compilation_group); | ||
219 | } | ||
220 | |||
221 | static void xen_compilation_destroy(void) | ||
222 | { | ||
223 | sysfs_remove_group(hypervisor_kobj, &xen_compilation_group); | ||
224 | } | ||
225 | |||
226 | /* xen properties info */ | ||
227 | |||
228 | static ssize_t capabilities_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
229 | { | ||
230 | int ret = -ENOMEM; | ||
231 | char *caps; | ||
232 | |||
233 | caps = kmalloc(XEN_CAPABILITIES_INFO_LEN, GFP_KERNEL); | ||
234 | if (caps) { | ||
235 | ret = HYPERVISOR_xen_version(XENVER_capabilities, caps); | ||
236 | if (!ret) | ||
237 | ret = sprintf(buffer, "%s\n", caps); | ||
238 | kfree(caps); | ||
239 | } | ||
240 | |||
241 | return ret; | ||
242 | } | ||
243 | |||
244 | HYPERVISOR_ATTR_RO(capabilities); | ||
245 | |||
246 | static ssize_t changeset_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
247 | { | ||
248 | int ret = -ENOMEM; | ||
249 | char *cset; | ||
250 | |||
251 | cset = kmalloc(XEN_CHANGESET_INFO_LEN, GFP_KERNEL); | ||
252 | if (cset) { | ||
253 | ret = HYPERVISOR_xen_version(XENVER_changeset, cset); | ||
254 | if (!ret) | ||
255 | ret = sprintf(buffer, "%s\n", cset); | ||
256 | kfree(cset); | ||
257 | } | ||
258 | |||
259 | return ret; | ||
260 | } | ||
261 | |||
262 | HYPERVISOR_ATTR_RO(changeset); | ||
263 | |||
264 | static ssize_t virtual_start_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
265 | { | ||
266 | int ret = -ENOMEM; | ||
267 | struct xen_platform_parameters *parms; | ||
268 | |||
269 | parms = kmalloc(sizeof(struct xen_platform_parameters), GFP_KERNEL); | ||
270 | if (parms) { | ||
271 | ret = HYPERVISOR_xen_version(XENVER_platform_parameters, | ||
272 | parms); | ||
273 | if (!ret) | ||
274 | ret = sprintf(buffer, "%lx\n", parms->virt_start); | ||
275 | kfree(parms); | ||
276 | } | ||
277 | |||
278 | return ret; | ||
279 | } | ||
280 | |||
281 | HYPERVISOR_ATTR_RO(virtual_start); | ||
282 | |||
283 | static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
284 | { | ||
285 | int ret; | ||
286 | |||
287 | ret = HYPERVISOR_xen_version(XENVER_pagesize, NULL); | ||
288 | if (ret > 0) | ||
289 | ret = sprintf(buffer, "%x\n", ret); | ||
290 | |||
291 | return ret; | ||
292 | } | ||
293 | |||
294 | HYPERVISOR_ATTR_RO(pagesize); | ||
295 | |||
296 | static ssize_t xen_feature_show(int index, char *buffer) | ||
297 | { | ||
298 | ssize_t ret; | ||
299 | struct xen_feature_info info; | ||
300 | |||
301 | info.submap_idx = index; | ||
302 | ret = HYPERVISOR_xen_version(XENVER_get_features, &info); | ||
303 | if (!ret) | ||
304 | ret = sprintf(buffer, "%08x", info.submap); | ||
305 | |||
306 | return ret; | ||
307 | } | ||
308 | |||
309 | static ssize_t features_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
310 | { | ||
311 | ssize_t len; | ||
312 | int i; | ||
313 | |||
314 | len = 0; | ||
315 | for (i = XENFEAT_NR_SUBMAPS-1; i >= 0; i--) { | ||
316 | int ret = xen_feature_show(i, buffer + len); | ||
317 | if (ret < 0) { | ||
318 | if (len == 0) | ||
319 | len = ret; | ||
320 | break; | ||
321 | } | ||
322 | len += ret; | ||
323 | } | ||
324 | if (len > 0) | ||
325 | buffer[len++] = '\n'; | ||
326 | |||
327 | return len; | ||
328 | } | ||
329 | |||
330 | HYPERVISOR_ATTR_RO(features); | ||
331 | |||
332 | static struct attribute *xen_properties_attrs[] = { | ||
333 | &capabilities_attr.attr, | ||
334 | &changeset_attr.attr, | ||
335 | &virtual_start_attr.attr, | ||
336 | &pagesize_attr.attr, | ||
337 | &features_attr.attr, | ||
338 | NULL | ||
339 | }; | ||
340 | |||
341 | static struct attribute_group xen_properties_group = { | ||
342 | .name = "properties", | ||
343 | .attrs = xen_properties_attrs, | ||
344 | }; | ||
345 | |||
346 | static int __init xen_properties_init(void) | ||
347 | { | ||
348 | return sysfs_create_group(hypervisor_kobj, &xen_properties_group); | ||
349 | } | ||
350 | |||
351 | static void xen_properties_destroy(void) | ||
352 | { | ||
353 | sysfs_remove_group(hypervisor_kobj, &xen_properties_group); | ||
354 | } | ||
355 | |||
356 | static int __init hyper_sysfs_init(void) | ||
357 | { | ||
358 | int ret; | ||
359 | |||
360 | if (!xen_domain()) | ||
361 | return -ENODEV; | ||
362 | |||
363 | ret = xen_sysfs_type_init(); | ||
364 | if (ret) | ||
365 | goto out; | ||
366 | ret = xen_sysfs_version_init(); | ||
367 | if (ret) | ||
368 | goto version_out; | ||
369 | ret = xen_compilation_init(); | ||
370 | if (ret) | ||
371 | goto comp_out; | ||
372 | ret = xen_sysfs_uuid_init(); | ||
373 | if (ret) | ||
374 | goto uuid_out; | ||
375 | ret = xen_properties_init(); | ||
376 | if (ret) | ||
377 | goto prop_out; | ||
378 | |||
379 | goto out; | ||
380 | |||
381 | prop_out: | ||
382 | xen_sysfs_uuid_destroy(); | ||
383 | uuid_out: | ||
384 | xen_compilation_destroy(); | ||
385 | comp_out: | ||
386 | xen_sysfs_version_destroy(); | ||
387 | version_out: | ||
388 | xen_sysfs_type_destroy(); | ||
389 | out: | ||
390 | return ret; | ||
391 | } | ||
392 | |||
393 | static void __exit hyper_sysfs_exit(void) | ||
394 | { | ||
395 | xen_properties_destroy(); | ||
396 | xen_compilation_destroy(); | ||
397 | xen_sysfs_uuid_destroy(); | ||
398 | xen_sysfs_version_destroy(); | ||
399 | xen_sysfs_type_destroy(); | ||
400 | |||
401 | } | ||
402 | module_init(hyper_sysfs_init); | ||
403 | module_exit(hyper_sysfs_exit); | ||
404 | |||
405 | static ssize_t hyp_sysfs_show(struct kobject *kobj, | ||
406 | struct attribute *attr, | ||
407 | char *buffer) | ||
408 | { | ||
409 | struct hyp_sysfs_attr *hyp_attr; | ||
410 | hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr); | ||
411 | if (hyp_attr->show) | ||
412 | return hyp_attr->show(hyp_attr, buffer); | ||
413 | return 0; | ||
414 | } | ||
415 | |||
416 | static ssize_t hyp_sysfs_store(struct kobject *kobj, | ||
417 | struct attribute *attr, | ||
418 | const char *buffer, | ||
419 | size_t len) | ||
420 | { | ||
421 | struct hyp_sysfs_attr *hyp_attr; | ||
422 | hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr); | ||
423 | if (hyp_attr->store) | ||
424 | return hyp_attr->store(hyp_attr, buffer, len); | ||
425 | return 0; | ||
426 | } | ||
427 | |||
428 | static struct sysfs_ops hyp_sysfs_ops = { | ||
429 | .show = hyp_sysfs_show, | ||
430 | .store = hyp_sysfs_store, | ||
431 | }; | ||
432 | |||
433 | static struct kobj_type hyp_sysfs_kobj_type = { | ||
434 | .sysfs_ops = &hyp_sysfs_ops, | ||
435 | }; | ||
436 | |||
437 | static int __init hypervisor_subsys_init(void) | ||
438 | { | ||
439 | if (!xen_domain()) | ||
440 | return -ENODEV; | ||
441 | |||
442 | hypervisor_kobj->ktype = &hyp_sysfs_kobj_type; | ||
443 | return 0; | ||
444 | } | ||
445 | device_initcall(hypervisor_subsys_init); | ||
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 773d1cf23283..d42e25d5968d 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c | |||
@@ -71,6 +71,9 @@ static int xenbus_probe_frontend(const char *type, const char *name); | |||
71 | 71 | ||
72 | static void xenbus_dev_shutdown(struct device *_dev); | 72 | static void xenbus_dev_shutdown(struct device *_dev); |
73 | 73 | ||
74 | static int xenbus_dev_suspend(struct device *dev, pm_message_t state); | ||
75 | static int xenbus_dev_resume(struct device *dev); | ||
76 | |||
74 | /* If something in array of ids matches this device, return it. */ | 77 | /* If something in array of ids matches this device, return it. */ |
75 | static const struct xenbus_device_id * | 78 | static const struct xenbus_device_id * |
76 | match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) | 79 | match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) |
@@ -188,6 +191,9 @@ static struct xen_bus_type xenbus_frontend = { | |||
188 | .remove = xenbus_dev_remove, | 191 | .remove = xenbus_dev_remove, |
189 | .shutdown = xenbus_dev_shutdown, | 192 | .shutdown = xenbus_dev_shutdown, |
190 | .dev_attrs = xenbus_dev_attrs, | 193 | .dev_attrs = xenbus_dev_attrs, |
194 | |||
195 | .suspend = xenbus_dev_suspend, | ||
196 | .resume = xenbus_dev_resume, | ||
191 | }, | 197 | }, |
192 | }; | 198 | }; |
193 | 199 | ||
@@ -654,6 +660,7 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus) | |||
654 | 660 | ||
655 | kfree(root); | 661 | kfree(root); |
656 | } | 662 | } |
663 | EXPORT_SYMBOL_GPL(xenbus_dev_changed); | ||
657 | 664 | ||
658 | static void frontend_changed(struct xenbus_watch *watch, | 665 | static void frontend_changed(struct xenbus_watch *watch, |
659 | const char **vec, unsigned int len) | 666 | const char **vec, unsigned int len) |
@@ -669,7 +676,7 @@ static struct xenbus_watch fe_watch = { | |||
669 | .callback = frontend_changed, | 676 | .callback = frontend_changed, |
670 | }; | 677 | }; |
671 | 678 | ||
672 | static int suspend_dev(struct device *dev, void *data) | 679 | static int xenbus_dev_suspend(struct device *dev, pm_message_t state) |
673 | { | 680 | { |
674 | int err = 0; | 681 | int err = 0; |
675 | struct xenbus_driver *drv; | 682 | struct xenbus_driver *drv; |
@@ -682,35 +689,14 @@ static int suspend_dev(struct device *dev, void *data) | |||
682 | drv = to_xenbus_driver(dev->driver); | 689 | drv = to_xenbus_driver(dev->driver); |
683 | xdev = container_of(dev, struct xenbus_device, dev); | 690 | xdev = container_of(dev, struct xenbus_device, dev); |
684 | if (drv->suspend) | 691 | if (drv->suspend) |
685 | err = drv->suspend(xdev); | 692 | err = drv->suspend(xdev, state); |
686 | if (err) | 693 | if (err) |
687 | printk(KERN_WARNING | 694 | printk(KERN_WARNING |
688 | "xenbus: suspend %s failed: %i\n", dev_name(dev), err); | 695 | "xenbus: suspend %s failed: %i\n", dev_name(dev), err); |
689 | return 0; | 696 | return 0; |
690 | } | 697 | } |
691 | 698 | ||
692 | static int suspend_cancel_dev(struct device *dev, void *data) | 699 | static int xenbus_dev_resume(struct device *dev) |
693 | { | ||
694 | int err = 0; | ||
695 | struct xenbus_driver *drv; | ||
696 | struct xenbus_device *xdev; | ||
697 | |||
698 | DPRINTK(""); | ||
699 | |||
700 | if (dev->driver == NULL) | ||
701 | return 0; | ||
702 | drv = to_xenbus_driver(dev->driver); | ||
703 | xdev = container_of(dev, struct xenbus_device, dev); | ||
704 | if (drv->suspend_cancel) | ||
705 | err = drv->suspend_cancel(xdev); | ||
706 | if (err) | ||
707 | printk(KERN_WARNING | ||
708 | "xenbus: suspend_cancel %s failed: %i\n", | ||
709 | dev_name(dev), err); | ||
710 | return 0; | ||
711 | } | ||
712 | |||
713 | static int resume_dev(struct device *dev, void *data) | ||
714 | { | 700 | { |
715 | int err; | 701 | int err; |
716 | struct xenbus_driver *drv; | 702 | struct xenbus_driver *drv; |
@@ -755,33 +741,6 @@ static int resume_dev(struct device *dev, void *data) | |||
755 | return 0; | 741 | return 0; |
756 | } | 742 | } |
757 | 743 | ||
758 | void xenbus_suspend(void) | ||
759 | { | ||
760 | DPRINTK(""); | ||
761 | |||
762 | bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev); | ||
763 | xenbus_backend_suspend(suspend_dev); | ||
764 | xs_suspend(); | ||
765 | } | ||
766 | EXPORT_SYMBOL_GPL(xenbus_suspend); | ||
767 | |||
768 | void xenbus_resume(void) | ||
769 | { | ||
770 | xb_init_comms(); | ||
771 | xs_resume(); | ||
772 | bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev); | ||
773 | xenbus_backend_resume(resume_dev); | ||
774 | } | ||
775 | EXPORT_SYMBOL_GPL(xenbus_resume); | ||
776 | |||
777 | void xenbus_suspend_cancel(void) | ||
778 | { | ||
779 | xs_suspend_cancel(); | ||
780 | bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev); | ||
781 | xenbus_backend_resume(suspend_cancel_dev); | ||
782 | } | ||
783 | EXPORT_SYMBOL_GPL(xenbus_suspend_cancel); | ||
784 | |||
785 | /* A flag to determine if xenstored is 'ready' (i.e. has started) */ | 744 | /* A flag to determine if xenstored is 'ready' (i.e. has started) */ |
786 | int xenstored_ready = 0; | 745 | int xenstored_ready = 0; |
787 | 746 | ||
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index e325eab4724d..eab33f1dbdf7 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c | |||
@@ -673,6 +673,8 @@ void xs_resume(void) | |||
673 | struct xenbus_watch *watch; | 673 | struct xenbus_watch *watch; |
674 | char token[sizeof(watch) * 2 + 1]; | 674 | char token[sizeof(watch) * 2 + 1]; |
675 | 675 | ||
676 | xb_init_comms(); | ||
677 | |||
676 | mutex_unlock(&xs_state.response_mutex); | 678 | mutex_unlock(&xs_state.response_mutex); |
677 | mutex_unlock(&xs_state.request_mutex); | 679 | mutex_unlock(&xs_state.request_mutex); |
678 | up_write(&xs_state.transaction_mutex); | 680 | up_write(&xs_state.transaction_mutex); |
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index 515741a8e6b8..6559e0c752ce 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c | |||
@@ -20,10 +20,27 @@ | |||
20 | MODULE_DESCRIPTION("Xen filesystem"); | 20 | MODULE_DESCRIPTION("Xen filesystem"); |
21 | MODULE_LICENSE("GPL"); | 21 | MODULE_LICENSE("GPL"); |
22 | 22 | ||
23 | static ssize_t capabilities_read(struct file *file, char __user *buf, | ||
24 | size_t size, loff_t *off) | ||
25 | { | ||
26 | char *tmp = ""; | ||
27 | |||
28 | if (xen_initial_domain()) | ||
29 | tmp = "control_d\n"; | ||
30 | |||
31 | return simple_read_from_buffer(buf, size, off, tmp, strlen(tmp)); | ||
32 | } | ||
33 | |||
34 | static const struct file_operations capabilities_file_ops = { | ||
35 | .read = capabilities_read, | ||
36 | }; | ||
37 | |||
23 | static int xenfs_fill_super(struct super_block *sb, void *data, int silent) | 38 | static int xenfs_fill_super(struct super_block *sb, void *data, int silent) |
24 | { | 39 | { |
25 | static struct tree_descr xenfs_files[] = { | 40 | static struct tree_descr xenfs_files[] = { |
26 | [2] = {"xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR}, | 41 | [1] = {}, |
42 | { "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR }, | ||
43 | { "capabilities", &capabilities_file_ops, S_IRUGO }, | ||
27 | {""}, | 44 | {""}, |
28 | }; | 45 | }; |
29 | 46 | ||
diff --git a/include/Kbuild b/include/Kbuild index d8c3e3cbf416..fe36accd4328 100644 --- a/include/Kbuild +++ b/include/Kbuild | |||
@@ -8,3 +8,4 @@ header-y += mtd/ | |||
8 | header-y += rdma/ | 8 | header-y += rdma/ |
9 | header-y += video/ | 9 | header-y += video/ |
10 | header-y += drm/ | 10 | header-y += drm/ |
11 | header-y += xen/ | ||
diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h index e16fdb1f4f4f..09887045d03f 100644 --- a/include/asm-frv/pgtable.h +++ b/include/asm-frv/pgtable.h | |||
@@ -73,8 +73,8 @@ static inline int pte_file(pte_t pte) { return 0; } | |||
73 | #define pgtable_cache_init() do {} while (0) | 73 | #define pgtable_cache_init() do {} while (0) |
74 | #define arch_enter_lazy_mmu_mode() do {} while (0) | 74 | #define arch_enter_lazy_mmu_mode() do {} while (0) |
75 | #define arch_leave_lazy_mmu_mode() do {} while (0) | 75 | #define arch_leave_lazy_mmu_mode() do {} while (0) |
76 | #define arch_enter_lazy_cpu_mode() do {} while (0) | 76 | |
77 | #define arch_leave_lazy_cpu_mode() do {} while (0) | 77 | #define arch_start_context_switch(prev) do {} while (0) |
78 | 78 | ||
79 | #else /* !CONFIG_MMU */ | 79 | #else /* !CONFIG_MMU */ |
80 | /*****************************************************************************/ | 80 | /*****************************************************************************/ |
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 8e6d0ca70aba..e410f602cab1 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h | |||
@@ -280,17 +280,18 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, | |||
280 | #endif | 280 | #endif |
281 | 281 | ||
282 | /* | 282 | /* |
283 | * A facility to provide batching of the reload of page tables with the | 283 | * A facility to provide batching of the reload of page tables and |
284 | * actual context switch code for paravirtualized guests. By convention, | 284 | * other process state with the actual context switch code for |
285 | * only one of the lazy modes (CPU, MMU) should be active at any given | 285 | * paravirtualized guests. By convention, only one of the batched |
286 | * time, entry should never be nested, and entry and exits should always | 286 | * update (lazy) modes (CPU, MMU) should be active at any given time, |
287 | * be paired. This is for sanity of maintaining and reasoning about the | 287 | * entry should never be nested, and entry and exits should always be |
288 | * kernel code. | 288 | * paired. This is for sanity of maintaining and reasoning about the |
289 | * kernel code. In this case, the exit (end of the context switch) is | ||
290 | * in architecture-specific code, and so doesn't need a generic | ||
291 | * definition. | ||
289 | */ | 292 | */ |
290 | #ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE | 293 | #ifndef __HAVE_ARCH_START_CONTEXT_SWITCH |
291 | #define arch_enter_lazy_cpu_mode() do {} while (0) | 294 | #define arch_start_context_switch(prev) do {} while (0) |
292 | #define arch_leave_lazy_cpu_mode() do {} while (0) | ||
293 | #define arch_flush_lazy_cpu_mode() do {} while (0) | ||
294 | #endif | 295 | #endif |
295 | 296 | ||
296 | #ifndef __HAVE_PFNMAP_TRACKING | 297 | #ifndef __HAVE_PFNMAP_TRACKING |
diff --git a/include/xen/Kbuild b/include/xen/Kbuild new file mode 100644 index 000000000000..4e65c16a445b --- /dev/null +++ b/include/xen/Kbuild | |||
@@ -0,0 +1 @@ | |||
header-y += evtchn.h | |||
diff --git a/include/xen/events.h b/include/xen/events.h index 0d5f1adc0363..e68d59a90ca8 100644 --- a/include/xen/events.h +++ b/include/xen/events.h | |||
@@ -53,4 +53,7 @@ bool xen_test_irq_pending(int irq); | |||
53 | irq will be disabled so it won't deliver an interrupt. */ | 53 | irq will be disabled so it won't deliver an interrupt. */ |
54 | void xen_poll_irq(int irq); | 54 | void xen_poll_irq(int irq); |
55 | 55 | ||
56 | /* Determine the IRQ which is bound to an event channel */ | ||
57 | unsigned irq_from_evtchn(unsigned int evtchn); | ||
58 | |||
56 | #endif /* _XEN_EVENTS_H */ | 59 | #endif /* _XEN_EVENTS_H */ |
diff --git a/include/xen/evtchn.h b/include/xen/evtchn.h new file mode 100644 index 000000000000..14e833ee4e0b --- /dev/null +++ b/include/xen/evtchn.h | |||
@@ -0,0 +1,88 @@ | |||
1 | /****************************************************************************** | ||
2 | * evtchn.h | ||
3 | * | ||
4 | * Interface to /dev/xen/evtchn. | ||
5 | * | ||
6 | * Copyright (c) 2003-2005, K A Fraser | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License version 2 | ||
10 | * as published by the Free Software Foundation; or, when distributed | ||
11 | * separately from the Linux kernel or incorporated into other | ||
12 | * software packages, subject to the following license: | ||
13 | * | ||
14 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
15 | * of this source file (the "Software"), to deal in the Software without | ||
16 | * restriction, including without limitation the rights to use, copy, modify, | ||
17 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
18 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
19 | * the following conditions: | ||
20 | * | ||
21 | * The above copyright notice and this permission notice shall be included in | ||
22 | * all copies or substantial portions of the Software. | ||
23 | * | ||
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
25 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
26 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
27 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
28 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
29 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
30 | * IN THE SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #ifndef __LINUX_PUBLIC_EVTCHN_H__ | ||
34 | #define __LINUX_PUBLIC_EVTCHN_H__ | ||
35 | |||
36 | /* | ||
37 | * Bind a fresh port to VIRQ @virq. | ||
38 | * Return allocated port. | ||
39 | */ | ||
40 | #define IOCTL_EVTCHN_BIND_VIRQ \ | ||
41 | _IOC(_IOC_NONE, 'E', 0, sizeof(struct ioctl_evtchn_bind_virq)) | ||
42 | struct ioctl_evtchn_bind_virq { | ||
43 | unsigned int virq; | ||
44 | }; | ||
45 | |||
46 | /* | ||
47 | * Bind a fresh port to remote <@remote_domain, @remote_port>. | ||
48 | * Return allocated port. | ||
49 | */ | ||
50 | #define IOCTL_EVTCHN_BIND_INTERDOMAIN \ | ||
51 | _IOC(_IOC_NONE, 'E', 1, sizeof(struct ioctl_evtchn_bind_interdomain)) | ||
52 | struct ioctl_evtchn_bind_interdomain { | ||
53 | unsigned int remote_domain, remote_port; | ||
54 | }; | ||
55 | |||
56 | /* | ||
57 | * Allocate a fresh port for binding to @remote_domain. | ||
58 | * Return allocated port. | ||
59 | */ | ||
60 | #define IOCTL_EVTCHN_BIND_UNBOUND_PORT \ | ||
61 | _IOC(_IOC_NONE, 'E', 2, sizeof(struct ioctl_evtchn_bind_unbound_port)) | ||
62 | struct ioctl_evtchn_bind_unbound_port { | ||
63 | unsigned int remote_domain; | ||
64 | }; | ||
65 | |||
66 | /* | ||
67 | * Unbind previously allocated @port. | ||
68 | */ | ||
69 | #define IOCTL_EVTCHN_UNBIND \ | ||
70 | _IOC(_IOC_NONE, 'E', 3, sizeof(struct ioctl_evtchn_unbind)) | ||
71 | struct ioctl_evtchn_unbind { | ||
72 | unsigned int port; | ||
73 | }; | ||
74 | |||
75 | /* | ||
76 | * Unbind previously allocated @port. | ||
77 | */ | ||
78 | #define IOCTL_EVTCHN_NOTIFY \ | ||
79 | _IOC(_IOC_NONE, 'E', 4, sizeof(struct ioctl_evtchn_notify)) | ||
80 | struct ioctl_evtchn_notify { | ||
81 | unsigned int port; | ||
82 | }; | ||
83 | |||
84 | /* Clear and reinitialise the event buffer. Clear error condition. */ | ||
85 | #define IOCTL_EVTCHN_RESET \ | ||
86 | _IOC(_IOC_NONE, 'E', 5, 0) | ||
87 | |||
88 | #endif /* __LINUX_PUBLIC_EVTCHN_H__ */ | ||
diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h index 453235e923f0..e8b6519d47e9 100644 --- a/include/xen/interface/version.h +++ b/include/xen/interface/version.h | |||
@@ -57,4 +57,7 @@ struct xen_feature_info { | |||
57 | /* Declares the features reported by XENVER_get_features. */ | 57 | /* Declares the features reported by XENVER_get_features. */ |
58 | #include "features.h" | 58 | #include "features.h" |
59 | 59 | ||
60 | /* arg == NULL; returns host memory page size. */ | ||
61 | #define XENVER_pagesize 7 | ||
62 | |||
60 | #endif /* __XEN_PUBLIC_VERSION_H__ */ | 63 | #endif /* __XEN_PUBLIC_VERSION_H__ */ |
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index f87f9614844d..b9763badbd77 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h | |||
@@ -91,8 +91,7 @@ struct xenbus_driver { | |||
91 | void (*otherend_changed)(struct xenbus_device *dev, | 91 | void (*otherend_changed)(struct xenbus_device *dev, |
92 | enum xenbus_state backend_state); | 92 | enum xenbus_state backend_state); |
93 | int (*remove)(struct xenbus_device *dev); | 93 | int (*remove)(struct xenbus_device *dev); |
94 | int (*suspend)(struct xenbus_device *dev); | 94 | int (*suspend)(struct xenbus_device *dev, pm_message_t state); |
95 | int (*suspend_cancel)(struct xenbus_device *dev); | ||
96 | int (*resume)(struct xenbus_device *dev); | 95 | int (*resume)(struct xenbus_device *dev); |
97 | int (*uevent)(struct xenbus_device *, char **, int, char *, int); | 96 | int (*uevent)(struct xenbus_device *, char **, int, char *, int); |
98 | struct device_driver driver; | 97 | struct device_driver driver; |
diff --git a/kernel/sched.c b/kernel/sched.c index 6cc1fd5d5072..b38bd96098f6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -2754,7 +2754,7 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
2754 | * combine the page table reload and the switch backend into | 2754 | * combine the page table reload and the switch backend into |
2755 | * one hypercall. | 2755 | * one hypercall. |
2756 | */ | 2756 | */ |
2757 | arch_enter_lazy_cpu_mode(); | 2757 | arch_start_context_switch(prev); |
2758 | 2758 | ||
2759 | if (unlikely(!mm)) { | 2759 | if (unlikely(!mm)) { |
2760 | next->active_mm = oldmm; | 2760 | next->active_mm = oldmm; |