diff options
37 files changed, 1281 insertions, 174 deletions
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index a53da004e08e..4fb37c8a0832 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -56,6 +56,7 @@ struct desc_ptr; | |||
56 | struct tss_struct; | 56 | struct tss_struct; |
57 | struct mm_struct; | 57 | struct mm_struct; |
58 | struct desc_struct; | 58 | struct desc_struct; |
59 | struct task_struct; | ||
59 | 60 | ||
60 | /* | 61 | /* |
61 | * Wrapper type for pointers to code which uses the non-standard | 62 | * Wrapper type for pointers to code which uses the non-standard |
@@ -203,7 +204,8 @@ struct pv_cpu_ops { | |||
203 | 204 | ||
204 | void (*swapgs)(void); | 205 | void (*swapgs)(void); |
205 | 206 | ||
206 | struct pv_lazy_ops lazy_mode; | 207 | void (*start_context_switch)(struct task_struct *prev); |
208 | void (*end_context_switch)(struct task_struct *next); | ||
207 | }; | 209 | }; |
208 | 210 | ||
209 | struct pv_irq_ops { | 211 | struct pv_irq_ops { |
@@ -1399,25 +1401,23 @@ enum paravirt_lazy_mode { | |||
1399 | }; | 1401 | }; |
1400 | 1402 | ||
1401 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void); | 1403 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void); |
1402 | void paravirt_enter_lazy_cpu(void); | 1404 | void paravirt_start_context_switch(struct task_struct *prev); |
1403 | void paravirt_leave_lazy_cpu(void); | 1405 | void paravirt_end_context_switch(struct task_struct *next); |
1406 | |||
1404 | void paravirt_enter_lazy_mmu(void); | 1407 | void paravirt_enter_lazy_mmu(void); |
1405 | void paravirt_leave_lazy_mmu(void); | 1408 | void paravirt_leave_lazy_mmu(void); |
1406 | void paravirt_leave_lazy(enum paravirt_lazy_mode mode); | ||
1407 | 1409 | ||
1408 | #define __HAVE_ARCH_ENTER_LAZY_CPU_MODE | 1410 | #define __HAVE_ARCH_START_CONTEXT_SWITCH |
1409 | static inline void arch_enter_lazy_cpu_mode(void) | 1411 | static inline void arch_start_context_switch(struct task_struct *prev) |
1410 | { | 1412 | { |
1411 | PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); | 1413 | PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev); |
1412 | } | 1414 | } |
1413 | 1415 | ||
1414 | static inline void arch_leave_lazy_cpu_mode(void) | 1416 | static inline void arch_end_context_switch(struct task_struct *next) |
1415 | { | 1417 | { |
1416 | PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); | 1418 | PVOP_VCALL1(pv_cpu_ops.end_context_switch, next); |
1417 | } | 1419 | } |
1418 | 1420 | ||
1419 | void arch_flush_lazy_cpu_mode(void); | ||
1420 | |||
1421 | #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE | 1421 | #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE |
1422 | static inline void arch_enter_lazy_mmu_mode(void) | 1422 | static inline void arch_enter_lazy_mmu_mode(void) |
1423 | { | 1423 | { |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 3f8d09d94eb3..18ef7ebf2631 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -81,6 +81,8 @@ static inline void __init paravirt_pagetable_setup_done(pgd_t *base) | |||
81 | #define pte_val(x) native_pte_val(x) | 81 | #define pte_val(x) native_pte_val(x) |
82 | #define __pte(x) native_make_pte(x) | 82 | #define __pte(x) native_make_pte(x) |
83 | 83 | ||
84 | #define arch_end_context_switch(prev) do {} while(0) | ||
85 | |||
84 | #endif /* CONFIG_PARAVIRT */ | 86 | #endif /* CONFIG_PARAVIRT */ |
85 | 87 | ||
86 | /* | 88 | /* |
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index a4737dddfd58..64cf2d24fad1 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h | |||
@@ -48,9 +48,15 @@ | |||
48 | #endif | 48 | #endif |
49 | 49 | ||
50 | #ifdef CONFIG_X86_64 | 50 | #ifdef CONFIG_X86_64 |
51 | #ifdef CONFIG_PARAVIRT | ||
52 | /* Paravirtualized systems may not have PSE or PGE available */ | ||
51 | #define NEED_PSE 0 | 53 | #define NEED_PSE 0 |
52 | #define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) | ||
53 | #define NEED_PGE 0 | 54 | #define NEED_PGE 0 |
55 | #else | ||
56 | #define NEED_PSE (1<<(X86_FEATURE_PSE) & 31) | ||
57 | #define NEED_PGE (1<<(X86_FEATURE_PGE) & 31) | ||
58 | #endif | ||
59 | #define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) | ||
54 | #define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) | 60 | #define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) |
55 | #define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) | 61 | #define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) |
56 | #define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) | 62 | #define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 8820a73ae090..602c769fc98c 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -94,7 +94,8 @@ struct thread_info { | |||
94 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ | 94 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ |
95 | #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ | 95 | #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ |
96 | #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ | 96 | #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ |
97 | #define TIF_SYSCALL_FTRACE 27 /* for ftrace syscall instrumentation */ | 97 | #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ |
98 | #define TIF_SYSCALL_FTRACE 28 /* for ftrace syscall instrumentation */ | ||
98 | 99 | ||
99 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | 100 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) |
100 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | 101 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) |
@@ -116,6 +117,7 @@ struct thread_info { | |||
116 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) | 117 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) |
117 | #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) | 118 | #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) |
118 | #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) | 119 | #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) |
120 | #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) | ||
119 | #define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE) | 121 | #define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE) |
120 | 122 | ||
121 | /* work to do in syscall_trace_enter() */ | 123 | /* work to do in syscall_trace_enter() */ |
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index cbfdc26b1460..bfd74c032fca 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -14,6 +14,9 @@ asmlinkage void divide_error(void); | |||
14 | asmlinkage void debug(void); | 14 | asmlinkage void debug(void); |
15 | asmlinkage void nmi(void); | 15 | asmlinkage void nmi(void); |
16 | asmlinkage void int3(void); | 16 | asmlinkage void int3(void); |
17 | asmlinkage void xen_debug(void); | ||
18 | asmlinkage void xen_int3(void); | ||
19 | asmlinkage void xen_stack_segment(void); | ||
17 | asmlinkage void overflow(void); | 20 | asmlinkage void overflow(void); |
18 | asmlinkage void bounds(void); | 21 | asmlinkage void bounds(void); |
19 | asmlinkage void invalid_op(void); | 22 | asmlinkage void invalid_op(void); |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 38946c6e8433..bb01ce080b80 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1379,6 +1379,11 @@ END(xen_failsafe_callback) | |||
1379 | paranoidzeroentry_ist debug do_debug DEBUG_STACK | 1379 | paranoidzeroentry_ist debug do_debug DEBUG_STACK |
1380 | paranoidzeroentry_ist int3 do_int3 DEBUG_STACK | 1380 | paranoidzeroentry_ist int3 do_int3 DEBUG_STACK |
1381 | paranoiderrorentry stack_segment do_stack_segment | 1381 | paranoiderrorentry stack_segment do_stack_segment |
1382 | #ifdef CONFIG_XEN | ||
1383 | zeroentry xen_debug do_debug | ||
1384 | zeroentry xen_int3 do_int3 | ||
1385 | errorentry xen_stack_segment do_stack_segment | ||
1386 | #endif | ||
1382 | errorentry general_protection do_general_protection | 1387 | errorentry general_protection do_general_protection |
1383 | errorentry page_fault do_page_fault | 1388 | errorentry page_fault do_page_fault |
1384 | #ifdef CONFIG_X86_MCE | 1389 | #ifdef CONFIG_X86_MCE |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 33019ddb56b4..6551dedee20c 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -195,7 +195,7 @@ static void kvm_leave_lazy_mmu(void) | |||
195 | struct kvm_para_state *state = kvm_para_state(); | 195 | struct kvm_para_state *state = kvm_para_state(); |
196 | 196 | ||
197 | mmu_queue_flush(state); | 197 | mmu_queue_flush(state); |
198 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | 198 | paravirt_leave_lazy_mmu(); |
199 | state->mode = paravirt_get_lazy_mode(); | 199 | state->mode = paravirt_get_lazy_mode(); |
200 | } | 200 | } |
201 | 201 | ||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 9faf43bea336..70ec9b951d76 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -248,18 +248,16 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA | |||
248 | 248 | ||
249 | static inline void enter_lazy(enum paravirt_lazy_mode mode) | 249 | static inline void enter_lazy(enum paravirt_lazy_mode mode) |
250 | { | 250 | { |
251 | BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); | 251 | BUG_ON(percpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); |
252 | BUG_ON(preemptible()); | ||
253 | 252 | ||
254 | __get_cpu_var(paravirt_lazy_mode) = mode; | 253 | percpu_write(paravirt_lazy_mode, mode); |
255 | } | 254 | } |
256 | 255 | ||
257 | void paravirt_leave_lazy(enum paravirt_lazy_mode mode) | 256 | static void leave_lazy(enum paravirt_lazy_mode mode) |
258 | { | 257 | { |
259 | BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode); | 258 | BUG_ON(percpu_read(paravirt_lazy_mode) != mode); |
260 | BUG_ON(preemptible()); | ||
261 | 259 | ||
262 | __get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; | 260 | percpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); |
263 | } | 261 | } |
264 | 262 | ||
265 | void paravirt_enter_lazy_mmu(void) | 263 | void paravirt_enter_lazy_mmu(void) |
@@ -269,22 +267,36 @@ void paravirt_enter_lazy_mmu(void) | |||
269 | 267 | ||
270 | void paravirt_leave_lazy_mmu(void) | 268 | void paravirt_leave_lazy_mmu(void) |
271 | { | 269 | { |
272 | paravirt_leave_lazy(PARAVIRT_LAZY_MMU); | 270 | leave_lazy(PARAVIRT_LAZY_MMU); |
273 | } | 271 | } |
274 | 272 | ||
275 | void paravirt_enter_lazy_cpu(void) | 273 | void paravirt_start_context_switch(struct task_struct *prev) |
276 | { | 274 | { |
275 | BUG_ON(preemptible()); | ||
276 | |||
277 | if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { | ||
278 | arch_leave_lazy_mmu_mode(); | ||
279 | set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); | ||
280 | } | ||
277 | enter_lazy(PARAVIRT_LAZY_CPU); | 281 | enter_lazy(PARAVIRT_LAZY_CPU); |
278 | } | 282 | } |
279 | 283 | ||
280 | void paravirt_leave_lazy_cpu(void) | 284 | void paravirt_end_context_switch(struct task_struct *next) |
281 | { | 285 | { |
282 | paravirt_leave_lazy(PARAVIRT_LAZY_CPU); | 286 | BUG_ON(preemptible()); |
287 | |||
288 | leave_lazy(PARAVIRT_LAZY_CPU); | ||
289 | |||
290 | if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES)) | ||
291 | arch_enter_lazy_mmu_mode(); | ||
283 | } | 292 | } |
284 | 293 | ||
285 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void) | 294 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void) |
286 | { | 295 | { |
287 | return __get_cpu_var(paravirt_lazy_mode); | 296 | if (in_interrupt()) |
297 | return PARAVIRT_LAZY_NONE; | ||
298 | |||
299 | return percpu_read(paravirt_lazy_mode); | ||
288 | } | 300 | } |
289 | 301 | ||
290 | void arch_flush_lazy_mmu_mode(void) | 302 | void arch_flush_lazy_mmu_mode(void) |
@@ -292,7 +304,6 @@ void arch_flush_lazy_mmu_mode(void) | |||
292 | preempt_disable(); | 304 | preempt_disable(); |
293 | 305 | ||
294 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { | 306 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { |
295 | WARN_ON(preempt_count() == 1); | ||
296 | arch_leave_lazy_mmu_mode(); | 307 | arch_leave_lazy_mmu_mode(); |
297 | arch_enter_lazy_mmu_mode(); | 308 | arch_enter_lazy_mmu_mode(); |
298 | } | 309 | } |
@@ -300,19 +311,6 @@ void arch_flush_lazy_mmu_mode(void) | |||
300 | preempt_enable(); | 311 | preempt_enable(); |
301 | } | 312 | } |
302 | 313 | ||
303 | void arch_flush_lazy_cpu_mode(void) | ||
304 | { | ||
305 | preempt_disable(); | ||
306 | |||
307 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { | ||
308 | WARN_ON(preempt_count() == 1); | ||
309 | arch_leave_lazy_cpu_mode(); | ||
310 | arch_enter_lazy_cpu_mode(); | ||
311 | } | ||
312 | |||
313 | preempt_enable(); | ||
314 | } | ||
315 | |||
316 | struct pv_info pv_info = { | 314 | struct pv_info pv_info = { |
317 | .name = "bare hardware", | 315 | .name = "bare hardware", |
318 | .paravirt_enabled = 0, | 316 | .paravirt_enabled = 0, |
@@ -404,10 +402,8 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
404 | .set_iopl_mask = native_set_iopl_mask, | 402 | .set_iopl_mask = native_set_iopl_mask, |
405 | .io_delay = native_io_delay, | 403 | .io_delay = native_io_delay, |
406 | 404 | ||
407 | .lazy_mode = { | 405 | .start_context_switch = paravirt_nop, |
408 | .enter = paravirt_nop, | 406 | .end_context_switch = paravirt_nop, |
409 | .leave = paravirt_nop, | ||
410 | }, | ||
411 | }; | 407 | }; |
412 | 408 | ||
413 | struct pv_apic_ops pv_apic_ops = { | 409 | struct pv_apic_ops pv_apic_ops = { |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 56d50b7d71df..c60924b5d123 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -404,7 +404,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
404 | * done before math_state_restore, so the TS bit is up | 404 | * done before math_state_restore, so the TS bit is up |
405 | * to date. | 405 | * to date. |
406 | */ | 406 | */ |
407 | arch_leave_lazy_cpu_mode(); | 407 | arch_end_context_switch(next_p); |
408 | 408 | ||
409 | /* If the task has used fpu the last 5 timeslices, just do a full | 409 | /* If the task has used fpu the last 5 timeslices, just do a full |
410 | * restore of the math state immediately to avoid the trap; the | 410 | * restore of the math state immediately to avoid the trap; the |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 9d6b20e6cd80..45f010fb2e20 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -425,7 +425,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
425 | * done before math_state_restore, so the TS bit is up | 425 | * done before math_state_restore, so the TS bit is up |
426 | * to date. | 426 | * to date. |
427 | */ | 427 | */ |
428 | arch_leave_lazy_cpu_mode(); | 428 | arch_end_context_switch(next_p); |
429 | 429 | ||
430 | /* | 430 | /* |
431 | * Switch FS and GS. | 431 | * Switch FS and GS. |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 95deb9f2211e..b263423fbe2a 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -462,22 +462,28 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, | |||
462 | } | 462 | } |
463 | #endif | 463 | #endif |
464 | 464 | ||
465 | static void vmi_enter_lazy_cpu(void) | 465 | static void vmi_start_context_switch(struct task_struct *prev) |
466 | { | 466 | { |
467 | paravirt_enter_lazy_cpu(); | 467 | paravirt_start_context_switch(prev); |
468 | vmi_ops.set_lazy_mode(2); | 468 | vmi_ops.set_lazy_mode(2); |
469 | } | 469 | } |
470 | 470 | ||
471 | static void vmi_end_context_switch(struct task_struct *next) | ||
472 | { | ||
473 | vmi_ops.set_lazy_mode(0); | ||
474 | paravirt_end_context_switch(next); | ||
475 | } | ||
476 | |||
471 | static void vmi_enter_lazy_mmu(void) | 477 | static void vmi_enter_lazy_mmu(void) |
472 | { | 478 | { |
473 | paravirt_enter_lazy_mmu(); | 479 | paravirt_enter_lazy_mmu(); |
474 | vmi_ops.set_lazy_mode(1); | 480 | vmi_ops.set_lazy_mode(1); |
475 | } | 481 | } |
476 | 482 | ||
477 | static void vmi_leave_lazy(void) | 483 | static void vmi_leave_lazy_mmu(void) |
478 | { | 484 | { |
479 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | ||
480 | vmi_ops.set_lazy_mode(0); | 485 | vmi_ops.set_lazy_mode(0); |
486 | paravirt_leave_lazy_mmu(); | ||
481 | } | 487 | } |
482 | 488 | ||
483 | static inline int __init check_vmi_rom(struct vrom_header *rom) | 489 | static inline int __init check_vmi_rom(struct vrom_header *rom) |
@@ -711,14 +717,14 @@ static inline int __init activate_vmi(void) | |||
711 | para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); | 717 | para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); |
712 | para_fill(pv_cpu_ops.io_delay, IODelay); | 718 | para_fill(pv_cpu_ops.io_delay, IODelay); |
713 | 719 | ||
714 | para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu, | 720 | para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch, |
715 | set_lazy_mode, SetLazyMode); | 721 | set_lazy_mode, SetLazyMode); |
716 | para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy, | 722 | para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch, |
717 | set_lazy_mode, SetLazyMode); | 723 | set_lazy_mode, SetLazyMode); |
718 | 724 | ||
719 | para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, | 725 | para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, |
720 | set_lazy_mode, SetLazyMode); | 726 | set_lazy_mode, SetLazyMode); |
721 | para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy, | 727 | para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy_mmu, |
722 | set_lazy_mode, SetLazyMode); | 728 | set_lazy_mode, SetLazyMode); |
723 | 729 | ||
724 | /* user and kernel flush are just handled with different flags to FlushTLB */ | 730 | /* user and kernel flush are just handled with different flags to FlushTLB */ |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index ef4205c1a7a5..4e0c26559395 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -167,10 +167,16 @@ static void lazy_hcall3(unsigned long call, | |||
167 | 167 | ||
168 | /* When lazy mode is turned off reset the per-cpu lazy mode variable and then | 168 | /* When lazy mode is turned off reset the per-cpu lazy mode variable and then |
169 | * issue the do-nothing hypercall to flush any stored calls. */ | 169 | * issue the do-nothing hypercall to flush any stored calls. */ |
170 | static void lguest_leave_lazy_mode(void) | 170 | static void lguest_leave_lazy_mmu_mode(void) |
171 | { | 171 | { |
172 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | ||
173 | kvm_hypercall0(LHCALL_FLUSH_ASYNC); | 172 | kvm_hypercall0(LHCALL_FLUSH_ASYNC); |
173 | paravirt_leave_lazy_mmu(); | ||
174 | } | ||
175 | |||
176 | static void lguest_end_context_switch(struct task_struct *next) | ||
177 | { | ||
178 | kvm_hypercall0(LHCALL_FLUSH_ASYNC); | ||
179 | paravirt_end_context_switch(next); | ||
174 | } | 180 | } |
175 | 181 | ||
176 | /*G:033 | 182 | /*G:033 |
@@ -1054,8 +1060,8 @@ __init void lguest_init(void) | |||
1054 | pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry; | 1060 | pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry; |
1055 | pv_cpu_ops.write_idt_entry = lguest_write_idt_entry; | 1061 | pv_cpu_ops.write_idt_entry = lguest_write_idt_entry; |
1056 | pv_cpu_ops.wbinvd = lguest_wbinvd; | 1062 | pv_cpu_ops.wbinvd = lguest_wbinvd; |
1057 | pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu; | 1063 | pv_cpu_ops.start_context_switch = paravirt_start_context_switch; |
1058 | pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode; | 1064 | pv_cpu_ops.end_context_switch = lguest_end_context_switch; |
1059 | 1065 | ||
1060 | /* pagetable management */ | 1066 | /* pagetable management */ |
1061 | pv_mmu_ops.write_cr3 = lguest_write_cr3; | 1067 | pv_mmu_ops.write_cr3 = lguest_write_cr3; |
@@ -1068,7 +1074,7 @@ __init void lguest_init(void) | |||
1068 | pv_mmu_ops.read_cr2 = lguest_read_cr2; | 1074 | pv_mmu_ops.read_cr2 = lguest_read_cr2; |
1069 | pv_mmu_ops.read_cr3 = lguest_read_cr3; | 1075 | pv_mmu_ops.read_cr3 = lguest_read_cr3; |
1070 | pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; | 1076 | pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; |
1071 | pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode; | 1077 | pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode; |
1072 | pv_mmu_ops.pte_update = lguest_pte_update; | 1078 | pv_mmu_ops.pte_update = lguest_pte_update; |
1073 | pv_mmu_ops.pte_update_defer = lguest_pte_update; | 1079 | pv_mmu_ops.pte_update_defer = lguest_pte_update; |
1074 | 1080 | ||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index b9ca6d767dbb..5ec7ae366615 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -201,12 +201,10 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) | |||
201 | if (!pmd_present(*pmd_k)) | 201 | if (!pmd_present(*pmd_k)) |
202 | return NULL; | 202 | return NULL; |
203 | 203 | ||
204 | if (!pmd_present(*pmd)) { | 204 | if (!pmd_present(*pmd)) |
205 | set_pmd(pmd, *pmd_k); | 205 | set_pmd(pmd, *pmd_k); |
206 | arch_flush_lazy_mmu_mode(); | 206 | else |
207 | } else { | ||
208 | BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); | 207 | BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); |
209 | } | ||
210 | 208 | ||
211 | return pmd_k; | 209 | return pmd_k; |
212 | } | 210 | } |
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 8126e8d1a2a4..58f621e81919 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c | |||
@@ -44,7 +44,6 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) | |||
44 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | 44 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); |
45 | BUG_ON(!pte_none(*(kmap_pte-idx))); | 45 | BUG_ON(!pte_none(*(kmap_pte-idx))); |
46 | set_pte(kmap_pte-idx, mk_pte(page, prot)); | 46 | set_pte(kmap_pte-idx, mk_pte(page, prot)); |
47 | arch_flush_lazy_mmu_mode(); | ||
48 | 47 | ||
49 | return (void *)vaddr; | 48 | return (void *)vaddr; |
50 | } | 49 | } |
@@ -74,7 +73,6 @@ void kunmap_atomic(void *kvaddr, enum km_type type) | |||
74 | #endif | 73 | #endif |
75 | } | 74 | } |
76 | 75 | ||
77 | arch_flush_lazy_mmu_mode(); | ||
78 | pagefault_enable(); | 76 | pagefault_enable(); |
79 | } | 77 | } |
80 | 78 | ||
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 8056545e2d39..fe6f84ca121e 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c | |||
@@ -82,7 +82,6 @@ iounmap_atomic(void *kvaddr, enum km_type type) | |||
82 | if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) | 82 | if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) |
83 | kpte_clear_flush(kmap_pte-idx, vaddr); | 83 | kpte_clear_flush(kmap_pte-idx, vaddr); |
84 | 84 | ||
85 | arch_flush_lazy_mmu_mode(); | ||
86 | pagefault_enable(); | 85 | pagefault_enable(); |
87 | } | 86 | } |
88 | EXPORT_SYMBOL_GPL(iounmap_atomic); | 87 | EXPORT_SYMBOL_GPL(iounmap_atomic); |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e17efed088c5..6ce9518fe2ac 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -839,13 +839,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
839 | 839 | ||
840 | vm_unmap_aliases(); | 840 | vm_unmap_aliases(); |
841 | 841 | ||
842 | /* | ||
843 | * If we're called with lazy mmu updates enabled, the | ||
844 | * in-memory pte state may be stale. Flush pending updates to | ||
845 | * bring them up to date. | ||
846 | */ | ||
847 | arch_flush_lazy_mmu_mode(); | ||
848 | |||
849 | cpa.vaddr = addr; | 842 | cpa.vaddr = addr; |
850 | cpa.pages = pages; | 843 | cpa.pages = pages; |
851 | cpa.numpages = numpages; | 844 | cpa.numpages = numpages; |
@@ -890,13 +883,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
890 | } else | 883 | } else |
891 | cpa_flush_all(cache); | 884 | cpa_flush_all(cache); |
892 | 885 | ||
893 | /* | ||
894 | * If we've been called with lazy mmu updates enabled, then | ||
895 | * make sure that everything gets flushed out before we | ||
896 | * return. | ||
897 | */ | ||
898 | arch_flush_lazy_mmu_mode(); | ||
899 | |||
900 | out: | 886 | out: |
901 | return ret; | 887 | return ret; |
902 | } | 888 | } |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f09e8c36ee80..0a1700a2be9c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/delay.h> | 20 | #include <linux/delay.h> |
21 | #include <linux/start_kernel.h> | 21 | #include <linux/start_kernel.h> |
22 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
23 | #include <linux/kprobes.h> | ||
23 | #include <linux/bootmem.h> | 24 | #include <linux/bootmem.h> |
24 | #include <linux/module.h> | 25 | #include <linux/module.h> |
25 | #include <linux/mm.h> | 26 | #include <linux/mm.h> |
@@ -44,6 +45,7 @@ | |||
44 | #include <asm/processor.h> | 45 | #include <asm/processor.h> |
45 | #include <asm/proto.h> | 46 | #include <asm/proto.h> |
46 | #include <asm/msr-index.h> | 47 | #include <asm/msr-index.h> |
48 | #include <asm/traps.h> | ||
47 | #include <asm/setup.h> | 49 | #include <asm/setup.h> |
48 | #include <asm/desc.h> | 50 | #include <asm/desc.h> |
49 | #include <asm/pgtable.h> | 51 | #include <asm/pgtable.h> |
@@ -240,10 +242,10 @@ static unsigned long xen_get_debugreg(int reg) | |||
240 | return HYPERVISOR_get_debugreg(reg); | 242 | return HYPERVISOR_get_debugreg(reg); |
241 | } | 243 | } |
242 | 244 | ||
243 | void xen_leave_lazy(void) | 245 | static void xen_end_context_switch(struct task_struct *next) |
244 | { | 246 | { |
245 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | ||
246 | xen_mc_flush(); | 247 | xen_mc_flush(); |
248 | paravirt_end_context_switch(next); | ||
247 | } | 249 | } |
248 | 250 | ||
249 | static unsigned long xen_store_tr(void) | 251 | static unsigned long xen_store_tr(void) |
@@ -428,11 +430,44 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, | |||
428 | static int cvt_gate_to_trap(int vector, const gate_desc *val, | 430 | static int cvt_gate_to_trap(int vector, const gate_desc *val, |
429 | struct trap_info *info) | 431 | struct trap_info *info) |
430 | { | 432 | { |
433 | unsigned long addr; | ||
434 | |||
431 | if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT) | 435 | if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT) |
432 | return 0; | 436 | return 0; |
433 | 437 | ||
434 | info->vector = vector; | 438 | info->vector = vector; |
435 | info->address = gate_offset(*val); | 439 | |
440 | addr = gate_offset(*val); | ||
441 | #ifdef CONFIG_X86_64 | ||
442 | /* | ||
443 | * Look for known traps using IST, and substitute them | ||
444 | * appropriately. The debugger ones are the only ones we care | ||
445 | * about. Xen will handle faults like double_fault and | ||
446 | * machine_check, so we should never see them. Warn if | ||
447 | * there's an unexpected IST-using fault handler. | ||
448 | */ | ||
449 | if (addr == (unsigned long)debug) | ||
450 | addr = (unsigned long)xen_debug; | ||
451 | else if (addr == (unsigned long)int3) | ||
452 | addr = (unsigned long)xen_int3; | ||
453 | else if (addr == (unsigned long)stack_segment) | ||
454 | addr = (unsigned long)xen_stack_segment; | ||
455 | else if (addr == (unsigned long)double_fault || | ||
456 | addr == (unsigned long)nmi) { | ||
457 | /* Don't need to handle these */ | ||
458 | return 0; | ||
459 | #ifdef CONFIG_X86_MCE | ||
460 | } else if (addr == (unsigned long)machine_check) { | ||
461 | return 0; | ||
462 | #endif | ||
463 | } else { | ||
464 | /* Some other trap using IST? */ | ||
465 | if (WARN_ON(val->ist != 0)) | ||
466 | return 0; | ||
467 | } | ||
468 | #endif /* CONFIG_X86_64 */ | ||
469 | info->address = addr; | ||
470 | |||
436 | info->cs = gate_segment(*val); | 471 | info->cs = gate_segment(*val); |
437 | info->flags = val->dpl; | 472 | info->flags = val->dpl; |
438 | /* interrupt gates clear IF */ | 473 | /* interrupt gates clear IF */ |
@@ -623,10 +658,26 @@ static void xen_clts(void) | |||
623 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 658 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
624 | } | 659 | } |
625 | 660 | ||
661 | static DEFINE_PER_CPU(unsigned long, xen_cr0_value); | ||
662 | |||
663 | static unsigned long xen_read_cr0(void) | ||
664 | { | ||
665 | unsigned long cr0 = percpu_read(xen_cr0_value); | ||
666 | |||
667 | if (unlikely(cr0 == 0)) { | ||
668 | cr0 = native_read_cr0(); | ||
669 | percpu_write(xen_cr0_value, cr0); | ||
670 | } | ||
671 | |||
672 | return cr0; | ||
673 | } | ||
674 | |||
626 | static void xen_write_cr0(unsigned long cr0) | 675 | static void xen_write_cr0(unsigned long cr0) |
627 | { | 676 | { |
628 | struct multicall_space mcs; | 677 | struct multicall_space mcs; |
629 | 678 | ||
679 | percpu_write(xen_cr0_value, cr0); | ||
680 | |||
630 | /* Only pay attention to cr0.TS; everything else is | 681 | /* Only pay attention to cr0.TS; everything else is |
631 | ignored. */ | 682 | ignored. */ |
632 | mcs = xen_mc_entry(0); | 683 | mcs = xen_mc_entry(0); |
@@ -812,7 +863,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
812 | 863 | ||
813 | .clts = xen_clts, | 864 | .clts = xen_clts, |
814 | 865 | ||
815 | .read_cr0 = native_read_cr0, | 866 | .read_cr0 = xen_read_cr0, |
816 | .write_cr0 = xen_write_cr0, | 867 | .write_cr0 = xen_write_cr0, |
817 | 868 | ||
818 | .read_cr4 = native_read_cr4, | 869 | .read_cr4 = native_read_cr4, |
@@ -860,10 +911,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
860 | /* Xen takes care of %gs when switching to usermode for us */ | 911 | /* Xen takes care of %gs when switching to usermode for us */ |
861 | .swapgs = paravirt_nop, | 912 | .swapgs = paravirt_nop, |
862 | 913 | ||
863 | .lazy_mode = { | 914 | .start_context_switch = paravirt_start_context_switch, |
864 | .enter = paravirt_enter_lazy_cpu, | 915 | .end_context_switch = xen_end_context_switch, |
865 | .leave = xen_leave_lazy, | ||
866 | }, | ||
867 | }; | 916 | }; |
868 | 917 | ||
869 | static const struct pv_apic_ops xen_apic_ops __initdata = { | 918 | static const struct pv_apic_ops xen_apic_ops __initdata = { |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index fba55b1a4021..4ceb28581652 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -452,10 +452,6 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) | |||
452 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | 452 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
453 | pte_t *ptep, pte_t pteval) | 453 | pte_t *ptep, pte_t pteval) |
454 | { | 454 | { |
455 | /* updates to init_mm may be done without lock */ | ||
456 | if (mm == &init_mm) | ||
457 | preempt_disable(); | ||
458 | |||
459 | ADD_STATS(set_pte_at, 1); | 455 | ADD_STATS(set_pte_at, 1); |
460 | // ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); | 456 | // ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); |
461 | ADD_STATS(set_pte_at_current, mm == current->mm); | 457 | ADD_STATS(set_pte_at_current, mm == current->mm); |
@@ -476,9 +472,7 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | |||
476 | } | 472 | } |
477 | xen_set_pte(ptep, pteval); | 473 | xen_set_pte(ptep, pteval); |
478 | 474 | ||
479 | out: | 475 | out: return; |
480 | if (mm == &init_mm) | ||
481 | preempt_enable(); | ||
482 | } | 476 | } |
483 | 477 | ||
484 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, | 478 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, |
@@ -1152,10 +1146,8 @@ static void drop_other_mm_ref(void *info) | |||
1152 | 1146 | ||
1153 | /* If this cpu still has a stale cr3 reference, then make sure | 1147 | /* If this cpu still has a stale cr3 reference, then make sure |
1154 | it has been flushed. */ | 1148 | it has been flushed. */ |
1155 | if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) { | 1149 | if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) |
1156 | load_cr3(swapper_pg_dir); | 1150 | load_cr3(swapper_pg_dir); |
1157 | arch_flush_lazy_cpu_mode(); | ||
1158 | } | ||
1159 | } | 1151 | } |
1160 | 1152 | ||
1161 | static void xen_drop_mm_ref(struct mm_struct *mm) | 1153 | static void xen_drop_mm_ref(struct mm_struct *mm) |
@@ -1168,7 +1160,6 @@ static void xen_drop_mm_ref(struct mm_struct *mm) | |||
1168 | load_cr3(swapper_pg_dir); | 1160 | load_cr3(swapper_pg_dir); |
1169 | else | 1161 | else |
1170 | leave_mm(smp_processor_id()); | 1162 | leave_mm(smp_processor_id()); |
1171 | arch_flush_lazy_cpu_mode(); | ||
1172 | } | 1163 | } |
1173 | 1164 | ||
1174 | /* Get the "official" set of cpus referring to our pagetable. */ | 1165 | /* Get the "official" set of cpus referring to our pagetable. */ |
@@ -1876,6 +1867,14 @@ __init void xen_post_allocator_init(void) | |||
1876 | xen_mark_init_mm_pinned(); | 1867 | xen_mark_init_mm_pinned(); |
1877 | } | 1868 | } |
1878 | 1869 | ||
1870 | static void xen_leave_lazy_mmu(void) | ||
1871 | { | ||
1872 | preempt_disable(); | ||
1873 | xen_mc_flush(); | ||
1874 | paravirt_leave_lazy_mmu(); | ||
1875 | preempt_enable(); | ||
1876 | } | ||
1877 | |||
1879 | const struct pv_mmu_ops xen_mmu_ops __initdata = { | 1878 | const struct pv_mmu_ops xen_mmu_ops __initdata = { |
1880 | .pagetable_setup_start = xen_pagetable_setup_start, | 1879 | .pagetable_setup_start = xen_pagetable_setup_start, |
1881 | .pagetable_setup_done = xen_pagetable_setup_done, | 1880 | .pagetable_setup_done = xen_pagetable_setup_done, |
@@ -1949,7 +1948,7 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1949 | 1948 | ||
1950 | .lazy_mode = { | 1949 | .lazy_mode = { |
1951 | .enter = paravirt_enter_lazy_mmu, | 1950 | .enter = paravirt_enter_lazy_mmu, |
1952 | .leave = xen_leave_lazy, | 1951 | .leave = xen_leave_lazy_mmu, |
1953 | }, | 1952 | }, |
1954 | 1953 | ||
1955 | .set_fixmap = xen_set_fixmap, | 1954 | .set_fixmap = xen_set_fixmap, |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 15c6c68db6a2..ad0047f47cd4 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -61,9 +61,9 @@ char * __init xen_memory_setup(void) | |||
61 | * - xen_start_info | 61 | * - xen_start_info |
62 | * See comment above "struct start_info" in <xen/interface/xen.h> | 62 | * See comment above "struct start_info" in <xen/interface/xen.h> |
63 | */ | 63 | */ |
64 | e820_add_region(__pa(xen_start_info->mfn_list), | 64 | reserve_early(__pa(xen_start_info->mfn_list), |
65 | xen_start_info->pt_base - xen_start_info->mfn_list, | 65 | __pa(xen_start_info->pt_base), |
66 | E820_RESERVED); | 66 | "XEN START INFO"); |
67 | 67 | ||
68 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 68 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
69 | 69 | ||
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index ca6596b05d53..22494fd4c9b5 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -30,7 +30,6 @@ pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); | |||
30 | void xen_ident_map_ISA(void); | 30 | void xen_ident_map_ISA(void); |
31 | void xen_reserve_top(void); | 31 | void xen_reserve_top(void); |
32 | 32 | ||
33 | void xen_leave_lazy(void); | ||
34 | void xen_post_allocator_init(void); | 33 | void xen_post_allocator_init(void); |
35 | 34 | ||
36 | char * __init xen_memory_setup(void); | 35 | char * __init xen_memory_setup(void); |
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 8ac9cddac575..cab100acf983 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig | |||
@@ -18,6 +18,16 @@ config XEN_SCRUB_PAGES | |||
18 | secure, but slightly less efficient. | 18 | secure, but slightly less efficient. |
19 | If in doubt, say yes. | 19 | If in doubt, say yes. |
20 | 20 | ||
21 | config XEN_DEV_EVTCHN | ||
22 | tristate "Xen /dev/xen/evtchn device" | ||
23 | depends on XEN | ||
24 | default y | ||
25 | help | ||
26 | The evtchn driver allows a userspace process to triger event | ||
27 | channels and to receive notification of an event channel | ||
28 | firing. | ||
29 | If in doubt, say yes. | ||
30 | |||
21 | config XENFS | 31 | config XENFS |
22 | tristate "Xen filesystem" | 32 | tristate "Xen filesystem" |
23 | depends on XEN | 33 | depends on XEN |
@@ -41,3 +51,13 @@ config XEN_COMPAT_XENFS | |||
41 | a xen platform. | 51 | a xen platform. |
42 | If in doubt, say yes. | 52 | If in doubt, say yes. |
43 | 53 | ||
54 | config XEN_SYS_HYPERVISOR | ||
55 | bool "Create xen entries under /sys/hypervisor" | ||
56 | depends on XEN && SYSFS | ||
57 | select SYS_HYPERVISOR | ||
58 | default y | ||
59 | help | ||
60 | Create entries under /sys/hypervisor describing the Xen | ||
61 | hypervisor environment. When running native or in another | ||
62 | virtual environment, /sys/hypervisor will still be present, | ||
63 | but will have no xen contents. \ No newline at end of file | ||
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index ff8accc9e103..ec2a39b1e26f 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile | |||
@@ -4,4 +4,6 @@ obj-y += xenbus/ | |||
4 | obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o | 4 | obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o |
5 | obj-$(CONFIG_XEN_XENCOMM) += xencomm.o | 5 | obj-$(CONFIG_XEN_XENCOMM) += xencomm.o |
6 | obj-$(CONFIG_XEN_BALLOON) += balloon.o | 6 | obj-$(CONFIG_XEN_BALLOON) += balloon.o |
7 | obj-$(CONFIG_XENFS) += xenfs/ \ No newline at end of file | 7 | obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o |
8 | obj-$(CONFIG_XENFS) += xenfs/ | ||
9 | obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o \ No newline at end of file | ||
diff --git a/drivers/xen/events.c b/drivers/xen/events.c index be437c2bc942..891d2e90753a 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c | |||
@@ -151,6 +151,12 @@ static unsigned int evtchn_from_irq(unsigned irq) | |||
151 | return info_for_irq(irq)->evtchn; | 151 | return info_for_irq(irq)->evtchn; |
152 | } | 152 | } |
153 | 153 | ||
154 | unsigned irq_from_evtchn(unsigned int evtchn) | ||
155 | { | ||
156 | return evtchn_to_irq[evtchn]; | ||
157 | } | ||
158 | EXPORT_SYMBOL_GPL(irq_from_evtchn); | ||
159 | |||
154 | static enum ipi_vector ipi_from_irq(unsigned irq) | 160 | static enum ipi_vector ipi_from_irq(unsigned irq) |
155 | { | 161 | { |
156 | struct irq_info *info = info_for_irq(irq); | 162 | struct irq_info *info = info_for_irq(irq); |
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c new file mode 100644 index 000000000000..af031950f9b1 --- /dev/null +++ b/drivers/xen/evtchn.c | |||
@@ -0,0 +1,507 @@ | |||
1 | /****************************************************************************** | ||
2 | * evtchn.c | ||
3 | * | ||
4 | * Driver for receiving and demuxing event-channel signals. | ||
5 | * | ||
6 | * Copyright (c) 2004-2005, K A Fraser | ||
7 | * Multi-process extensions Copyright (c) 2004, Steven Smith | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License version 2 | ||
11 | * as published by the Free Software Foundation; or, when distributed | ||
12 | * separately from the Linux kernel or incorporated into other | ||
13 | * software packages, subject to the following license: | ||
14 | * | ||
15 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
16 | * of this source file (the "Software"), to deal in the Software without | ||
17 | * restriction, including without limitation the rights to use, copy, modify, | ||
18 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
19 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
20 | * the following conditions: | ||
21 | * | ||
22 | * The above copyright notice and this permission notice shall be included in | ||
23 | * all copies or substantial portions of the Software. | ||
24 | * | ||
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
26 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
27 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
28 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
29 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
31 | * IN THE SOFTWARE. | ||
32 | */ | ||
33 | |||
34 | #include <linux/module.h> | ||
35 | #include <linux/kernel.h> | ||
36 | #include <linux/sched.h> | ||
37 | #include <linux/slab.h> | ||
38 | #include <linux/string.h> | ||
39 | #include <linux/errno.h> | ||
40 | #include <linux/fs.h> | ||
41 | #include <linux/errno.h> | ||
42 | #include <linux/miscdevice.h> | ||
43 | #include <linux/major.h> | ||
44 | #include <linux/proc_fs.h> | ||
45 | #include <linux/stat.h> | ||
46 | #include <linux/poll.h> | ||
47 | #include <linux/irq.h> | ||
48 | #include <linux/init.h> | ||
49 | #include <linux/gfp.h> | ||
50 | #include <linux/mutex.h> | ||
51 | #include <linux/cpu.h> | ||
52 | #include <xen/events.h> | ||
53 | #include <xen/evtchn.h> | ||
54 | #include <asm/xen/hypervisor.h> | ||
55 | |||
56 | struct per_user_data { | ||
57 | struct mutex bind_mutex; /* serialize bind/unbind operations */ | ||
58 | |||
59 | /* Notification ring, accessed via /dev/xen/evtchn. */ | ||
60 | #define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) | ||
61 | #define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1)) | ||
62 | evtchn_port_t *ring; | ||
63 | unsigned int ring_cons, ring_prod, ring_overflow; | ||
64 | struct mutex ring_cons_mutex; /* protect against concurrent readers */ | ||
65 | |||
66 | /* Processes wait on this queue when ring is empty. */ | ||
67 | wait_queue_head_t evtchn_wait; | ||
68 | struct fasync_struct *evtchn_async_queue; | ||
69 | const char *name; | ||
70 | }; | ||
71 | |||
72 | /* Who's bound to each port? */ | ||
73 | static struct per_user_data *port_user[NR_EVENT_CHANNELS]; | ||
74 | static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */ | ||
75 | |||
76 | irqreturn_t evtchn_interrupt(int irq, void *data) | ||
77 | { | ||
78 | unsigned int port = (unsigned long)data; | ||
79 | struct per_user_data *u; | ||
80 | |||
81 | spin_lock(&port_user_lock); | ||
82 | |||
83 | u = port_user[port]; | ||
84 | |||
85 | disable_irq_nosync(irq); | ||
86 | |||
87 | if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { | ||
88 | u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; | ||
89 | wmb(); /* Ensure ring contents visible */ | ||
90 | if (u->ring_cons == u->ring_prod++) { | ||
91 | wake_up_interruptible(&u->evtchn_wait); | ||
92 | kill_fasync(&u->evtchn_async_queue, | ||
93 | SIGIO, POLL_IN); | ||
94 | } | ||
95 | } else { | ||
96 | u->ring_overflow = 1; | ||
97 | } | ||
98 | |||
99 | spin_unlock(&port_user_lock); | ||
100 | |||
101 | return IRQ_HANDLED; | ||
102 | } | ||
103 | |||
104 | static ssize_t evtchn_read(struct file *file, char __user *buf, | ||
105 | size_t count, loff_t *ppos) | ||
106 | { | ||
107 | int rc; | ||
108 | unsigned int c, p, bytes1 = 0, bytes2 = 0; | ||
109 | struct per_user_data *u = file->private_data; | ||
110 | |||
111 | /* Whole number of ports. */ | ||
112 | count &= ~(sizeof(evtchn_port_t)-1); | ||
113 | |||
114 | if (count == 0) | ||
115 | return 0; | ||
116 | |||
117 | if (count > PAGE_SIZE) | ||
118 | count = PAGE_SIZE; | ||
119 | |||
120 | for (;;) { | ||
121 | mutex_lock(&u->ring_cons_mutex); | ||
122 | |||
123 | rc = -EFBIG; | ||
124 | if (u->ring_overflow) | ||
125 | goto unlock_out; | ||
126 | |||
127 | c = u->ring_cons; | ||
128 | p = u->ring_prod; | ||
129 | if (c != p) | ||
130 | break; | ||
131 | |||
132 | mutex_unlock(&u->ring_cons_mutex); | ||
133 | |||
134 | if (file->f_flags & O_NONBLOCK) | ||
135 | return -EAGAIN; | ||
136 | |||
137 | rc = wait_event_interruptible(u->evtchn_wait, | ||
138 | u->ring_cons != u->ring_prod); | ||
139 | if (rc) | ||
140 | return rc; | ||
141 | } | ||
142 | |||
143 | /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ | ||
144 | if (((c ^ p) & EVTCHN_RING_SIZE) != 0) { | ||
145 | bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * | ||
146 | sizeof(evtchn_port_t); | ||
147 | bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t); | ||
148 | } else { | ||
149 | bytes1 = (p - c) * sizeof(evtchn_port_t); | ||
150 | bytes2 = 0; | ||
151 | } | ||
152 | |||
153 | /* Truncate chunks according to caller's maximum byte count. */ | ||
154 | if (bytes1 > count) { | ||
155 | bytes1 = count; | ||
156 | bytes2 = 0; | ||
157 | } else if ((bytes1 + bytes2) > count) { | ||
158 | bytes2 = count - bytes1; | ||
159 | } | ||
160 | |||
161 | rc = -EFAULT; | ||
162 | rmb(); /* Ensure that we see the port before we copy it. */ | ||
163 | if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) || | ||
164 | ((bytes2 != 0) && | ||
165 | copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) | ||
166 | goto unlock_out; | ||
167 | |||
168 | u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t); | ||
169 | rc = bytes1 + bytes2; | ||
170 | |||
171 | unlock_out: | ||
172 | mutex_unlock(&u->ring_cons_mutex); | ||
173 | return rc; | ||
174 | } | ||
175 | |||
176 | static ssize_t evtchn_write(struct file *file, const char __user *buf, | ||
177 | size_t count, loff_t *ppos) | ||
178 | { | ||
179 | int rc, i; | ||
180 | evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL); | ||
181 | struct per_user_data *u = file->private_data; | ||
182 | |||
183 | if (kbuf == NULL) | ||
184 | return -ENOMEM; | ||
185 | |||
186 | /* Whole number of ports. */ | ||
187 | count &= ~(sizeof(evtchn_port_t)-1); | ||
188 | |||
189 | rc = 0; | ||
190 | if (count == 0) | ||
191 | goto out; | ||
192 | |||
193 | if (count > PAGE_SIZE) | ||
194 | count = PAGE_SIZE; | ||
195 | |||
196 | rc = -EFAULT; | ||
197 | if (copy_from_user(kbuf, buf, count) != 0) | ||
198 | goto out; | ||
199 | |||
200 | spin_lock_irq(&port_user_lock); | ||
201 | for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) | ||
202 | if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u)) | ||
203 | enable_irq(irq_from_evtchn(kbuf[i])); | ||
204 | spin_unlock_irq(&port_user_lock); | ||
205 | |||
206 | rc = count; | ||
207 | |||
208 | out: | ||
209 | free_page((unsigned long)kbuf); | ||
210 | return rc; | ||
211 | } | ||
212 | |||
213 | static int evtchn_bind_to_user(struct per_user_data *u, int port) | ||
214 | { | ||
215 | int rc = 0; | ||
216 | |||
217 | /* | ||
218 | * Ports are never reused, so every caller should pass in a | ||
219 | * unique port. | ||
220 | * | ||
221 | * (Locking not necessary because we haven't registered the | ||
222 | * interrupt handler yet, and our caller has already | ||
223 | * serialized bind operations.) | ||
224 | */ | ||
225 | BUG_ON(port_user[port] != NULL); | ||
226 | port_user[port] = u; | ||
227 | |||
228 | rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, | ||
229 | u->name, (void *)(unsigned long)port); | ||
230 | if (rc >= 0) | ||
231 | rc = 0; | ||
232 | |||
233 | return rc; | ||
234 | } | ||
235 | |||
236 | static void evtchn_unbind_from_user(struct per_user_data *u, int port) | ||
237 | { | ||
238 | int irq = irq_from_evtchn(port); | ||
239 | |||
240 | unbind_from_irqhandler(irq, (void *)(unsigned long)port); | ||
241 | |||
242 | /* make sure we unbind the irq handler before clearing the port */ | ||
243 | barrier(); | ||
244 | |||
245 | port_user[port] = NULL; | ||
246 | } | ||
247 | |||
248 | static long evtchn_ioctl(struct file *file, | ||
249 | unsigned int cmd, unsigned long arg) | ||
250 | { | ||
251 | int rc; | ||
252 | struct per_user_data *u = file->private_data; | ||
253 | void __user *uarg = (void __user *) arg; | ||
254 | |||
255 | /* Prevent bind from racing with unbind */ | ||
256 | mutex_lock(&u->bind_mutex); | ||
257 | |||
258 | switch (cmd) { | ||
259 | case IOCTL_EVTCHN_BIND_VIRQ: { | ||
260 | struct ioctl_evtchn_bind_virq bind; | ||
261 | struct evtchn_bind_virq bind_virq; | ||
262 | |||
263 | rc = -EFAULT; | ||
264 | if (copy_from_user(&bind, uarg, sizeof(bind))) | ||
265 | break; | ||
266 | |||
267 | bind_virq.virq = bind.virq; | ||
268 | bind_virq.vcpu = 0; | ||
269 | rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, | ||
270 | &bind_virq); | ||
271 | if (rc != 0) | ||
272 | break; | ||
273 | |||
274 | rc = evtchn_bind_to_user(u, bind_virq.port); | ||
275 | if (rc == 0) | ||
276 | rc = bind_virq.port; | ||
277 | break; | ||
278 | } | ||
279 | |||
280 | case IOCTL_EVTCHN_BIND_INTERDOMAIN: { | ||
281 | struct ioctl_evtchn_bind_interdomain bind; | ||
282 | struct evtchn_bind_interdomain bind_interdomain; | ||
283 | |||
284 | rc = -EFAULT; | ||
285 | if (copy_from_user(&bind, uarg, sizeof(bind))) | ||
286 | break; | ||
287 | |||
288 | bind_interdomain.remote_dom = bind.remote_domain; | ||
289 | bind_interdomain.remote_port = bind.remote_port; | ||
290 | rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, | ||
291 | &bind_interdomain); | ||
292 | if (rc != 0) | ||
293 | break; | ||
294 | |||
295 | rc = evtchn_bind_to_user(u, bind_interdomain.local_port); | ||
296 | if (rc == 0) | ||
297 | rc = bind_interdomain.local_port; | ||
298 | break; | ||
299 | } | ||
300 | |||
301 | case IOCTL_EVTCHN_BIND_UNBOUND_PORT: { | ||
302 | struct ioctl_evtchn_bind_unbound_port bind; | ||
303 | struct evtchn_alloc_unbound alloc_unbound; | ||
304 | |||
305 | rc = -EFAULT; | ||
306 | if (copy_from_user(&bind, uarg, sizeof(bind))) | ||
307 | break; | ||
308 | |||
309 | alloc_unbound.dom = DOMID_SELF; | ||
310 | alloc_unbound.remote_dom = bind.remote_domain; | ||
311 | rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, | ||
312 | &alloc_unbound); | ||
313 | if (rc != 0) | ||
314 | break; | ||
315 | |||
316 | rc = evtchn_bind_to_user(u, alloc_unbound.port); | ||
317 | if (rc == 0) | ||
318 | rc = alloc_unbound.port; | ||
319 | break; | ||
320 | } | ||
321 | |||
322 | case IOCTL_EVTCHN_UNBIND: { | ||
323 | struct ioctl_evtchn_unbind unbind; | ||
324 | |||
325 | rc = -EFAULT; | ||
326 | if (copy_from_user(&unbind, uarg, sizeof(unbind))) | ||
327 | break; | ||
328 | |||
329 | rc = -EINVAL; | ||
330 | if (unbind.port >= NR_EVENT_CHANNELS) | ||
331 | break; | ||
332 | |||
333 | spin_lock_irq(&port_user_lock); | ||
334 | |||
335 | rc = -ENOTCONN; | ||
336 | if (port_user[unbind.port] != u) { | ||
337 | spin_unlock_irq(&port_user_lock); | ||
338 | break; | ||
339 | } | ||
340 | |||
341 | evtchn_unbind_from_user(u, unbind.port); | ||
342 | |||
343 | spin_unlock_irq(&port_user_lock); | ||
344 | |||
345 | rc = 0; | ||
346 | break; | ||
347 | } | ||
348 | |||
349 | case IOCTL_EVTCHN_NOTIFY: { | ||
350 | struct ioctl_evtchn_notify notify; | ||
351 | |||
352 | rc = -EFAULT; | ||
353 | if (copy_from_user(¬ify, uarg, sizeof(notify))) | ||
354 | break; | ||
355 | |||
356 | if (notify.port >= NR_EVENT_CHANNELS) { | ||
357 | rc = -EINVAL; | ||
358 | } else if (port_user[notify.port] != u) { | ||
359 | rc = -ENOTCONN; | ||
360 | } else { | ||
361 | notify_remote_via_evtchn(notify.port); | ||
362 | rc = 0; | ||
363 | } | ||
364 | break; | ||
365 | } | ||
366 | |||
367 | case IOCTL_EVTCHN_RESET: { | ||
368 | /* Initialise the ring to empty. Clear errors. */ | ||
369 | mutex_lock(&u->ring_cons_mutex); | ||
370 | spin_lock_irq(&port_user_lock); | ||
371 | u->ring_cons = u->ring_prod = u->ring_overflow = 0; | ||
372 | spin_unlock_irq(&port_user_lock); | ||
373 | mutex_unlock(&u->ring_cons_mutex); | ||
374 | rc = 0; | ||
375 | break; | ||
376 | } | ||
377 | |||
378 | default: | ||
379 | rc = -ENOSYS; | ||
380 | break; | ||
381 | } | ||
382 | mutex_unlock(&u->bind_mutex); | ||
383 | |||
384 | return rc; | ||
385 | } | ||
386 | |||
387 | static unsigned int evtchn_poll(struct file *file, poll_table *wait) | ||
388 | { | ||
389 | unsigned int mask = POLLOUT | POLLWRNORM; | ||
390 | struct per_user_data *u = file->private_data; | ||
391 | |||
392 | poll_wait(file, &u->evtchn_wait, wait); | ||
393 | if (u->ring_cons != u->ring_prod) | ||
394 | mask |= POLLIN | POLLRDNORM; | ||
395 | if (u->ring_overflow) | ||
396 | mask = POLLERR; | ||
397 | return mask; | ||
398 | } | ||
399 | |||
400 | static int evtchn_fasync(int fd, struct file *filp, int on) | ||
401 | { | ||
402 | struct per_user_data *u = filp->private_data; | ||
403 | return fasync_helper(fd, filp, on, &u->evtchn_async_queue); | ||
404 | } | ||
405 | |||
406 | static int evtchn_open(struct inode *inode, struct file *filp) | ||
407 | { | ||
408 | struct per_user_data *u; | ||
409 | |||
410 | u = kzalloc(sizeof(*u), GFP_KERNEL); | ||
411 | if (u == NULL) | ||
412 | return -ENOMEM; | ||
413 | |||
414 | u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm); | ||
415 | if (u->name == NULL) { | ||
416 | kfree(u); | ||
417 | return -ENOMEM; | ||
418 | } | ||
419 | |||
420 | init_waitqueue_head(&u->evtchn_wait); | ||
421 | |||
422 | u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL); | ||
423 | if (u->ring == NULL) { | ||
424 | kfree(u->name); | ||
425 | kfree(u); | ||
426 | return -ENOMEM; | ||
427 | } | ||
428 | |||
429 | mutex_init(&u->bind_mutex); | ||
430 | mutex_init(&u->ring_cons_mutex); | ||
431 | |||
432 | filp->private_data = u; | ||
433 | |||
434 | return 0; | ||
435 | } | ||
436 | |||
437 | static int evtchn_release(struct inode *inode, struct file *filp) | ||
438 | { | ||
439 | int i; | ||
440 | struct per_user_data *u = filp->private_data; | ||
441 | |||
442 | spin_lock_irq(&port_user_lock); | ||
443 | |||
444 | free_page((unsigned long)u->ring); | ||
445 | |||
446 | for (i = 0; i < NR_EVENT_CHANNELS; i++) { | ||
447 | if (port_user[i] != u) | ||
448 | continue; | ||
449 | |||
450 | evtchn_unbind_from_user(port_user[i], i); | ||
451 | } | ||
452 | |||
453 | spin_unlock_irq(&port_user_lock); | ||
454 | |||
455 | kfree(u->name); | ||
456 | kfree(u); | ||
457 | |||
458 | return 0; | ||
459 | } | ||
460 | |||
461 | static const struct file_operations evtchn_fops = { | ||
462 | .owner = THIS_MODULE, | ||
463 | .read = evtchn_read, | ||
464 | .write = evtchn_write, | ||
465 | .unlocked_ioctl = evtchn_ioctl, | ||
466 | .poll = evtchn_poll, | ||
467 | .fasync = evtchn_fasync, | ||
468 | .open = evtchn_open, | ||
469 | .release = evtchn_release, | ||
470 | }; | ||
471 | |||
472 | static struct miscdevice evtchn_miscdev = { | ||
473 | .minor = MISC_DYNAMIC_MINOR, | ||
474 | .name = "evtchn", | ||
475 | .fops = &evtchn_fops, | ||
476 | }; | ||
477 | static int __init evtchn_init(void) | ||
478 | { | ||
479 | int err; | ||
480 | |||
481 | if (!xen_domain()) | ||
482 | return -ENODEV; | ||
483 | |||
484 | spin_lock_init(&port_user_lock); | ||
485 | memset(port_user, 0, sizeof(port_user)); | ||
486 | |||
487 | /* Create '/dev/misc/evtchn'. */ | ||
488 | err = misc_register(&evtchn_miscdev); | ||
489 | if (err != 0) { | ||
490 | printk(KERN_ALERT "Could not register /dev/misc/evtchn\n"); | ||
491 | return err; | ||
492 | } | ||
493 | |||
494 | printk(KERN_INFO "Event-channel device installed.\n"); | ||
495 | |||
496 | return 0; | ||
497 | } | ||
498 | |||
499 | static void __exit evtchn_cleanup(void) | ||
500 | { | ||
501 | misc_deregister(&evtchn_miscdev); | ||
502 | } | ||
503 | |||
504 | module_init(evtchn_init); | ||
505 | module_exit(evtchn_cleanup); | ||
506 | |||
507 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 4b5b84837ee1..fddc2025dece 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c | |||
@@ -98,9 +98,8 @@ static void do_suspend(void) | |||
98 | goto out; | 98 | goto out; |
99 | } | 99 | } |
100 | 100 | ||
101 | printk("suspending xenbus...\n"); | 101 | printk(KERN_DEBUG "suspending xenstore...\n"); |
102 | /* XXX use normal device tree? */ | 102 | xs_suspend(); |
103 | xenbus_suspend(); | ||
104 | 103 | ||
105 | err = device_power_down(PMSG_SUSPEND); | 104 | err = device_power_down(PMSG_SUSPEND); |
106 | if (err) { | 105 | if (err) { |
@@ -116,9 +115,9 @@ static void do_suspend(void) | |||
116 | 115 | ||
117 | if (!cancelled) { | 116 | if (!cancelled) { |
118 | xen_arch_resume(); | 117 | xen_arch_resume(); |
119 | xenbus_resume(); | 118 | xs_resume(); |
120 | } else | 119 | } else |
121 | xenbus_suspend_cancel(); | 120 | xs_suspend_cancel(); |
122 | 121 | ||
123 | device_power_up(PMSG_RESUME); | 122 | device_power_up(PMSG_RESUME); |
124 | 123 | ||
diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c new file mode 100644 index 000000000000..88a60e03ccf0 --- /dev/null +++ b/drivers/xen/sys-hypervisor.c | |||
@@ -0,0 +1,445 @@ | |||
1 | /* | ||
2 | * copyright (c) 2006 IBM Corporation | ||
3 | * Authored by: Mike D. Day <ncmike@us.ibm.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/kobject.h> | ||
13 | |||
14 | #include <asm/xen/hypervisor.h> | ||
15 | #include <asm/xen/hypercall.h> | ||
16 | |||
17 | #include <xen/xenbus.h> | ||
18 | #include <xen/interface/xen.h> | ||
19 | #include <xen/interface/version.h> | ||
20 | |||
21 | #define HYPERVISOR_ATTR_RO(_name) \ | ||
22 | static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name) | ||
23 | |||
24 | #define HYPERVISOR_ATTR_RW(_name) \ | ||
25 | static struct hyp_sysfs_attr _name##_attr = \ | ||
26 | __ATTR(_name, 0644, _name##_show, _name##_store) | ||
27 | |||
28 | struct hyp_sysfs_attr { | ||
29 | struct attribute attr; | ||
30 | ssize_t (*show)(struct hyp_sysfs_attr *, char *); | ||
31 | ssize_t (*store)(struct hyp_sysfs_attr *, const char *, size_t); | ||
32 | void *hyp_attr_data; | ||
33 | }; | ||
34 | |||
35 | static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
36 | { | ||
37 | return sprintf(buffer, "xen\n"); | ||
38 | } | ||
39 | |||
40 | HYPERVISOR_ATTR_RO(type); | ||
41 | |||
42 | static int __init xen_sysfs_type_init(void) | ||
43 | { | ||
44 | return sysfs_create_file(hypervisor_kobj, &type_attr.attr); | ||
45 | } | ||
46 | |||
47 | static void xen_sysfs_type_destroy(void) | ||
48 | { | ||
49 | sysfs_remove_file(hypervisor_kobj, &type_attr.attr); | ||
50 | } | ||
51 | |||
52 | /* xen version attributes */ | ||
53 | static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
54 | { | ||
55 | int version = HYPERVISOR_xen_version(XENVER_version, NULL); | ||
56 | if (version) | ||
57 | return sprintf(buffer, "%d\n", version >> 16); | ||
58 | return -ENODEV; | ||
59 | } | ||
60 | |||
61 | HYPERVISOR_ATTR_RO(major); | ||
62 | |||
63 | static ssize_t minor_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
64 | { | ||
65 | int version = HYPERVISOR_xen_version(XENVER_version, NULL); | ||
66 | if (version) | ||
67 | return sprintf(buffer, "%d\n", version & 0xff); | ||
68 | return -ENODEV; | ||
69 | } | ||
70 | |||
71 | HYPERVISOR_ATTR_RO(minor); | ||
72 | |||
73 | static ssize_t extra_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
74 | { | ||
75 | int ret = -ENOMEM; | ||
76 | char *extra; | ||
77 | |||
78 | extra = kmalloc(XEN_EXTRAVERSION_LEN, GFP_KERNEL); | ||
79 | if (extra) { | ||
80 | ret = HYPERVISOR_xen_version(XENVER_extraversion, extra); | ||
81 | if (!ret) | ||
82 | ret = sprintf(buffer, "%s\n", extra); | ||
83 | kfree(extra); | ||
84 | } | ||
85 | |||
86 | return ret; | ||
87 | } | ||
88 | |||
89 | HYPERVISOR_ATTR_RO(extra); | ||
90 | |||
91 | static struct attribute *version_attrs[] = { | ||
92 | &major_attr.attr, | ||
93 | &minor_attr.attr, | ||
94 | &extra_attr.attr, | ||
95 | NULL | ||
96 | }; | ||
97 | |||
98 | static struct attribute_group version_group = { | ||
99 | .name = "version", | ||
100 | .attrs = version_attrs, | ||
101 | }; | ||
102 | |||
103 | static int __init xen_sysfs_version_init(void) | ||
104 | { | ||
105 | return sysfs_create_group(hypervisor_kobj, &version_group); | ||
106 | } | ||
107 | |||
108 | static void xen_sysfs_version_destroy(void) | ||
109 | { | ||
110 | sysfs_remove_group(hypervisor_kobj, &version_group); | ||
111 | } | ||
112 | |||
113 | /* UUID */ | ||
114 | |||
115 | static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
116 | { | ||
117 | char *vm, *val; | ||
118 | int ret; | ||
119 | extern int xenstored_ready; | ||
120 | |||
121 | if (!xenstored_ready) | ||
122 | return -EBUSY; | ||
123 | |||
124 | vm = xenbus_read(XBT_NIL, "vm", "", NULL); | ||
125 | if (IS_ERR(vm)) | ||
126 | return PTR_ERR(vm); | ||
127 | val = xenbus_read(XBT_NIL, vm, "uuid", NULL); | ||
128 | kfree(vm); | ||
129 | if (IS_ERR(val)) | ||
130 | return PTR_ERR(val); | ||
131 | ret = sprintf(buffer, "%s\n", val); | ||
132 | kfree(val); | ||
133 | return ret; | ||
134 | } | ||
135 | |||
136 | HYPERVISOR_ATTR_RO(uuid); | ||
137 | |||
138 | static int __init xen_sysfs_uuid_init(void) | ||
139 | { | ||
140 | return sysfs_create_file(hypervisor_kobj, &uuid_attr.attr); | ||
141 | } | ||
142 | |||
143 | static void xen_sysfs_uuid_destroy(void) | ||
144 | { | ||
145 | sysfs_remove_file(hypervisor_kobj, &uuid_attr.attr); | ||
146 | } | ||
147 | |||
148 | /* xen compilation attributes */ | ||
149 | |||
150 | static ssize_t compiler_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
151 | { | ||
152 | int ret = -ENOMEM; | ||
153 | struct xen_compile_info *info; | ||
154 | |||
155 | info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL); | ||
156 | if (info) { | ||
157 | ret = HYPERVISOR_xen_version(XENVER_compile_info, info); | ||
158 | if (!ret) | ||
159 | ret = sprintf(buffer, "%s\n", info->compiler); | ||
160 | kfree(info); | ||
161 | } | ||
162 | |||
163 | return ret; | ||
164 | } | ||
165 | |||
166 | HYPERVISOR_ATTR_RO(compiler); | ||
167 | |||
168 | static ssize_t compiled_by_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
169 | { | ||
170 | int ret = -ENOMEM; | ||
171 | struct xen_compile_info *info; | ||
172 | |||
173 | info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL); | ||
174 | if (info) { | ||
175 | ret = HYPERVISOR_xen_version(XENVER_compile_info, info); | ||
176 | if (!ret) | ||
177 | ret = sprintf(buffer, "%s\n", info->compile_by); | ||
178 | kfree(info); | ||
179 | } | ||
180 | |||
181 | return ret; | ||
182 | } | ||
183 | |||
184 | HYPERVISOR_ATTR_RO(compiled_by); | ||
185 | |||
186 | static ssize_t compile_date_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
187 | { | ||
188 | int ret = -ENOMEM; | ||
189 | struct xen_compile_info *info; | ||
190 | |||
191 | info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL); | ||
192 | if (info) { | ||
193 | ret = HYPERVISOR_xen_version(XENVER_compile_info, info); | ||
194 | if (!ret) | ||
195 | ret = sprintf(buffer, "%s\n", info->compile_date); | ||
196 | kfree(info); | ||
197 | } | ||
198 | |||
199 | return ret; | ||
200 | } | ||
201 | |||
202 | HYPERVISOR_ATTR_RO(compile_date); | ||
203 | |||
204 | static struct attribute *xen_compile_attrs[] = { | ||
205 | &compiler_attr.attr, | ||
206 | &compiled_by_attr.attr, | ||
207 | &compile_date_attr.attr, | ||
208 | NULL | ||
209 | }; | ||
210 | |||
211 | static struct attribute_group xen_compilation_group = { | ||
212 | .name = "compilation", | ||
213 | .attrs = xen_compile_attrs, | ||
214 | }; | ||
215 | |||
216 | int __init static xen_compilation_init(void) | ||
217 | { | ||
218 | return sysfs_create_group(hypervisor_kobj, &xen_compilation_group); | ||
219 | } | ||
220 | |||
221 | static void xen_compilation_destroy(void) | ||
222 | { | ||
223 | sysfs_remove_group(hypervisor_kobj, &xen_compilation_group); | ||
224 | } | ||
225 | |||
226 | /* xen properties info */ | ||
227 | |||
228 | static ssize_t capabilities_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
229 | { | ||
230 | int ret = -ENOMEM; | ||
231 | char *caps; | ||
232 | |||
233 | caps = kmalloc(XEN_CAPABILITIES_INFO_LEN, GFP_KERNEL); | ||
234 | if (caps) { | ||
235 | ret = HYPERVISOR_xen_version(XENVER_capabilities, caps); | ||
236 | if (!ret) | ||
237 | ret = sprintf(buffer, "%s\n", caps); | ||
238 | kfree(caps); | ||
239 | } | ||
240 | |||
241 | return ret; | ||
242 | } | ||
243 | |||
244 | HYPERVISOR_ATTR_RO(capabilities); | ||
245 | |||
246 | static ssize_t changeset_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
247 | { | ||
248 | int ret = -ENOMEM; | ||
249 | char *cset; | ||
250 | |||
251 | cset = kmalloc(XEN_CHANGESET_INFO_LEN, GFP_KERNEL); | ||
252 | if (cset) { | ||
253 | ret = HYPERVISOR_xen_version(XENVER_changeset, cset); | ||
254 | if (!ret) | ||
255 | ret = sprintf(buffer, "%s\n", cset); | ||
256 | kfree(cset); | ||
257 | } | ||
258 | |||
259 | return ret; | ||
260 | } | ||
261 | |||
262 | HYPERVISOR_ATTR_RO(changeset); | ||
263 | |||
264 | static ssize_t virtual_start_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
265 | { | ||
266 | int ret = -ENOMEM; | ||
267 | struct xen_platform_parameters *parms; | ||
268 | |||
269 | parms = kmalloc(sizeof(struct xen_platform_parameters), GFP_KERNEL); | ||
270 | if (parms) { | ||
271 | ret = HYPERVISOR_xen_version(XENVER_platform_parameters, | ||
272 | parms); | ||
273 | if (!ret) | ||
274 | ret = sprintf(buffer, "%lx\n", parms->virt_start); | ||
275 | kfree(parms); | ||
276 | } | ||
277 | |||
278 | return ret; | ||
279 | } | ||
280 | |||
281 | HYPERVISOR_ATTR_RO(virtual_start); | ||
282 | |||
283 | static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
284 | { | ||
285 | int ret; | ||
286 | |||
287 | ret = HYPERVISOR_xen_version(XENVER_pagesize, NULL); | ||
288 | if (ret > 0) | ||
289 | ret = sprintf(buffer, "%x\n", ret); | ||
290 | |||
291 | return ret; | ||
292 | } | ||
293 | |||
294 | HYPERVISOR_ATTR_RO(pagesize); | ||
295 | |||
296 | static ssize_t xen_feature_show(int index, char *buffer) | ||
297 | { | ||
298 | ssize_t ret; | ||
299 | struct xen_feature_info info; | ||
300 | |||
301 | info.submap_idx = index; | ||
302 | ret = HYPERVISOR_xen_version(XENVER_get_features, &info); | ||
303 | if (!ret) | ||
304 | ret = sprintf(buffer, "%08x", info.submap); | ||
305 | |||
306 | return ret; | ||
307 | } | ||
308 | |||
309 | static ssize_t features_show(struct hyp_sysfs_attr *attr, char *buffer) | ||
310 | { | ||
311 | ssize_t len; | ||
312 | int i; | ||
313 | |||
314 | len = 0; | ||
315 | for (i = XENFEAT_NR_SUBMAPS-1; i >= 0; i--) { | ||
316 | int ret = xen_feature_show(i, buffer + len); | ||
317 | if (ret < 0) { | ||
318 | if (len == 0) | ||
319 | len = ret; | ||
320 | break; | ||
321 | } | ||
322 | len += ret; | ||
323 | } | ||
324 | if (len > 0) | ||
325 | buffer[len++] = '\n'; | ||
326 | |||
327 | return len; | ||
328 | } | ||
329 | |||
330 | HYPERVISOR_ATTR_RO(features); | ||
331 | |||
332 | static struct attribute *xen_properties_attrs[] = { | ||
333 | &capabilities_attr.attr, | ||
334 | &changeset_attr.attr, | ||
335 | &virtual_start_attr.attr, | ||
336 | &pagesize_attr.attr, | ||
337 | &features_attr.attr, | ||
338 | NULL | ||
339 | }; | ||
340 | |||
341 | static struct attribute_group xen_properties_group = { | ||
342 | .name = "properties", | ||
343 | .attrs = xen_properties_attrs, | ||
344 | }; | ||
345 | |||
346 | static int __init xen_properties_init(void) | ||
347 | { | ||
348 | return sysfs_create_group(hypervisor_kobj, &xen_properties_group); | ||
349 | } | ||
350 | |||
351 | static void xen_properties_destroy(void) | ||
352 | { | ||
353 | sysfs_remove_group(hypervisor_kobj, &xen_properties_group); | ||
354 | } | ||
355 | |||
356 | static int __init hyper_sysfs_init(void) | ||
357 | { | ||
358 | int ret; | ||
359 | |||
360 | if (!xen_domain()) | ||
361 | return -ENODEV; | ||
362 | |||
363 | ret = xen_sysfs_type_init(); | ||
364 | if (ret) | ||
365 | goto out; | ||
366 | ret = xen_sysfs_version_init(); | ||
367 | if (ret) | ||
368 | goto version_out; | ||
369 | ret = xen_compilation_init(); | ||
370 | if (ret) | ||
371 | goto comp_out; | ||
372 | ret = xen_sysfs_uuid_init(); | ||
373 | if (ret) | ||
374 | goto uuid_out; | ||
375 | ret = xen_properties_init(); | ||
376 | if (ret) | ||
377 | goto prop_out; | ||
378 | |||
379 | goto out; | ||
380 | |||
381 | prop_out: | ||
382 | xen_sysfs_uuid_destroy(); | ||
383 | uuid_out: | ||
384 | xen_compilation_destroy(); | ||
385 | comp_out: | ||
386 | xen_sysfs_version_destroy(); | ||
387 | version_out: | ||
388 | xen_sysfs_type_destroy(); | ||
389 | out: | ||
390 | return ret; | ||
391 | } | ||
392 | |||
393 | static void __exit hyper_sysfs_exit(void) | ||
394 | { | ||
395 | xen_properties_destroy(); | ||
396 | xen_compilation_destroy(); | ||
397 | xen_sysfs_uuid_destroy(); | ||
398 | xen_sysfs_version_destroy(); | ||
399 | xen_sysfs_type_destroy(); | ||
400 | |||
401 | } | ||
402 | module_init(hyper_sysfs_init); | ||
403 | module_exit(hyper_sysfs_exit); | ||
404 | |||
405 | static ssize_t hyp_sysfs_show(struct kobject *kobj, | ||
406 | struct attribute *attr, | ||
407 | char *buffer) | ||
408 | { | ||
409 | struct hyp_sysfs_attr *hyp_attr; | ||
410 | hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr); | ||
411 | if (hyp_attr->show) | ||
412 | return hyp_attr->show(hyp_attr, buffer); | ||
413 | return 0; | ||
414 | } | ||
415 | |||
416 | static ssize_t hyp_sysfs_store(struct kobject *kobj, | ||
417 | struct attribute *attr, | ||
418 | const char *buffer, | ||
419 | size_t len) | ||
420 | { | ||
421 | struct hyp_sysfs_attr *hyp_attr; | ||
422 | hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr); | ||
423 | if (hyp_attr->store) | ||
424 | return hyp_attr->store(hyp_attr, buffer, len); | ||
425 | return 0; | ||
426 | } | ||
427 | |||
428 | static struct sysfs_ops hyp_sysfs_ops = { | ||
429 | .show = hyp_sysfs_show, | ||
430 | .store = hyp_sysfs_store, | ||
431 | }; | ||
432 | |||
433 | static struct kobj_type hyp_sysfs_kobj_type = { | ||
434 | .sysfs_ops = &hyp_sysfs_ops, | ||
435 | }; | ||
436 | |||
437 | static int __init hypervisor_subsys_init(void) | ||
438 | { | ||
439 | if (!xen_domain()) | ||
440 | return -ENODEV; | ||
441 | |||
442 | hypervisor_kobj->ktype = &hyp_sysfs_kobj_type; | ||
443 | return 0; | ||
444 | } | ||
445 | device_initcall(hypervisor_subsys_init); | ||
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 773d1cf23283..d42e25d5968d 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c | |||
@@ -71,6 +71,9 @@ static int xenbus_probe_frontend(const char *type, const char *name); | |||
71 | 71 | ||
72 | static void xenbus_dev_shutdown(struct device *_dev); | 72 | static void xenbus_dev_shutdown(struct device *_dev); |
73 | 73 | ||
74 | static int xenbus_dev_suspend(struct device *dev, pm_message_t state); | ||
75 | static int xenbus_dev_resume(struct device *dev); | ||
76 | |||
74 | /* If something in array of ids matches this device, return it. */ | 77 | /* If something in array of ids matches this device, return it. */ |
75 | static const struct xenbus_device_id * | 78 | static const struct xenbus_device_id * |
76 | match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) | 79 | match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) |
@@ -188,6 +191,9 @@ static struct xen_bus_type xenbus_frontend = { | |||
188 | .remove = xenbus_dev_remove, | 191 | .remove = xenbus_dev_remove, |
189 | .shutdown = xenbus_dev_shutdown, | 192 | .shutdown = xenbus_dev_shutdown, |
190 | .dev_attrs = xenbus_dev_attrs, | 193 | .dev_attrs = xenbus_dev_attrs, |
194 | |||
195 | .suspend = xenbus_dev_suspend, | ||
196 | .resume = xenbus_dev_resume, | ||
191 | }, | 197 | }, |
192 | }; | 198 | }; |
193 | 199 | ||
@@ -654,6 +660,7 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus) | |||
654 | 660 | ||
655 | kfree(root); | 661 | kfree(root); |
656 | } | 662 | } |
663 | EXPORT_SYMBOL_GPL(xenbus_dev_changed); | ||
657 | 664 | ||
658 | static void frontend_changed(struct xenbus_watch *watch, | 665 | static void frontend_changed(struct xenbus_watch *watch, |
659 | const char **vec, unsigned int len) | 666 | const char **vec, unsigned int len) |
@@ -669,7 +676,7 @@ static struct xenbus_watch fe_watch = { | |||
669 | .callback = frontend_changed, | 676 | .callback = frontend_changed, |
670 | }; | 677 | }; |
671 | 678 | ||
672 | static int suspend_dev(struct device *dev, void *data) | 679 | static int xenbus_dev_suspend(struct device *dev, pm_message_t state) |
673 | { | 680 | { |
674 | int err = 0; | 681 | int err = 0; |
675 | struct xenbus_driver *drv; | 682 | struct xenbus_driver *drv; |
@@ -682,35 +689,14 @@ static int suspend_dev(struct device *dev, void *data) | |||
682 | drv = to_xenbus_driver(dev->driver); | 689 | drv = to_xenbus_driver(dev->driver); |
683 | xdev = container_of(dev, struct xenbus_device, dev); | 690 | xdev = container_of(dev, struct xenbus_device, dev); |
684 | if (drv->suspend) | 691 | if (drv->suspend) |
685 | err = drv->suspend(xdev); | 692 | err = drv->suspend(xdev, state); |
686 | if (err) | 693 | if (err) |
687 | printk(KERN_WARNING | 694 | printk(KERN_WARNING |
688 | "xenbus: suspend %s failed: %i\n", dev_name(dev), err); | 695 | "xenbus: suspend %s failed: %i\n", dev_name(dev), err); |
689 | return 0; | 696 | return 0; |
690 | } | 697 | } |
691 | 698 | ||
692 | static int suspend_cancel_dev(struct device *dev, void *data) | 699 | static int xenbus_dev_resume(struct device *dev) |
693 | { | ||
694 | int err = 0; | ||
695 | struct xenbus_driver *drv; | ||
696 | struct xenbus_device *xdev; | ||
697 | |||
698 | DPRINTK(""); | ||
699 | |||
700 | if (dev->driver == NULL) | ||
701 | return 0; | ||
702 | drv = to_xenbus_driver(dev->driver); | ||
703 | xdev = container_of(dev, struct xenbus_device, dev); | ||
704 | if (drv->suspend_cancel) | ||
705 | err = drv->suspend_cancel(xdev); | ||
706 | if (err) | ||
707 | printk(KERN_WARNING | ||
708 | "xenbus: suspend_cancel %s failed: %i\n", | ||
709 | dev_name(dev), err); | ||
710 | return 0; | ||
711 | } | ||
712 | |||
713 | static int resume_dev(struct device *dev, void *data) | ||
714 | { | 700 | { |
715 | int err; | 701 | int err; |
716 | struct xenbus_driver *drv; | 702 | struct xenbus_driver *drv; |
@@ -755,33 +741,6 @@ static int resume_dev(struct device *dev, void *data) | |||
755 | return 0; | 741 | return 0; |
756 | } | 742 | } |
757 | 743 | ||
758 | void xenbus_suspend(void) | ||
759 | { | ||
760 | DPRINTK(""); | ||
761 | |||
762 | bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev); | ||
763 | xenbus_backend_suspend(suspend_dev); | ||
764 | xs_suspend(); | ||
765 | } | ||
766 | EXPORT_SYMBOL_GPL(xenbus_suspend); | ||
767 | |||
768 | void xenbus_resume(void) | ||
769 | { | ||
770 | xb_init_comms(); | ||
771 | xs_resume(); | ||
772 | bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev); | ||
773 | xenbus_backend_resume(resume_dev); | ||
774 | } | ||
775 | EXPORT_SYMBOL_GPL(xenbus_resume); | ||
776 | |||
777 | void xenbus_suspend_cancel(void) | ||
778 | { | ||
779 | xs_suspend_cancel(); | ||
780 | bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev); | ||
781 | xenbus_backend_resume(suspend_cancel_dev); | ||
782 | } | ||
783 | EXPORT_SYMBOL_GPL(xenbus_suspend_cancel); | ||
784 | |||
785 | /* A flag to determine if xenstored is 'ready' (i.e. has started) */ | 744 | /* A flag to determine if xenstored is 'ready' (i.e. has started) */ |
786 | int xenstored_ready = 0; | 745 | int xenstored_ready = 0; |
787 | 746 | ||
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index e325eab4724d..eab33f1dbdf7 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c | |||
@@ -673,6 +673,8 @@ void xs_resume(void) | |||
673 | struct xenbus_watch *watch; | 673 | struct xenbus_watch *watch; |
674 | char token[sizeof(watch) * 2 + 1]; | 674 | char token[sizeof(watch) * 2 + 1]; |
675 | 675 | ||
676 | xb_init_comms(); | ||
677 | |||
676 | mutex_unlock(&xs_state.response_mutex); | 678 | mutex_unlock(&xs_state.response_mutex); |
677 | mutex_unlock(&xs_state.request_mutex); | 679 | mutex_unlock(&xs_state.request_mutex); |
678 | up_write(&xs_state.transaction_mutex); | 680 | up_write(&xs_state.transaction_mutex); |
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index 515741a8e6b8..6559e0c752ce 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c | |||
@@ -20,10 +20,27 @@ | |||
20 | MODULE_DESCRIPTION("Xen filesystem"); | 20 | MODULE_DESCRIPTION("Xen filesystem"); |
21 | MODULE_LICENSE("GPL"); | 21 | MODULE_LICENSE("GPL"); |
22 | 22 | ||
23 | static ssize_t capabilities_read(struct file *file, char __user *buf, | ||
24 | size_t size, loff_t *off) | ||
25 | { | ||
26 | char *tmp = ""; | ||
27 | |||
28 | if (xen_initial_domain()) | ||
29 | tmp = "control_d\n"; | ||
30 | |||
31 | return simple_read_from_buffer(buf, size, off, tmp, strlen(tmp)); | ||
32 | } | ||
33 | |||
34 | static const struct file_operations capabilities_file_ops = { | ||
35 | .read = capabilities_read, | ||
36 | }; | ||
37 | |||
23 | static int xenfs_fill_super(struct super_block *sb, void *data, int silent) | 38 | static int xenfs_fill_super(struct super_block *sb, void *data, int silent) |
24 | { | 39 | { |
25 | static struct tree_descr xenfs_files[] = { | 40 | static struct tree_descr xenfs_files[] = { |
26 | [2] = {"xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR}, | 41 | [1] = {}, |
42 | { "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR }, | ||
43 | { "capabilities", &capabilities_file_ops, S_IRUGO }, | ||
27 | {""}, | 44 | {""}, |
28 | }; | 45 | }; |
29 | 46 | ||
diff --git a/include/Kbuild b/include/Kbuild index d8c3e3cbf416..fe36accd4328 100644 --- a/include/Kbuild +++ b/include/Kbuild | |||
@@ -8,3 +8,4 @@ header-y += mtd/ | |||
8 | header-y += rdma/ | 8 | header-y += rdma/ |
9 | header-y += video/ | 9 | header-y += video/ |
10 | header-y += drm/ | 10 | header-y += drm/ |
11 | header-y += xen/ | ||
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 8e6d0ca70aba..e410f602cab1 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h | |||
@@ -280,17 +280,18 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, | |||
280 | #endif | 280 | #endif |
281 | 281 | ||
282 | /* | 282 | /* |
283 | * A facility to provide batching of the reload of page tables with the | 283 | * A facility to provide batching of the reload of page tables and |
284 | * actual context switch code for paravirtualized guests. By convention, | 284 | * other process state with the actual context switch code for |
285 | * only one of the lazy modes (CPU, MMU) should be active at any given | 285 | * paravirtualized guests. By convention, only one of the batched |
286 | * time, entry should never be nested, and entry and exits should always | 286 | * update (lazy) modes (CPU, MMU) should be active at any given time, |
287 | * be paired. This is for sanity of maintaining and reasoning about the | 287 | * entry should never be nested, and entry and exits should always be |
288 | * kernel code. | 288 | * paired. This is for sanity of maintaining and reasoning about the |
289 | * kernel code. In this case, the exit (end of the context switch) is | ||
290 | * in architecture-specific code, and so doesn't need a generic | ||
291 | * definition. | ||
289 | */ | 292 | */ |
290 | #ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE | 293 | #ifndef __HAVE_ARCH_START_CONTEXT_SWITCH |
291 | #define arch_enter_lazy_cpu_mode() do {} while (0) | 294 | #define arch_start_context_switch(prev) do {} while (0) |
292 | #define arch_leave_lazy_cpu_mode() do {} while (0) | ||
293 | #define arch_flush_lazy_cpu_mode() do {} while (0) | ||
294 | #endif | 295 | #endif |
295 | 296 | ||
296 | #ifndef __HAVE_PFNMAP_TRACKING | 297 | #ifndef __HAVE_PFNMAP_TRACKING |
diff --git a/include/xen/Kbuild b/include/xen/Kbuild new file mode 100644 index 000000000000..4e65c16a445b --- /dev/null +++ b/include/xen/Kbuild | |||
@@ -0,0 +1 @@ | |||
header-y += evtchn.h | |||
diff --git a/include/xen/events.h b/include/xen/events.h index 0d5f1adc0363..e68d59a90ca8 100644 --- a/include/xen/events.h +++ b/include/xen/events.h | |||
@@ -53,4 +53,7 @@ bool xen_test_irq_pending(int irq); | |||
53 | irq will be disabled so it won't deliver an interrupt. */ | 53 | irq will be disabled so it won't deliver an interrupt. */ |
54 | void xen_poll_irq(int irq); | 54 | void xen_poll_irq(int irq); |
55 | 55 | ||
56 | /* Determine the IRQ which is bound to an event channel */ | ||
57 | unsigned irq_from_evtchn(unsigned int evtchn); | ||
58 | |||
56 | #endif /* _XEN_EVENTS_H */ | 59 | #endif /* _XEN_EVENTS_H */ |
diff --git a/include/xen/evtchn.h b/include/xen/evtchn.h new file mode 100644 index 000000000000..14e833ee4e0b --- /dev/null +++ b/include/xen/evtchn.h | |||
@@ -0,0 +1,88 @@ | |||
1 | /****************************************************************************** | ||
2 | * evtchn.h | ||
3 | * | ||
4 | * Interface to /dev/xen/evtchn. | ||
5 | * | ||
6 | * Copyright (c) 2003-2005, K A Fraser | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License version 2 | ||
10 | * as published by the Free Software Foundation; or, when distributed | ||
11 | * separately from the Linux kernel or incorporated into other | ||
12 | * software packages, subject to the following license: | ||
13 | * | ||
14 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
15 | * of this source file (the "Software"), to deal in the Software without | ||
16 | * restriction, including without limitation the rights to use, copy, modify, | ||
17 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
18 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
19 | * the following conditions: | ||
20 | * | ||
21 | * The above copyright notice and this permission notice shall be included in | ||
22 | * all copies or substantial portions of the Software. | ||
23 | * | ||
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
25 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
26 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
27 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
28 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
29 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
30 | * IN THE SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #ifndef __LINUX_PUBLIC_EVTCHN_H__ | ||
34 | #define __LINUX_PUBLIC_EVTCHN_H__ | ||
35 | |||
36 | /* | ||
37 | * Bind a fresh port to VIRQ @virq. | ||
38 | * Return allocated port. | ||
39 | */ | ||
40 | #define IOCTL_EVTCHN_BIND_VIRQ \ | ||
41 | _IOC(_IOC_NONE, 'E', 0, sizeof(struct ioctl_evtchn_bind_virq)) | ||
42 | struct ioctl_evtchn_bind_virq { | ||
43 | unsigned int virq; | ||
44 | }; | ||
45 | |||
46 | /* | ||
47 | * Bind a fresh port to remote <@remote_domain, @remote_port>. | ||
48 | * Return allocated port. | ||
49 | */ | ||
50 | #define IOCTL_EVTCHN_BIND_INTERDOMAIN \ | ||
51 | _IOC(_IOC_NONE, 'E', 1, sizeof(struct ioctl_evtchn_bind_interdomain)) | ||
52 | struct ioctl_evtchn_bind_interdomain { | ||
53 | unsigned int remote_domain, remote_port; | ||
54 | }; | ||
55 | |||
56 | /* | ||
57 | * Allocate a fresh port for binding to @remote_domain. | ||
58 | * Return allocated port. | ||
59 | */ | ||
60 | #define IOCTL_EVTCHN_BIND_UNBOUND_PORT \ | ||
61 | _IOC(_IOC_NONE, 'E', 2, sizeof(struct ioctl_evtchn_bind_unbound_port)) | ||
62 | struct ioctl_evtchn_bind_unbound_port { | ||
63 | unsigned int remote_domain; | ||
64 | }; | ||
65 | |||
66 | /* | ||
67 | * Unbind previously allocated @port. | ||
68 | */ | ||
69 | #define IOCTL_EVTCHN_UNBIND \ | ||
70 | _IOC(_IOC_NONE, 'E', 3, sizeof(struct ioctl_evtchn_unbind)) | ||
71 | struct ioctl_evtchn_unbind { | ||
72 | unsigned int port; | ||
73 | }; | ||
74 | |||
75 | /* | ||
76 | * Unbind previously allocated @port. | ||
77 | */ | ||
78 | #define IOCTL_EVTCHN_NOTIFY \ | ||
79 | _IOC(_IOC_NONE, 'E', 4, sizeof(struct ioctl_evtchn_notify)) | ||
80 | struct ioctl_evtchn_notify { | ||
81 | unsigned int port; | ||
82 | }; | ||
83 | |||
84 | /* Clear and reinitialise the event buffer. Clear error condition. */ | ||
85 | #define IOCTL_EVTCHN_RESET \ | ||
86 | _IOC(_IOC_NONE, 'E', 5, 0) | ||
87 | |||
88 | #endif /* __LINUX_PUBLIC_EVTCHN_H__ */ | ||
diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h index 453235e923f0..e8b6519d47e9 100644 --- a/include/xen/interface/version.h +++ b/include/xen/interface/version.h | |||
@@ -57,4 +57,7 @@ struct xen_feature_info { | |||
57 | /* Declares the features reported by XENVER_get_features. */ | 57 | /* Declares the features reported by XENVER_get_features. */ |
58 | #include "features.h" | 58 | #include "features.h" |
59 | 59 | ||
60 | /* arg == NULL; returns host memory page size. */ | ||
61 | #define XENVER_pagesize 7 | ||
62 | |||
60 | #endif /* __XEN_PUBLIC_VERSION_H__ */ | 63 | #endif /* __XEN_PUBLIC_VERSION_H__ */ |
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index f87f9614844d..b9763badbd77 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h | |||
@@ -91,8 +91,7 @@ struct xenbus_driver { | |||
91 | void (*otherend_changed)(struct xenbus_device *dev, | 91 | void (*otherend_changed)(struct xenbus_device *dev, |
92 | enum xenbus_state backend_state); | 92 | enum xenbus_state backend_state); |
93 | int (*remove)(struct xenbus_device *dev); | 93 | int (*remove)(struct xenbus_device *dev); |
94 | int (*suspend)(struct xenbus_device *dev); | 94 | int (*suspend)(struct xenbus_device *dev, pm_message_t state); |
95 | int (*suspend_cancel)(struct xenbus_device *dev); | ||
96 | int (*resume)(struct xenbus_device *dev); | 95 | int (*resume)(struct xenbus_device *dev); |
97 | int (*uevent)(struct xenbus_device *, char **, int, char *, int); | 96 | int (*uevent)(struct xenbus_device *, char **, int, char *, int); |
98 | struct device_driver driver; | 97 | struct device_driver driver; |
diff --git a/kernel/sched.c b/kernel/sched.c index c3c04e256560..076e403b9c88 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -2783,7 +2783,7 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
2783 | * combine the page table reload and the switch backend into | 2783 | * combine the page table reload and the switch backend into |
2784 | * one hypercall. | 2784 | * one hypercall. |
2785 | */ | 2785 | */ |
2786 | arch_enter_lazy_cpu_mode(); | 2786 | arch_start_context_switch(prev); |
2787 | 2787 | ||
2788 | if (unlikely(!mm)) { | 2788 | if (unlikely(!mm)) { |
2789 | next->active_mm = oldmm; | 2789 | next->active_mm = oldmm; |