diff options
author | Jeremy Fitzhardinge <jeremy@xensource.com> | 2007-07-17 21:37:06 -0400 |
---|---|---|
committer | Jeremy Fitzhardinge <jeremy@goop.org> | 2007-07-18 11:47:44 -0400 |
commit | f120f13ea0dbb0b0d6675683d5f6faea71277e65 (patch) | |
tree | 6b525ab73bedfa78e43dee303ac991099377e9c5 | |
parent | f87e4cac4f4e940b328d3deb5b53e642e3881f43 (diff) |
xen: Add support for preemption
Add Xen support for preemption. This is mostly a cleanup of existing
preempt_enable/disable calls, or just comments to explain the current
usage.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Chris Wright <chrisw@sous-sol.org>
-rw-r--r-- | arch/i386/xen/Kconfig | 2 | ||||
-rw-r--r-- | arch/i386/xen/enlighten.c | 80 | ||||
-rw-r--r-- | arch/i386/xen/mmu.c | 3 | ||||
-rw-r--r-- | arch/i386/xen/multicalls.c | 11 | ||||
-rw-r--r-- | arch/i386/xen/time.c | 22 |
5 files changed, 76 insertions, 42 deletions
diff --git a/arch/i386/xen/Kconfig b/arch/i386/xen/Kconfig index b7697ff22361..9df99e1885a4 100644 --- a/arch/i386/xen/Kconfig +++ b/arch/i386/xen/Kconfig | |||
@@ -4,7 +4,7 @@ | |||
4 | 4 | ||
5 | config XEN | 5 | config XEN |
6 | bool "Enable support for Xen hypervisor" | 6 | bool "Enable support for Xen hypervisor" |
7 | depends on PARAVIRT && X86_CMPXCHG && X86_TSC && !(PREEMPT || NEED_MULTIPLE_NODES) | 7 | depends on PARAVIRT && X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES |
8 | help | 8 | help |
9 | This is the Linux Xen port. Enabling this will allow the | 9 | This is the Linux Xen port. Enabling this will allow the |
10 | kernel to boot in a paravirtualized environment under the | 10 | kernel to boot in a paravirtualized environment under the |
diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c index de62d66e0893..a1124b7f1d14 100644 --- a/arch/i386/xen/enlighten.c +++ b/arch/i386/xen/enlighten.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/smp.h> | 16 | #include <linux/smp.h> |
17 | #include <linux/preempt.h> | 17 | #include <linux/preempt.h> |
18 | #include <linux/hardirq.h> | ||
18 | #include <linux/percpu.h> | 19 | #include <linux/percpu.h> |
19 | #include <linux/delay.h> | 20 | #include <linux/delay.h> |
20 | #include <linux/start_kernel.h> | 21 | #include <linux/start_kernel.h> |
@@ -108,11 +109,10 @@ static unsigned long xen_save_fl(void) | |||
108 | struct vcpu_info *vcpu; | 109 | struct vcpu_info *vcpu; |
109 | unsigned long flags; | 110 | unsigned long flags; |
110 | 111 | ||
111 | preempt_disable(); | ||
112 | vcpu = x86_read_percpu(xen_vcpu); | 112 | vcpu = x86_read_percpu(xen_vcpu); |
113 | |||
113 | /* flag has opposite sense of mask */ | 114 | /* flag has opposite sense of mask */ |
114 | flags = !vcpu->evtchn_upcall_mask; | 115 | flags = !vcpu->evtchn_upcall_mask; |
115 | preempt_enable(); | ||
116 | 116 | ||
117 | /* convert to IF type flag | 117 | /* convert to IF type flag |
118 | -0 -> 0x00000000 | 118 | -0 -> 0x00000000 |
@@ -125,32 +125,35 @@ static void xen_restore_fl(unsigned long flags) | |||
125 | { | 125 | { |
126 | struct vcpu_info *vcpu; | 126 | struct vcpu_info *vcpu; |
127 | 127 | ||
128 | preempt_disable(); | ||
129 | |||
130 | /* convert from IF type flag */ | 128 | /* convert from IF type flag */ |
131 | flags = !(flags & X86_EFLAGS_IF); | 129 | flags = !(flags & X86_EFLAGS_IF); |
130 | |||
131 | /* There's a one instruction preempt window here. We need to | ||
132 | make sure we're don't switch CPUs between getting the vcpu | ||
133 | pointer and updating the mask. */ | ||
134 | preempt_disable(); | ||
132 | vcpu = x86_read_percpu(xen_vcpu); | 135 | vcpu = x86_read_percpu(xen_vcpu); |
133 | vcpu->evtchn_upcall_mask = flags; | 136 | vcpu->evtchn_upcall_mask = flags; |
137 | preempt_enable_no_resched(); | ||
134 | 138 | ||
135 | if (flags == 0) { | 139 | /* Doesn't matter if we get preempted here, because any |
136 | /* Unmask then check (avoid races). We're only protecting | 140 | pending event will get dealt with anyway. */ |
137 | against updates by this CPU, so there's no need for | ||
138 | anything stronger. */ | ||
139 | barrier(); | ||
140 | 141 | ||
142 | if (flags == 0) { | ||
143 | preempt_check_resched(); | ||
144 | barrier(); /* unmask then check (avoid races) */ | ||
141 | if (unlikely(vcpu->evtchn_upcall_pending)) | 145 | if (unlikely(vcpu->evtchn_upcall_pending)) |
142 | force_evtchn_callback(); | 146 | force_evtchn_callback(); |
143 | preempt_enable(); | 147 | } |
144 | } else | ||
145 | preempt_enable_no_resched(); | ||
146 | } | 148 | } |
147 | 149 | ||
148 | static void xen_irq_disable(void) | 150 | static void xen_irq_disable(void) |
149 | { | 151 | { |
150 | struct vcpu_info *vcpu; | 152 | /* There's a one instruction preempt window here. We need to |
153 | make sure we're don't switch CPUs between getting the vcpu | ||
154 | pointer and updating the mask. */ | ||
151 | preempt_disable(); | 155 | preempt_disable(); |
152 | vcpu = x86_read_percpu(xen_vcpu); | 156 | x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; |
153 | vcpu->evtchn_upcall_mask = 1; | ||
154 | preempt_enable_no_resched(); | 157 | preempt_enable_no_resched(); |
155 | } | 158 | } |
156 | 159 | ||
@@ -158,18 +161,20 @@ static void xen_irq_enable(void) | |||
158 | { | 161 | { |
159 | struct vcpu_info *vcpu; | 162 | struct vcpu_info *vcpu; |
160 | 163 | ||
164 | /* There's a one instruction preempt window here. We need to | ||
165 | make sure we're don't switch CPUs between getting the vcpu | ||
166 | pointer and updating the mask. */ | ||
161 | preempt_disable(); | 167 | preempt_disable(); |
162 | vcpu = x86_read_percpu(xen_vcpu); | 168 | vcpu = x86_read_percpu(xen_vcpu); |
163 | vcpu->evtchn_upcall_mask = 0; | 169 | vcpu->evtchn_upcall_mask = 0; |
170 | preempt_enable_no_resched(); | ||
164 | 171 | ||
165 | /* Unmask then check (avoid races). We're only protecting | 172 | /* Doesn't matter if we get preempted here, because any |
166 | against updates by this CPU, so there's no need for | 173 | pending event will get dealt with anyway. */ |
167 | anything stronger. */ | ||
168 | barrier(); | ||
169 | 174 | ||
175 | barrier(); /* unmask then check (avoid races) */ | ||
170 | if (unlikely(vcpu->evtchn_upcall_pending)) | 176 | if (unlikely(vcpu->evtchn_upcall_pending)) |
171 | force_evtchn_callback(); | 177 | force_evtchn_callback(); |
172 | preempt_enable(); | ||
173 | } | 178 | } |
174 | 179 | ||
175 | static void xen_safe_halt(void) | 180 | static void xen_safe_halt(void) |
@@ -189,6 +194,8 @@ static void xen_halt(void) | |||
189 | 194 | ||
190 | static void xen_set_lazy_mode(enum paravirt_lazy_mode mode) | 195 | static void xen_set_lazy_mode(enum paravirt_lazy_mode mode) |
191 | { | 196 | { |
197 | BUG_ON(preemptible()); | ||
198 | |||
192 | switch (mode) { | 199 | switch (mode) { |
193 | case PARAVIRT_LAZY_NONE: | 200 | case PARAVIRT_LAZY_NONE: |
194 | BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE); | 201 | BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE); |
@@ -293,9 +300,13 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, | |||
293 | xmaddr_t mach_lp = virt_to_machine(lp); | 300 | xmaddr_t mach_lp = virt_to_machine(lp); |
294 | u64 entry = (u64)high << 32 | low; | 301 | u64 entry = (u64)high << 32 | low; |
295 | 302 | ||
303 | preempt_disable(); | ||
304 | |||
296 | xen_mc_flush(); | 305 | xen_mc_flush(); |
297 | if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry)) | 306 | if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry)) |
298 | BUG(); | 307 | BUG(); |
308 | |||
309 | preempt_enable(); | ||
299 | } | 310 | } |
300 | 311 | ||
301 | static int cvt_gate_to_trap(int vector, u32 low, u32 high, | 312 | static int cvt_gate_to_trap(int vector, u32 low, u32 high, |
@@ -328,11 +339,13 @@ static DEFINE_PER_CPU(struct Xgt_desc_struct, idt_desc); | |||
328 | static void xen_write_idt_entry(struct desc_struct *dt, int entrynum, | 339 | static void xen_write_idt_entry(struct desc_struct *dt, int entrynum, |
329 | u32 low, u32 high) | 340 | u32 low, u32 high) |
330 | { | 341 | { |
331 | |||
332 | int cpu = smp_processor_id(); | ||
333 | unsigned long p = (unsigned long)&dt[entrynum]; | 342 | unsigned long p = (unsigned long)&dt[entrynum]; |
334 | unsigned long start = per_cpu(idt_desc, cpu).address; | 343 | unsigned long start, end; |
335 | unsigned long end = start + per_cpu(idt_desc, cpu).size + 1; | 344 | |
345 | preempt_disable(); | ||
346 | |||
347 | start = __get_cpu_var(idt_desc).address; | ||
348 | end = start + __get_cpu_var(idt_desc).size + 1; | ||
336 | 349 | ||
337 | xen_mc_flush(); | 350 | xen_mc_flush(); |
338 | 351 | ||
@@ -347,6 +360,8 @@ static void xen_write_idt_entry(struct desc_struct *dt, int entrynum, | |||
347 | if (HYPERVISOR_set_trap_table(info)) | 360 | if (HYPERVISOR_set_trap_table(info)) |
348 | BUG(); | 361 | BUG(); |
349 | } | 362 | } |
363 | |||
364 | preempt_enable(); | ||
350 | } | 365 | } |
351 | 366 | ||
352 | static void xen_convert_trap_info(const struct Xgt_desc_struct *desc, | 367 | static void xen_convert_trap_info(const struct Xgt_desc_struct *desc, |
@@ -368,11 +383,9 @@ static void xen_convert_trap_info(const struct Xgt_desc_struct *desc, | |||
368 | 383 | ||
369 | void xen_copy_trap_info(struct trap_info *traps) | 384 | void xen_copy_trap_info(struct trap_info *traps) |
370 | { | 385 | { |
371 | const struct Xgt_desc_struct *desc = &get_cpu_var(idt_desc); | 386 | const struct Xgt_desc_struct *desc = &__get_cpu_var(idt_desc); |
372 | 387 | ||
373 | xen_convert_trap_info(desc, traps); | 388 | xen_convert_trap_info(desc, traps); |
374 | |||
375 | put_cpu_var(idt_desc); | ||
376 | } | 389 | } |
377 | 390 | ||
378 | /* Load a new IDT into Xen. In principle this can be per-CPU, so we | 391 | /* Load a new IDT into Xen. In principle this can be per-CPU, so we |
@@ -382,12 +395,11 @@ static void xen_load_idt(const struct Xgt_desc_struct *desc) | |||
382 | { | 395 | { |
383 | static DEFINE_SPINLOCK(lock); | 396 | static DEFINE_SPINLOCK(lock); |
384 | static struct trap_info traps[257]; | 397 | static struct trap_info traps[257]; |
385 | int cpu = smp_processor_id(); | ||
386 | |||
387 | per_cpu(idt_desc, cpu) = *desc; | ||
388 | 398 | ||
389 | spin_lock(&lock); | 399 | spin_lock(&lock); |
390 | 400 | ||
401 | __get_cpu_var(idt_desc) = *desc; | ||
402 | |||
391 | xen_convert_trap_info(desc, traps); | 403 | xen_convert_trap_info(desc, traps); |
392 | 404 | ||
393 | xen_mc_flush(); | 405 | xen_mc_flush(); |
@@ -402,6 +414,8 @@ static void xen_load_idt(const struct Xgt_desc_struct *desc) | |||
402 | static void xen_write_gdt_entry(struct desc_struct *dt, int entry, | 414 | static void xen_write_gdt_entry(struct desc_struct *dt, int entry, |
403 | u32 low, u32 high) | 415 | u32 low, u32 high) |
404 | { | 416 | { |
417 | preempt_disable(); | ||
418 | |||
405 | switch ((high >> 8) & 0xff) { | 419 | switch ((high >> 8) & 0xff) { |
406 | case DESCTYPE_LDT: | 420 | case DESCTYPE_LDT: |
407 | case DESCTYPE_TSS: | 421 | case DESCTYPE_TSS: |
@@ -418,10 +432,12 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry, | |||
418 | } | 432 | } |
419 | 433 | ||
420 | } | 434 | } |
435 | |||
436 | preempt_enable(); | ||
421 | } | 437 | } |
422 | 438 | ||
423 | static void xen_load_esp0(struct tss_struct *tss, | 439 | static void xen_load_esp0(struct tss_struct *tss, |
424 | struct thread_struct *thread) | 440 | struct thread_struct *thread) |
425 | { | 441 | { |
426 | struct multicall_space mcs = xen_mc_entry(0); | 442 | struct multicall_space mcs = xen_mc_entry(0); |
427 | MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->esp0); | 443 | MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->esp0); |
@@ -525,6 +541,8 @@ static unsigned long xen_read_cr3(void) | |||
525 | 541 | ||
526 | static void xen_write_cr3(unsigned long cr3) | 542 | static void xen_write_cr3(unsigned long cr3) |
527 | { | 543 | { |
544 | BUG_ON(preemptible()); | ||
545 | |||
528 | if (cr3 == x86_read_percpu(xen_cr3)) { | 546 | if (cr3 == x86_read_percpu(xen_cr3)) { |
529 | /* just a simple tlb flush */ | 547 | /* just a simple tlb flush */ |
530 | xen_flush_tlb(); | 548 | xen_flush_tlb(); |
diff --git a/arch/i386/xen/mmu.c b/arch/i386/xen/mmu.c index bc49ef846203..f431cf14e644 100644 --- a/arch/i386/xen/mmu.c +++ b/arch/i386/xen/mmu.c | |||
@@ -38,6 +38,7 @@ | |||
38 | * | 38 | * |
39 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 | 39 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 |
40 | */ | 40 | */ |
41 | #include <linux/sched.h> | ||
41 | #include <linux/highmem.h> | 42 | #include <linux/highmem.h> |
42 | #include <linux/bug.h> | 43 | #include <linux/bug.h> |
43 | #include <linux/sched.h> | 44 | #include <linux/sched.h> |
@@ -531,5 +532,7 @@ void xen_exit_mmap(struct mm_struct *mm) | |||
531 | drop_mm_ref(mm); | 532 | drop_mm_ref(mm); |
532 | put_cpu(); | 533 | put_cpu(); |
533 | 534 | ||
535 | spin_lock(&mm->page_table_lock); | ||
534 | xen_pgd_unpin(mm->pgd); | 536 | xen_pgd_unpin(mm->pgd); |
537 | spin_unlock(&mm->page_table_lock); | ||
535 | } | 538 | } |
diff --git a/arch/i386/xen/multicalls.c b/arch/i386/xen/multicalls.c index 869f9833f08f..d4015a9ed46c 100644 --- a/arch/i386/xen/multicalls.c +++ b/arch/i386/xen/multicalls.c | |||
@@ -20,6 +20,7 @@ | |||
20 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 | 20 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 |
21 | */ | 21 | */ |
22 | #include <linux/percpu.h> | 22 | #include <linux/percpu.h> |
23 | #include <linux/hardirq.h> | ||
23 | 24 | ||
24 | #include <asm/xen/hypercall.h> | 25 | #include <asm/xen/hypercall.h> |
25 | 26 | ||
@@ -39,10 +40,12 @@ DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); | |||
39 | 40 | ||
40 | void xen_mc_flush(void) | 41 | void xen_mc_flush(void) |
41 | { | 42 | { |
42 | struct mc_buffer *b = &get_cpu_var(mc_buffer); | 43 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); |
43 | int ret = 0; | 44 | int ret = 0; |
44 | unsigned long flags; | 45 | unsigned long flags; |
45 | 46 | ||
47 | BUG_ON(preemptible()); | ||
48 | |||
46 | /* Disable interrupts in case someone comes in and queues | 49 | /* Disable interrupts in case someone comes in and queues |
47 | something in the middle */ | 50 | something in the middle */ |
48 | local_irq_save(flags); | 51 | local_irq_save(flags); |
@@ -60,7 +63,6 @@ void xen_mc_flush(void) | |||
60 | } else | 63 | } else |
61 | BUG_ON(b->argidx != 0); | 64 | BUG_ON(b->argidx != 0); |
62 | 65 | ||
63 | put_cpu_var(mc_buffer); | ||
64 | local_irq_restore(flags); | 66 | local_irq_restore(flags); |
65 | 67 | ||
66 | BUG_ON(ret); | 68 | BUG_ON(ret); |
@@ -68,10 +70,11 @@ void xen_mc_flush(void) | |||
68 | 70 | ||
69 | struct multicall_space __xen_mc_entry(size_t args) | 71 | struct multicall_space __xen_mc_entry(size_t args) |
70 | { | 72 | { |
71 | struct mc_buffer *b = &get_cpu_var(mc_buffer); | 73 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); |
72 | struct multicall_space ret; | 74 | struct multicall_space ret; |
73 | unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64); | 75 | unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64); |
74 | 76 | ||
77 | BUG_ON(preemptible()); | ||
75 | BUG_ON(argspace > MC_ARGS); | 78 | BUG_ON(argspace > MC_ARGS); |
76 | 79 | ||
77 | if (b->mcidx == MC_BATCH || | 80 | if (b->mcidx == MC_BATCH || |
@@ -83,7 +86,5 @@ struct multicall_space __xen_mc_entry(size_t args) | |||
83 | ret.args = &b->args[b->argidx]; | 86 | ret.args = &b->args[b->argidx]; |
84 | b->argidx += argspace; | 87 | b->argidx += argspace; |
85 | 88 | ||
86 | put_cpu_var(mc_buffer); | ||
87 | |||
88 | return ret; | 89 | return ret; |
89 | } | 90 | } |
diff --git a/arch/i386/xen/time.c b/arch/i386/xen/time.c index aeb04cf5dbf1..51fdabf1fd4d 100644 --- a/arch/i386/xen/time.c +++ b/arch/i386/xen/time.c | |||
@@ -88,7 +88,7 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res) | |||
88 | u64 state_time; | 88 | u64 state_time; |
89 | struct vcpu_runstate_info *state; | 89 | struct vcpu_runstate_info *state; |
90 | 90 | ||
91 | preempt_disable(); | 91 | BUG_ON(preemptible()); |
92 | 92 | ||
93 | state = &__get_cpu_var(runstate); | 93 | state = &__get_cpu_var(runstate); |
94 | 94 | ||
@@ -103,8 +103,6 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res) | |||
103 | *res = *state; | 103 | *res = *state; |
104 | barrier(); | 104 | barrier(); |
105 | } while (get64(&state->state_entry_time) != state_time); | 105 | } while (get64(&state->state_entry_time) != state_time); |
106 | |||
107 | preempt_enable(); | ||
108 | } | 106 | } |
109 | 107 | ||
110 | static void setup_runstate_info(int cpu) | 108 | static void setup_runstate_info(int cpu) |
@@ -179,9 +177,19 @@ static void do_stolen_accounting(void) | |||
179 | unsigned long long xen_sched_clock(void) | 177 | unsigned long long xen_sched_clock(void) |
180 | { | 178 | { |
181 | struct vcpu_runstate_info state; | 179 | struct vcpu_runstate_info state; |
182 | cycle_t now = xen_clocksource_read(); | 180 | cycle_t now; |
181 | u64 ret; | ||
183 | s64 offset; | 182 | s64 offset; |
184 | 183 | ||
184 | /* | ||
185 | * Ideally sched_clock should be called on a per-cpu basis | ||
186 | * anyway, so preempt should already be disabled, but that's | ||
187 | * not current practice at the moment. | ||
188 | */ | ||
189 | preempt_disable(); | ||
190 | |||
191 | now = xen_clocksource_read(); | ||
192 | |||
185 | get_runstate_snapshot(&state); | 193 | get_runstate_snapshot(&state); |
186 | 194 | ||
187 | WARN_ON(state.state != RUNSTATE_running); | 195 | WARN_ON(state.state != RUNSTATE_running); |
@@ -190,9 +198,13 @@ unsigned long long xen_sched_clock(void) | |||
190 | if (offset < 0) | 198 | if (offset < 0) |
191 | offset = 0; | 199 | offset = 0; |
192 | 200 | ||
193 | return state.time[RUNSTATE_blocked] + | 201 | ret = state.time[RUNSTATE_blocked] + |
194 | state.time[RUNSTATE_running] + | 202 | state.time[RUNSTATE_running] + |
195 | offset; | 203 | offset; |
204 | |||
205 | preempt_enable(); | ||
206 | |||
207 | return ret; | ||
196 | } | 208 | } |
197 | 209 | ||
198 | 210 | ||