diff options
50 files changed, 1696 insertions, 846 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index c9be69fedb70..7b655b5bb9ab 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -10,7 +10,7 @@ ifdef CONFIG_FTRACE | |||
10 | # Do not profile debug and lowlevel utilities | 10 | # Do not profile debug and lowlevel utilities |
11 | CFLAGS_REMOVE_tsc.o = -pg | 11 | CFLAGS_REMOVE_tsc.o = -pg |
12 | CFLAGS_REMOVE_rtc.o = -pg | 12 | CFLAGS_REMOVE_rtc.o = -pg |
13 | CFLAGS_REMOVE_paravirt.o = -pg | 13 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg |
14 | endif | 14 | endif |
15 | 15 | ||
16 | # | 16 | # |
@@ -90,7 +90,7 @@ obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o | |||
90 | obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o | 90 | obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o |
91 | obj-$(CONFIG_KVM_GUEST) += kvm.o | 91 | obj-$(CONFIG_KVM_GUEST) += kvm.o |
92 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o | 92 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o |
93 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o | 93 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o paravirt-spinlocks.o |
94 | obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o | 94 | obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o |
95 | 95 | ||
96 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o | 96 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7581b62df184..fb789dd9e691 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1121,16 +1121,5 @@ void __cpuinit cpu_init(void) | |||
1121 | xsave_init(); | 1121 | xsave_init(); |
1122 | } | 1122 | } |
1123 | 1123 | ||
1124 | #ifdef CONFIG_HOTPLUG_CPU | ||
1125 | void __cpuinit cpu_uninit(void) | ||
1126 | { | ||
1127 | int cpu = raw_smp_processor_id(); | ||
1128 | cpu_clear(cpu, cpu_initialized); | ||
1129 | |||
1130 | /* lazy TLB state */ | ||
1131 | per_cpu(cpu_tlbstate, cpu).state = 0; | ||
1132 | per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; | ||
1133 | } | ||
1134 | #endif | ||
1135 | 1124 | ||
1136 | #endif | 1125 | #endif |
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 0ed5f939b905..eee32b43fee3 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
@@ -52,6 +52,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
52 | memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, | 52 | memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, |
53 | (mincount - oldsize) * LDT_ENTRY_SIZE); | 53 | (mincount - oldsize) * LDT_ENTRY_SIZE); |
54 | 54 | ||
55 | paravirt_alloc_ldt(newldt, mincount); | ||
56 | |||
55 | #ifdef CONFIG_X86_64 | 57 | #ifdef CONFIG_X86_64 |
56 | /* CHECKME: Do we really need this ? */ | 58 | /* CHECKME: Do we really need this ? */ |
57 | wmb(); | 59 | wmb(); |
@@ -74,6 +76,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
74 | #endif | 76 | #endif |
75 | } | 77 | } |
76 | if (oldsize) { | 78 | if (oldsize) { |
79 | paravirt_free_ldt(oldldt, oldsize); | ||
77 | if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) | 80 | if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) |
78 | vfree(oldldt); | 81 | vfree(oldldt); |
79 | else | 82 | else |
@@ -85,10 +88,13 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
85 | static inline int copy_ldt(mm_context_t *new, mm_context_t *old) | 88 | static inline int copy_ldt(mm_context_t *new, mm_context_t *old) |
86 | { | 89 | { |
87 | int err = alloc_ldt(new, old->size, 0); | 90 | int err = alloc_ldt(new, old->size, 0); |
91 | int i; | ||
88 | 92 | ||
89 | if (err < 0) | 93 | if (err < 0) |
90 | return err; | 94 | return err; |
91 | memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE); | 95 | |
96 | for(i = 0; i < old->size; i++) | ||
97 | write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); | ||
92 | return 0; | 98 | return 0; |
93 | } | 99 | } |
94 | 100 | ||
@@ -125,6 +131,7 @@ void destroy_context(struct mm_struct *mm) | |||
125 | if (mm == current->active_mm) | 131 | if (mm == current->active_mm) |
126 | clear_LDT(); | 132 | clear_LDT(); |
127 | #endif | 133 | #endif |
134 | paravirt_free_ldt(mm->context.ldt, mm->context.size); | ||
128 | if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) | 135 | if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) |
129 | vfree(mm->context.ldt); | 136 | vfree(mm->context.ldt); |
130 | else | 137 | else |
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c new file mode 100644 index 000000000000..0e9f1982b1dd --- /dev/null +++ b/arch/x86/kernel/paravirt-spinlocks.c | |||
@@ -0,0 +1,37 @@ | |||
1 | /* | ||
2 | * Split spinlock implementation out into its own file, so it can be | ||
3 | * compiled in a FTRACE-compatible way. | ||
4 | */ | ||
5 | #include <linux/spinlock.h> | ||
6 | #include <linux/module.h> | ||
7 | |||
8 | #include <asm/paravirt.h> | ||
9 | |||
10 | static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags) | ||
11 | { | ||
12 | __raw_spin_lock(lock); | ||
13 | } | ||
14 | |||
15 | struct pv_lock_ops pv_lock_ops = { | ||
16 | #ifdef CONFIG_SMP | ||
17 | .spin_is_locked = __ticket_spin_is_locked, | ||
18 | .spin_is_contended = __ticket_spin_is_contended, | ||
19 | |||
20 | .spin_lock = __ticket_spin_lock, | ||
21 | .spin_lock_flags = default_spin_lock_flags, | ||
22 | .spin_trylock = __ticket_spin_trylock, | ||
23 | .spin_unlock = __ticket_spin_unlock, | ||
24 | #endif | ||
25 | }; | ||
26 | EXPORT_SYMBOL(pv_lock_ops); | ||
27 | |||
28 | void __init paravirt_use_bytelocks(void) | ||
29 | { | ||
30 | #ifdef CONFIG_SMP | ||
31 | pv_lock_ops.spin_is_locked = __byte_spin_is_locked; | ||
32 | pv_lock_ops.spin_is_contended = __byte_spin_is_contended; | ||
33 | pv_lock_ops.spin_lock = __byte_spin_lock; | ||
34 | pv_lock_ops.spin_trylock = __byte_spin_trylock; | ||
35 | pv_lock_ops.spin_unlock = __byte_spin_unlock; | ||
36 | #endif | ||
37 | } | ||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 6b0bb73998dd..e4c8fb608873 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -268,17 +268,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) | |||
268 | return __get_cpu_var(paravirt_lazy_mode); | 268 | return __get_cpu_var(paravirt_lazy_mode); |
269 | } | 269 | } |
270 | 270 | ||
271 | void __init paravirt_use_bytelocks(void) | ||
272 | { | ||
273 | #ifdef CONFIG_SMP | ||
274 | pv_lock_ops.spin_is_locked = __byte_spin_is_locked; | ||
275 | pv_lock_ops.spin_is_contended = __byte_spin_is_contended; | ||
276 | pv_lock_ops.spin_lock = __byte_spin_lock; | ||
277 | pv_lock_ops.spin_trylock = __byte_spin_trylock; | ||
278 | pv_lock_ops.spin_unlock = __byte_spin_unlock; | ||
279 | #endif | ||
280 | } | ||
281 | |||
282 | struct pv_info pv_info = { | 271 | struct pv_info pv_info = { |
283 | .name = "bare hardware", | 272 | .name = "bare hardware", |
284 | .paravirt_enabled = 0, | 273 | .paravirt_enabled = 0, |
@@ -349,6 +338,10 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
349 | .write_ldt_entry = native_write_ldt_entry, | 338 | .write_ldt_entry = native_write_ldt_entry, |
350 | .write_gdt_entry = native_write_gdt_entry, | 339 | .write_gdt_entry = native_write_gdt_entry, |
351 | .write_idt_entry = native_write_idt_entry, | 340 | .write_idt_entry = native_write_idt_entry, |
341 | |||
342 | .alloc_ldt = paravirt_nop, | ||
343 | .free_ldt = paravirt_nop, | ||
344 | |||
352 | .load_sp0 = native_load_sp0, | 345 | .load_sp0 = native_load_sp0, |
353 | 346 | ||
354 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) | 347 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) |
@@ -460,18 +453,6 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
460 | .set_fixmap = native_set_fixmap, | 453 | .set_fixmap = native_set_fixmap, |
461 | }; | 454 | }; |
462 | 455 | ||
463 | struct pv_lock_ops pv_lock_ops = { | ||
464 | #ifdef CONFIG_SMP | ||
465 | .spin_is_locked = __ticket_spin_is_locked, | ||
466 | .spin_is_contended = __ticket_spin_is_contended, | ||
467 | |||
468 | .spin_lock = __ticket_spin_lock, | ||
469 | .spin_trylock = __ticket_spin_trylock, | ||
470 | .spin_unlock = __ticket_spin_unlock, | ||
471 | #endif | ||
472 | }; | ||
473 | EXPORT_SYMBOL(pv_lock_ops); | ||
474 | |||
475 | EXPORT_SYMBOL_GPL(pv_time_ops); | 456 | EXPORT_SYMBOL_GPL(pv_time_ops); |
476 | EXPORT_SYMBOL (pv_cpu_ops); | 457 | EXPORT_SYMBOL (pv_cpu_ops); |
477 | EXPORT_SYMBOL (pv_mmu_ops); | 458 | EXPORT_SYMBOL (pv_mmu_ops); |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 205188db9626..922c14058f97 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -76,47 +76,12 @@ unsigned long thread_saved_pc(struct task_struct *tsk) | |||
76 | return ((unsigned long *)tsk->thread.sp)[3]; | 76 | return ((unsigned long *)tsk->thread.sp)[3]; |
77 | } | 77 | } |
78 | 78 | ||
79 | #ifdef CONFIG_HOTPLUG_CPU | 79 | #ifndef CONFIG_SMP |
80 | #include <asm/nmi.h> | ||
81 | |||
82 | static void cpu_exit_clear(void) | ||
83 | { | ||
84 | int cpu = raw_smp_processor_id(); | ||
85 | |||
86 | idle_task_exit(); | ||
87 | |||
88 | cpu_uninit(); | ||
89 | irq_ctx_exit(cpu); | ||
90 | |||
91 | cpu_clear(cpu, cpu_callout_map); | ||
92 | cpu_clear(cpu, cpu_callin_map); | ||
93 | |||
94 | numa_remove_cpu(cpu); | ||
95 | c1e_remove_cpu(cpu); | ||
96 | } | ||
97 | |||
98 | /* We don't actually take CPU down, just spin without interrupts. */ | ||
99 | static inline void play_dead(void) | ||
100 | { | ||
101 | /* This must be done before dead CPU ack */ | ||
102 | cpu_exit_clear(); | ||
103 | mb(); | ||
104 | /* Ack it */ | ||
105 | __get_cpu_var(cpu_state) = CPU_DEAD; | ||
106 | |||
107 | /* | ||
108 | * With physical CPU hotplug, we should halt the cpu | ||
109 | */ | ||
110 | local_irq_disable(); | ||
111 | /* mask all interrupts, flush any and all caches, and halt */ | ||
112 | wbinvd_halt(); | ||
113 | } | ||
114 | #else | ||
115 | static inline void play_dead(void) | 80 | static inline void play_dead(void) |
116 | { | 81 | { |
117 | BUG(); | 82 | BUG(); |
118 | } | 83 | } |
119 | #endif /* CONFIG_HOTPLUG_CPU */ | 84 | #endif |
120 | 85 | ||
121 | /* | 86 | /* |
122 | * The idle thread. There's no useful work to be | 87 | * The idle thread. There's no useful work to be |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b6b508ea7110..ca80394ef5b8 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -86,30 +86,12 @@ void exit_idle(void) | |||
86 | __exit_idle(); | 86 | __exit_idle(); |
87 | } | 87 | } |
88 | 88 | ||
89 | #ifdef CONFIG_HOTPLUG_CPU | 89 | #ifndef CONFIG_SMP |
90 | DECLARE_PER_CPU(int, cpu_state); | ||
91 | |||
92 | #include <linux/nmi.h> | ||
93 | /* We halt the CPU with physical CPU hotplug */ | ||
94 | static inline void play_dead(void) | ||
95 | { | ||
96 | idle_task_exit(); | ||
97 | c1e_remove_cpu(raw_smp_processor_id()); | ||
98 | |||
99 | mb(); | ||
100 | /* Ack it */ | ||
101 | __get_cpu_var(cpu_state) = CPU_DEAD; | ||
102 | |||
103 | local_irq_disable(); | ||
104 | /* mask all interrupts, flush any and all caches, and halt */ | ||
105 | wbinvd_halt(); | ||
106 | } | ||
107 | #else | ||
108 | static inline void play_dead(void) | 90 | static inline void play_dead(void) |
109 | { | 91 | { |
110 | BUG(); | 92 | BUG(); |
111 | } | 93 | } |
112 | #endif /* CONFIG_HOTPLUG_CPU */ | 94 | #endif |
113 | 95 | ||
114 | /* | 96 | /* |
115 | * The idle thread. There's no useful work to be | 97 | * The idle thread. There's no useful work to be |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 361b7a4c640c..18f9b19f5f8f 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -214,12 +214,16 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) | |||
214 | struct smp_ops smp_ops = { | 214 | struct smp_ops smp_ops = { |
215 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, | 215 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, |
216 | .smp_prepare_cpus = native_smp_prepare_cpus, | 216 | .smp_prepare_cpus = native_smp_prepare_cpus, |
217 | .cpu_up = native_cpu_up, | ||
218 | .smp_cpus_done = native_smp_cpus_done, | 217 | .smp_cpus_done = native_smp_cpus_done, |
219 | 218 | ||
220 | .smp_send_stop = native_smp_send_stop, | 219 | .smp_send_stop = native_smp_send_stop, |
221 | .smp_send_reschedule = native_smp_send_reschedule, | 220 | .smp_send_reschedule = native_smp_send_reschedule, |
222 | 221 | ||
222 | .cpu_up = native_cpu_up, | ||
223 | .cpu_die = native_cpu_die, | ||
224 | .cpu_disable = native_cpu_disable, | ||
225 | .play_dead = native_play_dead, | ||
226 | |||
223 | .send_call_func_ipi = native_send_call_func_ipi, | 227 | .send_call_func_ipi = native_send_call_func_ipi, |
224 | .send_call_func_single_ipi = native_send_call_func_single_ipi, | 228 | .send_call_func_single_ipi = native_send_call_func_single_ipi, |
225 | }; | 229 | }; |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9056f7e272c0..76b6f50978f7 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -52,6 +52,7 @@ | |||
52 | #include <asm/desc.h> | 52 | #include <asm/desc.h> |
53 | #include <asm/nmi.h> | 53 | #include <asm/nmi.h> |
54 | #include <asm/irq.h> | 54 | #include <asm/irq.h> |
55 | #include <asm/idle.h> | ||
55 | #include <asm/smp.h> | 56 | #include <asm/smp.h> |
56 | #include <asm/trampoline.h> | 57 | #include <asm/trampoline.h> |
57 | #include <asm/cpu.h> | 58 | #include <asm/cpu.h> |
@@ -1344,25 +1345,9 @@ static void __ref remove_cpu_from_maps(int cpu) | |||
1344 | numa_remove_cpu(cpu); | 1345 | numa_remove_cpu(cpu); |
1345 | } | 1346 | } |
1346 | 1347 | ||
1347 | int __cpu_disable(void) | 1348 | void cpu_disable_common(void) |
1348 | { | 1349 | { |
1349 | int cpu = smp_processor_id(); | 1350 | int cpu = smp_processor_id(); |
1350 | |||
1351 | /* | ||
1352 | * Perhaps use cpufreq to drop frequency, but that could go | ||
1353 | * into generic code. | ||
1354 | * | ||
1355 | * We won't take down the boot processor on i386 due to some | ||
1356 | * interrupts only being able to be serviced by the BSP. | ||
1357 | * Especially so if we're not using an IOAPIC -zwane | ||
1358 | */ | ||
1359 | if (cpu == 0) | ||
1360 | return -EBUSY; | ||
1361 | |||
1362 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
1363 | stop_apic_nmi_watchdog(NULL); | ||
1364 | clear_local_APIC(); | ||
1365 | |||
1366 | /* | 1351 | /* |
1367 | * HACK: | 1352 | * HACK: |
1368 | * Allow any queued timer interrupts to get serviced | 1353 | * Allow any queued timer interrupts to get serviced |
@@ -1380,10 +1365,32 @@ int __cpu_disable(void) | |||
1380 | remove_cpu_from_maps(cpu); | 1365 | remove_cpu_from_maps(cpu); |
1381 | unlock_vector_lock(); | 1366 | unlock_vector_lock(); |
1382 | fixup_irqs(cpu_online_map); | 1367 | fixup_irqs(cpu_online_map); |
1368 | } | ||
1369 | |||
1370 | int native_cpu_disable(void) | ||
1371 | { | ||
1372 | int cpu = smp_processor_id(); | ||
1373 | |||
1374 | /* | ||
1375 | * Perhaps use cpufreq to drop frequency, but that could go | ||
1376 | * into generic code. | ||
1377 | * | ||
1378 | * We won't take down the boot processor on i386 due to some | ||
1379 | * interrupts only being able to be serviced by the BSP. | ||
1380 | * Especially so if we're not using an IOAPIC -zwane | ||
1381 | */ | ||
1382 | if (cpu == 0) | ||
1383 | return -EBUSY; | ||
1384 | |||
1385 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
1386 | stop_apic_nmi_watchdog(NULL); | ||
1387 | clear_local_APIC(); | ||
1388 | |||
1389 | cpu_disable_common(); | ||
1383 | return 0; | 1390 | return 0; |
1384 | } | 1391 | } |
1385 | 1392 | ||
1386 | void __cpu_die(unsigned int cpu) | 1393 | void native_cpu_die(unsigned int cpu) |
1387 | { | 1394 | { |
1388 | /* We don't do anything here: idle task is faking death itself. */ | 1395 | /* We don't do anything here: idle task is faking death itself. */ |
1389 | unsigned int i; | 1396 | unsigned int i; |
@@ -1400,15 +1407,45 @@ void __cpu_die(unsigned int cpu) | |||
1400 | } | 1407 | } |
1401 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); | 1408 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); |
1402 | } | 1409 | } |
1410 | |||
1411 | void play_dead_common(void) | ||
1412 | { | ||
1413 | idle_task_exit(); | ||
1414 | reset_lazy_tlbstate(); | ||
1415 | irq_ctx_exit(raw_smp_processor_id()); | ||
1416 | c1e_remove_cpu(raw_smp_processor_id()); | ||
1417 | |||
1418 | mb(); | ||
1419 | /* Ack it */ | ||
1420 | __get_cpu_var(cpu_state) = CPU_DEAD; | ||
1421 | |||
1422 | /* | ||
1423 | * With physical CPU hotplug, we should halt the cpu | ||
1424 | */ | ||
1425 | local_irq_disable(); | ||
1426 | } | ||
1427 | |||
1428 | void native_play_dead(void) | ||
1429 | { | ||
1430 | play_dead_common(); | ||
1431 | wbinvd_halt(); | ||
1432 | } | ||
1433 | |||
1403 | #else /* ... !CONFIG_HOTPLUG_CPU */ | 1434 | #else /* ... !CONFIG_HOTPLUG_CPU */ |
1404 | int __cpu_disable(void) | 1435 | int native_cpu_disable(void) |
1405 | { | 1436 | { |
1406 | return -ENOSYS; | 1437 | return -ENOSYS; |
1407 | } | 1438 | } |
1408 | 1439 | ||
1409 | void __cpu_die(unsigned int cpu) | 1440 | void native_cpu_die(unsigned int cpu) |
1410 | { | 1441 | { |
1411 | /* We said "no" in __cpu_disable */ | 1442 | /* We said "no" in __cpu_disable */ |
1412 | BUG(); | 1443 | BUG(); |
1413 | } | 1444 | } |
1445 | |||
1446 | void native_play_dead(void) | ||
1447 | { | ||
1448 | BUG(); | ||
1449 | } | ||
1450 | |||
1414 | #endif | 1451 | #endif |
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index fec1ecedc9b7..e00534b33534 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c | |||
@@ -241,3 +241,11 @@ void flush_tlb_all(void) | |||
241 | on_each_cpu(do_flush_tlb_all, NULL, 1); | 241 | on_each_cpu(do_flush_tlb_all, NULL, 1); |
242 | } | 242 | } |
243 | 243 | ||
244 | void reset_lazy_tlbstate(void) | ||
245 | { | ||
246 | int cpu = raw_smp_processor_id(); | ||
247 | |||
248 | per_cpu(cpu_tlbstate, cpu).state = 0; | ||
249 | per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; | ||
250 | } | ||
251 | |||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 8f92cac4e6db..a742d753d5b0 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -914,15 +914,15 @@ LIST_HEAD(pgd_list); | |||
914 | 914 | ||
915 | void vmalloc_sync_all(void) | 915 | void vmalloc_sync_all(void) |
916 | { | 916 | { |
917 | #ifdef CONFIG_X86_32 | ||
918 | unsigned long start = VMALLOC_START & PGDIR_MASK; | ||
919 | unsigned long address; | 917 | unsigned long address; |
920 | 918 | ||
919 | #ifdef CONFIG_X86_32 | ||
921 | if (SHARED_KERNEL_PMD) | 920 | if (SHARED_KERNEL_PMD) |
922 | return; | 921 | return; |
923 | 922 | ||
924 | BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); | 923 | for (address = VMALLOC_START & PMD_MASK; |
925 | for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) { | 924 | address >= TASK_SIZE && address < FIXADDR_TOP; |
925 | address += PMD_SIZE) { | ||
926 | unsigned long flags; | 926 | unsigned long flags; |
927 | struct page *page; | 927 | struct page *page; |
928 | 928 | ||
@@ -935,10 +935,8 @@ void vmalloc_sync_all(void) | |||
935 | spin_unlock_irqrestore(&pgd_lock, flags); | 935 | spin_unlock_irqrestore(&pgd_lock, flags); |
936 | } | 936 | } |
937 | #else /* CONFIG_X86_64 */ | 937 | #else /* CONFIG_X86_64 */ |
938 | unsigned long start = VMALLOC_START & PGDIR_MASK; | 938 | for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END; |
939 | unsigned long address; | 939 | address += PGDIR_SIZE) { |
940 | |||
941 | for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { | ||
942 | const pgd_t *pgd_ref = pgd_offset_k(address); | 940 | const pgd_t *pgd_ref = pgd_offset_k(address); |
943 | unsigned long flags; | 941 | unsigned long flags; |
944 | struct page *page; | 942 | struct page *page; |
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 3815e425f470..87b9ab166423 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -26,5 +26,13 @@ config XEN_MAX_DOMAIN_MEMORY | |||
26 | 26 | ||
27 | config XEN_SAVE_RESTORE | 27 | config XEN_SAVE_RESTORE |
28 | bool | 28 | bool |
29 | depends on PM | 29 | depends on XEN && PM |
30 | default y \ No newline at end of file | 30 | default y |
31 | |||
32 | config XEN_DEBUG_FS | ||
33 | bool "Enable Xen debug and tuning parameters in debugfs" | ||
34 | depends on XEN && DEBUG_FS | ||
35 | default n | ||
36 | help | ||
37 | Enable statistics output and various tuning options in debugfs. | ||
38 | Enabling this option may incur a significant performance overhead. | ||
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 59c1e539aed2..313947940a1a 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -1,4 +1,12 @@ | |||
1 | obj-y := enlighten.o setup.o multicalls.o mmu.o \ | 1 | ifdef CONFIG_FTRACE |
2 | # Do not profile debug and lowlevel utilities | ||
3 | CFLAGS_REMOVE_spinlock.o = -pg | ||
4 | CFLAGS_REMOVE_time.o = -pg | ||
5 | CFLAGS_REMOVE_irq.o = -pg | ||
6 | endif | ||
7 | |||
8 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ | ||
2 | time.o xen-asm_$(BITS).o grant-table.o suspend.o | 9 | time.o xen-asm_$(BITS).o grant-table.o suspend.o |
3 | 10 | ||
4 | obj-$(CONFIG_SMP) += smp.o | 11 | obj-$(CONFIG_SMP) += smp.o spinlock.o |
12 | obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o \ No newline at end of file | ||
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c new file mode 100644 index 000000000000..b53225d2cac3 --- /dev/null +++ b/arch/x86/xen/debugfs.c | |||
@@ -0,0 +1,123 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/debugfs.h> | ||
3 | #include <linux/module.h> | ||
4 | |||
5 | #include "debugfs.h" | ||
6 | |||
7 | static struct dentry *d_xen_debug; | ||
8 | |||
9 | struct dentry * __init xen_init_debugfs(void) | ||
10 | { | ||
11 | if (!d_xen_debug) { | ||
12 | d_xen_debug = debugfs_create_dir("xen", NULL); | ||
13 | |||
14 | if (!d_xen_debug) | ||
15 | pr_warning("Could not create 'xen' debugfs directory\n"); | ||
16 | } | ||
17 | |||
18 | return d_xen_debug; | ||
19 | } | ||
20 | |||
21 | struct array_data | ||
22 | { | ||
23 | void *array; | ||
24 | unsigned elements; | ||
25 | }; | ||
26 | |||
27 | static int u32_array_open(struct inode *inode, struct file *file) | ||
28 | { | ||
29 | file->private_data = NULL; | ||
30 | return nonseekable_open(inode, file); | ||
31 | } | ||
32 | |||
33 | static size_t format_array(char *buf, size_t bufsize, const char *fmt, | ||
34 | u32 *array, unsigned array_size) | ||
35 | { | ||
36 | size_t ret = 0; | ||
37 | unsigned i; | ||
38 | |||
39 | for(i = 0; i < array_size; i++) { | ||
40 | size_t len; | ||
41 | |||
42 | len = snprintf(buf, bufsize, fmt, array[i]); | ||
43 | len++; /* ' ' or '\n' */ | ||
44 | ret += len; | ||
45 | |||
46 | if (buf) { | ||
47 | buf += len; | ||
48 | bufsize -= len; | ||
49 | buf[-1] = (i == array_size-1) ? '\n' : ' '; | ||
50 | } | ||
51 | } | ||
52 | |||
53 | ret++; /* \0 */ | ||
54 | if (buf) | ||
55 | *buf = '\0'; | ||
56 | |||
57 | return ret; | ||
58 | } | ||
59 | |||
60 | static char *format_array_alloc(const char *fmt, u32 *array, unsigned array_size) | ||
61 | { | ||
62 | size_t len = format_array(NULL, 0, fmt, array, array_size); | ||
63 | char *ret; | ||
64 | |||
65 | ret = kmalloc(len, GFP_KERNEL); | ||
66 | if (ret == NULL) | ||
67 | return NULL; | ||
68 | |||
69 | format_array(ret, len, fmt, array, array_size); | ||
70 | return ret; | ||
71 | } | ||
72 | |||
73 | static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len, | ||
74 | loff_t *ppos) | ||
75 | { | ||
76 | struct inode *inode = file->f_path.dentry->d_inode; | ||
77 | struct array_data *data = inode->i_private; | ||
78 | size_t size; | ||
79 | |||
80 | if (*ppos == 0) { | ||
81 | if (file->private_data) { | ||
82 | kfree(file->private_data); | ||
83 | file->private_data = NULL; | ||
84 | } | ||
85 | |||
86 | file->private_data = format_array_alloc("%u", data->array, data->elements); | ||
87 | } | ||
88 | |||
89 | size = 0; | ||
90 | if (file->private_data) | ||
91 | size = strlen(file->private_data); | ||
92 | |||
93 | return simple_read_from_buffer(buf, len, ppos, file->private_data, size); | ||
94 | } | ||
95 | |||
96 | static int xen_array_release(struct inode *inode, struct file *file) | ||
97 | { | ||
98 | kfree(file->private_data); | ||
99 | |||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | static struct file_operations u32_array_fops = { | ||
104 | .owner = THIS_MODULE, | ||
105 | .open = u32_array_open, | ||
106 | .release= xen_array_release, | ||
107 | .read = u32_array_read, | ||
108 | }; | ||
109 | |||
110 | struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode, | ||
111 | struct dentry *parent, | ||
112 | u32 *array, unsigned elements) | ||
113 | { | ||
114 | struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL); | ||
115 | |||
116 | if (data == NULL) | ||
117 | return NULL; | ||
118 | |||
119 | data->array = array; | ||
120 | data->elements = elements; | ||
121 | |||
122 | return debugfs_create_file(name, mode, parent, data, &u32_array_fops); | ||
123 | } | ||
diff --git a/arch/x86/xen/debugfs.h b/arch/x86/xen/debugfs.h new file mode 100644 index 000000000000..e28132084832 --- /dev/null +++ b/arch/x86/xen/debugfs.h | |||
@@ -0,0 +1,10 @@ | |||
1 | #ifndef _XEN_DEBUGFS_H | ||
2 | #define _XEN_DEBUGFS_H | ||
3 | |||
4 | struct dentry * __init xen_init_debugfs(void); | ||
5 | |||
6 | struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode, | ||
7 | struct dentry *parent, | ||
8 | u32 *array, unsigned elements); | ||
9 | |||
10 | #endif /* _XEN_DEBUGFS_H */ | ||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a27d562a9744..0013a729b41d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <xen/interface/xen.h> | 30 | #include <xen/interface/xen.h> |
31 | #include <xen/interface/physdev.h> | 31 | #include <xen/interface/physdev.h> |
32 | #include <xen/interface/vcpu.h> | 32 | #include <xen/interface/vcpu.h> |
33 | #include <xen/interface/sched.h> | ||
34 | #include <xen/features.h> | 33 | #include <xen/features.h> |
35 | #include <xen/page.h> | 34 | #include <xen/page.h> |
36 | #include <xen/hvc-console.h> | 35 | #include <xen/hvc-console.h> |
@@ -58,6 +57,9 @@ EXPORT_SYMBOL_GPL(hypercall_page); | |||
58 | DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); | 57 | DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); |
59 | DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); | 58 | DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); |
60 | 59 | ||
60 | enum xen_domain_type xen_domain_type = XEN_NATIVE; | ||
61 | EXPORT_SYMBOL_GPL(xen_domain_type); | ||
62 | |||
61 | /* | 63 | /* |
62 | * Identity map, in addition to plain kernel map. This needs to be | 64 | * Identity map, in addition to plain kernel map. This needs to be |
63 | * large enough to allocate page table pages to allocate the rest. | 65 | * large enough to allocate page table pages to allocate the rest. |
@@ -111,7 +113,14 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; | |||
111 | * | 113 | * |
112 | * 0: not available, 1: available | 114 | * 0: not available, 1: available |
113 | */ | 115 | */ |
114 | static int have_vcpu_info_placement = 1; | 116 | static int have_vcpu_info_placement = |
117 | #ifdef CONFIG_X86_32 | ||
118 | 1 | ||
119 | #else | ||
120 | 0 | ||
121 | #endif | ||
122 | ; | ||
123 | |||
115 | 124 | ||
116 | static void xen_vcpu_setup(int cpu) | 125 | static void xen_vcpu_setup(int cpu) |
117 | { | 126 | { |
@@ -227,103 +236,68 @@ static unsigned long xen_get_debugreg(int reg) | |||
227 | return HYPERVISOR_get_debugreg(reg); | 236 | return HYPERVISOR_get_debugreg(reg); |
228 | } | 237 | } |
229 | 238 | ||
230 | static unsigned long xen_save_fl(void) | 239 | static void xen_leave_lazy(void) |
231 | { | 240 | { |
232 | struct vcpu_info *vcpu; | 241 | paravirt_leave_lazy(paravirt_get_lazy_mode()); |
233 | unsigned long flags; | 242 | xen_mc_flush(); |
234 | |||
235 | vcpu = x86_read_percpu(xen_vcpu); | ||
236 | |||
237 | /* flag has opposite sense of mask */ | ||
238 | flags = !vcpu->evtchn_upcall_mask; | ||
239 | |||
240 | /* convert to IF type flag | ||
241 | -0 -> 0x00000000 | ||
242 | -1 -> 0xffffffff | ||
243 | */ | ||
244 | return (-flags) & X86_EFLAGS_IF; | ||
245 | } | 243 | } |
246 | 244 | ||
247 | static void xen_restore_fl(unsigned long flags) | 245 | static unsigned long xen_store_tr(void) |
248 | { | 246 | { |
249 | struct vcpu_info *vcpu; | 247 | return 0; |
250 | |||
251 | /* convert from IF type flag */ | ||
252 | flags = !(flags & X86_EFLAGS_IF); | ||
253 | |||
254 | /* There's a one instruction preempt window here. We need to | ||
255 | make sure we're don't switch CPUs between getting the vcpu | ||
256 | pointer and updating the mask. */ | ||
257 | preempt_disable(); | ||
258 | vcpu = x86_read_percpu(xen_vcpu); | ||
259 | vcpu->evtchn_upcall_mask = flags; | ||
260 | preempt_enable_no_resched(); | ||
261 | |||
262 | /* Doesn't matter if we get preempted here, because any | ||
263 | pending event will get dealt with anyway. */ | ||
264 | |||
265 | if (flags == 0) { | ||
266 | preempt_check_resched(); | ||
267 | barrier(); /* unmask then check (avoid races) */ | ||
268 | if (unlikely(vcpu->evtchn_upcall_pending)) | ||
269 | force_evtchn_callback(); | ||
270 | } | ||
271 | } | 248 | } |
272 | 249 | ||
273 | static void xen_irq_disable(void) | 250 | /* |
251 | * Set the page permissions for a particular virtual address. If the | ||
252 | * address is a vmalloc mapping (or other non-linear mapping), then | ||
253 | * find the linear mapping of the page and also set its protections to | ||
254 | * match. | ||
255 | */ | ||
256 | static void set_aliased_prot(void *v, pgprot_t prot) | ||
274 | { | 257 | { |
275 | /* There's a one instruction preempt window here. We need to | 258 | int level; |
276 | make sure we're don't switch CPUs between getting the vcpu | 259 | pte_t *ptep; |
277 | pointer and updating the mask. */ | 260 | pte_t pte; |
278 | preempt_disable(); | 261 | unsigned long pfn; |
279 | x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; | 262 | struct page *page; |
280 | preempt_enable_no_resched(); | ||
281 | } | ||
282 | 263 | ||
283 | static void xen_irq_enable(void) | 264 | ptep = lookup_address((unsigned long)v, &level); |
284 | { | 265 | BUG_ON(ptep == NULL); |
285 | struct vcpu_info *vcpu; | ||
286 | 266 | ||
287 | /* We don't need to worry about being preempted here, since | 267 | pfn = pte_pfn(*ptep); |
288 | either a) interrupts are disabled, so no preemption, or b) | 268 | page = pfn_to_page(pfn); |
289 | the caller is confused and is trying to re-enable interrupts | ||
290 | on an indeterminate processor. */ | ||
291 | 269 | ||
292 | vcpu = x86_read_percpu(xen_vcpu); | 270 | pte = pfn_pte(pfn, prot); |
293 | vcpu->evtchn_upcall_mask = 0; | ||
294 | 271 | ||
295 | /* Doesn't matter if we get preempted here, because any | 272 | if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) |
296 | pending event will get dealt with anyway. */ | 273 | BUG(); |
297 | 274 | ||
298 | barrier(); /* unmask then check (avoid races) */ | 275 | if (!PageHighMem(page)) { |
299 | if (unlikely(vcpu->evtchn_upcall_pending)) | 276 | void *av = __va(PFN_PHYS(pfn)); |
300 | force_evtchn_callback(); | ||
301 | } | ||
302 | 277 | ||
303 | static void xen_safe_halt(void) | 278 | if (av != v) |
304 | { | 279 | if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0)) |
305 | /* Blocking includes an implicit local_irq_enable(). */ | 280 | BUG(); |
306 | if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) | 281 | } else |
307 | BUG(); | 282 | kmap_flush_unused(); |
308 | } | 283 | } |
309 | 284 | ||
310 | static void xen_halt(void) | 285 | static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) |
311 | { | 286 | { |
312 | if (irqs_disabled()) | 287 | const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; |
313 | HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); | 288 | int i; |
314 | else | ||
315 | xen_safe_halt(); | ||
316 | } | ||
317 | 289 | ||
318 | static void xen_leave_lazy(void) | 290 | for(i = 0; i < entries; i += entries_per_page) |
319 | { | 291 | set_aliased_prot(ldt + i, PAGE_KERNEL_RO); |
320 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | ||
321 | xen_mc_flush(); | ||
322 | } | 292 | } |
323 | 293 | ||
324 | static unsigned long xen_store_tr(void) | 294 | static void xen_free_ldt(struct desc_struct *ldt, unsigned entries) |
325 | { | 295 | { |
326 | return 0; | 296 | const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; |
297 | int i; | ||
298 | |||
299 | for(i = 0; i < entries; i += entries_per_page) | ||
300 | set_aliased_prot(ldt + i, PAGE_KERNEL); | ||
327 | } | 301 | } |
328 | 302 | ||
329 | static void xen_set_ldt(const void *addr, unsigned entries) | 303 | static void xen_set_ldt(const void *addr, unsigned entries) |
@@ -426,8 +400,7 @@ static void xen_load_gs_index(unsigned int idx) | |||
426 | static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, | 400 | static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, |
427 | const void *ptr) | 401 | const void *ptr) |
428 | { | 402 | { |
429 | unsigned long lp = (unsigned long)&dt[entrynum]; | 403 | xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]); |
430 | xmaddr_t mach_lp = virt_to_machine(lp); | ||
431 | u64 entry = *(u64 *)ptr; | 404 | u64 entry = *(u64 *)ptr; |
432 | 405 | ||
433 | preempt_disable(); | 406 | preempt_disable(); |
@@ -560,7 +533,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry, | |||
560 | } | 533 | } |
561 | 534 | ||
562 | static void xen_load_sp0(struct tss_struct *tss, | 535 | static void xen_load_sp0(struct tss_struct *tss, |
563 | struct thread_struct *thread) | 536 | struct thread_struct *thread) |
564 | { | 537 | { |
565 | struct multicall_space mcs = xen_mc_entry(0); | 538 | struct multicall_space mcs = xen_mc_entry(0); |
566 | MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); | 539 | MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); |
@@ -835,6 +808,19 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) | |||
835 | ret = -EFAULT; | 808 | ret = -EFAULT; |
836 | break; | 809 | break; |
837 | #endif | 810 | #endif |
811 | |||
812 | case MSR_STAR: | ||
813 | case MSR_CSTAR: | ||
814 | case MSR_LSTAR: | ||
815 | case MSR_SYSCALL_MASK: | ||
816 | case MSR_IA32_SYSENTER_CS: | ||
817 | case MSR_IA32_SYSENTER_ESP: | ||
818 | case MSR_IA32_SYSENTER_EIP: | ||
819 | /* Fast syscall setup is all done in hypercalls, so | ||
820 | these are all ignored. Stub them out here to stop | ||
821 | Xen console noise. */ | ||
822 | break; | ||
823 | |||
838 | default: | 824 | default: |
839 | ret = native_write_msr_safe(msr, low, high); | 825 | ret = native_write_msr_safe(msr, low, high); |
840 | } | 826 | } |
@@ -878,8 +864,8 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l | |||
878 | SetPagePinned(page); | 864 | SetPagePinned(page); |
879 | 865 | ||
880 | if (!PageHighMem(page)) { | 866 | if (!PageHighMem(page)) { |
881 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); | 867 | make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); |
882 | if (level == PT_PTE) | 868 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) |
883 | pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); | 869 | pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); |
884 | } else | 870 | } else |
885 | /* make sure there are no stray mappings of | 871 | /* make sure there are no stray mappings of |
@@ -947,7 +933,7 @@ static void xen_release_ptpage(unsigned long pfn, unsigned level) | |||
947 | 933 | ||
948 | if (PagePinned(page)) { | 934 | if (PagePinned(page)) { |
949 | if (!PageHighMem(page)) { | 935 | if (!PageHighMem(page)) { |
950 | if (level == PT_PTE) | 936 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) |
951 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); | 937 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); |
952 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 938 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); |
953 | } | 939 | } |
@@ -994,6 +980,7 @@ static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) | |||
994 | } | 980 | } |
995 | #endif | 981 | #endif |
996 | 982 | ||
983 | #ifdef CONFIG_X86_32 | ||
997 | static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) | 984 | static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) |
998 | { | 985 | { |
999 | /* If there's an existing pte, then don't allow _PAGE_RW to be set */ | 986 | /* If there's an existing pte, then don't allow _PAGE_RW to be set */ |
@@ -1012,6 +999,7 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) | |||
1012 | 999 | ||
1013 | xen_set_pte(ptep, pte); | 1000 | xen_set_pte(ptep, pte); |
1014 | } | 1001 | } |
1002 | #endif | ||
1015 | 1003 | ||
1016 | static __init void xen_pagetable_setup_start(pgd_t *base) | 1004 | static __init void xen_pagetable_setup_start(pgd_t *base) |
1017 | { | 1005 | { |
@@ -1078,7 +1066,6 @@ void xen_setup_vcpu_info_placement(void) | |||
1078 | 1066 | ||
1079 | /* xen_vcpu_setup managed to place the vcpu_info within the | 1067 | /* xen_vcpu_setup managed to place the vcpu_info within the |
1080 | percpu area for all cpus, so make use of it */ | 1068 | percpu area for all cpus, so make use of it */ |
1081 | #ifdef CONFIG_X86_32 | ||
1082 | if (have_vcpu_info_placement) { | 1069 | if (have_vcpu_info_placement) { |
1083 | printk(KERN_INFO "Xen: using vcpu_info placement\n"); | 1070 | printk(KERN_INFO "Xen: using vcpu_info placement\n"); |
1084 | 1071 | ||
@@ -1088,7 +1075,6 @@ void xen_setup_vcpu_info_placement(void) | |||
1088 | pv_irq_ops.irq_enable = xen_irq_enable_direct; | 1075 | pv_irq_ops.irq_enable = xen_irq_enable_direct; |
1089 | pv_mmu_ops.read_cr2 = xen_read_cr2_direct; | 1076 | pv_mmu_ops.read_cr2 = xen_read_cr2_direct; |
1090 | } | 1077 | } |
1091 | #endif | ||
1092 | } | 1078 | } |
1093 | 1079 | ||
1094 | static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, | 1080 | static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, |
@@ -1109,12 +1095,10 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, | |||
1109 | goto patch_site | 1095 | goto patch_site |
1110 | 1096 | ||
1111 | switch (type) { | 1097 | switch (type) { |
1112 | #ifdef CONFIG_X86_32 | ||
1113 | SITE(pv_irq_ops, irq_enable); | 1098 | SITE(pv_irq_ops, irq_enable); |
1114 | SITE(pv_irq_ops, irq_disable); | 1099 | SITE(pv_irq_ops, irq_disable); |
1115 | SITE(pv_irq_ops, save_fl); | 1100 | SITE(pv_irq_ops, save_fl); |
1116 | SITE(pv_irq_ops, restore_fl); | 1101 | SITE(pv_irq_ops, restore_fl); |
1117 | #endif /* CONFIG_X86_32 */ | ||
1118 | #undef SITE | 1102 | #undef SITE |
1119 | 1103 | ||
1120 | patch_site: | 1104 | patch_site: |
@@ -1252,6 +1236,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
1252 | .load_gs_index = xen_load_gs_index, | 1236 | .load_gs_index = xen_load_gs_index, |
1253 | #endif | 1237 | #endif |
1254 | 1238 | ||
1239 | .alloc_ldt = xen_alloc_ldt, | ||
1240 | .free_ldt = xen_free_ldt, | ||
1241 | |||
1255 | .store_gdt = native_store_gdt, | 1242 | .store_gdt = native_store_gdt, |
1256 | .store_idt = native_store_idt, | 1243 | .store_idt = native_store_idt, |
1257 | .store_tr = xen_store_tr, | 1244 | .store_tr = xen_store_tr, |
@@ -1273,36 +1260,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
1273 | }, | 1260 | }, |
1274 | }; | 1261 | }; |
1275 | 1262 | ||
1276 | static void __init __xen_init_IRQ(void) | ||
1277 | { | ||
1278 | #ifdef CONFIG_X86_64 | ||
1279 | int i; | ||
1280 | |||
1281 | /* Create identity vector->irq map */ | ||
1282 | for(i = 0; i < NR_VECTORS; i++) { | ||
1283 | int cpu; | ||
1284 | |||
1285 | for_each_possible_cpu(cpu) | ||
1286 | per_cpu(vector_irq, cpu)[i] = i; | ||
1287 | } | ||
1288 | #endif /* CONFIG_X86_64 */ | ||
1289 | |||
1290 | xen_init_IRQ(); | ||
1291 | } | ||
1292 | |||
1293 | static const struct pv_irq_ops xen_irq_ops __initdata = { | ||
1294 | .init_IRQ = __xen_init_IRQ, | ||
1295 | .save_fl = xen_save_fl, | ||
1296 | .restore_fl = xen_restore_fl, | ||
1297 | .irq_disable = xen_irq_disable, | ||
1298 | .irq_enable = xen_irq_enable, | ||
1299 | .safe_halt = xen_safe_halt, | ||
1300 | .halt = xen_halt, | ||
1301 | #ifdef CONFIG_X86_64 | ||
1302 | .adjust_exception_frame = xen_adjust_exception_frame, | ||
1303 | #endif | ||
1304 | }; | ||
1305 | |||
1306 | static const struct pv_apic_ops xen_apic_ops __initdata = { | 1263 | static const struct pv_apic_ops xen_apic_ops __initdata = { |
1307 | #ifdef CONFIG_X86_LOCAL_APIC | 1264 | #ifdef CONFIG_X86_LOCAL_APIC |
1308 | .setup_boot_clock = paravirt_nop, | 1265 | .setup_boot_clock = paravirt_nop, |
@@ -1443,7 +1400,7 @@ static void __init xen_reserve_top(void) | |||
1443 | if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) | 1400 | if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) |
1444 | top = pp.virt_start; | 1401 | top = pp.virt_start; |
1445 | 1402 | ||
1446 | reserve_top_address(-top + 2 * PAGE_SIZE); | 1403 | reserve_top_address(-top); |
1447 | #endif /* CONFIG_X86_32 */ | 1404 | #endif /* CONFIG_X86_32 */ |
1448 | } | 1405 | } |
1449 | 1406 | ||
@@ -1477,48 +1434,11 @@ static void *m2v(phys_addr_t maddr) | |||
1477 | return __ka(m2p(maddr)); | 1434 | return __ka(m2p(maddr)); |
1478 | } | 1435 | } |
1479 | 1436 | ||
1480 | #ifdef CONFIG_X86_64 | ||
1481 | static void walk(pgd_t *pgd, unsigned long addr) | ||
1482 | { | ||
1483 | unsigned l4idx = pgd_index(addr); | ||
1484 | unsigned l3idx = pud_index(addr); | ||
1485 | unsigned l2idx = pmd_index(addr); | ||
1486 | unsigned l1idx = pte_index(addr); | ||
1487 | pgd_t l4; | ||
1488 | pud_t l3; | ||
1489 | pmd_t l2; | ||
1490 | pte_t l1; | ||
1491 | |||
1492 | xen_raw_printk("walk %p, %lx -> %d %d %d %d\n", | ||
1493 | pgd, addr, l4idx, l3idx, l2idx, l1idx); | ||
1494 | |||
1495 | l4 = pgd[l4idx]; | ||
1496 | xen_raw_printk(" l4: %016lx\n", l4.pgd); | ||
1497 | xen_raw_printk(" %016lx\n", pgd_val(l4)); | ||
1498 | |||
1499 | l3 = ((pud_t *)(m2v(l4.pgd)))[l3idx]; | ||
1500 | xen_raw_printk(" l3: %016lx\n", l3.pud); | ||
1501 | xen_raw_printk(" %016lx\n", pud_val(l3)); | ||
1502 | |||
1503 | l2 = ((pmd_t *)(m2v(l3.pud)))[l2idx]; | ||
1504 | xen_raw_printk(" l2: %016lx\n", l2.pmd); | ||
1505 | xen_raw_printk(" %016lx\n", pmd_val(l2)); | ||
1506 | |||
1507 | l1 = ((pte_t *)(m2v(l2.pmd)))[l1idx]; | ||
1508 | xen_raw_printk(" l1: %016lx\n", l1.pte); | ||
1509 | xen_raw_printk(" %016lx\n", pte_val(l1)); | ||
1510 | } | ||
1511 | #endif | ||
1512 | |||
1513 | static void set_page_prot(void *addr, pgprot_t prot) | 1437 | static void set_page_prot(void *addr, pgprot_t prot) |
1514 | { | 1438 | { |
1515 | unsigned long pfn = __pa(addr) >> PAGE_SHIFT; | 1439 | unsigned long pfn = __pa(addr) >> PAGE_SHIFT; |
1516 | pte_t pte = pfn_pte(pfn, prot); | 1440 | pte_t pte = pfn_pte(pfn, prot); |
1517 | 1441 | ||
1518 | xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016llx pte=%016llx\n", | ||
1519 | addr, pfn, get_phys_to_machine(pfn), | ||
1520 | pgprot_val(prot), pte.pte); | ||
1521 | |||
1522 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) | 1442 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) |
1523 | BUG(); | 1443 | BUG(); |
1524 | } | 1444 | } |
@@ -1694,6 +1614,8 @@ asmlinkage void __init xen_start_kernel(void) | |||
1694 | if (!xen_start_info) | 1614 | if (!xen_start_info) |
1695 | return; | 1615 | return; |
1696 | 1616 | ||
1617 | xen_domain_type = XEN_PV_DOMAIN; | ||
1618 | |||
1697 | BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); | 1619 | BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); |
1698 | 1620 | ||
1699 | xen_setup_features(); | 1621 | xen_setup_features(); |
@@ -1703,10 +1625,11 @@ asmlinkage void __init xen_start_kernel(void) | |||
1703 | pv_init_ops = xen_init_ops; | 1625 | pv_init_ops = xen_init_ops; |
1704 | pv_time_ops = xen_time_ops; | 1626 | pv_time_ops = xen_time_ops; |
1705 | pv_cpu_ops = xen_cpu_ops; | 1627 | pv_cpu_ops = xen_cpu_ops; |
1706 | pv_irq_ops = xen_irq_ops; | ||
1707 | pv_apic_ops = xen_apic_ops; | 1628 | pv_apic_ops = xen_apic_ops; |
1708 | pv_mmu_ops = xen_mmu_ops; | 1629 | pv_mmu_ops = xen_mmu_ops; |
1709 | 1630 | ||
1631 | xen_init_irq_ops(); | ||
1632 | |||
1710 | #ifdef CONFIG_X86_LOCAL_APIC | 1633 | #ifdef CONFIG_X86_LOCAL_APIC |
1711 | /* | 1634 | /* |
1712 | * set up the basic apic ops. | 1635 | * set up the basic apic ops. |
@@ -1737,7 +1660,7 @@ asmlinkage void __init xen_start_kernel(void) | |||
1737 | 1660 | ||
1738 | /* Prevent unwanted bits from being set in PTEs. */ | 1661 | /* Prevent unwanted bits from being set in PTEs. */ |
1739 | __supported_pte_mask &= ~_PAGE_GLOBAL; | 1662 | __supported_pte_mask &= ~_PAGE_GLOBAL; |
1740 | if (!is_initial_xendomain()) | 1663 | if (!xen_initial_domain()) |
1741 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); | 1664 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); |
1742 | 1665 | ||
1743 | /* Don't do the full vcpu_info placement stuff until we have a | 1666 | /* Don't do the full vcpu_info placement stuff until we have a |
@@ -1772,7 +1695,7 @@ asmlinkage void __init xen_start_kernel(void) | |||
1772 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; | 1695 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; |
1773 | boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); | 1696 | boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); |
1774 | 1697 | ||
1775 | if (!is_initial_xendomain()) { | 1698 | if (!xen_initial_domain()) { |
1776 | add_preferred_console("xenboot", 0, NULL); | 1699 | add_preferred_console("xenboot", 0, NULL); |
1777 | add_preferred_console("tty", 0, NULL); | 1700 | add_preferred_console("tty", 0, NULL); |
1778 | add_preferred_console("hvc", 0, NULL); | 1701 | add_preferred_console("hvc", 0, NULL); |
@@ -1780,15 +1703,6 @@ asmlinkage void __init xen_start_kernel(void) | |||
1780 | 1703 | ||
1781 | xen_raw_console_write("about to get started...\n"); | 1704 | xen_raw_console_write("about to get started...\n"); |
1782 | 1705 | ||
1783 | #if 0 | ||
1784 | xen_raw_printk("&boot_params=%p __pa(&boot_params)=%lx __va(__pa(&boot_params))=%lx\n", | ||
1785 | &boot_params, __pa_symbol(&boot_params), | ||
1786 | __va(__pa_symbol(&boot_params))); | ||
1787 | |||
1788 | walk(pgd, &boot_params); | ||
1789 | walk(pgd, __va(__pa(&boot_params))); | ||
1790 | #endif | ||
1791 | |||
1792 | /* Start the world */ | 1706 | /* Start the world */ |
1793 | #ifdef CONFIG_X86_32 | 1707 | #ifdef CONFIG_X86_32 |
1794 | i386_start_kernel(); | 1708 | i386_start_kernel(); |
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c new file mode 100644 index 000000000000..28b85ab8422e --- /dev/null +++ b/arch/x86/xen/irq.c | |||
@@ -0,0 +1,143 @@ | |||
1 | #include <linux/hardirq.h> | ||
2 | |||
3 | #include <xen/interface/xen.h> | ||
4 | #include <xen/interface/sched.h> | ||
5 | #include <xen/interface/vcpu.h> | ||
6 | |||
7 | #include <asm/xen/hypercall.h> | ||
8 | #include <asm/xen/hypervisor.h> | ||
9 | |||
10 | #include "xen-ops.h" | ||
11 | |||
12 | /* | ||
13 | * Force a proper event-channel callback from Xen after clearing the | ||
14 | * callback mask. We do this in a very simple manner, by making a call | ||
15 | * down into Xen. The pending flag will be checked by Xen on return. | ||
16 | */ | ||
17 | void xen_force_evtchn_callback(void) | ||
18 | { | ||
19 | (void)HYPERVISOR_xen_version(0, NULL); | ||
20 | } | ||
21 | |||
22 | static void __init __xen_init_IRQ(void) | ||
23 | { | ||
24 | #ifdef CONFIG_X86_64 | ||
25 | int i; | ||
26 | |||
27 | /* Create identity vector->irq map */ | ||
28 | for(i = 0; i < NR_VECTORS; i++) { | ||
29 | int cpu; | ||
30 | |||
31 | for_each_possible_cpu(cpu) | ||
32 | per_cpu(vector_irq, cpu)[i] = i; | ||
33 | } | ||
34 | #endif /* CONFIG_X86_64 */ | ||
35 | |||
36 | xen_init_IRQ(); | ||
37 | } | ||
38 | |||
39 | static unsigned long xen_save_fl(void) | ||
40 | { | ||
41 | struct vcpu_info *vcpu; | ||
42 | unsigned long flags; | ||
43 | |||
44 | vcpu = x86_read_percpu(xen_vcpu); | ||
45 | |||
46 | /* flag has opposite sense of mask */ | ||
47 | flags = !vcpu->evtchn_upcall_mask; | ||
48 | |||
49 | /* convert to IF type flag | ||
50 | -0 -> 0x00000000 | ||
51 | -1 -> 0xffffffff | ||
52 | */ | ||
53 | return (-flags) & X86_EFLAGS_IF; | ||
54 | } | ||
55 | |||
56 | static void xen_restore_fl(unsigned long flags) | ||
57 | { | ||
58 | struct vcpu_info *vcpu; | ||
59 | |||
60 | /* convert from IF type flag */ | ||
61 | flags = !(flags & X86_EFLAGS_IF); | ||
62 | |||
63 | /* There's a one instruction preempt window here. We need to | ||
64 | make sure we're don't switch CPUs between getting the vcpu | ||
65 | pointer and updating the mask. */ | ||
66 | preempt_disable(); | ||
67 | vcpu = x86_read_percpu(xen_vcpu); | ||
68 | vcpu->evtchn_upcall_mask = flags; | ||
69 | preempt_enable_no_resched(); | ||
70 | |||
71 | /* Doesn't matter if we get preempted here, because any | ||
72 | pending event will get dealt with anyway. */ | ||
73 | |||
74 | if (flags == 0) { | ||
75 | preempt_check_resched(); | ||
76 | barrier(); /* unmask then check (avoid races) */ | ||
77 | if (unlikely(vcpu->evtchn_upcall_pending)) | ||
78 | xen_force_evtchn_callback(); | ||
79 | } | ||
80 | } | ||
81 | |||
82 | static void xen_irq_disable(void) | ||
83 | { | ||
84 | /* There's a one instruction preempt window here. We need to | ||
85 | make sure we're don't switch CPUs between getting the vcpu | ||
86 | pointer and updating the mask. */ | ||
87 | preempt_disable(); | ||
88 | x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; | ||
89 | preempt_enable_no_resched(); | ||
90 | } | ||
91 | |||
92 | static void xen_irq_enable(void) | ||
93 | { | ||
94 | struct vcpu_info *vcpu; | ||
95 | |||
96 | /* We don't need to worry about being preempted here, since | ||
97 | either a) interrupts are disabled, so no preemption, or b) | ||
98 | the caller is confused and is trying to re-enable interrupts | ||
99 | on an indeterminate processor. */ | ||
100 | |||
101 | vcpu = x86_read_percpu(xen_vcpu); | ||
102 | vcpu->evtchn_upcall_mask = 0; | ||
103 | |||
104 | /* Doesn't matter if we get preempted here, because any | ||
105 | pending event will get dealt with anyway. */ | ||
106 | |||
107 | barrier(); /* unmask then check (avoid races) */ | ||
108 | if (unlikely(vcpu->evtchn_upcall_pending)) | ||
109 | xen_force_evtchn_callback(); | ||
110 | } | ||
111 | |||
112 | static void xen_safe_halt(void) | ||
113 | { | ||
114 | /* Blocking includes an implicit local_irq_enable(). */ | ||
115 | if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) | ||
116 | BUG(); | ||
117 | } | ||
118 | |||
119 | static void xen_halt(void) | ||
120 | { | ||
121 | if (irqs_disabled()) | ||
122 | HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); | ||
123 | else | ||
124 | xen_safe_halt(); | ||
125 | } | ||
126 | |||
127 | static const struct pv_irq_ops xen_irq_ops __initdata = { | ||
128 | .init_IRQ = __xen_init_IRQ, | ||
129 | .save_fl = xen_save_fl, | ||
130 | .restore_fl = xen_restore_fl, | ||
131 | .irq_disable = xen_irq_disable, | ||
132 | .irq_enable = xen_irq_enable, | ||
133 | .safe_halt = xen_safe_halt, | ||
134 | .halt = xen_halt, | ||
135 | #ifdef CONFIG_X86_64 | ||
136 | .adjust_exception_frame = xen_adjust_exception_frame, | ||
137 | #endif | ||
138 | }; | ||
139 | |||
140 | void __init xen_init_irq_ops() | ||
141 | { | ||
142 | pv_irq_ops = xen_irq_ops; | ||
143 | } | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index aa37469da696..ae173f6edd8b 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -40,6 +40,7 @@ | |||
40 | */ | 40 | */ |
41 | #include <linux/sched.h> | 41 | #include <linux/sched.h> |
42 | #include <linux/highmem.h> | 42 | #include <linux/highmem.h> |
43 | #include <linux/debugfs.h> | ||
43 | #include <linux/bug.h> | 44 | #include <linux/bug.h> |
44 | 45 | ||
45 | #include <asm/pgtable.h> | 46 | #include <asm/pgtable.h> |
@@ -57,6 +58,61 @@ | |||
57 | 58 | ||
58 | #include "multicalls.h" | 59 | #include "multicalls.h" |
59 | #include "mmu.h" | 60 | #include "mmu.h" |
61 | #include "debugfs.h" | ||
62 | |||
63 | #define MMU_UPDATE_HISTO 30 | ||
64 | |||
65 | #ifdef CONFIG_XEN_DEBUG_FS | ||
66 | |||
67 | static struct { | ||
68 | u32 pgd_update; | ||
69 | u32 pgd_update_pinned; | ||
70 | u32 pgd_update_batched; | ||
71 | |||
72 | u32 pud_update; | ||
73 | u32 pud_update_pinned; | ||
74 | u32 pud_update_batched; | ||
75 | |||
76 | u32 pmd_update; | ||
77 | u32 pmd_update_pinned; | ||
78 | u32 pmd_update_batched; | ||
79 | |||
80 | u32 pte_update; | ||
81 | u32 pte_update_pinned; | ||
82 | u32 pte_update_batched; | ||
83 | |||
84 | u32 mmu_update; | ||
85 | u32 mmu_update_extended; | ||
86 | u32 mmu_update_histo[MMU_UPDATE_HISTO]; | ||
87 | |||
88 | u32 prot_commit; | ||
89 | u32 prot_commit_batched; | ||
90 | |||
91 | u32 set_pte_at; | ||
92 | u32 set_pte_at_batched; | ||
93 | u32 set_pte_at_pinned; | ||
94 | u32 set_pte_at_current; | ||
95 | u32 set_pte_at_kernel; | ||
96 | } mmu_stats; | ||
97 | |||
98 | static u8 zero_stats; | ||
99 | |||
100 | static inline void check_zero(void) | ||
101 | { | ||
102 | if (unlikely(zero_stats)) { | ||
103 | memset(&mmu_stats, 0, sizeof(mmu_stats)); | ||
104 | zero_stats = 0; | ||
105 | } | ||
106 | } | ||
107 | |||
108 | #define ADD_STATS(elem, val) \ | ||
109 | do { check_zero(); mmu_stats.elem += (val); } while(0) | ||
110 | |||
111 | #else /* !CONFIG_XEN_DEBUG_FS */ | ||
112 | |||
113 | #define ADD_STATS(elem, val) do { (void)(val); } while(0) | ||
114 | |||
115 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
60 | 116 | ||
61 | /* | 117 | /* |
62 | * Just beyond the highest usermode address. STACK_TOP_MAX has a | 118 | * Just beyond the highest usermode address. STACK_TOP_MAX has a |
@@ -229,25 +285,35 @@ void make_lowmem_page_readwrite(void *vaddr) | |||
229 | } | 285 | } |
230 | 286 | ||
231 | 287 | ||
232 | static bool page_pinned(void *ptr) | 288 | static bool xen_page_pinned(void *ptr) |
233 | { | 289 | { |
234 | struct page *page = virt_to_page(ptr); | 290 | struct page *page = virt_to_page(ptr); |
235 | 291 | ||
236 | return PagePinned(page); | 292 | return PagePinned(page); |
237 | } | 293 | } |
238 | 294 | ||
239 | static void extend_mmu_update(const struct mmu_update *update) | 295 | static void xen_extend_mmu_update(const struct mmu_update *update) |
240 | { | 296 | { |
241 | struct multicall_space mcs; | 297 | struct multicall_space mcs; |
242 | struct mmu_update *u; | 298 | struct mmu_update *u; |
243 | 299 | ||
244 | mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); | 300 | mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); |
245 | 301 | ||
246 | if (mcs.mc != NULL) | 302 | if (mcs.mc != NULL) { |
303 | ADD_STATS(mmu_update_extended, 1); | ||
304 | ADD_STATS(mmu_update_histo[mcs.mc->args[1]], -1); | ||
305 | |||
247 | mcs.mc->args[1]++; | 306 | mcs.mc->args[1]++; |
248 | else { | 307 | |
308 | if (mcs.mc->args[1] < MMU_UPDATE_HISTO) | ||
309 | ADD_STATS(mmu_update_histo[mcs.mc->args[1]], 1); | ||
310 | else | ||
311 | ADD_STATS(mmu_update_histo[0], 1); | ||
312 | } else { | ||
313 | ADD_STATS(mmu_update, 1); | ||
249 | mcs = __xen_mc_entry(sizeof(*u)); | 314 | mcs = __xen_mc_entry(sizeof(*u)); |
250 | MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); | 315 | MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); |
316 | ADD_STATS(mmu_update_histo[1], 1); | ||
251 | } | 317 | } |
252 | 318 | ||
253 | u = mcs.args; | 319 | u = mcs.args; |
@@ -265,7 +331,9 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | |||
265 | /* ptr may be ioremapped for 64-bit pagetable setup */ | 331 | /* ptr may be ioremapped for 64-bit pagetable setup */ |
266 | u.ptr = arbitrary_virt_to_machine(ptr).maddr; | 332 | u.ptr = arbitrary_virt_to_machine(ptr).maddr; |
267 | u.val = pmd_val_ma(val); | 333 | u.val = pmd_val_ma(val); |
268 | extend_mmu_update(&u); | 334 | xen_extend_mmu_update(&u); |
335 | |||
336 | ADD_STATS(pmd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
269 | 337 | ||
270 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 338 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
271 | 339 | ||
@@ -274,13 +342,17 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | |||
274 | 342 | ||
275 | void xen_set_pmd(pmd_t *ptr, pmd_t val) | 343 | void xen_set_pmd(pmd_t *ptr, pmd_t val) |
276 | { | 344 | { |
345 | ADD_STATS(pmd_update, 1); | ||
346 | |||
277 | /* If page is not pinned, we can just update the entry | 347 | /* If page is not pinned, we can just update the entry |
278 | directly */ | 348 | directly */ |
279 | if (!page_pinned(ptr)) { | 349 | if (!xen_page_pinned(ptr)) { |
280 | *ptr = val; | 350 | *ptr = val; |
281 | return; | 351 | return; |
282 | } | 352 | } |
283 | 353 | ||
354 | ADD_STATS(pmd_update_pinned, 1); | ||
355 | |||
284 | xen_set_pmd_hyper(ptr, val); | 356 | xen_set_pmd_hyper(ptr, val); |
285 | } | 357 | } |
286 | 358 | ||
@@ -300,12 +372,18 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | |||
300 | if (mm == &init_mm) | 372 | if (mm == &init_mm) |
301 | preempt_disable(); | 373 | preempt_disable(); |
302 | 374 | ||
375 | ADD_STATS(set_pte_at, 1); | ||
376 | // ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); | ||
377 | ADD_STATS(set_pte_at_current, mm == current->mm); | ||
378 | ADD_STATS(set_pte_at_kernel, mm == &init_mm); | ||
379 | |||
303 | if (mm == current->mm || mm == &init_mm) { | 380 | if (mm == current->mm || mm == &init_mm) { |
304 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { | 381 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { |
305 | struct multicall_space mcs; | 382 | struct multicall_space mcs; |
306 | mcs = xen_mc_entry(0); | 383 | mcs = xen_mc_entry(0); |
307 | 384 | ||
308 | MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); | 385 | MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); |
386 | ADD_STATS(set_pte_at_batched, 1); | ||
309 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 387 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
310 | goto out; | 388 | goto out; |
311 | } else | 389 | } else |
@@ -334,7 +412,10 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | |||
334 | 412 | ||
335 | u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; | 413 | u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; |
336 | u.val = pte_val_ma(pte); | 414 | u.val = pte_val_ma(pte); |
337 | extend_mmu_update(&u); | 415 | xen_extend_mmu_update(&u); |
416 | |||
417 | ADD_STATS(prot_commit, 1); | ||
418 | ADD_STATS(prot_commit_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
338 | 419 | ||
339 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 420 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
340 | } | 421 | } |
@@ -400,7 +481,9 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val) | |||
400 | /* ptr may be ioremapped for 64-bit pagetable setup */ | 481 | /* ptr may be ioremapped for 64-bit pagetable setup */ |
401 | u.ptr = arbitrary_virt_to_machine(ptr).maddr; | 482 | u.ptr = arbitrary_virt_to_machine(ptr).maddr; |
402 | u.val = pud_val_ma(val); | 483 | u.val = pud_val_ma(val); |
403 | extend_mmu_update(&u); | 484 | xen_extend_mmu_update(&u); |
485 | |||
486 | ADD_STATS(pud_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
404 | 487 | ||
405 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 488 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
406 | 489 | ||
@@ -409,18 +492,26 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val) | |||
409 | 492 | ||
410 | void xen_set_pud(pud_t *ptr, pud_t val) | 493 | void xen_set_pud(pud_t *ptr, pud_t val) |
411 | { | 494 | { |
495 | ADD_STATS(pud_update, 1); | ||
496 | |||
412 | /* If page is not pinned, we can just update the entry | 497 | /* If page is not pinned, we can just update the entry |
413 | directly */ | 498 | directly */ |
414 | if (!page_pinned(ptr)) { | 499 | if (!xen_page_pinned(ptr)) { |
415 | *ptr = val; | 500 | *ptr = val; |
416 | return; | 501 | return; |
417 | } | 502 | } |
418 | 503 | ||
504 | ADD_STATS(pud_update_pinned, 1); | ||
505 | |||
419 | xen_set_pud_hyper(ptr, val); | 506 | xen_set_pud_hyper(ptr, val); |
420 | } | 507 | } |
421 | 508 | ||
422 | void xen_set_pte(pte_t *ptep, pte_t pte) | 509 | void xen_set_pte(pte_t *ptep, pte_t pte) |
423 | { | 510 | { |
511 | ADD_STATS(pte_update, 1); | ||
512 | // ADD_STATS(pte_update_pinned, xen_page_pinned(ptep)); | ||
513 | ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
514 | |||
424 | #ifdef CONFIG_X86_PAE | 515 | #ifdef CONFIG_X86_PAE |
425 | ptep->pte_high = pte.pte_high; | 516 | ptep->pte_high = pte.pte_high; |
426 | smp_wmb(); | 517 | smp_wmb(); |
@@ -490,7 +581,7 @@ static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | |||
490 | 581 | ||
491 | u.ptr = virt_to_machine(ptr).maddr; | 582 | u.ptr = virt_to_machine(ptr).maddr; |
492 | u.val = pgd_val_ma(val); | 583 | u.val = pgd_val_ma(val); |
493 | extend_mmu_update(&u); | 584 | xen_extend_mmu_update(&u); |
494 | } | 585 | } |
495 | 586 | ||
496 | /* | 587 | /* |
@@ -517,17 +608,22 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val) | |||
517 | { | 608 | { |
518 | pgd_t *user_ptr = xen_get_user_pgd(ptr); | 609 | pgd_t *user_ptr = xen_get_user_pgd(ptr); |
519 | 610 | ||
611 | ADD_STATS(pgd_update, 1); | ||
612 | |||
520 | /* If page is not pinned, we can just update the entry | 613 | /* If page is not pinned, we can just update the entry |
521 | directly */ | 614 | directly */ |
522 | if (!page_pinned(ptr)) { | 615 | if (!xen_page_pinned(ptr)) { |
523 | *ptr = val; | 616 | *ptr = val; |
524 | if (user_ptr) { | 617 | if (user_ptr) { |
525 | WARN_ON(page_pinned(user_ptr)); | 618 | WARN_ON(xen_page_pinned(user_ptr)); |
526 | *user_ptr = val; | 619 | *user_ptr = val; |
527 | } | 620 | } |
528 | return; | 621 | return; |
529 | } | 622 | } |
530 | 623 | ||
624 | ADD_STATS(pgd_update_pinned, 1); | ||
625 | ADD_STATS(pgd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
626 | |||
531 | /* If it's pinned, then we can at least batch the kernel and | 627 | /* If it's pinned, then we can at least batch the kernel and |
532 | user updates together. */ | 628 | user updates together. */ |
533 | xen_mc_batch(); | 629 | xen_mc_batch(); |
@@ -555,9 +651,12 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val) | |||
555 | * For 64-bit, we must skip the Xen hole in the middle of the address | 651 | * For 64-bit, we must skip the Xen hole in the middle of the address |
556 | * space, just after the big x86-64 virtual hole. | 652 | * space, just after the big x86-64 virtual hole. |
557 | */ | 653 | */ |
558 | static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | 654 | static int xen_pgd_walk(struct mm_struct *mm, |
559 | unsigned long limit) | 655 | int (*func)(struct mm_struct *mm, struct page *, |
656 | enum pt_level), | ||
657 | unsigned long limit) | ||
560 | { | 658 | { |
659 | pgd_t *pgd = mm->pgd; | ||
561 | int flush = 0; | 660 | int flush = 0; |
562 | unsigned hole_low, hole_high; | 661 | unsigned hole_low, hole_high; |
563 | unsigned pgdidx_limit, pudidx_limit, pmdidx_limit; | 662 | unsigned pgdidx_limit, pudidx_limit, pmdidx_limit; |
@@ -590,8 +689,6 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | |||
590 | pmdidx_limit = 0; | 689 | pmdidx_limit = 0; |
591 | #endif | 690 | #endif |
592 | 691 | ||
593 | flush |= (*func)(virt_to_page(pgd), PT_PGD); | ||
594 | |||
595 | for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) { | 692 | for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) { |
596 | pud_t *pud; | 693 | pud_t *pud; |
597 | 694 | ||
@@ -604,7 +701,7 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | |||
604 | pud = pud_offset(&pgd[pgdidx], 0); | 701 | pud = pud_offset(&pgd[pgdidx], 0); |
605 | 702 | ||
606 | if (PTRS_PER_PUD > 1) /* not folded */ | 703 | if (PTRS_PER_PUD > 1) /* not folded */ |
607 | flush |= (*func)(virt_to_page(pud), PT_PUD); | 704 | flush |= (*func)(mm, virt_to_page(pud), PT_PUD); |
608 | 705 | ||
609 | for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) { | 706 | for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) { |
610 | pmd_t *pmd; | 707 | pmd_t *pmd; |
@@ -619,7 +716,7 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | |||
619 | pmd = pmd_offset(&pud[pudidx], 0); | 716 | pmd = pmd_offset(&pud[pudidx], 0); |
620 | 717 | ||
621 | if (PTRS_PER_PMD > 1) /* not folded */ | 718 | if (PTRS_PER_PMD > 1) /* not folded */ |
622 | flush |= (*func)(virt_to_page(pmd), PT_PMD); | 719 | flush |= (*func)(mm, virt_to_page(pmd), PT_PMD); |
623 | 720 | ||
624 | for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) { | 721 | for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) { |
625 | struct page *pte; | 722 | struct page *pte; |
@@ -633,28 +730,34 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | |||
633 | continue; | 730 | continue; |
634 | 731 | ||
635 | pte = pmd_page(pmd[pmdidx]); | 732 | pte = pmd_page(pmd[pmdidx]); |
636 | flush |= (*func)(pte, PT_PTE); | 733 | flush |= (*func)(mm, pte, PT_PTE); |
637 | } | 734 | } |
638 | } | 735 | } |
639 | } | 736 | } |
737 | |||
640 | out: | 738 | out: |
739 | /* Do the top level last, so that the callbacks can use it as | ||
740 | a cue to do final things like tlb flushes. */ | ||
741 | flush |= (*func)(mm, virt_to_page(pgd), PT_PGD); | ||
641 | 742 | ||
642 | return flush; | 743 | return flush; |
643 | } | 744 | } |
644 | 745 | ||
645 | static spinlock_t *lock_pte(struct page *page) | 746 | /* If we're using split pte locks, then take the page's lock and |
747 | return a pointer to it. Otherwise return NULL. */ | ||
748 | static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm) | ||
646 | { | 749 | { |
647 | spinlock_t *ptl = NULL; | 750 | spinlock_t *ptl = NULL; |
648 | 751 | ||
649 | #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS | 752 | #if USE_SPLIT_PTLOCKS |
650 | ptl = __pte_lockptr(page); | 753 | ptl = __pte_lockptr(page); |
651 | spin_lock(ptl); | 754 | spin_lock_nest_lock(ptl, &mm->page_table_lock); |
652 | #endif | 755 | #endif |
653 | 756 | ||
654 | return ptl; | 757 | return ptl; |
655 | } | 758 | } |
656 | 759 | ||
657 | static void do_unlock(void *v) | 760 | static void xen_pte_unlock(void *v) |
658 | { | 761 | { |
659 | spinlock_t *ptl = v; | 762 | spinlock_t *ptl = v; |
660 | spin_unlock(ptl); | 763 | spin_unlock(ptl); |
@@ -672,7 +775,8 @@ static void xen_do_pin(unsigned level, unsigned long pfn) | |||
672 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | 775 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); |
673 | } | 776 | } |
674 | 777 | ||
675 | static int pin_page(struct page *page, enum pt_level level) | 778 | static int xen_pin_page(struct mm_struct *mm, struct page *page, |
779 | enum pt_level level) | ||
676 | { | 780 | { |
677 | unsigned pgfl = TestSetPagePinned(page); | 781 | unsigned pgfl = TestSetPagePinned(page); |
678 | int flush; | 782 | int flush; |
@@ -691,21 +795,40 @@ static int pin_page(struct page *page, enum pt_level level) | |||
691 | 795 | ||
692 | flush = 0; | 796 | flush = 0; |
693 | 797 | ||
798 | /* | ||
799 | * We need to hold the pagetable lock between the time | ||
800 | * we make the pagetable RO and when we actually pin | ||
801 | * it. If we don't, then other users may come in and | ||
802 | * attempt to update the pagetable by writing it, | ||
803 | * which will fail because the memory is RO but not | ||
804 | * pinned, so Xen won't do the trap'n'emulate. | ||
805 | * | ||
806 | * If we're using split pte locks, we can't hold the | ||
807 | * entire pagetable's worth of locks during the | ||
808 | * traverse, because we may wrap the preempt count (8 | ||
809 | * bits). The solution is to mark RO and pin each PTE | ||
810 | * page while holding the lock. This means the number | ||
811 | * of locks we end up holding is never more than a | ||
812 | * batch size (~32 entries, at present). | ||
813 | * | ||
814 | * If we're not using split pte locks, we needn't pin | ||
815 | * the PTE pages independently, because we're | ||
816 | * protected by the overall pagetable lock. | ||
817 | */ | ||
694 | ptl = NULL; | 818 | ptl = NULL; |
695 | if (level == PT_PTE) | 819 | if (level == PT_PTE) |
696 | ptl = lock_pte(page); | 820 | ptl = xen_pte_lock(page, mm); |
697 | 821 | ||
698 | MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, | 822 | MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, |
699 | pfn_pte(pfn, PAGE_KERNEL_RO), | 823 | pfn_pte(pfn, PAGE_KERNEL_RO), |
700 | level == PT_PGD ? UVMF_TLB_FLUSH : 0); | 824 | level == PT_PGD ? UVMF_TLB_FLUSH : 0); |
701 | 825 | ||
702 | if (level == PT_PTE) | 826 | if (ptl) { |
703 | xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn); | 827 | xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn); |
704 | 828 | ||
705 | if (ptl) { | ||
706 | /* Queue a deferred unlock for when this batch | 829 | /* Queue a deferred unlock for when this batch |
707 | is completed. */ | 830 | is completed. */ |
708 | xen_mc_callback(do_unlock, ptl); | 831 | xen_mc_callback(xen_pte_unlock, ptl); |
709 | } | 832 | } |
710 | } | 833 | } |
711 | 834 | ||
@@ -715,11 +838,11 @@ static int pin_page(struct page *page, enum pt_level level) | |||
715 | /* This is called just after a mm has been created, but it has not | 838 | /* This is called just after a mm has been created, but it has not |
716 | been used yet. We need to make sure that its pagetable is all | 839 | been used yet. We need to make sure that its pagetable is all |
717 | read-only, and can be pinned. */ | 840 | read-only, and can be pinned. */ |
718 | void xen_pgd_pin(pgd_t *pgd) | 841 | static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) |
719 | { | 842 | { |
720 | xen_mc_batch(); | 843 | xen_mc_batch(); |
721 | 844 | ||
722 | if (pgd_walk(pgd, pin_page, USER_LIMIT)) { | 845 | if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) { |
723 | /* re-enable interrupts for kmap_flush_unused */ | 846 | /* re-enable interrupts for kmap_flush_unused */ |
724 | xen_mc_issue(0); | 847 | xen_mc_issue(0); |
725 | kmap_flush_unused(); | 848 | kmap_flush_unused(); |
@@ -733,25 +856,35 @@ void xen_pgd_pin(pgd_t *pgd) | |||
733 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); | 856 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); |
734 | 857 | ||
735 | if (user_pgd) { | 858 | if (user_pgd) { |
736 | pin_page(virt_to_page(user_pgd), PT_PGD); | 859 | xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD); |
737 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); | 860 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); |
738 | } | 861 | } |
739 | } | 862 | } |
740 | #else /* CONFIG_X86_32 */ | 863 | #else /* CONFIG_X86_32 */ |
741 | #ifdef CONFIG_X86_PAE | 864 | #ifdef CONFIG_X86_PAE |
742 | /* Need to make sure unshared kernel PMD is pinnable */ | 865 | /* Need to make sure unshared kernel PMD is pinnable */ |
743 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); | 866 | xen_pin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), |
867 | PT_PMD); | ||
744 | #endif | 868 | #endif |
745 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); | 869 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); |
746 | #endif /* CONFIG_X86_64 */ | 870 | #endif /* CONFIG_X86_64 */ |
747 | xen_mc_issue(0); | 871 | xen_mc_issue(0); |
748 | } | 872 | } |
749 | 873 | ||
874 | static void xen_pgd_pin(struct mm_struct *mm) | ||
875 | { | ||
876 | __xen_pgd_pin(mm, mm->pgd); | ||
877 | } | ||
878 | |||
750 | /* | 879 | /* |
751 | * On save, we need to pin all pagetables to make sure they get their | 880 | * On save, we need to pin all pagetables to make sure they get their |
752 | * mfns turned into pfns. Search the list for any unpinned pgds and pin | 881 | * mfns turned into pfns. Search the list for any unpinned pgds and pin |
753 | * them (unpinned pgds are not currently in use, probably because the | 882 | * them (unpinned pgds are not currently in use, probably because the |
754 | * process is under construction or destruction). | 883 | * process is under construction or destruction). |
884 | * | ||
885 | * Expected to be called in stop_machine() ("equivalent to taking | ||
886 | * every spinlock in the system"), so the locking doesn't really | ||
887 | * matter all that much. | ||
755 | */ | 888 | */ |
756 | void xen_mm_pin_all(void) | 889 | void xen_mm_pin_all(void) |
757 | { | 890 | { |
@@ -762,7 +895,7 @@ void xen_mm_pin_all(void) | |||
762 | 895 | ||
763 | list_for_each_entry(page, &pgd_list, lru) { | 896 | list_for_each_entry(page, &pgd_list, lru) { |
764 | if (!PagePinned(page)) { | 897 | if (!PagePinned(page)) { |
765 | xen_pgd_pin((pgd_t *)page_address(page)); | 898 | __xen_pgd_pin(&init_mm, (pgd_t *)page_address(page)); |
766 | SetPageSavePinned(page); | 899 | SetPageSavePinned(page); |
767 | } | 900 | } |
768 | } | 901 | } |
@@ -775,7 +908,8 @@ void xen_mm_pin_all(void) | |||
775 | * that's before we have page structures to store the bits. So do all | 908 | * that's before we have page structures to store the bits. So do all |
776 | * the book-keeping now. | 909 | * the book-keeping now. |
777 | */ | 910 | */ |
778 | static __init int mark_pinned(struct page *page, enum pt_level level) | 911 | static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page, |
912 | enum pt_level level) | ||
779 | { | 913 | { |
780 | SetPagePinned(page); | 914 | SetPagePinned(page); |
781 | return 0; | 915 | return 0; |
@@ -783,10 +917,11 @@ static __init int mark_pinned(struct page *page, enum pt_level level) | |||
783 | 917 | ||
784 | void __init xen_mark_init_mm_pinned(void) | 918 | void __init xen_mark_init_mm_pinned(void) |
785 | { | 919 | { |
786 | pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP); | 920 | xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); |
787 | } | 921 | } |
788 | 922 | ||
789 | static int unpin_page(struct page *page, enum pt_level level) | 923 | static int xen_unpin_page(struct mm_struct *mm, struct page *page, |
924 | enum pt_level level) | ||
790 | { | 925 | { |
791 | unsigned pgfl = TestClearPagePinned(page); | 926 | unsigned pgfl = TestClearPagePinned(page); |
792 | 927 | ||
@@ -796,10 +931,18 @@ static int unpin_page(struct page *page, enum pt_level level) | |||
796 | spinlock_t *ptl = NULL; | 931 | spinlock_t *ptl = NULL; |
797 | struct multicall_space mcs; | 932 | struct multicall_space mcs; |
798 | 933 | ||
934 | /* | ||
935 | * Do the converse to pin_page. If we're using split | ||
936 | * pte locks, we must be holding the lock for while | ||
937 | * the pte page is unpinned but still RO to prevent | ||
938 | * concurrent updates from seeing it in this | ||
939 | * partially-pinned state. | ||
940 | */ | ||
799 | if (level == PT_PTE) { | 941 | if (level == PT_PTE) { |
800 | ptl = lock_pte(page); | 942 | ptl = xen_pte_lock(page, mm); |
801 | 943 | ||
802 | xen_do_pin(MMUEXT_UNPIN_TABLE, pfn); | 944 | if (ptl) |
945 | xen_do_pin(MMUEXT_UNPIN_TABLE, pfn); | ||
803 | } | 946 | } |
804 | 947 | ||
805 | mcs = __xen_mc_entry(0); | 948 | mcs = __xen_mc_entry(0); |
@@ -810,7 +953,7 @@ static int unpin_page(struct page *page, enum pt_level level) | |||
810 | 953 | ||
811 | if (ptl) { | 954 | if (ptl) { |
812 | /* unlock when batch completed */ | 955 | /* unlock when batch completed */ |
813 | xen_mc_callback(do_unlock, ptl); | 956 | xen_mc_callback(xen_pte_unlock, ptl); |
814 | } | 957 | } |
815 | } | 958 | } |
816 | 959 | ||
@@ -818,7 +961,7 @@ static int unpin_page(struct page *page, enum pt_level level) | |||
818 | } | 961 | } |
819 | 962 | ||
820 | /* Release a pagetables pages back as normal RW */ | 963 | /* Release a pagetables pages back as normal RW */ |
821 | static void xen_pgd_unpin(pgd_t *pgd) | 964 | static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) |
822 | { | 965 | { |
823 | xen_mc_batch(); | 966 | xen_mc_batch(); |
824 | 967 | ||
@@ -830,21 +973,27 @@ static void xen_pgd_unpin(pgd_t *pgd) | |||
830 | 973 | ||
831 | if (user_pgd) { | 974 | if (user_pgd) { |
832 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); | 975 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); |
833 | unpin_page(virt_to_page(user_pgd), PT_PGD); | 976 | xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD); |
834 | } | 977 | } |
835 | } | 978 | } |
836 | #endif | 979 | #endif |
837 | 980 | ||
838 | #ifdef CONFIG_X86_PAE | 981 | #ifdef CONFIG_X86_PAE |
839 | /* Need to make sure unshared kernel PMD is unpinned */ | 982 | /* Need to make sure unshared kernel PMD is unpinned */ |
840 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); | 983 | xen_unpin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), |
984 | PT_PMD); | ||
841 | #endif | 985 | #endif |
842 | 986 | ||
843 | pgd_walk(pgd, unpin_page, USER_LIMIT); | 987 | xen_pgd_walk(mm, xen_unpin_page, USER_LIMIT); |
844 | 988 | ||
845 | xen_mc_issue(0); | 989 | xen_mc_issue(0); |
846 | } | 990 | } |
847 | 991 | ||
992 | static void xen_pgd_unpin(struct mm_struct *mm) | ||
993 | { | ||
994 | __xen_pgd_unpin(mm, mm->pgd); | ||
995 | } | ||
996 | |||
848 | /* | 997 | /* |
849 | * On resume, undo any pinning done at save, so that the rest of the | 998 | * On resume, undo any pinning done at save, so that the rest of the |
850 | * kernel doesn't see any unexpected pinned pagetables. | 999 | * kernel doesn't see any unexpected pinned pagetables. |
@@ -859,7 +1008,7 @@ void xen_mm_unpin_all(void) | |||
859 | list_for_each_entry(page, &pgd_list, lru) { | 1008 | list_for_each_entry(page, &pgd_list, lru) { |
860 | if (PageSavePinned(page)) { | 1009 | if (PageSavePinned(page)) { |
861 | BUG_ON(!PagePinned(page)); | 1010 | BUG_ON(!PagePinned(page)); |
862 | xen_pgd_unpin((pgd_t *)page_address(page)); | 1011 | __xen_pgd_unpin(&init_mm, (pgd_t *)page_address(page)); |
863 | ClearPageSavePinned(page); | 1012 | ClearPageSavePinned(page); |
864 | } | 1013 | } |
865 | } | 1014 | } |
@@ -870,14 +1019,14 @@ void xen_mm_unpin_all(void) | |||
870 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) | 1019 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) |
871 | { | 1020 | { |
872 | spin_lock(&next->page_table_lock); | 1021 | spin_lock(&next->page_table_lock); |
873 | xen_pgd_pin(next->pgd); | 1022 | xen_pgd_pin(next); |
874 | spin_unlock(&next->page_table_lock); | 1023 | spin_unlock(&next->page_table_lock); |
875 | } | 1024 | } |
876 | 1025 | ||
877 | void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) | 1026 | void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) |
878 | { | 1027 | { |
879 | spin_lock(&mm->page_table_lock); | 1028 | spin_lock(&mm->page_table_lock); |
880 | xen_pgd_pin(mm->pgd); | 1029 | xen_pgd_pin(mm); |
881 | spin_unlock(&mm->page_table_lock); | 1030 | spin_unlock(&mm->page_table_lock); |
882 | } | 1031 | } |
883 | 1032 | ||
@@ -907,7 +1056,7 @@ static void drop_other_mm_ref(void *info) | |||
907 | } | 1056 | } |
908 | } | 1057 | } |
909 | 1058 | ||
910 | static void drop_mm_ref(struct mm_struct *mm) | 1059 | static void xen_drop_mm_ref(struct mm_struct *mm) |
911 | { | 1060 | { |
912 | cpumask_t mask; | 1061 | cpumask_t mask; |
913 | unsigned cpu; | 1062 | unsigned cpu; |
@@ -937,7 +1086,7 @@ static void drop_mm_ref(struct mm_struct *mm) | |||
937 | smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); | 1086 | smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); |
938 | } | 1087 | } |
939 | #else | 1088 | #else |
940 | static void drop_mm_ref(struct mm_struct *mm) | 1089 | static void xen_drop_mm_ref(struct mm_struct *mm) |
941 | { | 1090 | { |
942 | if (current->active_mm == mm) | 1091 | if (current->active_mm == mm) |
943 | load_cr3(swapper_pg_dir); | 1092 | load_cr3(swapper_pg_dir); |
@@ -961,14 +1110,77 @@ static void drop_mm_ref(struct mm_struct *mm) | |||
961 | void xen_exit_mmap(struct mm_struct *mm) | 1110 | void xen_exit_mmap(struct mm_struct *mm) |
962 | { | 1111 | { |
963 | get_cpu(); /* make sure we don't move around */ | 1112 | get_cpu(); /* make sure we don't move around */ |
964 | drop_mm_ref(mm); | 1113 | xen_drop_mm_ref(mm); |
965 | put_cpu(); | 1114 | put_cpu(); |
966 | 1115 | ||
967 | spin_lock(&mm->page_table_lock); | 1116 | spin_lock(&mm->page_table_lock); |
968 | 1117 | ||
969 | /* pgd may not be pinned in the error exit path of execve */ | 1118 | /* pgd may not be pinned in the error exit path of execve */ |
970 | if (page_pinned(mm->pgd)) | 1119 | if (xen_page_pinned(mm->pgd)) |
971 | xen_pgd_unpin(mm->pgd); | 1120 | xen_pgd_unpin(mm); |
972 | 1121 | ||
973 | spin_unlock(&mm->page_table_lock); | 1122 | spin_unlock(&mm->page_table_lock); |
974 | } | 1123 | } |
1124 | |||
1125 | #ifdef CONFIG_XEN_DEBUG_FS | ||
1126 | |||
1127 | static struct dentry *d_mmu_debug; | ||
1128 | |||
1129 | static int __init xen_mmu_debugfs(void) | ||
1130 | { | ||
1131 | struct dentry *d_xen = xen_init_debugfs(); | ||
1132 | |||
1133 | if (d_xen == NULL) | ||
1134 | return -ENOMEM; | ||
1135 | |||
1136 | d_mmu_debug = debugfs_create_dir("mmu", d_xen); | ||
1137 | |||
1138 | debugfs_create_u8("zero_stats", 0644, d_mmu_debug, &zero_stats); | ||
1139 | |||
1140 | debugfs_create_u32("pgd_update", 0444, d_mmu_debug, &mmu_stats.pgd_update); | ||
1141 | debugfs_create_u32("pgd_update_pinned", 0444, d_mmu_debug, | ||
1142 | &mmu_stats.pgd_update_pinned); | ||
1143 | debugfs_create_u32("pgd_update_batched", 0444, d_mmu_debug, | ||
1144 | &mmu_stats.pgd_update_pinned); | ||
1145 | |||
1146 | debugfs_create_u32("pud_update", 0444, d_mmu_debug, &mmu_stats.pud_update); | ||
1147 | debugfs_create_u32("pud_update_pinned", 0444, d_mmu_debug, | ||
1148 | &mmu_stats.pud_update_pinned); | ||
1149 | debugfs_create_u32("pud_update_batched", 0444, d_mmu_debug, | ||
1150 | &mmu_stats.pud_update_pinned); | ||
1151 | |||
1152 | debugfs_create_u32("pmd_update", 0444, d_mmu_debug, &mmu_stats.pmd_update); | ||
1153 | debugfs_create_u32("pmd_update_pinned", 0444, d_mmu_debug, | ||
1154 | &mmu_stats.pmd_update_pinned); | ||
1155 | debugfs_create_u32("pmd_update_batched", 0444, d_mmu_debug, | ||
1156 | &mmu_stats.pmd_update_pinned); | ||
1157 | |||
1158 | debugfs_create_u32("pte_update", 0444, d_mmu_debug, &mmu_stats.pte_update); | ||
1159 | // debugfs_create_u32("pte_update_pinned", 0444, d_mmu_debug, | ||
1160 | // &mmu_stats.pte_update_pinned); | ||
1161 | debugfs_create_u32("pte_update_batched", 0444, d_mmu_debug, | ||
1162 | &mmu_stats.pte_update_pinned); | ||
1163 | |||
1164 | debugfs_create_u32("mmu_update", 0444, d_mmu_debug, &mmu_stats.mmu_update); | ||
1165 | debugfs_create_u32("mmu_update_extended", 0444, d_mmu_debug, | ||
1166 | &mmu_stats.mmu_update_extended); | ||
1167 | xen_debugfs_create_u32_array("mmu_update_histo", 0444, d_mmu_debug, | ||
1168 | mmu_stats.mmu_update_histo, 20); | ||
1169 | |||
1170 | debugfs_create_u32("set_pte_at", 0444, d_mmu_debug, &mmu_stats.set_pte_at); | ||
1171 | debugfs_create_u32("set_pte_at_batched", 0444, d_mmu_debug, | ||
1172 | &mmu_stats.set_pte_at_batched); | ||
1173 | debugfs_create_u32("set_pte_at_current", 0444, d_mmu_debug, | ||
1174 | &mmu_stats.set_pte_at_current); | ||
1175 | debugfs_create_u32("set_pte_at_kernel", 0444, d_mmu_debug, | ||
1176 | &mmu_stats.set_pte_at_kernel); | ||
1177 | |||
1178 | debugfs_create_u32("prot_commit", 0444, d_mmu_debug, &mmu_stats.prot_commit); | ||
1179 | debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug, | ||
1180 | &mmu_stats.prot_commit_batched); | ||
1181 | |||
1182 | return 0; | ||
1183 | } | ||
1184 | fs_initcall(xen_mmu_debugfs); | ||
1185 | |||
1186 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 0f59bd03f9e3..98d71659da5a 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h | |||
@@ -18,9 +18,6 @@ void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next); | |||
18 | void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); | 18 | void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); |
19 | void xen_exit_mmap(struct mm_struct *mm); | 19 | void xen_exit_mmap(struct mm_struct *mm); |
20 | 20 | ||
21 | void xen_pgd_pin(pgd_t *pgd); | ||
22 | //void xen_pgd_unpin(pgd_t *pgd); | ||
23 | |||
24 | pteval_t xen_pte_val(pte_t); | 21 | pteval_t xen_pte_val(pte_t); |
25 | pmdval_t xen_pmd_val(pmd_t); | 22 | pmdval_t xen_pmd_val(pmd_t); |
26 | pgdval_t xen_pgd_val(pgd_t); | 23 | pgdval_t xen_pgd_val(pgd_t); |
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 9efd1c6c9776..8ea8a0d0b0de 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c | |||
@@ -21,16 +21,20 @@ | |||
21 | */ | 21 | */ |
22 | #include <linux/percpu.h> | 22 | #include <linux/percpu.h> |
23 | #include <linux/hardirq.h> | 23 | #include <linux/hardirq.h> |
24 | #include <linux/debugfs.h> | ||
24 | 25 | ||
25 | #include <asm/xen/hypercall.h> | 26 | #include <asm/xen/hypercall.h> |
26 | 27 | ||
27 | #include "multicalls.h" | 28 | #include "multicalls.h" |
29 | #include "debugfs.h" | ||
30 | |||
31 | #define MC_BATCH 32 | ||
28 | 32 | ||
29 | #define MC_DEBUG 1 | 33 | #define MC_DEBUG 1 |
30 | 34 | ||
31 | #define MC_BATCH 32 | ||
32 | #define MC_ARGS (MC_BATCH * 16) | 35 | #define MC_ARGS (MC_BATCH * 16) |
33 | 36 | ||
37 | |||
34 | struct mc_buffer { | 38 | struct mc_buffer { |
35 | struct multicall_entry entries[MC_BATCH]; | 39 | struct multicall_entry entries[MC_BATCH]; |
36 | #if MC_DEBUG | 40 | #if MC_DEBUG |
@@ -47,6 +51,76 @@ struct mc_buffer { | |||
47 | static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); | 51 | static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); |
48 | DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); | 52 | DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); |
49 | 53 | ||
54 | /* flush reasons 0- slots, 1- args, 2- callbacks */ | ||
55 | enum flush_reasons | ||
56 | { | ||
57 | FL_SLOTS, | ||
58 | FL_ARGS, | ||
59 | FL_CALLBACKS, | ||
60 | |||
61 | FL_N_REASONS | ||
62 | }; | ||
63 | |||
64 | #ifdef CONFIG_XEN_DEBUG_FS | ||
65 | #define NHYPERCALLS 40 /* not really */ | ||
66 | |||
67 | static struct { | ||
68 | unsigned histo[MC_BATCH+1]; | ||
69 | |||
70 | unsigned issued; | ||
71 | unsigned arg_total; | ||
72 | unsigned hypercalls; | ||
73 | unsigned histo_hypercalls[NHYPERCALLS]; | ||
74 | |||
75 | unsigned flush[FL_N_REASONS]; | ||
76 | } mc_stats; | ||
77 | |||
78 | static u8 zero_stats; | ||
79 | |||
80 | static inline void check_zero(void) | ||
81 | { | ||
82 | if (unlikely(zero_stats)) { | ||
83 | memset(&mc_stats, 0, sizeof(mc_stats)); | ||
84 | zero_stats = 0; | ||
85 | } | ||
86 | } | ||
87 | |||
88 | static void mc_add_stats(const struct mc_buffer *mc) | ||
89 | { | ||
90 | int i; | ||
91 | |||
92 | check_zero(); | ||
93 | |||
94 | mc_stats.issued++; | ||
95 | mc_stats.hypercalls += mc->mcidx; | ||
96 | mc_stats.arg_total += mc->argidx; | ||
97 | |||
98 | mc_stats.histo[mc->mcidx]++; | ||
99 | for(i = 0; i < mc->mcidx; i++) { | ||
100 | unsigned op = mc->entries[i].op; | ||
101 | if (op < NHYPERCALLS) | ||
102 | mc_stats.histo_hypercalls[op]++; | ||
103 | } | ||
104 | } | ||
105 | |||
106 | static void mc_stats_flush(enum flush_reasons idx) | ||
107 | { | ||
108 | check_zero(); | ||
109 | |||
110 | mc_stats.flush[idx]++; | ||
111 | } | ||
112 | |||
113 | #else /* !CONFIG_XEN_DEBUG_FS */ | ||
114 | |||
115 | static inline void mc_add_stats(const struct mc_buffer *mc) | ||
116 | { | ||
117 | } | ||
118 | |||
119 | static inline void mc_stats_flush(enum flush_reasons idx) | ||
120 | { | ||
121 | } | ||
122 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
123 | |||
50 | void xen_mc_flush(void) | 124 | void xen_mc_flush(void) |
51 | { | 125 | { |
52 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); | 126 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); |
@@ -60,6 +134,8 @@ void xen_mc_flush(void) | |||
60 | something in the middle */ | 134 | something in the middle */ |
61 | local_irq_save(flags); | 135 | local_irq_save(flags); |
62 | 136 | ||
137 | mc_add_stats(b); | ||
138 | |||
63 | if (b->mcidx) { | 139 | if (b->mcidx) { |
64 | #if MC_DEBUG | 140 | #if MC_DEBUG |
65 | memcpy(b->debug, b->entries, | 141 | memcpy(b->debug, b->entries, |
@@ -115,6 +191,7 @@ struct multicall_space __xen_mc_entry(size_t args) | |||
115 | 191 | ||
116 | if (b->mcidx == MC_BATCH || | 192 | if (b->mcidx == MC_BATCH || |
117 | (argidx + args) > MC_ARGS) { | 193 | (argidx + args) > MC_ARGS) { |
194 | mc_stats_flush(b->mcidx == MC_BATCH ? FL_SLOTS : FL_ARGS); | ||
118 | xen_mc_flush(); | 195 | xen_mc_flush(); |
119 | argidx = roundup(b->argidx, sizeof(u64)); | 196 | argidx = roundup(b->argidx, sizeof(u64)); |
120 | } | 197 | } |
@@ -158,10 +235,44 @@ void xen_mc_callback(void (*fn)(void *), void *data) | |||
158 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); | 235 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); |
159 | struct callback *cb; | 236 | struct callback *cb; |
160 | 237 | ||
161 | if (b->cbidx == MC_BATCH) | 238 | if (b->cbidx == MC_BATCH) { |
239 | mc_stats_flush(FL_CALLBACKS); | ||
162 | xen_mc_flush(); | 240 | xen_mc_flush(); |
241 | } | ||
163 | 242 | ||
164 | cb = &b->callbacks[b->cbidx++]; | 243 | cb = &b->callbacks[b->cbidx++]; |
165 | cb->fn = fn; | 244 | cb->fn = fn; |
166 | cb->data = data; | 245 | cb->data = data; |
167 | } | 246 | } |
247 | |||
248 | #ifdef CONFIG_XEN_DEBUG_FS | ||
249 | |||
250 | static struct dentry *d_mc_debug; | ||
251 | |||
252 | static int __init xen_mc_debugfs(void) | ||
253 | { | ||
254 | struct dentry *d_xen = xen_init_debugfs(); | ||
255 | |||
256 | if (d_xen == NULL) | ||
257 | return -ENOMEM; | ||
258 | |||
259 | d_mc_debug = debugfs_create_dir("multicalls", d_xen); | ||
260 | |||
261 | debugfs_create_u8("zero_stats", 0644, d_mc_debug, &zero_stats); | ||
262 | |||
263 | debugfs_create_u32("batches", 0444, d_mc_debug, &mc_stats.issued); | ||
264 | debugfs_create_u32("hypercalls", 0444, d_mc_debug, &mc_stats.hypercalls); | ||
265 | debugfs_create_u32("arg_total", 0444, d_mc_debug, &mc_stats.arg_total); | ||
266 | |||
267 | xen_debugfs_create_u32_array("batch_histo", 0444, d_mc_debug, | ||
268 | mc_stats.histo, MC_BATCH); | ||
269 | xen_debugfs_create_u32_array("hypercall_histo", 0444, d_mc_debug, | ||
270 | mc_stats.histo_hypercalls, NHYPERCALLS); | ||
271 | xen_debugfs_create_u32_array("flush_reasons", 0444, d_mc_debug, | ||
272 | mc_stats.flush, FL_N_REASONS); | ||
273 | |||
274 | return 0; | ||
275 | } | ||
276 | fs_initcall(xen_mc_debugfs); | ||
277 | |||
278 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index d8faf79a0a1d..d77da613b1d2 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -11,11 +11,8 @@ | |||
11 | * useful topology information for the kernel to make use of. As a | 11 | * useful topology information for the kernel to make use of. As a |
12 | * result, all CPUs are treated as if they're single-core and | 12 | * result, all CPUs are treated as if they're single-core and |
13 | * single-threaded. | 13 | * single-threaded. |
14 | * | ||
15 | * This does not handle HOTPLUG_CPU yet. | ||
16 | */ | 14 | */ |
17 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
18 | #include <linux/kernel_stat.h> | ||
19 | #include <linux/err.h> | 16 | #include <linux/err.h> |
20 | #include <linux/smp.h> | 17 | #include <linux/smp.h> |
21 | 18 | ||
@@ -36,8 +33,6 @@ | |||
36 | #include "xen-ops.h" | 33 | #include "xen-ops.h" |
37 | #include "mmu.h" | 34 | #include "mmu.h" |
38 | 35 | ||
39 | static void __cpuinit xen_init_lock_cpu(int cpu); | ||
40 | |||
41 | cpumask_t xen_cpu_initialized_map; | 36 | cpumask_t xen_cpu_initialized_map; |
42 | 37 | ||
43 | static DEFINE_PER_CPU(int, resched_irq); | 38 | static DEFINE_PER_CPU(int, resched_irq); |
@@ -64,11 +59,12 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) | |||
64 | return IRQ_HANDLED; | 59 | return IRQ_HANDLED; |
65 | } | 60 | } |
66 | 61 | ||
67 | static __cpuinit void cpu_bringup_and_idle(void) | 62 | static __cpuinit void cpu_bringup(void) |
68 | { | 63 | { |
69 | int cpu = smp_processor_id(); | 64 | int cpu = smp_processor_id(); |
70 | 65 | ||
71 | cpu_init(); | 66 | cpu_init(); |
67 | touch_softlockup_watchdog(); | ||
72 | preempt_disable(); | 68 | preempt_disable(); |
73 | 69 | ||
74 | xen_enable_sysenter(); | 70 | xen_enable_sysenter(); |
@@ -89,6 +85,11 @@ static __cpuinit void cpu_bringup_and_idle(void) | |||
89 | local_irq_enable(); | 85 | local_irq_enable(); |
90 | 86 | ||
91 | wmb(); /* make sure everything is out */ | 87 | wmb(); /* make sure everything is out */ |
88 | } | ||
89 | |||
90 | static __cpuinit void cpu_bringup_and_idle(void) | ||
91 | { | ||
92 | cpu_bringup(); | ||
92 | cpu_idle(); | 93 | cpu_idle(); |
93 | } | 94 | } |
94 | 95 | ||
@@ -212,8 +213,6 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) | |||
212 | 213 | ||
213 | cpu_set(cpu, cpu_present_map); | 214 | cpu_set(cpu, cpu_present_map); |
214 | } | 215 | } |
215 | |||
216 | //init_xenbus_allowed_cpumask(); | ||
217 | } | 216 | } |
218 | 217 | ||
219 | static __cpuinit int | 218 | static __cpuinit int |
@@ -281,12 +280,6 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) | |||
281 | struct task_struct *idle = idle_task(cpu); | 280 | struct task_struct *idle = idle_task(cpu); |
282 | int rc; | 281 | int rc; |
283 | 282 | ||
284 | #if 0 | ||
285 | rc = cpu_up_check(cpu); | ||
286 | if (rc) | ||
287 | return rc; | ||
288 | #endif | ||
289 | |||
290 | #ifdef CONFIG_X86_64 | 283 | #ifdef CONFIG_X86_64 |
291 | /* Allocate node local memory for AP pdas */ | 284 | /* Allocate node local memory for AP pdas */ |
292 | WARN_ON(cpu == 0); | 285 | WARN_ON(cpu == 0); |
@@ -339,6 +332,60 @@ static void xen_smp_cpus_done(unsigned int max_cpus) | |||
339 | { | 332 | { |
340 | } | 333 | } |
341 | 334 | ||
335 | #ifdef CONFIG_HOTPLUG_CPU | ||
336 | static int xen_cpu_disable(void) | ||
337 | { | ||
338 | unsigned int cpu = smp_processor_id(); | ||
339 | if (cpu == 0) | ||
340 | return -EBUSY; | ||
341 | |||
342 | cpu_disable_common(); | ||
343 | |||
344 | load_cr3(swapper_pg_dir); | ||
345 | return 0; | ||
346 | } | ||
347 | |||
348 | static void xen_cpu_die(unsigned int cpu) | ||
349 | { | ||
350 | while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) { | ||
351 | current->state = TASK_UNINTERRUPTIBLE; | ||
352 | schedule_timeout(HZ/10); | ||
353 | } | ||
354 | unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); | ||
355 | unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); | ||
356 | unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL); | ||
357 | unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL); | ||
358 | xen_uninit_lock_cpu(cpu); | ||
359 | xen_teardown_timer(cpu); | ||
360 | |||
361 | if (num_online_cpus() == 1) | ||
362 | alternatives_smp_switch(0); | ||
363 | } | ||
364 | |||
365 | static void xen_play_dead(void) | ||
366 | { | ||
367 | play_dead_common(); | ||
368 | HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); | ||
369 | cpu_bringup(); | ||
370 | } | ||
371 | |||
372 | #else /* !CONFIG_HOTPLUG_CPU */ | ||
373 | static int xen_cpu_disable(void) | ||
374 | { | ||
375 | return -ENOSYS; | ||
376 | } | ||
377 | |||
378 | static void xen_cpu_die(unsigned int cpu) | ||
379 | { | ||
380 | BUG(); | ||
381 | } | ||
382 | |||
383 | static void xen_play_dead(void) | ||
384 | { | ||
385 | BUG(); | ||
386 | } | ||
387 | |||
388 | #endif | ||
342 | static void stop_self(void *v) | 389 | static void stop_self(void *v) |
343 | { | 390 | { |
344 | int cpu = smp_processor_id(); | 391 | int cpu = smp_processor_id(); |
@@ -419,176 +466,16 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) | |||
419 | return IRQ_HANDLED; | 466 | return IRQ_HANDLED; |
420 | } | 467 | } |
421 | 468 | ||
422 | struct xen_spinlock { | ||
423 | unsigned char lock; /* 0 -> free; 1 -> locked */ | ||
424 | unsigned short spinners; /* count of waiting cpus */ | ||
425 | }; | ||
426 | |||
427 | static int xen_spin_is_locked(struct raw_spinlock *lock) | ||
428 | { | ||
429 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
430 | |||
431 | return xl->lock != 0; | ||
432 | } | ||
433 | |||
434 | static int xen_spin_is_contended(struct raw_spinlock *lock) | ||
435 | { | ||
436 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
437 | |||
438 | /* Not strictly true; this is only the count of contended | ||
439 | lock-takers entering the slow path. */ | ||
440 | return xl->spinners != 0; | ||
441 | } | ||
442 | |||
443 | static int xen_spin_trylock(struct raw_spinlock *lock) | ||
444 | { | ||
445 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
446 | u8 old = 1; | ||
447 | |||
448 | asm("xchgb %b0,%1" | ||
449 | : "+q" (old), "+m" (xl->lock) : : "memory"); | ||
450 | |||
451 | return old == 0; | ||
452 | } | ||
453 | |||
454 | static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; | ||
455 | static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); | ||
456 | |||
457 | static inline void spinning_lock(struct xen_spinlock *xl) | ||
458 | { | ||
459 | __get_cpu_var(lock_spinners) = xl; | ||
460 | wmb(); /* set lock of interest before count */ | ||
461 | asm(LOCK_PREFIX " incw %0" | ||
462 | : "+m" (xl->spinners) : : "memory"); | ||
463 | } | ||
464 | |||
465 | static inline void unspinning_lock(struct xen_spinlock *xl) | ||
466 | { | ||
467 | asm(LOCK_PREFIX " decw %0" | ||
468 | : "+m" (xl->spinners) : : "memory"); | ||
469 | wmb(); /* decrement count before clearing lock */ | ||
470 | __get_cpu_var(lock_spinners) = NULL; | ||
471 | } | ||
472 | |||
473 | static noinline int xen_spin_lock_slow(struct raw_spinlock *lock) | ||
474 | { | ||
475 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
476 | int irq = __get_cpu_var(lock_kicker_irq); | ||
477 | int ret; | ||
478 | |||
479 | /* If kicker interrupts not initialized yet, just spin */ | ||
480 | if (irq == -1) | ||
481 | return 0; | ||
482 | |||
483 | /* announce we're spinning */ | ||
484 | spinning_lock(xl); | ||
485 | |||
486 | /* clear pending */ | ||
487 | xen_clear_irq_pending(irq); | ||
488 | |||
489 | /* check again make sure it didn't become free while | ||
490 | we weren't looking */ | ||
491 | ret = xen_spin_trylock(lock); | ||
492 | if (ret) | ||
493 | goto out; | ||
494 | |||
495 | /* block until irq becomes pending */ | ||
496 | xen_poll_irq(irq); | ||
497 | kstat_this_cpu.irqs[irq]++; | ||
498 | |||
499 | out: | ||
500 | unspinning_lock(xl); | ||
501 | return ret; | ||
502 | } | ||
503 | |||
504 | static void xen_spin_lock(struct raw_spinlock *lock) | ||
505 | { | ||
506 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
507 | int timeout; | ||
508 | u8 oldval; | ||
509 | |||
510 | do { | ||
511 | timeout = 1 << 10; | ||
512 | |||
513 | asm("1: xchgb %1,%0\n" | ||
514 | " testb %1,%1\n" | ||
515 | " jz 3f\n" | ||
516 | "2: rep;nop\n" | ||
517 | " cmpb $0,%0\n" | ||
518 | " je 1b\n" | ||
519 | " dec %2\n" | ||
520 | " jnz 2b\n" | ||
521 | "3:\n" | ||
522 | : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) | ||
523 | : "1" (1) | ||
524 | : "memory"); | ||
525 | |||
526 | } while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock))); | ||
527 | } | ||
528 | |||
529 | static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) | ||
530 | { | ||
531 | int cpu; | ||
532 | |||
533 | for_each_online_cpu(cpu) { | ||
534 | /* XXX should mix up next cpu selection */ | ||
535 | if (per_cpu(lock_spinners, cpu) == xl) { | ||
536 | xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); | ||
537 | break; | ||
538 | } | ||
539 | } | ||
540 | } | ||
541 | |||
542 | static void xen_spin_unlock(struct raw_spinlock *lock) | ||
543 | { | ||
544 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
545 | |||
546 | smp_wmb(); /* make sure no writes get moved after unlock */ | ||
547 | xl->lock = 0; /* release lock */ | ||
548 | |||
549 | /* make sure unlock happens before kick */ | ||
550 | barrier(); | ||
551 | |||
552 | if (unlikely(xl->spinners)) | ||
553 | xen_spin_unlock_slow(xl); | ||
554 | } | ||
555 | |||
556 | static __cpuinit void xen_init_lock_cpu(int cpu) | ||
557 | { | ||
558 | int irq; | ||
559 | const char *name; | ||
560 | |||
561 | name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); | ||
562 | irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, | ||
563 | cpu, | ||
564 | xen_reschedule_interrupt, | ||
565 | IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, | ||
566 | name, | ||
567 | NULL); | ||
568 | |||
569 | if (irq >= 0) { | ||
570 | disable_irq(irq); /* make sure it's never delivered */ | ||
571 | per_cpu(lock_kicker_irq, cpu) = irq; | ||
572 | } | ||
573 | |||
574 | printk("cpu %d spinlock event irq %d\n", cpu, irq); | ||
575 | } | ||
576 | |||
577 | static void __init xen_init_spinlocks(void) | ||
578 | { | ||
579 | pv_lock_ops.spin_is_locked = xen_spin_is_locked; | ||
580 | pv_lock_ops.spin_is_contended = xen_spin_is_contended; | ||
581 | pv_lock_ops.spin_lock = xen_spin_lock; | ||
582 | pv_lock_ops.spin_trylock = xen_spin_trylock; | ||
583 | pv_lock_ops.spin_unlock = xen_spin_unlock; | ||
584 | } | ||
585 | |||
586 | static const struct smp_ops xen_smp_ops __initdata = { | 469 | static const struct smp_ops xen_smp_ops __initdata = { |
587 | .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, | 470 | .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, |
588 | .smp_prepare_cpus = xen_smp_prepare_cpus, | 471 | .smp_prepare_cpus = xen_smp_prepare_cpus, |
589 | .cpu_up = xen_cpu_up, | ||
590 | .smp_cpus_done = xen_smp_cpus_done, | 472 | .smp_cpus_done = xen_smp_cpus_done, |
591 | 473 | ||
474 | .cpu_up = xen_cpu_up, | ||
475 | .cpu_die = xen_cpu_die, | ||
476 | .cpu_disable = xen_cpu_disable, | ||
477 | .play_dead = xen_play_dead, | ||
478 | |||
592 | .smp_send_stop = xen_smp_send_stop, | 479 | .smp_send_stop = xen_smp_send_stop, |
593 | .smp_send_reschedule = xen_smp_send_reschedule, | 480 | .smp_send_reschedule = xen_smp_send_reschedule, |
594 | 481 | ||
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c new file mode 100644 index 000000000000..dd71e3a021cd --- /dev/null +++ b/arch/x86/xen/spinlock.c | |||
@@ -0,0 +1,428 @@ | |||
1 | /* | ||
2 | * Split spinlock implementation out into its own file, so it can be | ||
3 | * compiled in a FTRACE-compatible way. | ||
4 | */ | ||
5 | #include <linux/kernel_stat.h> | ||
6 | #include <linux/spinlock.h> | ||
7 | #include <linux/debugfs.h> | ||
8 | #include <linux/log2.h> | ||
9 | |||
10 | #include <asm/paravirt.h> | ||
11 | |||
12 | #include <xen/interface/xen.h> | ||
13 | #include <xen/events.h> | ||
14 | |||
15 | #include "xen-ops.h" | ||
16 | #include "debugfs.h" | ||
17 | |||
18 | #ifdef CONFIG_XEN_DEBUG_FS | ||
19 | static struct xen_spinlock_stats | ||
20 | { | ||
21 | u64 taken; | ||
22 | u32 taken_slow; | ||
23 | u32 taken_slow_nested; | ||
24 | u32 taken_slow_pickup; | ||
25 | u32 taken_slow_spurious; | ||
26 | u32 taken_slow_irqenable; | ||
27 | |||
28 | u64 released; | ||
29 | u32 released_slow; | ||
30 | u32 released_slow_kicked; | ||
31 | |||
32 | #define HISTO_BUCKETS 30 | ||
33 | u32 histo_spin_total[HISTO_BUCKETS+1]; | ||
34 | u32 histo_spin_spinning[HISTO_BUCKETS+1]; | ||
35 | u32 histo_spin_blocked[HISTO_BUCKETS+1]; | ||
36 | |||
37 | u64 time_total; | ||
38 | u64 time_spinning; | ||
39 | u64 time_blocked; | ||
40 | } spinlock_stats; | ||
41 | |||
42 | static u8 zero_stats; | ||
43 | |||
44 | static unsigned lock_timeout = 1 << 10; | ||
45 | #define TIMEOUT lock_timeout | ||
46 | |||
47 | static inline void check_zero(void) | ||
48 | { | ||
49 | if (unlikely(zero_stats)) { | ||
50 | memset(&spinlock_stats, 0, sizeof(spinlock_stats)); | ||
51 | zero_stats = 0; | ||
52 | } | ||
53 | } | ||
54 | |||
55 | #define ADD_STATS(elem, val) \ | ||
56 | do { check_zero(); spinlock_stats.elem += (val); } while(0) | ||
57 | |||
58 | static inline u64 spin_time_start(void) | ||
59 | { | ||
60 | return xen_clocksource_read(); | ||
61 | } | ||
62 | |||
63 | static void __spin_time_accum(u64 delta, u32 *array) | ||
64 | { | ||
65 | unsigned index = ilog2(delta); | ||
66 | |||
67 | check_zero(); | ||
68 | |||
69 | if (index < HISTO_BUCKETS) | ||
70 | array[index]++; | ||
71 | else | ||
72 | array[HISTO_BUCKETS]++; | ||
73 | } | ||
74 | |||
75 | static inline void spin_time_accum_spinning(u64 start) | ||
76 | { | ||
77 | u32 delta = xen_clocksource_read() - start; | ||
78 | |||
79 | __spin_time_accum(delta, spinlock_stats.histo_spin_spinning); | ||
80 | spinlock_stats.time_spinning += delta; | ||
81 | } | ||
82 | |||
83 | static inline void spin_time_accum_total(u64 start) | ||
84 | { | ||
85 | u32 delta = xen_clocksource_read() - start; | ||
86 | |||
87 | __spin_time_accum(delta, spinlock_stats.histo_spin_total); | ||
88 | spinlock_stats.time_total += delta; | ||
89 | } | ||
90 | |||
91 | static inline void spin_time_accum_blocked(u64 start) | ||
92 | { | ||
93 | u32 delta = xen_clocksource_read() - start; | ||
94 | |||
95 | __spin_time_accum(delta, spinlock_stats.histo_spin_blocked); | ||
96 | spinlock_stats.time_blocked += delta; | ||
97 | } | ||
98 | #else /* !CONFIG_XEN_DEBUG_FS */ | ||
99 | #define TIMEOUT (1 << 10) | ||
100 | #define ADD_STATS(elem, val) do { (void)(val); } while(0) | ||
101 | |||
102 | static inline u64 spin_time_start(void) | ||
103 | { | ||
104 | return 0; | ||
105 | } | ||
106 | |||
107 | static inline void spin_time_accum_total(u64 start) | ||
108 | { | ||
109 | } | ||
110 | static inline void spin_time_accum_spinning(u64 start) | ||
111 | { | ||
112 | } | ||
113 | static inline void spin_time_accum_blocked(u64 start) | ||
114 | { | ||
115 | } | ||
116 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
117 | |||
118 | struct xen_spinlock { | ||
119 | unsigned char lock; /* 0 -> free; 1 -> locked */ | ||
120 | unsigned short spinners; /* count of waiting cpus */ | ||
121 | }; | ||
122 | |||
123 | static int xen_spin_is_locked(struct raw_spinlock *lock) | ||
124 | { | ||
125 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
126 | |||
127 | return xl->lock != 0; | ||
128 | } | ||
129 | |||
130 | static int xen_spin_is_contended(struct raw_spinlock *lock) | ||
131 | { | ||
132 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
133 | |||
134 | /* Not strictly true; this is only the count of contended | ||
135 | lock-takers entering the slow path. */ | ||
136 | return xl->spinners != 0; | ||
137 | } | ||
138 | |||
139 | static int xen_spin_trylock(struct raw_spinlock *lock) | ||
140 | { | ||
141 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
142 | u8 old = 1; | ||
143 | |||
144 | asm("xchgb %b0,%1" | ||
145 | : "+q" (old), "+m" (xl->lock) : : "memory"); | ||
146 | |||
147 | return old == 0; | ||
148 | } | ||
149 | |||
150 | static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; | ||
151 | static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); | ||
152 | |||
153 | /* | ||
154 | * Mark a cpu as interested in a lock. Returns the CPU's previous | ||
155 | * lock of interest, in case we got preempted by an interrupt. | ||
156 | */ | ||
157 | static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl) | ||
158 | { | ||
159 | struct xen_spinlock *prev; | ||
160 | |||
161 | prev = __get_cpu_var(lock_spinners); | ||
162 | __get_cpu_var(lock_spinners) = xl; | ||
163 | |||
164 | wmb(); /* set lock of interest before count */ | ||
165 | |||
166 | asm(LOCK_PREFIX " incw %0" | ||
167 | : "+m" (xl->spinners) : : "memory"); | ||
168 | |||
169 | return prev; | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * Mark a cpu as no longer interested in a lock. Restores previous | ||
174 | * lock of interest (NULL for none). | ||
175 | */ | ||
176 | static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev) | ||
177 | { | ||
178 | asm(LOCK_PREFIX " decw %0" | ||
179 | : "+m" (xl->spinners) : : "memory"); | ||
180 | wmb(); /* decrement count before restoring lock */ | ||
181 | __get_cpu_var(lock_spinners) = prev; | ||
182 | } | ||
183 | |||
184 | static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enable) | ||
185 | { | ||
186 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
187 | struct xen_spinlock *prev; | ||
188 | int irq = __get_cpu_var(lock_kicker_irq); | ||
189 | int ret; | ||
190 | unsigned long flags; | ||
191 | u64 start; | ||
192 | |||
193 | /* If kicker interrupts not initialized yet, just spin */ | ||
194 | if (irq == -1) | ||
195 | return 0; | ||
196 | |||
197 | start = spin_time_start(); | ||
198 | |||
199 | /* announce we're spinning */ | ||
200 | prev = spinning_lock(xl); | ||
201 | |||
202 | flags = __raw_local_save_flags(); | ||
203 | if (irq_enable) { | ||
204 | ADD_STATS(taken_slow_irqenable, 1); | ||
205 | raw_local_irq_enable(); | ||
206 | } | ||
207 | |||
208 | ADD_STATS(taken_slow, 1); | ||
209 | ADD_STATS(taken_slow_nested, prev != NULL); | ||
210 | |||
211 | do { | ||
212 | /* clear pending */ | ||
213 | xen_clear_irq_pending(irq); | ||
214 | |||
215 | /* check again make sure it didn't become free while | ||
216 | we weren't looking */ | ||
217 | ret = xen_spin_trylock(lock); | ||
218 | if (ret) { | ||
219 | ADD_STATS(taken_slow_pickup, 1); | ||
220 | |||
221 | /* | ||
222 | * If we interrupted another spinlock while it | ||
223 | * was blocking, make sure it doesn't block | ||
224 | * without rechecking the lock. | ||
225 | */ | ||
226 | if (prev != NULL) | ||
227 | xen_set_irq_pending(irq); | ||
228 | goto out; | ||
229 | } | ||
230 | |||
231 | /* | ||
232 | * Block until irq becomes pending. If we're | ||
233 | * interrupted at this point (after the trylock but | ||
234 | * before entering the block), then the nested lock | ||
235 | * handler guarantees that the irq will be left | ||
236 | * pending if there's any chance the lock became free; | ||
237 | * xen_poll_irq() returns immediately if the irq is | ||
238 | * pending. | ||
239 | */ | ||
240 | xen_poll_irq(irq); | ||
241 | ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); | ||
242 | } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ | ||
243 | |||
244 | kstat_this_cpu.irqs[irq]++; | ||
245 | |||
246 | out: | ||
247 | raw_local_irq_restore(flags); | ||
248 | unspinning_lock(xl, prev); | ||
249 | spin_time_accum_blocked(start); | ||
250 | |||
251 | return ret; | ||
252 | } | ||
253 | |||
254 | static inline void __xen_spin_lock(struct raw_spinlock *lock, bool irq_enable) | ||
255 | { | ||
256 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
257 | unsigned timeout; | ||
258 | u8 oldval; | ||
259 | u64 start_spin; | ||
260 | |||
261 | ADD_STATS(taken, 1); | ||
262 | |||
263 | start_spin = spin_time_start(); | ||
264 | |||
265 | do { | ||
266 | u64 start_spin_fast = spin_time_start(); | ||
267 | |||
268 | timeout = TIMEOUT; | ||
269 | |||
270 | asm("1: xchgb %1,%0\n" | ||
271 | " testb %1,%1\n" | ||
272 | " jz 3f\n" | ||
273 | "2: rep;nop\n" | ||
274 | " cmpb $0,%0\n" | ||
275 | " je 1b\n" | ||
276 | " dec %2\n" | ||
277 | " jnz 2b\n" | ||
278 | "3:\n" | ||
279 | : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) | ||
280 | : "1" (1) | ||
281 | : "memory"); | ||
282 | |||
283 | spin_time_accum_spinning(start_spin_fast); | ||
284 | |||
285 | } while (unlikely(oldval != 0 && | ||
286 | (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable)))); | ||
287 | |||
288 | spin_time_accum_total(start_spin); | ||
289 | } | ||
290 | |||
291 | static void xen_spin_lock(struct raw_spinlock *lock) | ||
292 | { | ||
293 | __xen_spin_lock(lock, false); | ||
294 | } | ||
295 | |||
296 | static void xen_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags) | ||
297 | { | ||
298 | __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags)); | ||
299 | } | ||
300 | |||
301 | static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) | ||
302 | { | ||
303 | int cpu; | ||
304 | |||
305 | ADD_STATS(released_slow, 1); | ||
306 | |||
307 | for_each_online_cpu(cpu) { | ||
308 | /* XXX should mix up next cpu selection */ | ||
309 | if (per_cpu(lock_spinners, cpu) == xl) { | ||
310 | ADD_STATS(released_slow_kicked, 1); | ||
311 | xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); | ||
312 | break; | ||
313 | } | ||
314 | } | ||
315 | } | ||
316 | |||
317 | static void xen_spin_unlock(struct raw_spinlock *lock) | ||
318 | { | ||
319 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
320 | |||
321 | ADD_STATS(released, 1); | ||
322 | |||
323 | smp_wmb(); /* make sure no writes get moved after unlock */ | ||
324 | xl->lock = 0; /* release lock */ | ||
325 | |||
326 | /* make sure unlock happens before kick */ | ||
327 | barrier(); | ||
328 | |||
329 | if (unlikely(xl->spinners)) | ||
330 | xen_spin_unlock_slow(xl); | ||
331 | } | ||
332 | |||
333 | static irqreturn_t dummy_handler(int irq, void *dev_id) | ||
334 | { | ||
335 | BUG(); | ||
336 | return IRQ_HANDLED; | ||
337 | } | ||
338 | |||
339 | void __cpuinit xen_init_lock_cpu(int cpu) | ||
340 | { | ||
341 | int irq; | ||
342 | const char *name; | ||
343 | |||
344 | name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); | ||
345 | irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, | ||
346 | cpu, | ||
347 | dummy_handler, | ||
348 | IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, | ||
349 | name, | ||
350 | NULL); | ||
351 | |||
352 | if (irq >= 0) { | ||
353 | disable_irq(irq); /* make sure it's never delivered */ | ||
354 | per_cpu(lock_kicker_irq, cpu) = irq; | ||
355 | } | ||
356 | |||
357 | printk("cpu %d spinlock event irq %d\n", cpu, irq); | ||
358 | } | ||
359 | |||
360 | void xen_uninit_lock_cpu(int cpu) | ||
361 | { | ||
362 | unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); | ||
363 | } | ||
364 | |||
365 | void __init xen_init_spinlocks(void) | ||
366 | { | ||
367 | pv_lock_ops.spin_is_locked = xen_spin_is_locked; | ||
368 | pv_lock_ops.spin_is_contended = xen_spin_is_contended; | ||
369 | pv_lock_ops.spin_lock = xen_spin_lock; | ||
370 | pv_lock_ops.spin_lock_flags = xen_spin_lock_flags; | ||
371 | pv_lock_ops.spin_trylock = xen_spin_trylock; | ||
372 | pv_lock_ops.spin_unlock = xen_spin_unlock; | ||
373 | } | ||
374 | |||
375 | #ifdef CONFIG_XEN_DEBUG_FS | ||
376 | |||
377 | static struct dentry *d_spin_debug; | ||
378 | |||
379 | static int __init xen_spinlock_debugfs(void) | ||
380 | { | ||
381 | struct dentry *d_xen = xen_init_debugfs(); | ||
382 | |||
383 | if (d_xen == NULL) | ||
384 | return -ENOMEM; | ||
385 | |||
386 | d_spin_debug = debugfs_create_dir("spinlocks", d_xen); | ||
387 | |||
388 | debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); | ||
389 | |||
390 | debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout); | ||
391 | |||
392 | debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken); | ||
393 | debugfs_create_u32("taken_slow", 0444, d_spin_debug, | ||
394 | &spinlock_stats.taken_slow); | ||
395 | debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug, | ||
396 | &spinlock_stats.taken_slow_nested); | ||
397 | debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, | ||
398 | &spinlock_stats.taken_slow_pickup); | ||
399 | debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, | ||
400 | &spinlock_stats.taken_slow_spurious); | ||
401 | debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug, | ||
402 | &spinlock_stats.taken_slow_irqenable); | ||
403 | |||
404 | debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released); | ||
405 | debugfs_create_u32("released_slow", 0444, d_spin_debug, | ||
406 | &spinlock_stats.released_slow); | ||
407 | debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, | ||
408 | &spinlock_stats.released_slow_kicked); | ||
409 | |||
410 | debugfs_create_u64("time_spinning", 0444, d_spin_debug, | ||
411 | &spinlock_stats.time_spinning); | ||
412 | debugfs_create_u64("time_blocked", 0444, d_spin_debug, | ||
413 | &spinlock_stats.time_blocked); | ||
414 | debugfs_create_u64("time_total", 0444, d_spin_debug, | ||
415 | &spinlock_stats.time_total); | ||
416 | |||
417 | xen_debugfs_create_u32_array("histo_total", 0444, d_spin_debug, | ||
418 | spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1); | ||
419 | xen_debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug, | ||
420 | spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1); | ||
421 | xen_debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, | ||
422 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); | ||
423 | |||
424 | return 0; | ||
425 | } | ||
426 | fs_initcall(xen_spinlock_debugfs); | ||
427 | |||
428 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 685b77470fc3..004ba86326ae 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -30,8 +30,6 @@ | |||
30 | #define TIMER_SLOP 100000 | 30 | #define TIMER_SLOP 100000 |
31 | #define NS_PER_TICK (1000000000LL / HZ) | 31 | #define NS_PER_TICK (1000000000LL / HZ) |
32 | 32 | ||
33 | static cycle_t xen_clocksource_read(void); | ||
34 | |||
35 | /* runstate info updated by Xen */ | 33 | /* runstate info updated by Xen */ |
36 | static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); | 34 | static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); |
37 | 35 | ||
@@ -213,7 +211,7 @@ unsigned long xen_tsc_khz(void) | |||
213 | return xen_khz; | 211 | return xen_khz; |
214 | } | 212 | } |
215 | 213 | ||
216 | static cycle_t xen_clocksource_read(void) | 214 | cycle_t xen_clocksource_read(void) |
217 | { | 215 | { |
218 | struct pvclock_vcpu_time_info *src; | 216 | struct pvclock_vcpu_time_info *src; |
219 | cycle_t ret; | 217 | cycle_t ret; |
@@ -452,6 +450,14 @@ void xen_setup_timer(int cpu) | |||
452 | setup_runstate_info(cpu); | 450 | setup_runstate_info(cpu); |
453 | } | 451 | } |
454 | 452 | ||
453 | void xen_teardown_timer(int cpu) | ||
454 | { | ||
455 | struct clock_event_device *evt; | ||
456 | BUG_ON(cpu == 0); | ||
457 | evt = &per_cpu(xen_clock_events, cpu); | ||
458 | unbind_from_irqhandler(evt->irq, NULL); | ||
459 | } | ||
460 | |||
455 | void xen_setup_cpu_clockevents(void) | 461 | void xen_setup_cpu_clockevents(void) |
456 | { | 462 | { |
457 | BUG_ON(preemptible()); | 463 | BUG_ON(preemptible()); |
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 2497a30f41de..42786f59d9c0 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S | |||
@@ -298,7 +298,7 @@ check_events: | |||
298 | push %eax | 298 | push %eax |
299 | push %ecx | 299 | push %ecx |
300 | push %edx | 300 | push %edx |
301 | call force_evtchn_callback | 301 | call xen_force_evtchn_callback |
302 | pop %edx | 302 | pop %edx |
303 | pop %ecx | 303 | pop %ecx |
304 | pop %eax | 304 | pop %eax |
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 7f58304fafb3..05794c566e87 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S | |||
@@ -26,8 +26,15 @@ | |||
26 | /* Pseudo-flag used for virtual NMI, which we don't implement yet */ | 26 | /* Pseudo-flag used for virtual NMI, which we don't implement yet */ |
27 | #define XEN_EFLAGS_NMI 0x80000000 | 27 | #define XEN_EFLAGS_NMI 0x80000000 |
28 | 28 | ||
29 | #if 0 | 29 | #if 1 |
30 | #include <asm/percpu.h> | 30 | /* |
31 | x86-64 does not yet support direct access to percpu variables | ||
32 | via a segment override, so we just need to make sure this code | ||
33 | never gets used | ||
34 | */ | ||
35 | #define BUG ud2a | ||
36 | #define PER_CPU_VAR(var, off) 0xdeadbeef | ||
37 | #endif | ||
31 | 38 | ||
32 | /* | 39 | /* |
33 | Enable events. This clears the event mask and tests the pending | 40 | Enable events. This clears the event mask and tests the pending |
@@ -35,6 +42,8 @@ | |||
35 | events, then enter the hypervisor to get them handled. | 42 | events, then enter the hypervisor to get them handled. |
36 | */ | 43 | */ |
37 | ENTRY(xen_irq_enable_direct) | 44 | ENTRY(xen_irq_enable_direct) |
45 | BUG | ||
46 | |||
38 | /* Unmask events */ | 47 | /* Unmask events */ |
39 | movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) | 48 | movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) |
40 | 49 | ||
@@ -58,6 +67,8 @@ ENDPATCH(xen_irq_enable_direct) | |||
58 | non-zero. | 67 | non-zero. |
59 | */ | 68 | */ |
60 | ENTRY(xen_irq_disable_direct) | 69 | ENTRY(xen_irq_disable_direct) |
70 | BUG | ||
71 | |||
61 | movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) | 72 | movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) |
62 | ENDPATCH(xen_irq_disable_direct) | 73 | ENDPATCH(xen_irq_disable_direct) |
63 | ret | 74 | ret |
@@ -74,6 +85,8 @@ ENDPATCH(xen_irq_disable_direct) | |||
74 | Xen and x86 use opposite senses (mask vs enable). | 85 | Xen and x86 use opposite senses (mask vs enable). |
75 | */ | 86 | */ |
76 | ENTRY(xen_save_fl_direct) | 87 | ENTRY(xen_save_fl_direct) |
88 | BUG | ||
89 | |||
77 | testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) | 90 | testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) |
78 | setz %ah | 91 | setz %ah |
79 | addb %ah,%ah | 92 | addb %ah,%ah |
@@ -91,6 +104,8 @@ ENDPATCH(xen_save_fl_direct) | |||
91 | if so. | 104 | if so. |
92 | */ | 105 | */ |
93 | ENTRY(xen_restore_fl_direct) | 106 | ENTRY(xen_restore_fl_direct) |
107 | BUG | ||
108 | |||
94 | testb $X86_EFLAGS_IF>>8, %ah | 109 | testb $X86_EFLAGS_IF>>8, %ah |
95 | setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) | 110 | setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) |
96 | /* Preempt here doesn't matter because that will deal with | 111 | /* Preempt here doesn't matter because that will deal with |
@@ -122,7 +137,7 @@ check_events: | |||
122 | push %r9 | 137 | push %r9 |
123 | push %r10 | 138 | push %r10 |
124 | push %r11 | 139 | push %r11 |
125 | call force_evtchn_callback | 140 | call xen_force_evtchn_callback |
126 | pop %r11 | 141 | pop %r11 |
127 | pop %r10 | 142 | pop %r10 |
128 | pop %r9 | 143 | pop %r9 |
@@ -133,7 +148,6 @@ check_events: | |||
133 | pop %rcx | 148 | pop %rcx |
134 | pop %rax | 149 | pop %rax |
135 | ret | 150 | ret |
136 | #endif | ||
137 | 151 | ||
138 | ENTRY(xen_adjust_exception_frame) | 152 | ENTRY(xen_adjust_exception_frame) |
139 | mov 8+0(%rsp),%rcx | 153 | mov 8+0(%rsp),%rcx |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index dd3c23152a2e..d7422dc2a55c 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define XEN_OPS_H | 2 | #define XEN_OPS_H |
3 | 3 | ||
4 | #include <linux/init.h> | 4 | #include <linux/init.h> |
5 | #include <linux/clocksource.h> | ||
5 | #include <linux/irqreturn.h> | 6 | #include <linux/irqreturn.h> |
6 | #include <xen/xen-ops.h> | 7 | #include <xen/xen-ops.h> |
7 | 8 | ||
@@ -31,7 +32,10 @@ void xen_vcpu_restore(void); | |||
31 | 32 | ||
32 | void __init xen_build_dynamic_phys_to_machine(void); | 33 | void __init xen_build_dynamic_phys_to_machine(void); |
33 | 34 | ||
35 | void xen_init_irq_ops(void); | ||
34 | void xen_setup_timer(int cpu); | 36 | void xen_setup_timer(int cpu); |
37 | void xen_teardown_timer(int cpu); | ||
38 | cycle_t xen_clocksource_read(void); | ||
35 | void xen_setup_cpu_clockevents(void); | 39 | void xen_setup_cpu_clockevents(void); |
36 | unsigned long xen_tsc_khz(void); | 40 | unsigned long xen_tsc_khz(void); |
37 | void __init xen_time_init(void); | 41 | void __init xen_time_init(void); |
@@ -50,6 +54,10 @@ void __init xen_setup_vcpu_info_placement(void); | |||
50 | #ifdef CONFIG_SMP | 54 | #ifdef CONFIG_SMP |
51 | void xen_smp_init(void); | 55 | void xen_smp_init(void); |
52 | 56 | ||
57 | void __init xen_init_spinlocks(void); | ||
58 | __cpuinit void xen_init_lock_cpu(int cpu); | ||
59 | void xen_uninit_lock_cpu(int cpu); | ||
60 | |||
53 | extern cpumask_t xen_cpu_initialized_map; | 61 | extern cpumask_t xen_cpu_initialized_map; |
54 | #else | 62 | #else |
55 | static inline void xen_smp_init(void) {} | 63 | static inline void xen_smp_init(void) {} |
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index bff602ccccf3..1a50ae70f716 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c | |||
@@ -1066,7 +1066,7 @@ static struct xenbus_driver blkfront = { | |||
1066 | 1066 | ||
1067 | static int __init xlblk_init(void) | 1067 | static int __init xlblk_init(void) |
1068 | { | 1068 | { |
1069 | if (!is_running_on_xen()) | 1069 | if (!xen_domain()) |
1070 | return -ENODEV; | 1070 | return -ENODEV; |
1071 | 1071 | ||
1072 | if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { | 1072 | if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { |
diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c index 6b70aa66a587..538ceea5e7df 100644 --- a/drivers/char/hvc_xen.c +++ b/drivers/char/hvc_xen.c | |||
@@ -108,8 +108,8 @@ static int __init xen_init(void) | |||
108 | { | 108 | { |
109 | struct hvc_struct *hp; | 109 | struct hvc_struct *hp; |
110 | 110 | ||
111 | if (!is_running_on_xen() || | 111 | if (!xen_pv_domain() || |
112 | is_initial_xendomain() || | 112 | xen_initial_domain() || |
113 | !xen_start_info->console.domU.evtchn) | 113 | !xen_start_info->console.domU.evtchn) |
114 | return -ENODEV; | 114 | return -ENODEV; |
115 | 115 | ||
@@ -142,7 +142,7 @@ static void __exit xen_fini(void) | |||
142 | 142 | ||
143 | static int xen_cons_init(void) | 143 | static int xen_cons_init(void) |
144 | { | 144 | { |
145 | if (!is_running_on_xen()) | 145 | if (!xen_pv_domain()) |
146 | return 0; | 146 | return 0; |
147 | 147 | ||
148 | hvc_instantiate(HVC_COOKIE, 0, &hvc_ops); | 148 | hvc_instantiate(HVC_COOKIE, 0, &hvc_ops); |
diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c index 9ce3b3baf3a2..3ab6362f043c 100644 --- a/drivers/input/xen-kbdfront.c +++ b/drivers/input/xen-kbdfront.c | |||
@@ -335,11 +335,11 @@ static struct xenbus_driver xenkbd = { | |||
335 | 335 | ||
336 | static int __init xenkbd_init(void) | 336 | static int __init xenkbd_init(void) |
337 | { | 337 | { |
338 | if (!is_running_on_xen()) | 338 | if (!xen_domain()) |
339 | return -ENODEV; | 339 | return -ENODEV; |
340 | 340 | ||
341 | /* Nothing to do if running in dom0. */ | 341 | /* Nothing to do if running in dom0. */ |
342 | if (is_initial_xendomain()) | 342 | if (xen_initial_domain()) |
343 | return -ENODEV; | 343 | return -ENODEV; |
344 | 344 | ||
345 | return xenbus_register_frontend(&xenkbd); | 345 | return xenbus_register_frontend(&xenkbd); |
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index c749bdba214c..3c3dd403f5dd 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c | |||
@@ -1794,10 +1794,10 @@ static struct xenbus_driver netfront = { | |||
1794 | 1794 | ||
1795 | static int __init netif_init(void) | 1795 | static int __init netif_init(void) |
1796 | { | 1796 | { |
1797 | if (!is_running_on_xen()) | 1797 | if (!xen_domain()) |
1798 | return -ENODEV; | 1798 | return -ENODEV; |
1799 | 1799 | ||
1800 | if (is_initial_xendomain()) | 1800 | if (xen_initial_domain()) |
1801 | return 0; | 1801 | return 0; |
1802 | 1802 | ||
1803 | printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n"); | 1803 | printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n"); |
@@ -1809,7 +1809,7 @@ module_init(netif_init); | |||
1809 | 1809 | ||
1810 | static void __exit netif_exit(void) | 1810 | static void __exit netif_exit(void) |
1811 | { | 1811 | { |
1812 | if (is_initial_xendomain()) | 1812 | if (xen_initial_domain()) |
1813 | return; | 1813 | return; |
1814 | 1814 | ||
1815 | xenbus_unregister_driver(&netfront); | 1815 | xenbus_unregister_driver(&netfront); |
diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c index 47ed39b52f9c..a463b3dd837b 100644 --- a/drivers/video/xen-fbfront.c +++ b/drivers/video/xen-fbfront.c | |||
@@ -680,11 +680,11 @@ static struct xenbus_driver xenfb = { | |||
680 | 680 | ||
681 | static int __init xenfb_init(void) | 681 | static int __init xenfb_init(void) |
682 | { | 682 | { |
683 | if (!is_running_on_xen()) | 683 | if (!xen_domain()) |
684 | return -ENODEV; | 684 | return -ENODEV; |
685 | 685 | ||
686 | /* Nothing to do if running in dom0. */ | 686 | /* Nothing to do if running in dom0. */ |
687 | if (is_initial_xendomain()) | 687 | if (xen_initial_domain()) |
688 | return -ENODEV; | 688 | return -ENODEV; |
689 | 689 | ||
690 | return xenbus_register_frontend(&xenfb); | 690 | return xenbus_register_frontend(&xenfb); |
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 363286c54290..d2a8fdf0e191 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile | |||
@@ -1,4 +1,5 @@ | |||
1 | obj-y += grant-table.o features.o events.o manage.o | 1 | obj-y += grant-table.o features.o events.o manage.o |
2 | obj-y += xenbus/ | 2 | obj-y += xenbus/ |
3 | obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o | ||
3 | obj-$(CONFIG_XEN_XENCOMM) += xencomm.o | 4 | obj-$(CONFIG_XEN_XENCOMM) += xencomm.o |
4 | obj-$(CONFIG_XEN_BALLOON) += balloon.o | 5 | obj-$(CONFIG_XEN_BALLOON) += balloon.o |
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 2e15da5459cf..8c83abc73400 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c | |||
@@ -53,7 +53,6 @@ | |||
53 | #include <asm/tlb.h> | 53 | #include <asm/tlb.h> |
54 | 54 | ||
55 | #include <xen/interface/memory.h> | 55 | #include <xen/interface/memory.h> |
56 | #include <xen/balloon.h> | ||
57 | #include <xen/xenbus.h> | 56 | #include <xen/xenbus.h> |
58 | #include <xen/features.h> | 57 | #include <xen/features.h> |
59 | #include <xen/page.h> | 58 | #include <xen/page.h> |
@@ -226,9 +225,8 @@ static int increase_reservation(unsigned long nr_pages) | |||
226 | } | 225 | } |
227 | 226 | ||
228 | set_xen_guest_handle(reservation.extent_start, frame_list); | 227 | set_xen_guest_handle(reservation.extent_start, frame_list); |
229 | reservation.nr_extents = nr_pages; | 228 | reservation.nr_extents = nr_pages; |
230 | rc = HYPERVISOR_memory_op( | 229 | rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); |
231 | XENMEM_populate_physmap, &reservation); | ||
232 | if (rc < nr_pages) { | 230 | if (rc < nr_pages) { |
233 | if (rc > 0) { | 231 | if (rc > 0) { |
234 | int ret; | 232 | int ret; |
@@ -236,7 +234,7 @@ static int increase_reservation(unsigned long nr_pages) | |||
236 | /* We hit the Xen hard limit: reprobe. */ | 234 | /* We hit the Xen hard limit: reprobe. */ |
237 | reservation.nr_extents = rc; | 235 | reservation.nr_extents = rc; |
238 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | 236 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, |
239 | &reservation); | 237 | &reservation); |
240 | BUG_ON(ret != rc); | 238 | BUG_ON(ret != rc); |
241 | } | 239 | } |
242 | if (rc >= 0) | 240 | if (rc >= 0) |
@@ -420,7 +418,7 @@ static int __init balloon_init(void) | |||
420 | unsigned long pfn; | 418 | unsigned long pfn; |
421 | struct page *page; | 419 | struct page *page; |
422 | 420 | ||
423 | if (!is_running_on_xen()) | 421 | if (!xen_pv_domain()) |
424 | return -ENODEV; | 422 | return -ENODEV; |
425 | 423 | ||
426 | pr_info("xen_balloon: Initialising balloon driver.\n"); | 424 | pr_info("xen_balloon: Initialising balloon driver.\n"); |
@@ -464,136 +462,13 @@ static void balloon_exit(void) | |||
464 | 462 | ||
465 | module_exit(balloon_exit); | 463 | module_exit(balloon_exit); |
466 | 464 | ||
467 | static void balloon_update_driver_allowance(long delta) | 465 | #define BALLOON_SHOW(name, format, args...) \ |
468 | { | 466 | static ssize_t show_##name(struct sys_device *dev, \ |
469 | unsigned long flags; | 467 | struct sysdev_attribute *attr, \ |
470 | 468 | char *buf) \ | |
471 | spin_lock_irqsave(&balloon_lock, flags); | 469 | { \ |
472 | balloon_stats.driver_pages += delta; | 470 | return sprintf(buf, format, ##args); \ |
473 | spin_unlock_irqrestore(&balloon_lock, flags); | 471 | } \ |
474 | } | ||
475 | |||
476 | static int dealloc_pte_fn( | ||
477 | pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) | ||
478 | { | ||
479 | unsigned long mfn = pte_mfn(*pte); | ||
480 | int ret; | ||
481 | struct xen_memory_reservation reservation = { | ||
482 | .nr_extents = 1, | ||
483 | .extent_order = 0, | ||
484 | .domid = DOMID_SELF | ||
485 | }; | ||
486 | set_xen_guest_handle(reservation.extent_start, &mfn); | ||
487 | set_pte_at(&init_mm, addr, pte, __pte_ma(0ull)); | ||
488 | set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY); | ||
489 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); | ||
490 | BUG_ON(ret != 1); | ||
491 | return 0; | ||
492 | } | ||
493 | |||
494 | static struct page **alloc_empty_pages_and_pagevec(int nr_pages) | ||
495 | { | ||
496 | unsigned long vaddr, flags; | ||
497 | struct page *page, **pagevec; | ||
498 | int i, ret; | ||
499 | |||
500 | pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL); | ||
501 | if (pagevec == NULL) | ||
502 | return NULL; | ||
503 | |||
504 | for (i = 0; i < nr_pages; i++) { | ||
505 | page = pagevec[i] = alloc_page(GFP_KERNEL); | ||
506 | if (page == NULL) | ||
507 | goto err; | ||
508 | |||
509 | vaddr = (unsigned long)page_address(page); | ||
510 | |||
511 | scrub_page(page); | ||
512 | |||
513 | spin_lock_irqsave(&balloon_lock, flags); | ||
514 | |||
515 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | ||
516 | unsigned long gmfn = page_to_pfn(page); | ||
517 | struct xen_memory_reservation reservation = { | ||
518 | .nr_extents = 1, | ||
519 | .extent_order = 0, | ||
520 | .domid = DOMID_SELF | ||
521 | }; | ||
522 | set_xen_guest_handle(reservation.extent_start, &gmfn); | ||
523 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | ||
524 | &reservation); | ||
525 | if (ret == 1) | ||
526 | ret = 0; /* success */ | ||
527 | } else { | ||
528 | ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE, | ||
529 | dealloc_pte_fn, NULL); | ||
530 | } | ||
531 | |||
532 | if (ret != 0) { | ||
533 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
534 | __free_page(page); | ||
535 | goto err; | ||
536 | } | ||
537 | |||
538 | totalram_pages = --balloon_stats.current_pages; | ||
539 | |||
540 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
541 | } | ||
542 | |||
543 | out: | ||
544 | schedule_work(&balloon_worker); | ||
545 | flush_tlb_all(); | ||
546 | return pagevec; | ||
547 | |||
548 | err: | ||
549 | spin_lock_irqsave(&balloon_lock, flags); | ||
550 | while (--i >= 0) | ||
551 | balloon_append(pagevec[i]); | ||
552 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
553 | kfree(pagevec); | ||
554 | pagevec = NULL; | ||
555 | goto out; | ||
556 | } | ||
557 | |||
558 | static void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages) | ||
559 | { | ||
560 | unsigned long flags; | ||
561 | int i; | ||
562 | |||
563 | if (pagevec == NULL) | ||
564 | return; | ||
565 | |||
566 | spin_lock_irqsave(&balloon_lock, flags); | ||
567 | for (i = 0; i < nr_pages; i++) { | ||
568 | BUG_ON(page_count(pagevec[i]) != 1); | ||
569 | balloon_append(pagevec[i]); | ||
570 | } | ||
571 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
572 | |||
573 | kfree(pagevec); | ||
574 | |||
575 | schedule_work(&balloon_worker); | ||
576 | } | ||
577 | |||
578 | static void balloon_release_driver_page(struct page *page) | ||
579 | { | ||
580 | unsigned long flags; | ||
581 | |||
582 | spin_lock_irqsave(&balloon_lock, flags); | ||
583 | balloon_append(page); | ||
584 | balloon_stats.driver_pages--; | ||
585 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
586 | |||
587 | schedule_work(&balloon_worker); | ||
588 | } | ||
589 | |||
590 | |||
591 | #define BALLOON_SHOW(name, format, args...) \ | ||
592 | static ssize_t show_##name(struct sys_device *dev, \ | ||
593 | char *buf) \ | ||
594 | { \ | ||
595 | return sprintf(buf, format, ##args); \ | ||
596 | } \ | ||
597 | static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) | 472 | static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) |
598 | 473 | ||
599 | BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); | 474 | BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); |
@@ -604,7 +479,8 @@ BALLOON_SHOW(hard_limit_kb, | |||
604 | (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0); | 479 | (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0); |
605 | BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages)); | 480 | BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages)); |
606 | 481 | ||
607 | static ssize_t show_target_kb(struct sys_device *dev, char *buf) | 482 | static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr, |
483 | char *buf) | ||
608 | { | 484 | { |
609 | return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); | 485 | return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); |
610 | } | 486 | } |
@@ -614,19 +490,14 @@ static ssize_t store_target_kb(struct sys_device *dev, | |||
614 | const char *buf, | 490 | const char *buf, |
615 | size_t count) | 491 | size_t count) |
616 | { | 492 | { |
617 | char memstring[64], *endchar; | 493 | char *endchar; |
618 | unsigned long long target_bytes; | 494 | unsigned long long target_bytes; |
619 | 495 | ||
620 | if (!capable(CAP_SYS_ADMIN)) | 496 | if (!capable(CAP_SYS_ADMIN)) |
621 | return -EPERM; | 497 | return -EPERM; |
622 | 498 | ||
623 | if (count <= 1) | 499 | target_bytes = memparse(buf, &endchar); |
624 | return -EBADMSG; /* runt */ | ||
625 | if (count > sizeof(memstring)) | ||
626 | return -EFBIG; /* too long */ | ||
627 | strcpy(memstring, buf); | ||
628 | 500 | ||
629 | target_bytes = memparse(memstring, &endchar); | ||
630 | balloon_set_new_target(target_bytes >> PAGE_SHIFT); | 501 | balloon_set_new_target(target_bytes >> PAGE_SHIFT); |
631 | 502 | ||
632 | return count; | 503 | return count; |
@@ -694,20 +565,4 @@ static int register_balloon(struct sys_device *sysdev) | |||
694 | return error; | 565 | return error; |
695 | } | 566 | } |
696 | 567 | ||
697 | static void unregister_balloon(struct sys_device *sysdev) | ||
698 | { | ||
699 | int i; | ||
700 | |||
701 | sysfs_remove_group(&sysdev->kobj, &balloon_info_group); | ||
702 | for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) | ||
703 | sysdev_remove_file(sysdev, balloon_attrs[i]); | ||
704 | sysdev_unregister(sysdev); | ||
705 | sysdev_class_unregister(&balloon_sysdev_class); | ||
706 | } | ||
707 | |||
708 | static void balloon_sysfs_exit(void) | ||
709 | { | ||
710 | unregister_balloon(&balloon_sysdev); | ||
711 | } | ||
712 | |||
713 | MODULE_LICENSE("GPL"); | 568 | MODULE_LICENSE("GPL"); |
diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c new file mode 100644 index 000000000000..565280ec1c6a --- /dev/null +++ b/drivers/xen/cpu_hotplug.c | |||
@@ -0,0 +1,90 @@ | |||
1 | #include <linux/notifier.h> | ||
2 | |||
3 | #include <xen/xenbus.h> | ||
4 | |||
5 | #include <asm-x86/xen/hypervisor.h> | ||
6 | #include <asm/cpu.h> | ||
7 | |||
8 | static void enable_hotplug_cpu(int cpu) | ||
9 | { | ||
10 | if (!cpu_present(cpu)) | ||
11 | arch_register_cpu(cpu); | ||
12 | |||
13 | cpu_set(cpu, cpu_present_map); | ||
14 | } | ||
15 | |||
16 | static void disable_hotplug_cpu(int cpu) | ||
17 | { | ||
18 | if (cpu_present(cpu)) | ||
19 | arch_unregister_cpu(cpu); | ||
20 | |||
21 | cpu_clear(cpu, cpu_present_map); | ||
22 | } | ||
23 | |||
24 | static void vcpu_hotplug(unsigned int cpu) | ||
25 | { | ||
26 | int err; | ||
27 | char dir[32], state[32]; | ||
28 | |||
29 | if (!cpu_possible(cpu)) | ||
30 | return; | ||
31 | |||
32 | sprintf(dir, "cpu/%u", cpu); | ||
33 | err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state); | ||
34 | if (err != 1) { | ||
35 | printk(KERN_ERR "XENBUS: Unable to read cpu state\n"); | ||
36 | return; | ||
37 | } | ||
38 | |||
39 | if (strcmp(state, "online") == 0) { | ||
40 | enable_hotplug_cpu(cpu); | ||
41 | } else if (strcmp(state, "offline") == 0) { | ||
42 | (void)cpu_down(cpu); | ||
43 | disable_hotplug_cpu(cpu); | ||
44 | } else { | ||
45 | printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", | ||
46 | state, cpu); | ||
47 | } | ||
48 | } | ||
49 | |||
50 | static void handle_vcpu_hotplug_event(struct xenbus_watch *watch, | ||
51 | const char **vec, unsigned int len) | ||
52 | { | ||
53 | unsigned int cpu; | ||
54 | char *cpustr; | ||
55 | const char *node = vec[XS_WATCH_PATH]; | ||
56 | |||
57 | cpustr = strstr(node, "cpu/"); | ||
58 | if (cpustr != NULL) { | ||
59 | sscanf(cpustr, "cpu/%u", &cpu); | ||
60 | vcpu_hotplug(cpu); | ||
61 | } | ||
62 | } | ||
63 | |||
64 | static int setup_cpu_watcher(struct notifier_block *notifier, | ||
65 | unsigned long event, void *data) | ||
66 | { | ||
67 | static struct xenbus_watch cpu_watch = { | ||
68 | .node = "cpu", | ||
69 | .callback = handle_vcpu_hotplug_event}; | ||
70 | |||
71 | (void)register_xenbus_watch(&cpu_watch); | ||
72 | |||
73 | return NOTIFY_DONE; | ||
74 | } | ||
75 | |||
76 | static int __init setup_vcpu_hotplug_event(void) | ||
77 | { | ||
78 | static struct notifier_block xsn_cpu = { | ||
79 | .notifier_call = setup_cpu_watcher }; | ||
80 | |||
81 | if (!xen_pv_domain()) | ||
82 | return -ENODEV; | ||
83 | |||
84 | register_xenstore_notifier(&xsn_cpu); | ||
85 | |||
86 | return 0; | ||
87 | } | ||
88 | |||
89 | arch_initcall(setup_vcpu_hotplug_event); | ||
90 | |||
diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 0e0c28574af8..c3290bc186a0 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c | |||
@@ -84,17 +84,6 @@ static int irq_bindcount[NR_IRQS]; | |||
84 | /* Xen will never allocate port zero for any purpose. */ | 84 | /* Xen will never allocate port zero for any purpose. */ |
85 | #define VALID_EVTCHN(chn) ((chn) != 0) | 85 | #define VALID_EVTCHN(chn) ((chn) != 0) |
86 | 86 | ||
87 | /* | ||
88 | * Force a proper event-channel callback from Xen after clearing the | ||
89 | * callback mask. We do this in a very simple manner, by making a call | ||
90 | * down into Xen. The pending flag will be checked by Xen on return. | ||
91 | */ | ||
92 | void force_evtchn_callback(void) | ||
93 | { | ||
94 | (void)HYPERVISOR_xen_version(0, NULL); | ||
95 | } | ||
96 | EXPORT_SYMBOL_GPL(force_evtchn_callback); | ||
97 | |||
98 | static struct irq_chip xen_dynamic_chip; | 87 | static struct irq_chip xen_dynamic_chip; |
99 | 88 | ||
100 | /* Constructor for packed IRQ information. */ | 89 | /* Constructor for packed IRQ information. */ |
@@ -175,6 +164,12 @@ static inline void set_evtchn(int port) | |||
175 | sync_set_bit(port, &s->evtchn_pending[0]); | 164 | sync_set_bit(port, &s->evtchn_pending[0]); |
176 | } | 165 | } |
177 | 166 | ||
167 | static inline int test_evtchn(int port) | ||
168 | { | ||
169 | struct shared_info *s = HYPERVISOR_shared_info; | ||
170 | return sync_test_bit(port, &s->evtchn_pending[0]); | ||
171 | } | ||
172 | |||
178 | 173 | ||
179 | /** | 174 | /** |
180 | * notify_remote_via_irq - send event to remote end of event channel via irq | 175 | * notify_remote_via_irq - send event to remote end of event channel via irq |
@@ -365,6 +360,10 @@ static void unbind_from_irq(unsigned int irq) | |||
365 | per_cpu(virq_to_irq, cpu_from_evtchn(evtchn)) | 360 | per_cpu(virq_to_irq, cpu_from_evtchn(evtchn)) |
366 | [index_from_irq(irq)] = -1; | 361 | [index_from_irq(irq)] = -1; |
367 | break; | 362 | break; |
363 | case IRQT_IPI: | ||
364 | per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn)) | ||
365 | [index_from_irq(irq)] = -1; | ||
366 | break; | ||
368 | default: | 367 | default: |
369 | break; | 368 | break; |
370 | } | 369 | } |
@@ -743,6 +742,25 @@ void xen_clear_irq_pending(int irq) | |||
743 | clear_evtchn(evtchn); | 742 | clear_evtchn(evtchn); |
744 | } | 743 | } |
745 | 744 | ||
745 | void xen_set_irq_pending(int irq) | ||
746 | { | ||
747 | int evtchn = evtchn_from_irq(irq); | ||
748 | |||
749 | if (VALID_EVTCHN(evtchn)) | ||
750 | set_evtchn(evtchn); | ||
751 | } | ||
752 | |||
753 | bool xen_test_irq_pending(int irq) | ||
754 | { | ||
755 | int evtchn = evtchn_from_irq(irq); | ||
756 | bool ret = false; | ||
757 | |||
758 | if (VALID_EVTCHN(evtchn)) | ||
759 | ret = test_evtchn(evtchn); | ||
760 | |||
761 | return ret; | ||
762 | } | ||
763 | |||
746 | /* Poll waiting for an irq to become pending. In the usual case, the | 764 | /* Poll waiting for an irq to become pending. In the usual case, the |
747 | irq will be disabled so it won't deliver an interrupt. */ | 765 | irq will be disabled so it won't deliver an interrupt. */ |
748 | void xen_poll_irq(int irq) | 766 | void xen_poll_irq(int irq) |
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index e9e11168616a..06592b9da83c 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c | |||
@@ -508,7 +508,7 @@ static int __devinit gnttab_init(void) | |||
508 | unsigned int max_nr_glist_frames, nr_glist_frames; | 508 | unsigned int max_nr_glist_frames, nr_glist_frames; |
509 | unsigned int nr_init_grefs; | 509 | unsigned int nr_init_grefs; |
510 | 510 | ||
511 | if (!is_running_on_xen()) | 511 | if (!xen_domain()) |
512 | return -ENODEV; | 512 | return -ENODEV; |
513 | 513 | ||
514 | nr_grant_frames = 1; | 514 | nr_grant_frames = 1; |
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 57ceb5346b74..7f24a98a446f 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c | |||
@@ -814,7 +814,7 @@ static int __init xenbus_probe_init(void) | |||
814 | DPRINTK(""); | 814 | DPRINTK(""); |
815 | 815 | ||
816 | err = -ENODEV; | 816 | err = -ENODEV; |
817 | if (!is_running_on_xen()) | 817 | if (!xen_domain()) |
818 | goto out_error; | 818 | goto out_error; |
819 | 819 | ||
820 | /* Register ourselves with the kernel bus subsystem */ | 820 | /* Register ourselves with the kernel bus subsystem */ |
@@ -829,7 +829,7 @@ static int __init xenbus_probe_init(void) | |||
829 | /* | 829 | /* |
830 | * Domain0 doesn't have a store_evtchn or store_mfn yet. | 830 | * Domain0 doesn't have a store_evtchn or store_mfn yet. |
831 | */ | 831 | */ |
832 | if (is_initial_xendomain()) { | 832 | if (xen_initial_domain()) { |
833 | /* dom0 not yet supported */ | 833 | /* dom0 not yet supported */ |
834 | } else { | 834 | } else { |
835 | xenstored_ready = 1; | 835 | xenstored_ready = 1; |
@@ -846,7 +846,7 @@ static int __init xenbus_probe_init(void) | |||
846 | goto out_unreg_back; | 846 | goto out_unreg_back; |
847 | } | 847 | } |
848 | 848 | ||
849 | if (!is_initial_xendomain()) | 849 | if (!xen_initial_domain()) |
850 | xenbus_probe(NULL); | 850 | xenbus_probe(NULL); |
851 | 851 | ||
852 | return 0; | 852 | return 0; |
@@ -937,7 +937,7 @@ static void wait_for_devices(struct xenbus_driver *xendrv) | |||
937 | unsigned long timeout = jiffies + 10*HZ; | 937 | unsigned long timeout = jiffies + 10*HZ; |
938 | struct device_driver *drv = xendrv ? &xendrv->driver : NULL; | 938 | struct device_driver *drv = xendrv ? &xendrv->driver : NULL; |
939 | 939 | ||
940 | if (!ready_to_wait_for_devices || !is_running_on_xen()) | 940 | if (!ready_to_wait_for_devices || !xen_domain()) |
941 | return; | 941 | return; |
942 | 942 | ||
943 | while (exists_disconnected_device(drv)) { | 943 | while (exists_disconnected_device(drv)) { |
diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h index b73fea54def2..ebc307817e98 100644 --- a/include/asm-x86/desc.h +++ b/include/asm-x86/desc.h | |||
@@ -24,6 +24,11 @@ static inline void fill_ldt(struct desc_struct *desc, | |||
24 | desc->d = info->seg_32bit; | 24 | desc->d = info->seg_32bit; |
25 | desc->g = info->limit_in_pages; | 25 | desc->g = info->limit_in_pages; |
26 | desc->base2 = (info->base_addr & 0xff000000) >> 24; | 26 | desc->base2 = (info->base_addr & 0xff000000) >> 24; |
27 | /* | ||
28 | * Don't allow setting of the lm bit. It is useless anyway | ||
29 | * because 64bit system calls require __USER_CS: | ||
30 | */ | ||
31 | desc->l = 0; | ||
27 | } | 32 | } |
28 | 33 | ||
29 | extern struct desc_ptr idt_descr; | 34 | extern struct desc_ptr idt_descr; |
@@ -97,7 +102,15 @@ static inline int desc_empty(const void *ptr) | |||
97 | native_write_gdt_entry(dt, entry, desc, type) | 102 | native_write_gdt_entry(dt, entry, desc, type) |
98 | #define write_idt_entry(dt, entry, g) \ | 103 | #define write_idt_entry(dt, entry, g) \ |
99 | native_write_idt_entry(dt, entry, g) | 104 | native_write_idt_entry(dt, entry, g) |
100 | #endif | 105 | |
106 | static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) | ||
107 | { | ||
108 | } | ||
109 | |||
110 | static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) | ||
111 | { | ||
112 | } | ||
113 | #endif /* CONFIG_PARAVIRT */ | ||
101 | 114 | ||
102 | static inline void native_write_idt_entry(gate_desc *idt, int entry, | 115 | static inline void native_write_idt_entry(gate_desc *idt, int entry, |
103 | const gate_desc *gate) | 116 | const gate_desc *gate) |
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index d7d358a43996..8d6ae2f760d0 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h | |||
@@ -124,6 +124,9 @@ struct pv_cpu_ops { | |||
124 | int entrynum, const void *desc, int size); | 124 | int entrynum, const void *desc, int size); |
125 | void (*write_idt_entry)(gate_desc *, | 125 | void (*write_idt_entry)(gate_desc *, |
126 | int entrynum, const gate_desc *gate); | 126 | int entrynum, const gate_desc *gate); |
127 | void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); | ||
128 | void (*free_ldt)(struct desc_struct *ldt, unsigned entries); | ||
129 | |||
127 | void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); | 130 | void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); |
128 | 131 | ||
129 | void (*set_iopl_mask)(unsigned mask); | 132 | void (*set_iopl_mask)(unsigned mask); |
@@ -325,6 +328,7 @@ struct pv_lock_ops { | |||
325 | int (*spin_is_locked)(struct raw_spinlock *lock); | 328 | int (*spin_is_locked)(struct raw_spinlock *lock); |
326 | int (*spin_is_contended)(struct raw_spinlock *lock); | 329 | int (*spin_is_contended)(struct raw_spinlock *lock); |
327 | void (*spin_lock)(struct raw_spinlock *lock); | 330 | void (*spin_lock)(struct raw_spinlock *lock); |
331 | void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags); | ||
328 | int (*spin_trylock)(struct raw_spinlock *lock); | 332 | int (*spin_trylock)(struct raw_spinlock *lock); |
329 | void (*spin_unlock)(struct raw_spinlock *lock); | 333 | void (*spin_unlock)(struct raw_spinlock *lock); |
330 | }; | 334 | }; |
@@ -830,6 +834,16 @@ do { \ | |||
830 | (aux) = __aux; \ | 834 | (aux) = __aux; \ |
831 | } while (0) | 835 | } while (0) |
832 | 836 | ||
837 | static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) | ||
838 | { | ||
839 | PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries); | ||
840 | } | ||
841 | |||
842 | static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) | ||
843 | { | ||
844 | PVOP_VCALL2(pv_cpu_ops.free_ldt, ldt, entries); | ||
845 | } | ||
846 | |||
833 | static inline void load_TR_desc(void) | 847 | static inline void load_TR_desc(void) |
834 | { | 848 | { |
835 | PVOP_VCALL0(pv_cpu_ops.load_tr_desc); | 849 | PVOP_VCALL0(pv_cpu_ops.load_tr_desc); |
@@ -1394,6 +1408,12 @@ static __always_inline void __raw_spin_lock(struct raw_spinlock *lock) | |||
1394 | PVOP_VCALL1(pv_lock_ops.spin_lock, lock); | 1408 | PVOP_VCALL1(pv_lock_ops.spin_lock, lock); |
1395 | } | 1409 | } |
1396 | 1410 | ||
1411 | static __always_inline void __raw_spin_lock_flags(struct raw_spinlock *lock, | ||
1412 | unsigned long flags) | ||
1413 | { | ||
1414 | PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags); | ||
1415 | } | ||
1416 | |||
1397 | static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock) | 1417 | static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock) |
1398 | { | 1418 | { |
1399 | return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock); | 1419 | return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock); |
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index 29324c103341..6df2615f9138 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h | |||
@@ -50,12 +50,16 @@ extern struct { | |||
50 | struct smp_ops { | 50 | struct smp_ops { |
51 | void (*smp_prepare_boot_cpu)(void); | 51 | void (*smp_prepare_boot_cpu)(void); |
52 | void (*smp_prepare_cpus)(unsigned max_cpus); | 52 | void (*smp_prepare_cpus)(unsigned max_cpus); |
53 | int (*cpu_up)(unsigned cpu); | ||
54 | void (*smp_cpus_done)(unsigned max_cpus); | 53 | void (*smp_cpus_done)(unsigned max_cpus); |
55 | 54 | ||
56 | void (*smp_send_stop)(void); | 55 | void (*smp_send_stop)(void); |
57 | void (*smp_send_reschedule)(int cpu); | 56 | void (*smp_send_reschedule)(int cpu); |
58 | 57 | ||
58 | int (*cpu_up)(unsigned cpu); | ||
59 | int (*cpu_disable)(void); | ||
60 | void (*cpu_die)(unsigned int cpu); | ||
61 | void (*play_dead)(void); | ||
62 | |||
59 | void (*send_call_func_ipi)(cpumask_t mask); | 63 | void (*send_call_func_ipi)(cpumask_t mask); |
60 | void (*send_call_func_single_ipi)(int cpu); | 64 | void (*send_call_func_single_ipi)(int cpu); |
61 | }; | 65 | }; |
@@ -94,6 +98,21 @@ static inline int __cpu_up(unsigned int cpu) | |||
94 | return smp_ops.cpu_up(cpu); | 98 | return smp_ops.cpu_up(cpu); |
95 | } | 99 | } |
96 | 100 | ||
101 | static inline int __cpu_disable(void) | ||
102 | { | ||
103 | return smp_ops.cpu_disable(); | ||
104 | } | ||
105 | |||
106 | static inline void __cpu_die(unsigned int cpu) | ||
107 | { | ||
108 | smp_ops.cpu_die(cpu); | ||
109 | } | ||
110 | |||
111 | static inline void play_dead(void) | ||
112 | { | ||
113 | smp_ops.play_dead(); | ||
114 | } | ||
115 | |||
97 | static inline void smp_send_reschedule(int cpu) | 116 | static inline void smp_send_reschedule(int cpu) |
98 | { | 117 | { |
99 | smp_ops.smp_send_reschedule(cpu); | 118 | smp_ops.smp_send_reschedule(cpu); |
@@ -109,16 +128,19 @@ static inline void arch_send_call_function_ipi(cpumask_t mask) | |||
109 | smp_ops.send_call_func_ipi(mask); | 128 | smp_ops.send_call_func_ipi(mask); |
110 | } | 129 | } |
111 | 130 | ||
131 | void cpu_disable_common(void); | ||
112 | void native_smp_prepare_boot_cpu(void); | 132 | void native_smp_prepare_boot_cpu(void); |
113 | void native_smp_prepare_cpus(unsigned int max_cpus); | 133 | void native_smp_prepare_cpus(unsigned int max_cpus); |
114 | void native_smp_cpus_done(unsigned int max_cpus); | 134 | void native_smp_cpus_done(unsigned int max_cpus); |
115 | int native_cpu_up(unsigned int cpunum); | 135 | int native_cpu_up(unsigned int cpunum); |
136 | int native_cpu_disable(void); | ||
137 | void native_cpu_die(unsigned int cpu); | ||
138 | void native_play_dead(void); | ||
139 | void play_dead_common(void); | ||
140 | |||
116 | void native_send_call_func_ipi(cpumask_t mask); | 141 | void native_send_call_func_ipi(cpumask_t mask); |
117 | void native_send_call_func_single_ipi(int cpu); | 142 | void native_send_call_func_single_ipi(int cpu); |
118 | 143 | ||
119 | extern int __cpu_disable(void); | ||
120 | extern void __cpu_die(unsigned int cpu); | ||
121 | |||
122 | void smp_store_cpu_info(int id); | 144 | void smp_store_cpu_info(int id); |
123 | #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) | 145 | #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) |
124 | 146 | ||
@@ -205,9 +227,5 @@ static inline int hard_smp_processor_id(void) | |||
205 | 227 | ||
206 | #endif /* CONFIG_X86_LOCAL_APIC */ | 228 | #endif /* CONFIG_X86_LOCAL_APIC */ |
207 | 229 | ||
208 | #ifdef CONFIG_HOTPLUG_CPU | ||
209 | extern void cpu_uninit(void); | ||
210 | #endif | ||
211 | |||
212 | #endif /* __ASSEMBLY__ */ | 230 | #endif /* __ASSEMBLY__ */ |
213 | #endif /* ASM_X86__SMP_H */ | 231 | #endif /* ASM_X86__SMP_H */ |
diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h index 93adae338ac6..157ff7fab97a 100644 --- a/include/asm-x86/spinlock.h +++ b/include/asm-x86/spinlock.h | |||
@@ -21,8 +21,10 @@ | |||
21 | 21 | ||
22 | #ifdef CONFIG_X86_32 | 22 | #ifdef CONFIG_X86_32 |
23 | # define LOCK_PTR_REG "a" | 23 | # define LOCK_PTR_REG "a" |
24 | # define REG_PTR_MODE "k" | ||
24 | #else | 25 | #else |
25 | # define LOCK_PTR_REG "D" | 26 | # define LOCK_PTR_REG "D" |
27 | # define REG_PTR_MODE "q" | ||
26 | #endif | 28 | #endif |
27 | 29 | ||
28 | #if defined(CONFIG_X86_32) && \ | 30 | #if defined(CONFIG_X86_32) && \ |
@@ -54,19 +56,7 @@ | |||
54 | * much between them in performance though, especially as locks are out of line. | 56 | * much between them in performance though, especially as locks are out of line. |
55 | */ | 57 | */ |
56 | #if (NR_CPUS < 256) | 58 | #if (NR_CPUS < 256) |
57 | static inline int __ticket_spin_is_locked(raw_spinlock_t *lock) | 59 | #define TICKET_SHIFT 8 |
58 | { | ||
59 | int tmp = ACCESS_ONCE(lock->slock); | ||
60 | |||
61 | return (((tmp >> 8) & 0xff) != (tmp & 0xff)); | ||
62 | } | ||
63 | |||
64 | static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) | ||
65 | { | ||
66 | int tmp = ACCESS_ONCE(lock->slock); | ||
67 | |||
68 | return (((tmp >> 8) - tmp) & 0xff) > 1; | ||
69 | } | ||
70 | 60 | ||
71 | static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) | 61 | static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) |
72 | { | 62 | { |
@@ -89,19 +79,17 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) | |||
89 | 79 | ||
90 | static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) | 80 | static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) |
91 | { | 81 | { |
92 | int tmp; | 82 | int tmp, new; |
93 | short new; | ||
94 | 83 | ||
95 | asm volatile("movw %2,%w0\n\t" | 84 | asm volatile("movzwl %2, %0\n\t" |
96 | "cmpb %h0,%b0\n\t" | 85 | "cmpb %h0,%b0\n\t" |
86 | "leal 0x100(%" REG_PTR_MODE "0), %1\n\t" | ||
97 | "jne 1f\n\t" | 87 | "jne 1f\n\t" |
98 | "movw %w0,%w1\n\t" | ||
99 | "incb %h1\n\t" | ||
100 | LOCK_PREFIX "cmpxchgw %w1,%2\n\t" | 88 | LOCK_PREFIX "cmpxchgw %w1,%2\n\t" |
101 | "1:" | 89 | "1:" |
102 | "sete %b1\n\t" | 90 | "sete %b1\n\t" |
103 | "movzbl %b1,%0\n\t" | 91 | "movzbl %b1,%0\n\t" |
104 | : "=&a" (tmp), "=Q" (new), "+m" (lock->slock) | 92 | : "=&a" (tmp), "=&q" (new), "+m" (lock->slock) |
105 | : | 93 | : |
106 | : "memory", "cc"); | 94 | : "memory", "cc"); |
107 | 95 | ||
@@ -116,19 +104,7 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) | |||
116 | : "memory", "cc"); | 104 | : "memory", "cc"); |
117 | } | 105 | } |
118 | #else | 106 | #else |
119 | static inline int __ticket_spin_is_locked(raw_spinlock_t *lock) | 107 | #define TICKET_SHIFT 16 |
120 | { | ||
121 | int tmp = ACCESS_ONCE(lock->slock); | ||
122 | |||
123 | return (((tmp >> 16) & 0xffff) != (tmp & 0xffff)); | ||
124 | } | ||
125 | |||
126 | static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) | ||
127 | { | ||
128 | int tmp = ACCESS_ONCE(lock->slock); | ||
129 | |||
130 | return (((tmp >> 16) - tmp) & 0xffff) > 1; | ||
131 | } | ||
132 | 108 | ||
133 | static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) | 109 | static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) |
134 | { | 110 | { |
@@ -146,7 +122,7 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) | |||
146 | /* don't need lfence here, because loads are in-order */ | 122 | /* don't need lfence here, because loads are in-order */ |
147 | "jmp 1b\n" | 123 | "jmp 1b\n" |
148 | "2:" | 124 | "2:" |
149 | : "+Q" (inc), "+m" (lock->slock), "=r" (tmp) | 125 | : "+r" (inc), "+m" (lock->slock), "=&r" (tmp) |
150 | : | 126 | : |
151 | : "memory", "cc"); | 127 | : "memory", "cc"); |
152 | } | 128 | } |
@@ -160,13 +136,13 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) | |||
160 | "movl %0,%1\n\t" | 136 | "movl %0,%1\n\t" |
161 | "roll $16, %0\n\t" | 137 | "roll $16, %0\n\t" |
162 | "cmpl %0,%1\n\t" | 138 | "cmpl %0,%1\n\t" |
139 | "leal 0x00010000(%" REG_PTR_MODE "0), %1\n\t" | ||
163 | "jne 1f\n\t" | 140 | "jne 1f\n\t" |
164 | "addl $0x00010000, %1\n\t" | ||
165 | LOCK_PREFIX "cmpxchgl %1,%2\n\t" | 141 | LOCK_PREFIX "cmpxchgl %1,%2\n\t" |
166 | "1:" | 142 | "1:" |
167 | "sete %b1\n\t" | 143 | "sete %b1\n\t" |
168 | "movzbl %b1,%0\n\t" | 144 | "movzbl %b1,%0\n\t" |
169 | : "=&a" (tmp), "=r" (new), "+m" (lock->slock) | 145 | : "=&a" (tmp), "=&q" (new), "+m" (lock->slock) |
170 | : | 146 | : |
171 | : "memory", "cc"); | 147 | : "memory", "cc"); |
172 | 148 | ||
@@ -182,7 +158,19 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) | |||
182 | } | 158 | } |
183 | #endif | 159 | #endif |
184 | 160 | ||
185 | #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) | 161 | static inline int __ticket_spin_is_locked(raw_spinlock_t *lock) |
162 | { | ||
163 | int tmp = ACCESS_ONCE(lock->slock); | ||
164 | |||
165 | return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1)); | ||
166 | } | ||
167 | |||
168 | static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) | ||
169 | { | ||
170 | int tmp = ACCESS_ONCE(lock->slock); | ||
171 | |||
172 | return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1; | ||
173 | } | ||
186 | 174 | ||
187 | #ifdef CONFIG_PARAVIRT | 175 | #ifdef CONFIG_PARAVIRT |
188 | /* | 176 | /* |
@@ -272,6 +260,13 @@ static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) | |||
272 | { | 260 | { |
273 | __ticket_spin_unlock(lock); | 261 | __ticket_spin_unlock(lock); |
274 | } | 262 | } |
263 | |||
264 | static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock, | ||
265 | unsigned long flags) | ||
266 | { | ||
267 | __raw_spin_lock(lock); | ||
268 | } | ||
269 | |||
275 | #endif /* CONFIG_PARAVIRT */ | 270 | #endif /* CONFIG_PARAVIRT */ |
276 | 271 | ||
277 | static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) | 272 | static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) |
diff --git a/include/asm-x86/tlbflush.h b/include/asm-x86/tlbflush.h index ef68b76dc3c5..3cdd08b5bdb7 100644 --- a/include/asm-x86/tlbflush.h +++ b/include/asm-x86/tlbflush.h | |||
@@ -119,6 +119,10 @@ static inline void native_flush_tlb_others(const cpumask_t *cpumask, | |||
119 | { | 119 | { |
120 | } | 120 | } |
121 | 121 | ||
122 | static inline void reset_lazy_tlbstate(void) | ||
123 | { | ||
124 | } | ||
125 | |||
122 | #else /* SMP */ | 126 | #else /* SMP */ |
123 | 127 | ||
124 | #include <asm/smp.h> | 128 | #include <asm/smp.h> |
@@ -151,6 +155,12 @@ struct tlb_state { | |||
151 | char __cacheline_padding[L1_CACHE_BYTES-8]; | 155 | char __cacheline_padding[L1_CACHE_BYTES-8]; |
152 | }; | 156 | }; |
153 | DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); | 157 | DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); |
158 | |||
159 | void reset_lazy_tlbstate(void); | ||
160 | #else | ||
161 | static inline void reset_lazy_tlbstate(void) | ||
162 | { | ||
163 | } | ||
154 | #endif | 164 | #endif |
155 | 165 | ||
156 | #endif /* SMP */ | 166 | #endif /* SMP */ |
diff --git a/include/asm-x86/xen/hypervisor.h b/include/asm-x86/xen/hypervisor.h index 0ef3a88b869d..445a24759560 100644 --- a/include/asm-x86/xen/hypervisor.h +++ b/include/asm-x86/xen/hypervisor.h | |||
@@ -54,7 +54,6 @@ | |||
54 | /* arch/i386/kernel/setup.c */ | 54 | /* arch/i386/kernel/setup.c */ |
55 | extern struct shared_info *HYPERVISOR_shared_info; | 55 | extern struct shared_info *HYPERVISOR_shared_info; |
56 | extern struct start_info *xen_start_info; | 56 | extern struct start_info *xen_start_info; |
57 | #define is_initial_xendomain() (xen_start_info->flags & SIF_INITDOMAIN) | ||
58 | 57 | ||
59 | /* arch/i386/mach-xen/evtchn.c */ | 58 | /* arch/i386/mach-xen/evtchn.c */ |
60 | /* Force a proper event-channel callback from Xen. */ | 59 | /* Force a proper event-channel callback from Xen. */ |
@@ -67,6 +66,17 @@ u64 jiffies_to_st(unsigned long jiffies); | |||
67 | #define MULTI_UVMFLAGS_INDEX 3 | 66 | #define MULTI_UVMFLAGS_INDEX 3 |
68 | #define MULTI_UVMDOMID_INDEX 4 | 67 | #define MULTI_UVMDOMID_INDEX 4 |
69 | 68 | ||
70 | #define is_running_on_xen() (xen_start_info ? 1 : 0) | 69 | enum xen_domain_type { |
70 | XEN_NATIVE, | ||
71 | XEN_PV_DOMAIN, | ||
72 | XEN_HVM_DOMAIN, | ||
73 | }; | ||
74 | |||
75 | extern enum xen_domain_type xen_domain_type; | ||
76 | |||
77 | #define xen_domain() (xen_domain_type != XEN_NATIVE) | ||
78 | #define xen_pv_domain() (xen_domain_type == XEN_PV_DOMAIN) | ||
79 | #define xen_initial_domain() (xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN) | ||
80 | #define xen_hvm_domain() (xen_domain_type == XEN_HVM_DOMAIN) | ||
71 | 81 | ||
72 | #endif /* ASM_X86__XEN__HYPERVISOR_H */ | 82 | #endif /* ASM_X86__XEN__HYPERVISOR_H */ |
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2651f805ba6d..75d81f157d2e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h | |||
@@ -182,7 +182,7 @@ extern int vsscanf(const char *, const char *, va_list) | |||
182 | 182 | ||
183 | extern int get_option(char **str, int *pint); | 183 | extern int get_option(char **str, int *pint); |
184 | extern char *get_options(const char *str, int nints, int *ints); | 184 | extern char *get_options(const char *str, int nints, int *ints); |
185 | extern unsigned long long memparse(char *ptr, char **retptr); | 185 | extern unsigned long long memparse(const char *ptr, char **retptr); |
186 | 186 | ||
187 | extern int core_kernel_text(unsigned long addr); | 187 | extern int core_kernel_text(unsigned long addr); |
188 | extern int __kernel_text_address(unsigned long addr); | 188 | extern int __kernel_text_address(unsigned long addr); |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 72a15dc26bbf..4194bf8e4f6c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -919,7 +919,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a | |||
919 | } | 919 | } |
920 | #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ | 920 | #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ |
921 | 921 | ||
922 | #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS | 922 | #if USE_SPLIT_PTLOCKS |
923 | /* | 923 | /* |
924 | * We tuck a spinlock to guard each pagetable page into its struct page, | 924 | * We tuck a spinlock to guard each pagetable page into its struct page, |
925 | * at page->private, with BUILD_BUG_ON to make sure that this will not | 925 | * at page->private, with BUILD_BUG_ON to make sure that this will not |
@@ -932,14 +932,14 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a | |||
932 | } while (0) | 932 | } while (0) |
933 | #define pte_lock_deinit(page) ((page)->mapping = NULL) | 933 | #define pte_lock_deinit(page) ((page)->mapping = NULL) |
934 | #define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));}) | 934 | #define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));}) |
935 | #else | 935 | #else /* !USE_SPLIT_PTLOCKS */ |
936 | /* | 936 | /* |
937 | * We use mm->page_table_lock to guard all pagetable pages of the mm. | 937 | * We use mm->page_table_lock to guard all pagetable pages of the mm. |
938 | */ | 938 | */ |
939 | #define pte_lock_init(page) do {} while (0) | 939 | #define pte_lock_init(page) do {} while (0) |
940 | #define pte_lock_deinit(page) do {} while (0) | 940 | #define pte_lock_deinit(page) do {} while (0) |
941 | #define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) | 941 | #define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) |
942 | #endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ | 942 | #endif /* USE_SPLIT_PTLOCKS */ |
943 | 943 | ||
944 | static inline void pgtable_page_ctor(struct page *page) | 944 | static inline void pgtable_page_ctor(struct page *page) |
945 | { | 945 | { |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index bf334138c7c1..9d49fa36bbef 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -21,11 +21,13 @@ | |||
21 | 21 | ||
22 | struct address_space; | 22 | struct address_space; |
23 | 23 | ||
24 | #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS | 24 | #define USE_SPLIT_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) |
25 | |||
26 | #if USE_SPLIT_PTLOCKS | ||
25 | typedef atomic_long_t mm_counter_t; | 27 | typedef atomic_long_t mm_counter_t; |
26 | #else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ | 28 | #else /* !USE_SPLIT_PTLOCKS */ |
27 | typedef unsigned long mm_counter_t; | 29 | typedef unsigned long mm_counter_t; |
28 | #endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ | 30 | #endif /* !USE_SPLIT_PTLOCKS */ |
29 | 31 | ||
30 | /* | 32 | /* |
31 | * Each physical page in the system has a struct page associated with | 33 | * Each physical page in the system has a struct page associated with |
@@ -65,7 +67,7 @@ struct page { | |||
65 | * see PAGE_MAPPING_ANON below. | 67 | * see PAGE_MAPPING_ANON below. |
66 | */ | 68 | */ |
67 | }; | 69 | }; |
68 | #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS | 70 | #if USE_SPLIT_PTLOCKS |
69 | spinlock_t ptl; | 71 | spinlock_t ptl; |
70 | #endif | 72 | #endif |
71 | struct kmem_cache *slab; /* SLUB: Pointer to slab */ | 73 | struct kmem_cache *slab; /* SLUB: Pointer to slab */ |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 5d0819ee442a..c226c7b82946 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -352,7 +352,7 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, | |||
352 | extern void arch_unmap_area(struct mm_struct *, unsigned long); | 352 | extern void arch_unmap_area(struct mm_struct *, unsigned long); |
353 | extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); | 353 | extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); |
354 | 354 | ||
355 | #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS | 355 | #if USE_SPLIT_PTLOCKS |
356 | /* | 356 | /* |
357 | * The mm counters are not protected by its page_table_lock, | 357 | * The mm counters are not protected by its page_table_lock, |
358 | * so must be incremented atomically. | 358 | * so must be incremented atomically. |
@@ -363,7 +363,7 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); | |||
363 | #define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member) | 363 | #define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member) |
364 | #define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member) | 364 | #define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member) |
365 | 365 | ||
366 | #else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ | 366 | #else /* !USE_SPLIT_PTLOCKS */ |
367 | /* | 367 | /* |
368 | * The mm counters are protected by its page_table_lock, | 368 | * The mm counters are protected by its page_table_lock, |
369 | * so can be incremented directly. | 369 | * so can be incremented directly. |
@@ -374,7 +374,7 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); | |||
374 | #define inc_mm_counter(mm, member) (mm)->_##member++ | 374 | #define inc_mm_counter(mm, member) (mm)->_##member++ |
375 | #define dec_mm_counter(mm, member) (mm)->_##member-- | 375 | #define dec_mm_counter(mm, member) (mm)->_##member-- |
376 | 376 | ||
377 | #endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ | 377 | #endif /* !USE_SPLIT_PTLOCKS */ |
378 | 378 | ||
379 | #define get_mm_rss(mm) \ | 379 | #define get_mm_rss(mm) \ |
380 | (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss)) | 380 | (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss)) |
diff --git a/include/xen/balloon.h b/include/xen/balloon.h deleted file mode 100644 index fe43b0f3c86a..000000000000 --- a/include/xen/balloon.h +++ /dev/null | |||
@@ -1,61 +0,0 @@ | |||
1 | /****************************************************************************** | ||
2 | * balloon.h | ||
3 | * | ||
4 | * Xen balloon driver - enables returning/claiming memory to/from Xen. | ||
5 | * | ||
6 | * Copyright (c) 2003, B Dragovic | ||
7 | * Copyright (c) 2003-2004, M Williamson, K Fraser | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License version 2 | ||
11 | * as published by the Free Software Foundation; or, when distributed | ||
12 | * separately from the Linux kernel or incorporated into other | ||
13 | * software packages, subject to the following license: | ||
14 | * | ||
15 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
16 | * of this source file (the "Software"), to deal in the Software without | ||
17 | * restriction, including without limitation the rights to use, copy, modify, | ||
18 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
19 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
20 | * the following conditions: | ||
21 | * | ||
22 | * The above copyright notice and this permission notice shall be included in | ||
23 | * all copies or substantial portions of the Software. | ||
24 | * | ||
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
26 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
27 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
28 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
29 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
31 | * IN THE SOFTWARE. | ||
32 | */ | ||
33 | |||
34 | #ifndef __XEN_BALLOON_H__ | ||
35 | #define __XEN_BALLOON_H__ | ||
36 | |||
37 | #include <linux/spinlock.h> | ||
38 | |||
39 | #if 0 | ||
40 | /* | ||
41 | * Inform the balloon driver that it should allow some slop for device-driver | ||
42 | * memory activities. | ||
43 | */ | ||
44 | void balloon_update_driver_allowance(long delta); | ||
45 | |||
46 | /* Allocate/free a set of empty pages in low memory (i.e., no RAM mapped). */ | ||
47 | struct page **alloc_empty_pages_and_pagevec(int nr_pages); | ||
48 | void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages); | ||
49 | |||
50 | void balloon_release_driver_page(struct page *page); | ||
51 | |||
52 | /* | ||
53 | * Prevent the balloon driver from changing the memory reservation during | ||
54 | * a driver critical region. | ||
55 | */ | ||
56 | extern spinlock_t balloon_lock; | ||
57 | #define balloon_lock(__flags) spin_lock_irqsave(&balloon_lock, __flags) | ||
58 | #define balloon_unlock(__flags) spin_unlock_irqrestore(&balloon_lock, __flags) | ||
59 | #endif | ||
60 | |||
61 | #endif /* __XEN_BALLOON_H__ */ | ||
diff --git a/include/xen/events.h b/include/xen/events.h index 4680ff3fbc91..0d5f1adc0363 100644 --- a/include/xen/events.h +++ b/include/xen/events.h | |||
@@ -46,6 +46,8 @@ extern void xen_irq_resume(void); | |||
46 | 46 | ||
47 | /* Clear an irq's pending state, in preparation for polling on it */ | 47 | /* Clear an irq's pending state, in preparation for polling on it */ |
48 | void xen_clear_irq_pending(int irq); | 48 | void xen_clear_irq_pending(int irq); |
49 | void xen_set_irq_pending(int irq); | ||
50 | bool xen_test_irq_pending(int irq); | ||
49 | 51 | ||
50 | /* Poll waiting for an irq to become pending. In the usual case, the | 52 | /* Poll waiting for an irq to become pending. In the usual case, the |
51 | irq will be disabled so it won't deliver an interrupt. */ | 53 | irq will be disabled so it won't deliver an interrupt. */ |
diff --git a/lib/cmdline.c b/lib/cmdline.c index 5ba8a942a478..f5f3ad8b62ff 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c | |||
@@ -126,7 +126,7 @@ char *get_options(const char *str, int nints, int *ints) | |||
126 | * megabyte, or one gigabyte, respectively. | 126 | * megabyte, or one gigabyte, respectively. |
127 | */ | 127 | */ |
128 | 128 | ||
129 | unsigned long long memparse(char *ptr, char **retptr) | 129 | unsigned long long memparse(const char *ptr, char **retptr) |
130 | { | 130 | { |
131 | char *endptr; /* local pointer to end of parsed string */ | 131 | char *endptr; /* local pointer to end of parsed string */ |
132 | 132 | ||