diff options
45 files changed, 1593 insertions, 664 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 3db651fc8ec5..d679cb2c79b4 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -10,7 +10,7 @@ ifdef CONFIG_FTRACE | |||
10 | # Do not profile debug and lowlevel utilities | 10 | # Do not profile debug and lowlevel utilities |
11 | CFLAGS_REMOVE_tsc.o = -pg | 11 | CFLAGS_REMOVE_tsc.o = -pg |
12 | CFLAGS_REMOVE_rtc.o = -pg | 12 | CFLAGS_REMOVE_rtc.o = -pg |
13 | CFLAGS_REMOVE_paravirt.o = -pg | 13 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg |
14 | endif | 14 | endif |
15 | 15 | ||
16 | # | 16 | # |
@@ -89,7 +89,7 @@ obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o | |||
89 | obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o | 89 | obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o |
90 | obj-$(CONFIG_KVM_GUEST) += kvm.o | 90 | obj-$(CONFIG_KVM_GUEST) += kvm.o |
91 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o | 91 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o |
92 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o | 92 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o paravirt-spinlocks.o |
93 | obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o | 93 | obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o |
94 | 94 | ||
95 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o | 95 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8aab8517642e..1c7d39f0e89e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -744,14 +744,3 @@ void __cpuinit cpu_init(void) | |||
744 | mxcsr_feature_mask_init(); | 744 | mxcsr_feature_mask_init(); |
745 | } | 745 | } |
746 | 746 | ||
747 | #ifdef CONFIG_HOTPLUG_CPU | ||
748 | void __cpuinit cpu_uninit(void) | ||
749 | { | ||
750 | int cpu = raw_smp_processor_id(); | ||
751 | cpu_clear(cpu, cpu_initialized); | ||
752 | |||
753 | /* lazy TLB state */ | ||
754 | per_cpu(cpu_tlbstate, cpu).state = 0; | ||
755 | per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; | ||
756 | } | ||
757 | #endif | ||
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index b68e21f06f4f..6e388412a854 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
@@ -51,6 +51,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
51 | memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, | 51 | memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, |
52 | (mincount - oldsize) * LDT_ENTRY_SIZE); | 52 | (mincount - oldsize) * LDT_ENTRY_SIZE); |
53 | 53 | ||
54 | paravirt_alloc_ldt(newldt, mincount); | ||
55 | |||
54 | #ifdef CONFIG_X86_64 | 56 | #ifdef CONFIG_X86_64 |
55 | /* CHECKME: Do we really need this ? */ | 57 | /* CHECKME: Do we really need this ? */ |
56 | wmb(); | 58 | wmb(); |
@@ -73,6 +75,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
73 | #endif | 75 | #endif |
74 | } | 76 | } |
75 | if (oldsize) { | 77 | if (oldsize) { |
78 | paravirt_free_ldt(oldldt, oldsize); | ||
76 | if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) | 79 | if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) |
77 | vfree(oldldt); | 80 | vfree(oldldt); |
78 | else | 81 | else |
@@ -84,10 +87,13 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
84 | static inline int copy_ldt(mm_context_t *new, mm_context_t *old) | 87 | static inline int copy_ldt(mm_context_t *new, mm_context_t *old) |
85 | { | 88 | { |
86 | int err = alloc_ldt(new, old->size, 0); | 89 | int err = alloc_ldt(new, old->size, 0); |
90 | int i; | ||
87 | 91 | ||
88 | if (err < 0) | 92 | if (err < 0) |
89 | return err; | 93 | return err; |
90 | memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE); | 94 | |
95 | for(i = 0; i < old->size; i++) | ||
96 | write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); | ||
91 | return 0; | 97 | return 0; |
92 | } | 98 | } |
93 | 99 | ||
@@ -124,6 +130,7 @@ void destroy_context(struct mm_struct *mm) | |||
124 | if (mm == current->active_mm) | 130 | if (mm == current->active_mm) |
125 | clear_LDT(); | 131 | clear_LDT(); |
126 | #endif | 132 | #endif |
133 | paravirt_free_ldt(mm->context.ldt, mm->context.size); | ||
127 | if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) | 134 | if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) |
128 | vfree(mm->context.ldt); | 135 | vfree(mm->context.ldt); |
129 | else | 136 | else |
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c new file mode 100644 index 000000000000..0e9f1982b1dd --- /dev/null +++ b/arch/x86/kernel/paravirt-spinlocks.c | |||
@@ -0,0 +1,37 @@ | |||
1 | /* | ||
2 | * Split spinlock implementation out into its own file, so it can be | ||
3 | * compiled in a FTRACE-compatible way. | ||
4 | */ | ||
5 | #include <linux/spinlock.h> | ||
6 | #include <linux/module.h> | ||
7 | |||
8 | #include <asm/paravirt.h> | ||
9 | |||
10 | static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags) | ||
11 | { | ||
12 | __raw_spin_lock(lock); | ||
13 | } | ||
14 | |||
15 | struct pv_lock_ops pv_lock_ops = { | ||
16 | #ifdef CONFIG_SMP | ||
17 | .spin_is_locked = __ticket_spin_is_locked, | ||
18 | .spin_is_contended = __ticket_spin_is_contended, | ||
19 | |||
20 | .spin_lock = __ticket_spin_lock, | ||
21 | .spin_lock_flags = default_spin_lock_flags, | ||
22 | .spin_trylock = __ticket_spin_trylock, | ||
23 | .spin_unlock = __ticket_spin_unlock, | ||
24 | #endif | ||
25 | }; | ||
26 | EXPORT_SYMBOL(pv_lock_ops); | ||
27 | |||
28 | void __init paravirt_use_bytelocks(void) | ||
29 | { | ||
30 | #ifdef CONFIG_SMP | ||
31 | pv_lock_ops.spin_is_locked = __byte_spin_is_locked; | ||
32 | pv_lock_ops.spin_is_contended = __byte_spin_is_contended; | ||
33 | pv_lock_ops.spin_lock = __byte_spin_lock; | ||
34 | pv_lock_ops.spin_trylock = __byte_spin_trylock; | ||
35 | pv_lock_ops.spin_unlock = __byte_spin_unlock; | ||
36 | #endif | ||
37 | } | ||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 300da17e61cb..7faea1817d05 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -268,17 +268,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) | |||
268 | return __get_cpu_var(paravirt_lazy_mode); | 268 | return __get_cpu_var(paravirt_lazy_mode); |
269 | } | 269 | } |
270 | 270 | ||
271 | void __init paravirt_use_bytelocks(void) | ||
272 | { | ||
273 | #ifdef CONFIG_SMP | ||
274 | pv_lock_ops.spin_is_locked = __byte_spin_is_locked; | ||
275 | pv_lock_ops.spin_is_contended = __byte_spin_is_contended; | ||
276 | pv_lock_ops.spin_lock = __byte_spin_lock; | ||
277 | pv_lock_ops.spin_trylock = __byte_spin_trylock; | ||
278 | pv_lock_ops.spin_unlock = __byte_spin_unlock; | ||
279 | #endif | ||
280 | } | ||
281 | |||
282 | struct pv_info pv_info = { | 271 | struct pv_info pv_info = { |
283 | .name = "bare hardware", | 272 | .name = "bare hardware", |
284 | .paravirt_enabled = 0, | 273 | .paravirt_enabled = 0, |
@@ -348,6 +337,10 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
348 | .write_ldt_entry = native_write_ldt_entry, | 337 | .write_ldt_entry = native_write_ldt_entry, |
349 | .write_gdt_entry = native_write_gdt_entry, | 338 | .write_gdt_entry = native_write_gdt_entry, |
350 | .write_idt_entry = native_write_idt_entry, | 339 | .write_idt_entry = native_write_idt_entry, |
340 | |||
341 | .alloc_ldt = paravirt_nop, | ||
342 | .free_ldt = paravirt_nop, | ||
343 | |||
351 | .load_sp0 = native_load_sp0, | 344 | .load_sp0 = native_load_sp0, |
352 | 345 | ||
353 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) | 346 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) |
@@ -461,18 +454,6 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
461 | .set_fixmap = native_set_fixmap, | 454 | .set_fixmap = native_set_fixmap, |
462 | }; | 455 | }; |
463 | 456 | ||
464 | struct pv_lock_ops pv_lock_ops = { | ||
465 | #ifdef CONFIG_SMP | ||
466 | .spin_is_locked = __ticket_spin_is_locked, | ||
467 | .spin_is_contended = __ticket_spin_is_contended, | ||
468 | |||
469 | .spin_lock = __ticket_spin_lock, | ||
470 | .spin_trylock = __ticket_spin_trylock, | ||
471 | .spin_unlock = __ticket_spin_unlock, | ||
472 | #endif | ||
473 | }; | ||
474 | EXPORT_SYMBOL(pv_lock_ops); | ||
475 | |||
476 | EXPORT_SYMBOL_GPL(pv_time_ops); | 457 | EXPORT_SYMBOL_GPL(pv_time_ops); |
477 | EXPORT_SYMBOL (pv_cpu_ops); | 458 | EXPORT_SYMBOL (pv_cpu_ops); |
478 | EXPORT_SYMBOL (pv_mmu_ops); | 459 | EXPORT_SYMBOL (pv_mmu_ops); |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 3b7a1ddcc0bc..b76b38ff962b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -72,46 +72,12 @@ unsigned long thread_saved_pc(struct task_struct *tsk) | |||
72 | return ((unsigned long *)tsk->thread.sp)[3]; | 72 | return ((unsigned long *)tsk->thread.sp)[3]; |
73 | } | 73 | } |
74 | 74 | ||
75 | #ifdef CONFIG_HOTPLUG_CPU | 75 | #ifndef CONFIG_SMP |
76 | #include <asm/nmi.h> | ||
77 | |||
78 | static void cpu_exit_clear(void) | ||
79 | { | ||
80 | int cpu = raw_smp_processor_id(); | ||
81 | |||
82 | idle_task_exit(); | ||
83 | |||
84 | cpu_uninit(); | ||
85 | irq_ctx_exit(cpu); | ||
86 | |||
87 | cpu_clear(cpu, cpu_callout_map); | ||
88 | cpu_clear(cpu, cpu_callin_map); | ||
89 | |||
90 | numa_remove_cpu(cpu); | ||
91 | } | ||
92 | |||
93 | /* We don't actually take CPU down, just spin without interrupts. */ | ||
94 | static inline void play_dead(void) | ||
95 | { | ||
96 | /* This must be done before dead CPU ack */ | ||
97 | cpu_exit_clear(); | ||
98 | mb(); | ||
99 | /* Ack it */ | ||
100 | __get_cpu_var(cpu_state) = CPU_DEAD; | ||
101 | |||
102 | /* | ||
103 | * With physical CPU hotplug, we should halt the cpu | ||
104 | */ | ||
105 | local_irq_disable(); | ||
106 | /* mask all interrupts, flush any and all caches, and halt */ | ||
107 | wbinvd_halt(); | ||
108 | } | ||
109 | #else | ||
110 | static inline void play_dead(void) | 76 | static inline void play_dead(void) |
111 | { | 77 | { |
112 | BUG(); | 78 | BUG(); |
113 | } | 79 | } |
114 | #endif /* CONFIG_HOTPLUG_CPU */ | 80 | #endif |
115 | 81 | ||
116 | /* | 82 | /* |
117 | * The idle thread. There's no useful work to be | 83 | * The idle thread. There's no useful work to be |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 71553b664e2a..ec27afa43d7e 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -85,28 +85,12 @@ void exit_idle(void) | |||
85 | __exit_idle(); | 85 | __exit_idle(); |
86 | } | 86 | } |
87 | 87 | ||
88 | #ifdef CONFIG_HOTPLUG_CPU | 88 | #ifndef CONFIG_SMP |
89 | DECLARE_PER_CPU(int, cpu_state); | ||
90 | |||
91 | #include <asm/nmi.h> | ||
92 | /* We halt the CPU with physical CPU hotplug */ | ||
93 | static inline void play_dead(void) | ||
94 | { | ||
95 | idle_task_exit(); | ||
96 | mb(); | ||
97 | /* Ack it */ | ||
98 | __get_cpu_var(cpu_state) = CPU_DEAD; | ||
99 | |||
100 | local_irq_disable(); | ||
101 | /* mask all interrupts, flush any and all caches, and halt */ | ||
102 | wbinvd_halt(); | ||
103 | } | ||
104 | #else | ||
105 | static inline void play_dead(void) | 89 | static inline void play_dead(void) |
106 | { | 90 | { |
107 | BUG(); | 91 | BUG(); |
108 | } | 92 | } |
109 | #endif /* CONFIG_HOTPLUG_CPU */ | 93 | #endif |
110 | 94 | ||
111 | /* | 95 | /* |
112 | * The idle thread. There's no useful work to be | 96 | * The idle thread. There's no useful work to be |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 361b7a4c640c..18f9b19f5f8f 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -214,12 +214,16 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) | |||
214 | struct smp_ops smp_ops = { | 214 | struct smp_ops smp_ops = { |
215 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, | 215 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, |
216 | .smp_prepare_cpus = native_smp_prepare_cpus, | 216 | .smp_prepare_cpus = native_smp_prepare_cpus, |
217 | .cpu_up = native_cpu_up, | ||
218 | .smp_cpus_done = native_smp_cpus_done, | 217 | .smp_cpus_done = native_smp_cpus_done, |
219 | 218 | ||
220 | .smp_send_stop = native_smp_send_stop, | 219 | .smp_send_stop = native_smp_send_stop, |
221 | .smp_send_reschedule = native_smp_send_reschedule, | 220 | .smp_send_reschedule = native_smp_send_reschedule, |
222 | 221 | ||
222 | .cpu_up = native_cpu_up, | ||
223 | .cpu_die = native_cpu_die, | ||
224 | .cpu_disable = native_cpu_disable, | ||
225 | .play_dead = native_play_dead, | ||
226 | |||
223 | .send_call_func_ipi = native_send_call_func_ipi, | 227 | .send_call_func_ipi = native_send_call_func_ipi, |
224 | .send_call_func_single_ipi = native_send_call_func_single_ipi, | 228 | .send_call_func_single_ipi = native_send_call_func_single_ipi, |
225 | }; | 229 | }; |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7985c5b3f916..66b04e598817 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -1346,25 +1346,9 @@ static void __ref remove_cpu_from_maps(int cpu) | |||
1346 | numa_remove_cpu(cpu); | 1346 | numa_remove_cpu(cpu); |
1347 | } | 1347 | } |
1348 | 1348 | ||
1349 | int __cpu_disable(void) | 1349 | void cpu_disable_common(void) |
1350 | { | 1350 | { |
1351 | int cpu = smp_processor_id(); | 1351 | int cpu = smp_processor_id(); |
1352 | |||
1353 | /* | ||
1354 | * Perhaps use cpufreq to drop frequency, but that could go | ||
1355 | * into generic code. | ||
1356 | * | ||
1357 | * We won't take down the boot processor on i386 due to some | ||
1358 | * interrupts only being able to be serviced by the BSP. | ||
1359 | * Especially so if we're not using an IOAPIC -zwane | ||
1360 | */ | ||
1361 | if (cpu == 0) | ||
1362 | return -EBUSY; | ||
1363 | |||
1364 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
1365 | stop_apic_nmi_watchdog(NULL); | ||
1366 | clear_local_APIC(); | ||
1367 | |||
1368 | /* | 1352 | /* |
1369 | * HACK: | 1353 | * HACK: |
1370 | * Allow any queued timer interrupts to get serviced | 1354 | * Allow any queued timer interrupts to get serviced |
@@ -1382,10 +1366,32 @@ int __cpu_disable(void) | |||
1382 | remove_cpu_from_maps(cpu); | 1366 | remove_cpu_from_maps(cpu); |
1383 | unlock_vector_lock(); | 1367 | unlock_vector_lock(); |
1384 | fixup_irqs(cpu_online_map); | 1368 | fixup_irqs(cpu_online_map); |
1369 | } | ||
1370 | |||
1371 | int native_cpu_disable(void) | ||
1372 | { | ||
1373 | int cpu = smp_processor_id(); | ||
1374 | |||
1375 | /* | ||
1376 | * Perhaps use cpufreq to drop frequency, but that could go | ||
1377 | * into generic code. | ||
1378 | * | ||
1379 | * We won't take down the boot processor on i386 due to some | ||
1380 | * interrupts only being able to be serviced by the BSP. | ||
1381 | * Especially so if we're not using an IOAPIC -zwane | ||
1382 | */ | ||
1383 | if (cpu == 0) | ||
1384 | return -EBUSY; | ||
1385 | |||
1386 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
1387 | stop_apic_nmi_watchdog(NULL); | ||
1388 | clear_local_APIC(); | ||
1389 | |||
1390 | cpu_disable_common(); | ||
1385 | return 0; | 1391 | return 0; |
1386 | } | 1392 | } |
1387 | 1393 | ||
1388 | void __cpu_die(unsigned int cpu) | 1394 | void native_cpu_die(unsigned int cpu) |
1389 | { | 1395 | { |
1390 | /* We don't do anything here: idle task is faking death itself. */ | 1396 | /* We don't do anything here: idle task is faking death itself. */ |
1391 | unsigned int i; | 1397 | unsigned int i; |
@@ -1402,15 +1408,44 @@ void __cpu_die(unsigned int cpu) | |||
1402 | } | 1408 | } |
1403 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); | 1409 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); |
1404 | } | 1410 | } |
1411 | |||
1412 | void play_dead_common(void) | ||
1413 | { | ||
1414 | idle_task_exit(); | ||
1415 | reset_lazy_tlbstate(); | ||
1416 | irq_ctx_exit(raw_smp_processor_id()); | ||
1417 | |||
1418 | mb(); | ||
1419 | /* Ack it */ | ||
1420 | __get_cpu_var(cpu_state) = CPU_DEAD; | ||
1421 | |||
1422 | /* | ||
1423 | * With physical CPU hotplug, we should halt the cpu | ||
1424 | */ | ||
1425 | local_irq_disable(); | ||
1426 | } | ||
1427 | |||
1428 | void native_play_dead(void) | ||
1429 | { | ||
1430 | play_dead_common(); | ||
1431 | wbinvd_halt(); | ||
1432 | } | ||
1433 | |||
1405 | #else /* ... !CONFIG_HOTPLUG_CPU */ | 1434 | #else /* ... !CONFIG_HOTPLUG_CPU */ |
1406 | int __cpu_disable(void) | 1435 | int native_cpu_disable(void) |
1407 | { | 1436 | { |
1408 | return -ENOSYS; | 1437 | return -ENOSYS; |
1409 | } | 1438 | } |
1410 | 1439 | ||
1411 | void __cpu_die(unsigned int cpu) | 1440 | void native_cpu_die(unsigned int cpu) |
1412 | { | 1441 | { |
1413 | /* We said "no" in __cpu_disable */ | 1442 | /* We said "no" in __cpu_disable */ |
1414 | BUG(); | 1443 | BUG(); |
1415 | } | 1444 | } |
1445 | |||
1446 | void native_play_dead(void) | ||
1447 | { | ||
1448 | BUG(); | ||
1449 | } | ||
1450 | |||
1416 | #endif | 1451 | #endif |
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index fec1ecedc9b7..e00534b33534 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c | |||
@@ -241,3 +241,11 @@ void flush_tlb_all(void) | |||
241 | on_each_cpu(do_flush_tlb_all, NULL, 1); | 241 | on_each_cpu(do_flush_tlb_all, NULL, 1); |
242 | } | 242 | } |
243 | 243 | ||
244 | void reset_lazy_tlbstate(void) | ||
245 | { | ||
246 | int cpu = raw_smp_processor_id(); | ||
247 | |||
248 | per_cpu(cpu_tlbstate, cpu).state = 0; | ||
249 | per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; | ||
250 | } | ||
251 | |||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 455f3fe67b42..356ed2dec3a6 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -915,15 +915,15 @@ LIST_HEAD(pgd_list); | |||
915 | 915 | ||
916 | void vmalloc_sync_all(void) | 916 | void vmalloc_sync_all(void) |
917 | { | 917 | { |
918 | #ifdef CONFIG_X86_32 | ||
919 | unsigned long start = VMALLOC_START & PGDIR_MASK; | ||
920 | unsigned long address; | 918 | unsigned long address; |
921 | 919 | ||
920 | #ifdef CONFIG_X86_32 | ||
922 | if (SHARED_KERNEL_PMD) | 921 | if (SHARED_KERNEL_PMD) |
923 | return; | 922 | return; |
924 | 923 | ||
925 | BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); | 924 | for (address = VMALLOC_START & PMD_MASK; |
926 | for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) { | 925 | address >= TASK_SIZE && address < FIXADDR_TOP; |
926 | address += PMD_SIZE) { | ||
927 | unsigned long flags; | 927 | unsigned long flags; |
928 | struct page *page; | 928 | struct page *page; |
929 | 929 | ||
@@ -936,10 +936,8 @@ void vmalloc_sync_all(void) | |||
936 | spin_unlock_irqrestore(&pgd_lock, flags); | 936 | spin_unlock_irqrestore(&pgd_lock, flags); |
937 | } | 937 | } |
938 | #else /* CONFIG_X86_64 */ | 938 | #else /* CONFIG_X86_64 */ |
939 | unsigned long start = VMALLOC_START & PGDIR_MASK; | 939 | for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END; |
940 | unsigned long address; | 940 | address += PGDIR_SIZE) { |
941 | |||
942 | for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { | ||
943 | const pgd_t *pgd_ref = pgd_offset_k(address); | 941 | const pgd_t *pgd_ref = pgd_offset_k(address); |
944 | unsigned long flags; | 942 | unsigned long flags; |
945 | struct page *page; | 943 | struct page *page; |
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 3815e425f470..d3e68465ace9 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -27,4 +27,12 @@ config XEN_MAX_DOMAIN_MEMORY | |||
27 | config XEN_SAVE_RESTORE | 27 | config XEN_SAVE_RESTORE |
28 | bool | 28 | bool |
29 | depends on PM | 29 | depends on PM |
30 | default y \ No newline at end of file | 30 | default y |
31 | |||
32 | config XEN_DEBUG_FS | ||
33 | bool "Enable Xen debug and tuning parameters in debugfs" | ||
34 | depends on XEN && DEBUG_FS | ||
35 | default n | ||
36 | help | ||
37 | Enable statistics output and various tuning options in debugfs. | ||
38 | Enabling this option may incur a significant performance overhead. \ No newline at end of file | ||
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 59c1e539aed2..313947940a1a 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -1,4 +1,12 @@ | |||
1 | obj-y := enlighten.o setup.o multicalls.o mmu.o \ | 1 | ifdef CONFIG_FTRACE |
2 | # Do not profile debug and lowlevel utilities | ||
3 | CFLAGS_REMOVE_spinlock.o = -pg | ||
4 | CFLAGS_REMOVE_time.o = -pg | ||
5 | CFLAGS_REMOVE_irq.o = -pg | ||
6 | endif | ||
7 | |||
8 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ | ||
2 | time.o xen-asm_$(BITS).o grant-table.o suspend.o | 9 | time.o xen-asm_$(BITS).o grant-table.o suspend.o |
3 | 10 | ||
4 | obj-$(CONFIG_SMP) += smp.o | 11 | obj-$(CONFIG_SMP) += smp.o spinlock.o |
12 | obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o \ No newline at end of file | ||
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c new file mode 100644 index 000000000000..b53225d2cac3 --- /dev/null +++ b/arch/x86/xen/debugfs.c | |||
@@ -0,0 +1,123 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/debugfs.h> | ||
3 | #include <linux/module.h> | ||
4 | |||
5 | #include "debugfs.h" | ||
6 | |||
7 | static struct dentry *d_xen_debug; | ||
8 | |||
9 | struct dentry * __init xen_init_debugfs(void) | ||
10 | { | ||
11 | if (!d_xen_debug) { | ||
12 | d_xen_debug = debugfs_create_dir("xen", NULL); | ||
13 | |||
14 | if (!d_xen_debug) | ||
15 | pr_warning("Could not create 'xen' debugfs directory\n"); | ||
16 | } | ||
17 | |||
18 | return d_xen_debug; | ||
19 | } | ||
20 | |||
21 | struct array_data | ||
22 | { | ||
23 | void *array; | ||
24 | unsigned elements; | ||
25 | }; | ||
26 | |||
27 | static int u32_array_open(struct inode *inode, struct file *file) | ||
28 | { | ||
29 | file->private_data = NULL; | ||
30 | return nonseekable_open(inode, file); | ||
31 | } | ||
32 | |||
33 | static size_t format_array(char *buf, size_t bufsize, const char *fmt, | ||
34 | u32 *array, unsigned array_size) | ||
35 | { | ||
36 | size_t ret = 0; | ||
37 | unsigned i; | ||
38 | |||
39 | for(i = 0; i < array_size; i++) { | ||
40 | size_t len; | ||
41 | |||
42 | len = snprintf(buf, bufsize, fmt, array[i]); | ||
43 | len++; /* ' ' or '\n' */ | ||
44 | ret += len; | ||
45 | |||
46 | if (buf) { | ||
47 | buf += len; | ||
48 | bufsize -= len; | ||
49 | buf[-1] = (i == array_size-1) ? '\n' : ' '; | ||
50 | } | ||
51 | } | ||
52 | |||
53 | ret++; /* \0 */ | ||
54 | if (buf) | ||
55 | *buf = '\0'; | ||
56 | |||
57 | return ret; | ||
58 | } | ||
59 | |||
60 | static char *format_array_alloc(const char *fmt, u32 *array, unsigned array_size) | ||
61 | { | ||
62 | size_t len = format_array(NULL, 0, fmt, array, array_size); | ||
63 | char *ret; | ||
64 | |||
65 | ret = kmalloc(len, GFP_KERNEL); | ||
66 | if (ret == NULL) | ||
67 | return NULL; | ||
68 | |||
69 | format_array(ret, len, fmt, array, array_size); | ||
70 | return ret; | ||
71 | } | ||
72 | |||
73 | static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len, | ||
74 | loff_t *ppos) | ||
75 | { | ||
76 | struct inode *inode = file->f_path.dentry->d_inode; | ||
77 | struct array_data *data = inode->i_private; | ||
78 | size_t size; | ||
79 | |||
80 | if (*ppos == 0) { | ||
81 | if (file->private_data) { | ||
82 | kfree(file->private_data); | ||
83 | file->private_data = NULL; | ||
84 | } | ||
85 | |||
86 | file->private_data = format_array_alloc("%u", data->array, data->elements); | ||
87 | } | ||
88 | |||
89 | size = 0; | ||
90 | if (file->private_data) | ||
91 | size = strlen(file->private_data); | ||
92 | |||
93 | return simple_read_from_buffer(buf, len, ppos, file->private_data, size); | ||
94 | } | ||
95 | |||
96 | static int xen_array_release(struct inode *inode, struct file *file) | ||
97 | { | ||
98 | kfree(file->private_data); | ||
99 | |||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | static struct file_operations u32_array_fops = { | ||
104 | .owner = THIS_MODULE, | ||
105 | .open = u32_array_open, | ||
106 | .release= xen_array_release, | ||
107 | .read = u32_array_read, | ||
108 | }; | ||
109 | |||
110 | struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode, | ||
111 | struct dentry *parent, | ||
112 | u32 *array, unsigned elements) | ||
113 | { | ||
114 | struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL); | ||
115 | |||
116 | if (data == NULL) | ||
117 | return NULL; | ||
118 | |||
119 | data->array = array; | ||
120 | data->elements = elements; | ||
121 | |||
122 | return debugfs_create_file(name, mode, parent, data, &u32_array_fops); | ||
123 | } | ||
diff --git a/arch/x86/xen/debugfs.h b/arch/x86/xen/debugfs.h new file mode 100644 index 000000000000..e28132084832 --- /dev/null +++ b/arch/x86/xen/debugfs.h | |||
@@ -0,0 +1,10 @@ | |||
1 | #ifndef _XEN_DEBUGFS_H | ||
2 | #define _XEN_DEBUGFS_H | ||
3 | |||
4 | struct dentry * __init xen_init_debugfs(void); | ||
5 | |||
6 | struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode, | ||
7 | struct dentry *parent, | ||
8 | u32 *array, unsigned elements); | ||
9 | |||
10 | #endif /* _XEN_DEBUGFS_H */ | ||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a4e201b47f64..b106e825d266 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <xen/interface/xen.h> | 30 | #include <xen/interface/xen.h> |
31 | #include <xen/interface/physdev.h> | 31 | #include <xen/interface/physdev.h> |
32 | #include <xen/interface/vcpu.h> | 32 | #include <xen/interface/vcpu.h> |
33 | #include <xen/interface/sched.h> | ||
34 | #include <xen/features.h> | 33 | #include <xen/features.h> |
35 | #include <xen/page.h> | 34 | #include <xen/page.h> |
36 | #include <xen/hvc-console.h> | 35 | #include <xen/hvc-console.h> |
@@ -57,6 +56,9 @@ EXPORT_SYMBOL_GPL(hypercall_page); | |||
57 | DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); | 56 | DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); |
58 | DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); | 57 | DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); |
59 | 58 | ||
59 | enum xen_domain_type xen_domain_type = XEN_NATIVE; | ||
60 | EXPORT_SYMBOL_GPL(xen_domain_type); | ||
61 | |||
60 | /* | 62 | /* |
61 | * Identity map, in addition to plain kernel map. This needs to be | 63 | * Identity map, in addition to plain kernel map. This needs to be |
62 | * large enough to allocate page table pages to allocate the rest. | 64 | * large enough to allocate page table pages to allocate the rest. |
@@ -226,103 +228,68 @@ static unsigned long xen_get_debugreg(int reg) | |||
226 | return HYPERVISOR_get_debugreg(reg); | 228 | return HYPERVISOR_get_debugreg(reg); |
227 | } | 229 | } |
228 | 230 | ||
229 | static unsigned long xen_save_fl(void) | 231 | static void xen_leave_lazy(void) |
230 | { | 232 | { |
231 | struct vcpu_info *vcpu; | 233 | paravirt_leave_lazy(paravirt_get_lazy_mode()); |
232 | unsigned long flags; | 234 | xen_mc_flush(); |
233 | |||
234 | vcpu = x86_read_percpu(xen_vcpu); | ||
235 | |||
236 | /* flag has opposite sense of mask */ | ||
237 | flags = !vcpu->evtchn_upcall_mask; | ||
238 | |||
239 | /* convert to IF type flag | ||
240 | -0 -> 0x00000000 | ||
241 | -1 -> 0xffffffff | ||
242 | */ | ||
243 | return (-flags) & X86_EFLAGS_IF; | ||
244 | } | 235 | } |
245 | 236 | ||
246 | static void xen_restore_fl(unsigned long flags) | 237 | static unsigned long xen_store_tr(void) |
247 | { | 238 | { |
248 | struct vcpu_info *vcpu; | 239 | return 0; |
249 | |||
250 | /* convert from IF type flag */ | ||
251 | flags = !(flags & X86_EFLAGS_IF); | ||
252 | |||
253 | /* There's a one instruction preempt window here. We need to | ||
254 | make sure we're don't switch CPUs between getting the vcpu | ||
255 | pointer and updating the mask. */ | ||
256 | preempt_disable(); | ||
257 | vcpu = x86_read_percpu(xen_vcpu); | ||
258 | vcpu->evtchn_upcall_mask = flags; | ||
259 | preempt_enable_no_resched(); | ||
260 | |||
261 | /* Doesn't matter if we get preempted here, because any | ||
262 | pending event will get dealt with anyway. */ | ||
263 | |||
264 | if (flags == 0) { | ||
265 | preempt_check_resched(); | ||
266 | barrier(); /* unmask then check (avoid races) */ | ||
267 | if (unlikely(vcpu->evtchn_upcall_pending)) | ||
268 | force_evtchn_callback(); | ||
269 | } | ||
270 | } | 240 | } |
271 | 241 | ||
272 | static void xen_irq_disable(void) | 242 | /* |
243 | * Set the page permissions for a particular virtual address. If the | ||
244 | * address is a vmalloc mapping (or other non-linear mapping), then | ||
245 | * find the linear mapping of the page and also set its protections to | ||
246 | * match. | ||
247 | */ | ||
248 | static void set_aliased_prot(void *v, pgprot_t prot) | ||
273 | { | 249 | { |
274 | /* There's a one instruction preempt window here. We need to | 250 | int level; |
275 | make sure we're don't switch CPUs between getting the vcpu | 251 | pte_t *ptep; |
276 | pointer and updating the mask. */ | 252 | pte_t pte; |
277 | preempt_disable(); | 253 | unsigned long pfn; |
278 | x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; | 254 | struct page *page; |
279 | preempt_enable_no_resched(); | ||
280 | } | ||
281 | 255 | ||
282 | static void xen_irq_enable(void) | 256 | ptep = lookup_address((unsigned long)v, &level); |
283 | { | 257 | BUG_ON(ptep == NULL); |
284 | struct vcpu_info *vcpu; | ||
285 | 258 | ||
286 | /* We don't need to worry about being preempted here, since | 259 | pfn = pte_pfn(*ptep); |
287 | either a) interrupts are disabled, so no preemption, or b) | 260 | page = pfn_to_page(pfn); |
288 | the caller is confused and is trying to re-enable interrupts | ||
289 | on an indeterminate processor. */ | ||
290 | 261 | ||
291 | vcpu = x86_read_percpu(xen_vcpu); | 262 | pte = pfn_pte(pfn, prot); |
292 | vcpu->evtchn_upcall_mask = 0; | ||
293 | 263 | ||
294 | /* Doesn't matter if we get preempted here, because any | 264 | if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) |
295 | pending event will get dealt with anyway. */ | 265 | BUG(); |
296 | 266 | ||
297 | barrier(); /* unmask then check (avoid races) */ | 267 | if (!PageHighMem(page)) { |
298 | if (unlikely(vcpu->evtchn_upcall_pending)) | 268 | void *av = __va(PFN_PHYS(pfn)); |
299 | force_evtchn_callback(); | ||
300 | } | ||
301 | 269 | ||
302 | static void xen_safe_halt(void) | 270 | if (av != v) |
303 | { | 271 | if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0)) |
304 | /* Blocking includes an implicit local_irq_enable(). */ | 272 | BUG(); |
305 | if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) | 273 | } else |
306 | BUG(); | 274 | kmap_flush_unused(); |
307 | } | 275 | } |
308 | 276 | ||
309 | static void xen_halt(void) | 277 | static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) |
310 | { | 278 | { |
311 | if (irqs_disabled()) | 279 | const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; |
312 | HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); | 280 | int i; |
313 | else | ||
314 | xen_safe_halt(); | ||
315 | } | ||
316 | 281 | ||
317 | static void xen_leave_lazy(void) | 282 | for(i = 0; i < entries; i += entries_per_page) |
318 | { | 283 | set_aliased_prot(ldt + i, PAGE_KERNEL_RO); |
319 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | ||
320 | xen_mc_flush(); | ||
321 | } | 284 | } |
322 | 285 | ||
323 | static unsigned long xen_store_tr(void) | 286 | static void xen_free_ldt(struct desc_struct *ldt, unsigned entries) |
324 | { | 287 | { |
325 | return 0; | 288 | const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; |
289 | int i; | ||
290 | |||
291 | for(i = 0; i < entries; i += entries_per_page) | ||
292 | set_aliased_prot(ldt + i, PAGE_KERNEL); | ||
326 | } | 293 | } |
327 | 294 | ||
328 | static void xen_set_ldt(const void *addr, unsigned entries) | 295 | static void xen_set_ldt(const void *addr, unsigned entries) |
@@ -425,8 +392,7 @@ static void xen_load_gs_index(unsigned int idx) | |||
425 | static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, | 392 | static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, |
426 | const void *ptr) | 393 | const void *ptr) |
427 | { | 394 | { |
428 | unsigned long lp = (unsigned long)&dt[entrynum]; | 395 | xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]); |
429 | xmaddr_t mach_lp = virt_to_machine(lp); | ||
430 | u64 entry = *(u64 *)ptr; | 396 | u64 entry = *(u64 *)ptr; |
431 | 397 | ||
432 | preempt_disable(); | 398 | preempt_disable(); |
@@ -559,7 +525,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry, | |||
559 | } | 525 | } |
560 | 526 | ||
561 | static void xen_load_sp0(struct tss_struct *tss, | 527 | static void xen_load_sp0(struct tss_struct *tss, |
562 | struct thread_struct *thread) | 528 | struct thread_struct *thread) |
563 | { | 529 | { |
564 | struct multicall_space mcs = xen_mc_entry(0); | 530 | struct multicall_space mcs = xen_mc_entry(0); |
565 | MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); | 531 | MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); |
@@ -803,6 +769,19 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) | |||
803 | ret = -EFAULT; | 769 | ret = -EFAULT; |
804 | break; | 770 | break; |
805 | #endif | 771 | #endif |
772 | |||
773 | case MSR_STAR: | ||
774 | case MSR_CSTAR: | ||
775 | case MSR_LSTAR: | ||
776 | case MSR_SYSCALL_MASK: | ||
777 | case MSR_IA32_SYSENTER_CS: | ||
778 | case MSR_IA32_SYSENTER_ESP: | ||
779 | case MSR_IA32_SYSENTER_EIP: | ||
780 | /* Fast syscall setup is all done in hypercalls, so | ||
781 | these are all ignored. Stub them out here to stop | ||
782 | Xen console noise. */ | ||
783 | break; | ||
784 | |||
806 | default: | 785 | default: |
807 | ret = native_write_msr_safe(msr, low, high); | 786 | ret = native_write_msr_safe(msr, low, high); |
808 | } | 787 | } |
@@ -846,7 +825,7 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level) | |||
846 | SetPagePinned(page); | 825 | SetPagePinned(page); |
847 | 826 | ||
848 | if (!PageHighMem(page)) { | 827 | if (!PageHighMem(page)) { |
849 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); | 828 | make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); |
850 | if (level == PT_PTE) | 829 | if (level == PT_PTE) |
851 | pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); | 830 | pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); |
852 | } else | 831 | } else |
@@ -1220,6 +1199,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
1220 | .load_gs_index = xen_load_gs_index, | 1199 | .load_gs_index = xen_load_gs_index, |
1221 | #endif | 1200 | #endif |
1222 | 1201 | ||
1202 | .alloc_ldt = xen_alloc_ldt, | ||
1203 | .free_ldt = xen_free_ldt, | ||
1204 | |||
1223 | .store_gdt = native_store_gdt, | 1205 | .store_gdt = native_store_gdt, |
1224 | .store_idt = native_store_idt, | 1206 | .store_idt = native_store_idt, |
1225 | .store_tr = xen_store_tr, | 1207 | .store_tr = xen_store_tr, |
@@ -1241,36 +1223,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
1241 | }, | 1223 | }, |
1242 | }; | 1224 | }; |
1243 | 1225 | ||
1244 | static void __init __xen_init_IRQ(void) | ||
1245 | { | ||
1246 | #ifdef CONFIG_X86_64 | ||
1247 | int i; | ||
1248 | |||
1249 | /* Create identity vector->irq map */ | ||
1250 | for(i = 0; i < NR_VECTORS; i++) { | ||
1251 | int cpu; | ||
1252 | |||
1253 | for_each_possible_cpu(cpu) | ||
1254 | per_cpu(vector_irq, cpu)[i] = i; | ||
1255 | } | ||
1256 | #endif /* CONFIG_X86_64 */ | ||
1257 | |||
1258 | xen_init_IRQ(); | ||
1259 | } | ||
1260 | |||
1261 | static const struct pv_irq_ops xen_irq_ops __initdata = { | ||
1262 | .init_IRQ = __xen_init_IRQ, | ||
1263 | .save_fl = xen_save_fl, | ||
1264 | .restore_fl = xen_restore_fl, | ||
1265 | .irq_disable = xen_irq_disable, | ||
1266 | .irq_enable = xen_irq_enable, | ||
1267 | .safe_halt = xen_safe_halt, | ||
1268 | .halt = xen_halt, | ||
1269 | #ifdef CONFIG_X86_64 | ||
1270 | .adjust_exception_frame = xen_adjust_exception_frame, | ||
1271 | #endif | ||
1272 | }; | ||
1273 | |||
1274 | static const struct pv_apic_ops xen_apic_ops __initdata = { | 1226 | static const struct pv_apic_ops xen_apic_ops __initdata = { |
1275 | #ifdef CONFIG_X86_LOCAL_APIC | 1227 | #ifdef CONFIG_X86_LOCAL_APIC |
1276 | .apic_write = xen_apic_write, | 1228 | .apic_write = xen_apic_write, |
@@ -1664,6 +1616,8 @@ asmlinkage void __init xen_start_kernel(void) | |||
1664 | if (!xen_start_info) | 1616 | if (!xen_start_info) |
1665 | return; | 1617 | return; |
1666 | 1618 | ||
1619 | xen_domain_type = XEN_PV_DOMAIN; | ||
1620 | |||
1667 | BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); | 1621 | BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); |
1668 | 1622 | ||
1669 | xen_setup_features(); | 1623 | xen_setup_features(); |
@@ -1673,10 +1627,11 @@ asmlinkage void __init xen_start_kernel(void) | |||
1673 | pv_init_ops = xen_init_ops; | 1627 | pv_init_ops = xen_init_ops; |
1674 | pv_time_ops = xen_time_ops; | 1628 | pv_time_ops = xen_time_ops; |
1675 | pv_cpu_ops = xen_cpu_ops; | 1629 | pv_cpu_ops = xen_cpu_ops; |
1676 | pv_irq_ops = xen_irq_ops; | ||
1677 | pv_apic_ops = xen_apic_ops; | 1630 | pv_apic_ops = xen_apic_ops; |
1678 | pv_mmu_ops = xen_mmu_ops; | 1631 | pv_mmu_ops = xen_mmu_ops; |
1679 | 1632 | ||
1633 | xen_init_irq_ops(); | ||
1634 | |||
1680 | if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { | 1635 | if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { |
1681 | pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; | 1636 | pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; |
1682 | pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; | 1637 | pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; |
@@ -1700,7 +1655,7 @@ asmlinkage void __init xen_start_kernel(void) | |||
1700 | 1655 | ||
1701 | /* Prevent unwanted bits from being set in PTEs. */ | 1656 | /* Prevent unwanted bits from being set in PTEs. */ |
1702 | __supported_pte_mask &= ~_PAGE_GLOBAL; | 1657 | __supported_pte_mask &= ~_PAGE_GLOBAL; |
1703 | if (!is_initial_xendomain()) | 1658 | if (!xen_initial_domain()) |
1704 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); | 1659 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); |
1705 | 1660 | ||
1706 | /* Don't do the full vcpu_info placement stuff until we have a | 1661 | /* Don't do the full vcpu_info placement stuff until we have a |
@@ -1735,7 +1690,7 @@ asmlinkage void __init xen_start_kernel(void) | |||
1735 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; | 1690 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; |
1736 | boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); | 1691 | boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); |
1737 | 1692 | ||
1738 | if (!is_initial_xendomain()) { | 1693 | if (!xen_initial_domain()) { |
1739 | add_preferred_console("xenboot", 0, NULL); | 1694 | add_preferred_console("xenboot", 0, NULL); |
1740 | add_preferred_console("tty", 0, NULL); | 1695 | add_preferred_console("tty", 0, NULL); |
1741 | add_preferred_console("hvc", 0, NULL); | 1696 | add_preferred_console("hvc", 0, NULL); |
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c new file mode 100644 index 000000000000..28b85ab8422e --- /dev/null +++ b/arch/x86/xen/irq.c | |||
@@ -0,0 +1,143 @@ | |||
1 | #include <linux/hardirq.h> | ||
2 | |||
3 | #include <xen/interface/xen.h> | ||
4 | #include <xen/interface/sched.h> | ||
5 | #include <xen/interface/vcpu.h> | ||
6 | |||
7 | #include <asm/xen/hypercall.h> | ||
8 | #include <asm/xen/hypervisor.h> | ||
9 | |||
10 | #include "xen-ops.h" | ||
11 | |||
12 | /* | ||
13 | * Force a proper event-channel callback from Xen after clearing the | ||
14 | * callback mask. We do this in a very simple manner, by making a call | ||
15 | * down into Xen. The pending flag will be checked by Xen on return. | ||
16 | */ | ||
17 | void xen_force_evtchn_callback(void) | ||
18 | { | ||
19 | (void)HYPERVISOR_xen_version(0, NULL); | ||
20 | } | ||
21 | |||
22 | static void __init __xen_init_IRQ(void) | ||
23 | { | ||
24 | #ifdef CONFIG_X86_64 | ||
25 | int i; | ||
26 | |||
27 | /* Create identity vector->irq map */ | ||
28 | for(i = 0; i < NR_VECTORS; i++) { | ||
29 | int cpu; | ||
30 | |||
31 | for_each_possible_cpu(cpu) | ||
32 | per_cpu(vector_irq, cpu)[i] = i; | ||
33 | } | ||
34 | #endif /* CONFIG_X86_64 */ | ||
35 | |||
36 | xen_init_IRQ(); | ||
37 | } | ||
38 | |||
39 | static unsigned long xen_save_fl(void) | ||
40 | { | ||
41 | struct vcpu_info *vcpu; | ||
42 | unsigned long flags; | ||
43 | |||
44 | vcpu = x86_read_percpu(xen_vcpu); | ||
45 | |||
46 | /* flag has opposite sense of mask */ | ||
47 | flags = !vcpu->evtchn_upcall_mask; | ||
48 | |||
49 | /* convert to IF type flag | ||
50 | -0 -> 0x00000000 | ||
51 | -1 -> 0xffffffff | ||
52 | */ | ||
53 | return (-flags) & X86_EFLAGS_IF; | ||
54 | } | ||
55 | |||
56 | static void xen_restore_fl(unsigned long flags) | ||
57 | { | ||
58 | struct vcpu_info *vcpu; | ||
59 | |||
60 | /* convert from IF type flag */ | ||
61 | flags = !(flags & X86_EFLAGS_IF); | ||
62 | |||
63 | /* There's a one instruction preempt window here. We need to | ||
64 | make sure we're don't switch CPUs between getting the vcpu | ||
65 | pointer and updating the mask. */ | ||
66 | preempt_disable(); | ||
67 | vcpu = x86_read_percpu(xen_vcpu); | ||
68 | vcpu->evtchn_upcall_mask = flags; | ||
69 | preempt_enable_no_resched(); | ||
70 | |||
71 | /* Doesn't matter if we get preempted here, because any | ||
72 | pending event will get dealt with anyway. */ | ||
73 | |||
74 | if (flags == 0) { | ||
75 | preempt_check_resched(); | ||
76 | barrier(); /* unmask then check (avoid races) */ | ||
77 | if (unlikely(vcpu->evtchn_upcall_pending)) | ||
78 | xen_force_evtchn_callback(); | ||
79 | } | ||
80 | } | ||
81 | |||
82 | static void xen_irq_disable(void) | ||
83 | { | ||
84 | /* There's a one instruction preempt window here. We need to | ||
85 | make sure we're don't switch CPUs between getting the vcpu | ||
86 | pointer and updating the mask. */ | ||
87 | preempt_disable(); | ||
88 | x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; | ||
89 | preempt_enable_no_resched(); | ||
90 | } | ||
91 | |||
92 | static void xen_irq_enable(void) | ||
93 | { | ||
94 | struct vcpu_info *vcpu; | ||
95 | |||
96 | /* We don't need to worry about being preempted here, since | ||
97 | either a) interrupts are disabled, so no preemption, or b) | ||
98 | the caller is confused and is trying to re-enable interrupts | ||
99 | on an indeterminate processor. */ | ||
100 | |||
101 | vcpu = x86_read_percpu(xen_vcpu); | ||
102 | vcpu->evtchn_upcall_mask = 0; | ||
103 | |||
104 | /* Doesn't matter if we get preempted here, because any | ||
105 | pending event will get dealt with anyway. */ | ||
106 | |||
107 | barrier(); /* unmask then check (avoid races) */ | ||
108 | if (unlikely(vcpu->evtchn_upcall_pending)) | ||
109 | xen_force_evtchn_callback(); | ||
110 | } | ||
111 | |||
112 | static void xen_safe_halt(void) | ||
113 | { | ||
114 | /* Blocking includes an implicit local_irq_enable(). */ | ||
115 | if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) | ||
116 | BUG(); | ||
117 | } | ||
118 | |||
119 | static void xen_halt(void) | ||
120 | { | ||
121 | if (irqs_disabled()) | ||
122 | HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); | ||
123 | else | ||
124 | xen_safe_halt(); | ||
125 | } | ||
126 | |||
127 | static const struct pv_irq_ops xen_irq_ops __initdata = { | ||
128 | .init_IRQ = __xen_init_IRQ, | ||
129 | .save_fl = xen_save_fl, | ||
130 | .restore_fl = xen_restore_fl, | ||
131 | .irq_disable = xen_irq_disable, | ||
132 | .irq_enable = xen_irq_enable, | ||
133 | .safe_halt = xen_safe_halt, | ||
134 | .halt = xen_halt, | ||
135 | #ifdef CONFIG_X86_64 | ||
136 | .adjust_exception_frame = xen_adjust_exception_frame, | ||
137 | #endif | ||
138 | }; | ||
139 | |||
140 | void __init xen_init_irq_ops() | ||
141 | { | ||
142 | pv_irq_ops = xen_irq_ops; | ||
143 | } | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 2e1b64088490..64e58681767e 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -40,6 +40,7 @@ | |||
40 | */ | 40 | */ |
41 | #include <linux/sched.h> | 41 | #include <linux/sched.h> |
42 | #include <linux/highmem.h> | 42 | #include <linux/highmem.h> |
43 | #include <linux/debugfs.h> | ||
43 | #include <linux/bug.h> | 44 | #include <linux/bug.h> |
44 | 45 | ||
45 | #include <asm/pgtable.h> | 46 | #include <asm/pgtable.h> |
@@ -57,6 +58,61 @@ | |||
57 | 58 | ||
58 | #include "multicalls.h" | 59 | #include "multicalls.h" |
59 | #include "mmu.h" | 60 | #include "mmu.h" |
61 | #include "debugfs.h" | ||
62 | |||
63 | #define MMU_UPDATE_HISTO 30 | ||
64 | |||
65 | #ifdef CONFIG_XEN_DEBUG_FS | ||
66 | |||
67 | static struct { | ||
68 | u32 pgd_update; | ||
69 | u32 pgd_update_pinned; | ||
70 | u32 pgd_update_batched; | ||
71 | |||
72 | u32 pud_update; | ||
73 | u32 pud_update_pinned; | ||
74 | u32 pud_update_batched; | ||
75 | |||
76 | u32 pmd_update; | ||
77 | u32 pmd_update_pinned; | ||
78 | u32 pmd_update_batched; | ||
79 | |||
80 | u32 pte_update; | ||
81 | u32 pte_update_pinned; | ||
82 | u32 pte_update_batched; | ||
83 | |||
84 | u32 mmu_update; | ||
85 | u32 mmu_update_extended; | ||
86 | u32 mmu_update_histo[MMU_UPDATE_HISTO]; | ||
87 | |||
88 | u32 prot_commit; | ||
89 | u32 prot_commit_batched; | ||
90 | |||
91 | u32 set_pte_at; | ||
92 | u32 set_pte_at_batched; | ||
93 | u32 set_pte_at_pinned; | ||
94 | u32 set_pte_at_current; | ||
95 | u32 set_pte_at_kernel; | ||
96 | } mmu_stats; | ||
97 | |||
98 | static u8 zero_stats; | ||
99 | |||
100 | static inline void check_zero(void) | ||
101 | { | ||
102 | if (unlikely(zero_stats)) { | ||
103 | memset(&mmu_stats, 0, sizeof(mmu_stats)); | ||
104 | zero_stats = 0; | ||
105 | } | ||
106 | } | ||
107 | |||
108 | #define ADD_STATS(elem, val) \ | ||
109 | do { check_zero(); mmu_stats.elem += (val); } while(0) | ||
110 | |||
111 | #else /* !CONFIG_XEN_DEBUG_FS */ | ||
112 | |||
113 | #define ADD_STATS(elem, val) do { (void)(val); } while(0) | ||
114 | |||
115 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
60 | 116 | ||
61 | /* | 117 | /* |
62 | * Just beyond the highest usermode address. STACK_TOP_MAX has a | 118 | * Just beyond the highest usermode address. STACK_TOP_MAX has a |
@@ -229,25 +285,35 @@ void make_lowmem_page_readwrite(void *vaddr) | |||
229 | } | 285 | } |
230 | 286 | ||
231 | 287 | ||
232 | static bool page_pinned(void *ptr) | 288 | static bool xen_page_pinned(void *ptr) |
233 | { | 289 | { |
234 | struct page *page = virt_to_page(ptr); | 290 | struct page *page = virt_to_page(ptr); |
235 | 291 | ||
236 | return PagePinned(page); | 292 | return PagePinned(page); |
237 | } | 293 | } |
238 | 294 | ||
239 | static void extend_mmu_update(const struct mmu_update *update) | 295 | static void xen_extend_mmu_update(const struct mmu_update *update) |
240 | { | 296 | { |
241 | struct multicall_space mcs; | 297 | struct multicall_space mcs; |
242 | struct mmu_update *u; | 298 | struct mmu_update *u; |
243 | 299 | ||
244 | mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); | 300 | mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); |
245 | 301 | ||
246 | if (mcs.mc != NULL) | 302 | if (mcs.mc != NULL) { |
303 | ADD_STATS(mmu_update_extended, 1); | ||
304 | ADD_STATS(mmu_update_histo[mcs.mc->args[1]], -1); | ||
305 | |||
247 | mcs.mc->args[1]++; | 306 | mcs.mc->args[1]++; |
248 | else { | 307 | |
308 | if (mcs.mc->args[1] < MMU_UPDATE_HISTO) | ||
309 | ADD_STATS(mmu_update_histo[mcs.mc->args[1]], 1); | ||
310 | else | ||
311 | ADD_STATS(mmu_update_histo[0], 1); | ||
312 | } else { | ||
313 | ADD_STATS(mmu_update, 1); | ||
249 | mcs = __xen_mc_entry(sizeof(*u)); | 314 | mcs = __xen_mc_entry(sizeof(*u)); |
250 | MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); | 315 | MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); |
316 | ADD_STATS(mmu_update_histo[1], 1); | ||
251 | } | 317 | } |
252 | 318 | ||
253 | u = mcs.args; | 319 | u = mcs.args; |
@@ -265,7 +331,9 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | |||
265 | /* ptr may be ioremapped for 64-bit pagetable setup */ | 331 | /* ptr may be ioremapped for 64-bit pagetable setup */ |
266 | u.ptr = arbitrary_virt_to_machine(ptr).maddr; | 332 | u.ptr = arbitrary_virt_to_machine(ptr).maddr; |
267 | u.val = pmd_val_ma(val); | 333 | u.val = pmd_val_ma(val); |
268 | extend_mmu_update(&u); | 334 | xen_extend_mmu_update(&u); |
335 | |||
336 | ADD_STATS(pmd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
269 | 337 | ||
270 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 338 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
271 | 339 | ||
@@ -274,13 +342,17 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | |||
274 | 342 | ||
275 | void xen_set_pmd(pmd_t *ptr, pmd_t val) | 343 | void xen_set_pmd(pmd_t *ptr, pmd_t val) |
276 | { | 344 | { |
345 | ADD_STATS(pmd_update, 1); | ||
346 | |||
277 | /* If page is not pinned, we can just update the entry | 347 | /* If page is not pinned, we can just update the entry |
278 | directly */ | 348 | directly */ |
279 | if (!page_pinned(ptr)) { | 349 | if (!xen_page_pinned(ptr)) { |
280 | *ptr = val; | 350 | *ptr = val; |
281 | return; | 351 | return; |
282 | } | 352 | } |
283 | 353 | ||
354 | ADD_STATS(pmd_update_pinned, 1); | ||
355 | |||
284 | xen_set_pmd_hyper(ptr, val); | 356 | xen_set_pmd_hyper(ptr, val); |
285 | } | 357 | } |
286 | 358 | ||
@@ -300,12 +372,18 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | |||
300 | if (mm == &init_mm) | 372 | if (mm == &init_mm) |
301 | preempt_disable(); | 373 | preempt_disable(); |
302 | 374 | ||
375 | ADD_STATS(set_pte_at, 1); | ||
376 | // ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); | ||
377 | ADD_STATS(set_pte_at_current, mm == current->mm); | ||
378 | ADD_STATS(set_pte_at_kernel, mm == &init_mm); | ||
379 | |||
303 | if (mm == current->mm || mm == &init_mm) { | 380 | if (mm == current->mm || mm == &init_mm) { |
304 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { | 381 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { |
305 | struct multicall_space mcs; | 382 | struct multicall_space mcs; |
306 | mcs = xen_mc_entry(0); | 383 | mcs = xen_mc_entry(0); |
307 | 384 | ||
308 | MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); | 385 | MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); |
386 | ADD_STATS(set_pte_at_batched, 1); | ||
309 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 387 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
310 | goto out; | 388 | goto out; |
311 | } else | 389 | } else |
@@ -334,7 +412,10 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | |||
334 | 412 | ||
335 | u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; | 413 | u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; |
336 | u.val = pte_val_ma(pte); | 414 | u.val = pte_val_ma(pte); |
337 | extend_mmu_update(&u); | 415 | xen_extend_mmu_update(&u); |
416 | |||
417 | ADD_STATS(prot_commit, 1); | ||
418 | ADD_STATS(prot_commit_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
338 | 419 | ||
339 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 420 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
340 | } | 421 | } |
@@ -400,7 +481,9 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val) | |||
400 | /* ptr may be ioremapped for 64-bit pagetable setup */ | 481 | /* ptr may be ioremapped for 64-bit pagetable setup */ |
401 | u.ptr = arbitrary_virt_to_machine(ptr).maddr; | 482 | u.ptr = arbitrary_virt_to_machine(ptr).maddr; |
402 | u.val = pud_val_ma(val); | 483 | u.val = pud_val_ma(val); |
403 | extend_mmu_update(&u); | 484 | xen_extend_mmu_update(&u); |
485 | |||
486 | ADD_STATS(pud_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
404 | 487 | ||
405 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 488 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
406 | 489 | ||
@@ -409,18 +492,26 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val) | |||
409 | 492 | ||
410 | void xen_set_pud(pud_t *ptr, pud_t val) | 493 | void xen_set_pud(pud_t *ptr, pud_t val) |
411 | { | 494 | { |
495 | ADD_STATS(pud_update, 1); | ||
496 | |||
412 | /* If page is not pinned, we can just update the entry | 497 | /* If page is not pinned, we can just update the entry |
413 | directly */ | 498 | directly */ |
414 | if (!page_pinned(ptr)) { | 499 | if (!xen_page_pinned(ptr)) { |
415 | *ptr = val; | 500 | *ptr = val; |
416 | return; | 501 | return; |
417 | } | 502 | } |
418 | 503 | ||
504 | ADD_STATS(pud_update_pinned, 1); | ||
505 | |||
419 | xen_set_pud_hyper(ptr, val); | 506 | xen_set_pud_hyper(ptr, val); |
420 | } | 507 | } |
421 | 508 | ||
422 | void xen_set_pte(pte_t *ptep, pte_t pte) | 509 | void xen_set_pte(pte_t *ptep, pte_t pte) |
423 | { | 510 | { |
511 | ADD_STATS(pte_update, 1); | ||
512 | // ADD_STATS(pte_update_pinned, xen_page_pinned(ptep)); | ||
513 | ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
514 | |||
424 | #ifdef CONFIG_X86_PAE | 515 | #ifdef CONFIG_X86_PAE |
425 | ptep->pte_high = pte.pte_high; | 516 | ptep->pte_high = pte.pte_high; |
426 | smp_wmb(); | 517 | smp_wmb(); |
@@ -490,7 +581,7 @@ static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | |||
490 | 581 | ||
491 | u.ptr = virt_to_machine(ptr).maddr; | 582 | u.ptr = virt_to_machine(ptr).maddr; |
492 | u.val = pgd_val_ma(val); | 583 | u.val = pgd_val_ma(val); |
493 | extend_mmu_update(&u); | 584 | xen_extend_mmu_update(&u); |
494 | } | 585 | } |
495 | 586 | ||
496 | /* | 587 | /* |
@@ -517,17 +608,22 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val) | |||
517 | { | 608 | { |
518 | pgd_t *user_ptr = xen_get_user_pgd(ptr); | 609 | pgd_t *user_ptr = xen_get_user_pgd(ptr); |
519 | 610 | ||
611 | ADD_STATS(pgd_update, 1); | ||
612 | |||
520 | /* If page is not pinned, we can just update the entry | 613 | /* If page is not pinned, we can just update the entry |
521 | directly */ | 614 | directly */ |
522 | if (!page_pinned(ptr)) { | 615 | if (!xen_page_pinned(ptr)) { |
523 | *ptr = val; | 616 | *ptr = val; |
524 | if (user_ptr) { | 617 | if (user_ptr) { |
525 | WARN_ON(page_pinned(user_ptr)); | 618 | WARN_ON(xen_page_pinned(user_ptr)); |
526 | *user_ptr = val; | 619 | *user_ptr = val; |
527 | } | 620 | } |
528 | return; | 621 | return; |
529 | } | 622 | } |
530 | 623 | ||
624 | ADD_STATS(pgd_update_pinned, 1); | ||
625 | ADD_STATS(pgd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
626 | |||
531 | /* If it's pinned, then we can at least batch the kernel and | 627 | /* If it's pinned, then we can at least batch the kernel and |
532 | user updates together. */ | 628 | user updates together. */ |
533 | xen_mc_batch(); | 629 | xen_mc_batch(); |
@@ -555,8 +651,8 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val) | |||
555 | * For 64-bit, we must skip the Xen hole in the middle of the address | 651 | * For 64-bit, we must skip the Xen hole in the middle of the address |
556 | * space, just after the big x86-64 virtual hole. | 652 | * space, just after the big x86-64 virtual hole. |
557 | */ | 653 | */ |
558 | static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | 654 | static int xen_pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), |
559 | unsigned long limit) | 655 | unsigned long limit) |
560 | { | 656 | { |
561 | int flush = 0; | 657 | int flush = 0; |
562 | unsigned hole_low, hole_high; | 658 | unsigned hole_low, hole_high; |
@@ -590,8 +686,6 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | |||
590 | pmdidx_limit = 0; | 686 | pmdidx_limit = 0; |
591 | #endif | 687 | #endif |
592 | 688 | ||
593 | flush |= (*func)(virt_to_page(pgd), PT_PGD); | ||
594 | |||
595 | for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) { | 689 | for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) { |
596 | pud_t *pud; | 690 | pud_t *pud; |
597 | 691 | ||
@@ -637,12 +731,18 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | |||
637 | } | 731 | } |
638 | } | 732 | } |
639 | } | 733 | } |
734 | |||
640 | out: | 735 | out: |
736 | /* Do the top level last, so that the callbacks can use it as | ||
737 | a cue to do final things like tlb flushes. */ | ||
738 | flush |= (*func)(virt_to_page(pgd), PT_PGD); | ||
641 | 739 | ||
642 | return flush; | 740 | return flush; |
643 | } | 741 | } |
644 | 742 | ||
645 | static spinlock_t *lock_pte(struct page *page) | 743 | /* If we're using split pte locks, then take the page's lock and |
744 | return a pointer to it. Otherwise return NULL. */ | ||
745 | static spinlock_t *xen_pte_lock(struct page *page) | ||
646 | { | 746 | { |
647 | spinlock_t *ptl = NULL; | 747 | spinlock_t *ptl = NULL; |
648 | 748 | ||
@@ -654,7 +754,7 @@ static spinlock_t *lock_pte(struct page *page) | |||
654 | return ptl; | 754 | return ptl; |
655 | } | 755 | } |
656 | 756 | ||
657 | static void do_unlock(void *v) | 757 | static void xen_pte_unlock(void *v) |
658 | { | 758 | { |
659 | spinlock_t *ptl = v; | 759 | spinlock_t *ptl = v; |
660 | spin_unlock(ptl); | 760 | spin_unlock(ptl); |
@@ -672,7 +772,7 @@ static void xen_do_pin(unsigned level, unsigned long pfn) | |||
672 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | 772 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); |
673 | } | 773 | } |
674 | 774 | ||
675 | static int pin_page(struct page *page, enum pt_level level) | 775 | static int xen_pin_page(struct page *page, enum pt_level level) |
676 | { | 776 | { |
677 | unsigned pgfl = TestSetPagePinned(page); | 777 | unsigned pgfl = TestSetPagePinned(page); |
678 | int flush; | 778 | int flush; |
@@ -691,21 +791,40 @@ static int pin_page(struct page *page, enum pt_level level) | |||
691 | 791 | ||
692 | flush = 0; | 792 | flush = 0; |
693 | 793 | ||
794 | /* | ||
795 | * We need to hold the pagetable lock between the time | ||
796 | * we make the pagetable RO and when we actually pin | ||
797 | * it. If we don't, then other users may come in and | ||
798 | * attempt to update the pagetable by writing it, | ||
799 | * which will fail because the memory is RO but not | ||
800 | * pinned, so Xen won't do the trap'n'emulate. | ||
801 | * | ||
802 | * If we're using split pte locks, we can't hold the | ||
803 | * entire pagetable's worth of locks during the | ||
804 | * traverse, because we may wrap the preempt count (8 | ||
805 | * bits). The solution is to mark RO and pin each PTE | ||
806 | * page while holding the lock. This means the number | ||
807 | * of locks we end up holding is never more than a | ||
808 | * batch size (~32 entries, at present). | ||
809 | * | ||
810 | * If we're not using split pte locks, we needn't pin | ||
811 | * the PTE pages independently, because we're | ||
812 | * protected by the overall pagetable lock. | ||
813 | */ | ||
694 | ptl = NULL; | 814 | ptl = NULL; |
695 | if (level == PT_PTE) | 815 | if (level == PT_PTE) |
696 | ptl = lock_pte(page); | 816 | ptl = xen_pte_lock(page); |
697 | 817 | ||
698 | MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, | 818 | MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, |
699 | pfn_pte(pfn, PAGE_KERNEL_RO), | 819 | pfn_pte(pfn, PAGE_KERNEL_RO), |
700 | level == PT_PGD ? UVMF_TLB_FLUSH : 0); | 820 | level == PT_PGD ? UVMF_TLB_FLUSH : 0); |
701 | 821 | ||
702 | if (level == PT_PTE) | 822 | if (ptl) { |
703 | xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn); | 823 | xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn); |
704 | 824 | ||
705 | if (ptl) { | ||
706 | /* Queue a deferred unlock for when this batch | 825 | /* Queue a deferred unlock for when this batch |
707 | is completed. */ | 826 | is completed. */ |
708 | xen_mc_callback(do_unlock, ptl); | 827 | xen_mc_callback(xen_pte_unlock, ptl); |
709 | } | 828 | } |
710 | } | 829 | } |
711 | 830 | ||
@@ -719,7 +838,7 @@ void xen_pgd_pin(pgd_t *pgd) | |||
719 | { | 838 | { |
720 | xen_mc_batch(); | 839 | xen_mc_batch(); |
721 | 840 | ||
722 | if (pgd_walk(pgd, pin_page, USER_LIMIT)) { | 841 | if (xen_pgd_walk(pgd, xen_pin_page, USER_LIMIT)) { |
723 | /* re-enable interrupts for kmap_flush_unused */ | 842 | /* re-enable interrupts for kmap_flush_unused */ |
724 | xen_mc_issue(0); | 843 | xen_mc_issue(0); |
725 | kmap_flush_unused(); | 844 | kmap_flush_unused(); |
@@ -733,14 +852,14 @@ void xen_pgd_pin(pgd_t *pgd) | |||
733 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); | 852 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); |
734 | 853 | ||
735 | if (user_pgd) { | 854 | if (user_pgd) { |
736 | pin_page(virt_to_page(user_pgd), PT_PGD); | 855 | xen_pin_page(virt_to_page(user_pgd), PT_PGD); |
737 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); | 856 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); |
738 | } | 857 | } |
739 | } | 858 | } |
740 | #else /* CONFIG_X86_32 */ | 859 | #else /* CONFIG_X86_32 */ |
741 | #ifdef CONFIG_X86_PAE | 860 | #ifdef CONFIG_X86_PAE |
742 | /* Need to make sure unshared kernel PMD is pinnable */ | 861 | /* Need to make sure unshared kernel PMD is pinnable */ |
743 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); | 862 | xen_pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); |
744 | #endif | 863 | #endif |
745 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); | 864 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); |
746 | #endif /* CONFIG_X86_64 */ | 865 | #endif /* CONFIG_X86_64 */ |
@@ -775,7 +894,7 @@ void xen_mm_pin_all(void) | |||
775 | * that's before we have page structures to store the bits. So do all | 894 | * that's before we have page structures to store the bits. So do all |
776 | * the book-keeping now. | 895 | * the book-keeping now. |
777 | */ | 896 | */ |
778 | static __init int mark_pinned(struct page *page, enum pt_level level) | 897 | static __init int xen_mark_pinned(struct page *page, enum pt_level level) |
779 | { | 898 | { |
780 | SetPagePinned(page); | 899 | SetPagePinned(page); |
781 | return 0; | 900 | return 0; |
@@ -783,10 +902,10 @@ static __init int mark_pinned(struct page *page, enum pt_level level) | |||
783 | 902 | ||
784 | void __init xen_mark_init_mm_pinned(void) | 903 | void __init xen_mark_init_mm_pinned(void) |
785 | { | 904 | { |
786 | pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP); | 905 | xen_pgd_walk(init_mm.pgd, xen_mark_pinned, FIXADDR_TOP); |
787 | } | 906 | } |
788 | 907 | ||
789 | static int unpin_page(struct page *page, enum pt_level level) | 908 | static int xen_unpin_page(struct page *page, enum pt_level level) |
790 | { | 909 | { |
791 | unsigned pgfl = TestClearPagePinned(page); | 910 | unsigned pgfl = TestClearPagePinned(page); |
792 | 911 | ||
@@ -796,10 +915,18 @@ static int unpin_page(struct page *page, enum pt_level level) | |||
796 | spinlock_t *ptl = NULL; | 915 | spinlock_t *ptl = NULL; |
797 | struct multicall_space mcs; | 916 | struct multicall_space mcs; |
798 | 917 | ||
918 | /* | ||
919 | * Do the converse to pin_page. If we're using split | ||
920 | * pte locks, we must be holding the lock for while | ||
921 | * the pte page is unpinned but still RO to prevent | ||
922 | * concurrent updates from seeing it in this | ||
923 | * partially-pinned state. | ||
924 | */ | ||
799 | if (level == PT_PTE) { | 925 | if (level == PT_PTE) { |
800 | ptl = lock_pte(page); | 926 | ptl = xen_pte_lock(page); |
801 | 927 | ||
802 | xen_do_pin(MMUEXT_UNPIN_TABLE, pfn); | 928 | if (ptl) |
929 | xen_do_pin(MMUEXT_UNPIN_TABLE, pfn); | ||
803 | } | 930 | } |
804 | 931 | ||
805 | mcs = __xen_mc_entry(0); | 932 | mcs = __xen_mc_entry(0); |
@@ -810,7 +937,7 @@ static int unpin_page(struct page *page, enum pt_level level) | |||
810 | 937 | ||
811 | if (ptl) { | 938 | if (ptl) { |
812 | /* unlock when batch completed */ | 939 | /* unlock when batch completed */ |
813 | xen_mc_callback(do_unlock, ptl); | 940 | xen_mc_callback(xen_pte_unlock, ptl); |
814 | } | 941 | } |
815 | } | 942 | } |
816 | 943 | ||
@@ -830,17 +957,17 @@ static void xen_pgd_unpin(pgd_t *pgd) | |||
830 | 957 | ||
831 | if (user_pgd) { | 958 | if (user_pgd) { |
832 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); | 959 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); |
833 | unpin_page(virt_to_page(user_pgd), PT_PGD); | 960 | xen_unpin_page(virt_to_page(user_pgd), PT_PGD); |
834 | } | 961 | } |
835 | } | 962 | } |
836 | #endif | 963 | #endif |
837 | 964 | ||
838 | #ifdef CONFIG_X86_PAE | 965 | #ifdef CONFIG_X86_PAE |
839 | /* Need to make sure unshared kernel PMD is unpinned */ | 966 | /* Need to make sure unshared kernel PMD is unpinned */ |
840 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); | 967 | xen_unpin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); |
841 | #endif | 968 | #endif |
842 | 969 | ||
843 | pgd_walk(pgd, unpin_page, USER_LIMIT); | 970 | xen_pgd_walk(pgd, xen_unpin_page, USER_LIMIT); |
844 | 971 | ||
845 | xen_mc_issue(0); | 972 | xen_mc_issue(0); |
846 | } | 973 | } |
@@ -907,7 +1034,7 @@ static void drop_other_mm_ref(void *info) | |||
907 | } | 1034 | } |
908 | } | 1035 | } |
909 | 1036 | ||
910 | static void drop_mm_ref(struct mm_struct *mm) | 1037 | static void xen_drop_mm_ref(struct mm_struct *mm) |
911 | { | 1038 | { |
912 | cpumask_t mask; | 1039 | cpumask_t mask; |
913 | unsigned cpu; | 1040 | unsigned cpu; |
@@ -937,7 +1064,7 @@ static void drop_mm_ref(struct mm_struct *mm) | |||
937 | smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); | 1064 | smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); |
938 | } | 1065 | } |
939 | #else | 1066 | #else |
940 | static void drop_mm_ref(struct mm_struct *mm) | 1067 | static void xen_drop_mm_ref(struct mm_struct *mm) |
941 | { | 1068 | { |
942 | if (current->active_mm == mm) | 1069 | if (current->active_mm == mm) |
943 | load_cr3(swapper_pg_dir); | 1070 | load_cr3(swapper_pg_dir); |
@@ -961,14 +1088,77 @@ static void drop_mm_ref(struct mm_struct *mm) | |||
961 | void xen_exit_mmap(struct mm_struct *mm) | 1088 | void xen_exit_mmap(struct mm_struct *mm) |
962 | { | 1089 | { |
963 | get_cpu(); /* make sure we don't move around */ | 1090 | get_cpu(); /* make sure we don't move around */ |
964 | drop_mm_ref(mm); | 1091 | xen_drop_mm_ref(mm); |
965 | put_cpu(); | 1092 | put_cpu(); |
966 | 1093 | ||
967 | spin_lock(&mm->page_table_lock); | 1094 | spin_lock(&mm->page_table_lock); |
968 | 1095 | ||
969 | /* pgd may not be pinned in the error exit path of execve */ | 1096 | /* pgd may not be pinned in the error exit path of execve */ |
970 | if (page_pinned(mm->pgd)) | 1097 | if (xen_page_pinned(mm->pgd)) |
971 | xen_pgd_unpin(mm->pgd); | 1098 | xen_pgd_unpin(mm->pgd); |
972 | 1099 | ||
973 | spin_unlock(&mm->page_table_lock); | 1100 | spin_unlock(&mm->page_table_lock); |
974 | } | 1101 | } |
1102 | |||
1103 | #ifdef CONFIG_XEN_DEBUG_FS | ||
1104 | |||
1105 | static struct dentry *d_mmu_debug; | ||
1106 | |||
1107 | static int __init xen_mmu_debugfs(void) | ||
1108 | { | ||
1109 | struct dentry *d_xen = xen_init_debugfs(); | ||
1110 | |||
1111 | if (d_xen == NULL) | ||
1112 | return -ENOMEM; | ||
1113 | |||
1114 | d_mmu_debug = debugfs_create_dir("mmu", d_xen); | ||
1115 | |||
1116 | debugfs_create_u8("zero_stats", 0644, d_mmu_debug, &zero_stats); | ||
1117 | |||
1118 | debugfs_create_u32("pgd_update", 0444, d_mmu_debug, &mmu_stats.pgd_update); | ||
1119 | debugfs_create_u32("pgd_update_pinned", 0444, d_mmu_debug, | ||
1120 | &mmu_stats.pgd_update_pinned); | ||
1121 | debugfs_create_u32("pgd_update_batched", 0444, d_mmu_debug, | ||
1122 | &mmu_stats.pgd_update_pinned); | ||
1123 | |||
1124 | debugfs_create_u32("pud_update", 0444, d_mmu_debug, &mmu_stats.pud_update); | ||
1125 | debugfs_create_u32("pud_update_pinned", 0444, d_mmu_debug, | ||
1126 | &mmu_stats.pud_update_pinned); | ||
1127 | debugfs_create_u32("pud_update_batched", 0444, d_mmu_debug, | ||
1128 | &mmu_stats.pud_update_pinned); | ||
1129 | |||
1130 | debugfs_create_u32("pmd_update", 0444, d_mmu_debug, &mmu_stats.pmd_update); | ||
1131 | debugfs_create_u32("pmd_update_pinned", 0444, d_mmu_debug, | ||
1132 | &mmu_stats.pmd_update_pinned); | ||
1133 | debugfs_create_u32("pmd_update_batched", 0444, d_mmu_debug, | ||
1134 | &mmu_stats.pmd_update_pinned); | ||
1135 | |||
1136 | debugfs_create_u32("pte_update", 0444, d_mmu_debug, &mmu_stats.pte_update); | ||
1137 | // debugfs_create_u32("pte_update_pinned", 0444, d_mmu_debug, | ||
1138 | // &mmu_stats.pte_update_pinned); | ||
1139 | debugfs_create_u32("pte_update_batched", 0444, d_mmu_debug, | ||
1140 | &mmu_stats.pte_update_pinned); | ||
1141 | |||
1142 | debugfs_create_u32("mmu_update", 0444, d_mmu_debug, &mmu_stats.mmu_update); | ||
1143 | debugfs_create_u32("mmu_update_extended", 0444, d_mmu_debug, | ||
1144 | &mmu_stats.mmu_update_extended); | ||
1145 | xen_debugfs_create_u32_array("mmu_update_histo", 0444, d_mmu_debug, | ||
1146 | mmu_stats.mmu_update_histo, 20); | ||
1147 | |||
1148 | debugfs_create_u32("set_pte_at", 0444, d_mmu_debug, &mmu_stats.set_pte_at); | ||
1149 | debugfs_create_u32("set_pte_at_batched", 0444, d_mmu_debug, | ||
1150 | &mmu_stats.set_pte_at_batched); | ||
1151 | debugfs_create_u32("set_pte_at_current", 0444, d_mmu_debug, | ||
1152 | &mmu_stats.set_pte_at_current); | ||
1153 | debugfs_create_u32("set_pte_at_kernel", 0444, d_mmu_debug, | ||
1154 | &mmu_stats.set_pte_at_kernel); | ||
1155 | |||
1156 | debugfs_create_u32("prot_commit", 0444, d_mmu_debug, &mmu_stats.prot_commit); | ||
1157 | debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug, | ||
1158 | &mmu_stats.prot_commit_batched); | ||
1159 | |||
1160 | return 0; | ||
1161 | } | ||
1162 | fs_initcall(xen_mmu_debugfs); | ||
1163 | |||
1164 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 9efd1c6c9776..8ea8a0d0b0de 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c | |||
@@ -21,16 +21,20 @@ | |||
21 | */ | 21 | */ |
22 | #include <linux/percpu.h> | 22 | #include <linux/percpu.h> |
23 | #include <linux/hardirq.h> | 23 | #include <linux/hardirq.h> |
24 | #include <linux/debugfs.h> | ||
24 | 25 | ||
25 | #include <asm/xen/hypercall.h> | 26 | #include <asm/xen/hypercall.h> |
26 | 27 | ||
27 | #include "multicalls.h" | 28 | #include "multicalls.h" |
29 | #include "debugfs.h" | ||
30 | |||
31 | #define MC_BATCH 32 | ||
28 | 32 | ||
29 | #define MC_DEBUG 1 | 33 | #define MC_DEBUG 1 |
30 | 34 | ||
31 | #define MC_BATCH 32 | ||
32 | #define MC_ARGS (MC_BATCH * 16) | 35 | #define MC_ARGS (MC_BATCH * 16) |
33 | 36 | ||
37 | |||
34 | struct mc_buffer { | 38 | struct mc_buffer { |
35 | struct multicall_entry entries[MC_BATCH]; | 39 | struct multicall_entry entries[MC_BATCH]; |
36 | #if MC_DEBUG | 40 | #if MC_DEBUG |
@@ -47,6 +51,76 @@ struct mc_buffer { | |||
47 | static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); | 51 | static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); |
48 | DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); | 52 | DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); |
49 | 53 | ||
54 | /* flush reasons 0- slots, 1- args, 2- callbacks */ | ||
55 | enum flush_reasons | ||
56 | { | ||
57 | FL_SLOTS, | ||
58 | FL_ARGS, | ||
59 | FL_CALLBACKS, | ||
60 | |||
61 | FL_N_REASONS | ||
62 | }; | ||
63 | |||
64 | #ifdef CONFIG_XEN_DEBUG_FS | ||
65 | #define NHYPERCALLS 40 /* not really */ | ||
66 | |||
67 | static struct { | ||
68 | unsigned histo[MC_BATCH+1]; | ||
69 | |||
70 | unsigned issued; | ||
71 | unsigned arg_total; | ||
72 | unsigned hypercalls; | ||
73 | unsigned histo_hypercalls[NHYPERCALLS]; | ||
74 | |||
75 | unsigned flush[FL_N_REASONS]; | ||
76 | } mc_stats; | ||
77 | |||
78 | static u8 zero_stats; | ||
79 | |||
80 | static inline void check_zero(void) | ||
81 | { | ||
82 | if (unlikely(zero_stats)) { | ||
83 | memset(&mc_stats, 0, sizeof(mc_stats)); | ||
84 | zero_stats = 0; | ||
85 | } | ||
86 | } | ||
87 | |||
88 | static void mc_add_stats(const struct mc_buffer *mc) | ||
89 | { | ||
90 | int i; | ||
91 | |||
92 | check_zero(); | ||
93 | |||
94 | mc_stats.issued++; | ||
95 | mc_stats.hypercalls += mc->mcidx; | ||
96 | mc_stats.arg_total += mc->argidx; | ||
97 | |||
98 | mc_stats.histo[mc->mcidx]++; | ||
99 | for(i = 0; i < mc->mcidx; i++) { | ||
100 | unsigned op = mc->entries[i].op; | ||
101 | if (op < NHYPERCALLS) | ||
102 | mc_stats.histo_hypercalls[op]++; | ||
103 | } | ||
104 | } | ||
105 | |||
106 | static void mc_stats_flush(enum flush_reasons idx) | ||
107 | { | ||
108 | check_zero(); | ||
109 | |||
110 | mc_stats.flush[idx]++; | ||
111 | } | ||
112 | |||
113 | #else /* !CONFIG_XEN_DEBUG_FS */ | ||
114 | |||
115 | static inline void mc_add_stats(const struct mc_buffer *mc) | ||
116 | { | ||
117 | } | ||
118 | |||
119 | static inline void mc_stats_flush(enum flush_reasons idx) | ||
120 | { | ||
121 | } | ||
122 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
123 | |||
50 | void xen_mc_flush(void) | 124 | void xen_mc_flush(void) |
51 | { | 125 | { |
52 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); | 126 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); |
@@ -60,6 +134,8 @@ void xen_mc_flush(void) | |||
60 | something in the middle */ | 134 | something in the middle */ |
61 | local_irq_save(flags); | 135 | local_irq_save(flags); |
62 | 136 | ||
137 | mc_add_stats(b); | ||
138 | |||
63 | if (b->mcidx) { | 139 | if (b->mcidx) { |
64 | #if MC_DEBUG | 140 | #if MC_DEBUG |
65 | memcpy(b->debug, b->entries, | 141 | memcpy(b->debug, b->entries, |
@@ -115,6 +191,7 @@ struct multicall_space __xen_mc_entry(size_t args) | |||
115 | 191 | ||
116 | if (b->mcidx == MC_BATCH || | 192 | if (b->mcidx == MC_BATCH || |
117 | (argidx + args) > MC_ARGS) { | 193 | (argidx + args) > MC_ARGS) { |
194 | mc_stats_flush(b->mcidx == MC_BATCH ? FL_SLOTS : FL_ARGS); | ||
118 | xen_mc_flush(); | 195 | xen_mc_flush(); |
119 | argidx = roundup(b->argidx, sizeof(u64)); | 196 | argidx = roundup(b->argidx, sizeof(u64)); |
120 | } | 197 | } |
@@ -158,10 +235,44 @@ void xen_mc_callback(void (*fn)(void *), void *data) | |||
158 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); | 235 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); |
159 | struct callback *cb; | 236 | struct callback *cb; |
160 | 237 | ||
161 | if (b->cbidx == MC_BATCH) | 238 | if (b->cbidx == MC_BATCH) { |
239 | mc_stats_flush(FL_CALLBACKS); | ||
162 | xen_mc_flush(); | 240 | xen_mc_flush(); |
241 | } | ||
163 | 242 | ||
164 | cb = &b->callbacks[b->cbidx++]; | 243 | cb = &b->callbacks[b->cbidx++]; |
165 | cb->fn = fn; | 244 | cb->fn = fn; |
166 | cb->data = data; | 245 | cb->data = data; |
167 | } | 246 | } |
247 | |||
248 | #ifdef CONFIG_XEN_DEBUG_FS | ||
249 | |||
250 | static struct dentry *d_mc_debug; | ||
251 | |||
252 | static int __init xen_mc_debugfs(void) | ||
253 | { | ||
254 | struct dentry *d_xen = xen_init_debugfs(); | ||
255 | |||
256 | if (d_xen == NULL) | ||
257 | return -ENOMEM; | ||
258 | |||
259 | d_mc_debug = debugfs_create_dir("multicalls", d_xen); | ||
260 | |||
261 | debugfs_create_u8("zero_stats", 0644, d_mc_debug, &zero_stats); | ||
262 | |||
263 | debugfs_create_u32("batches", 0444, d_mc_debug, &mc_stats.issued); | ||
264 | debugfs_create_u32("hypercalls", 0444, d_mc_debug, &mc_stats.hypercalls); | ||
265 | debugfs_create_u32("arg_total", 0444, d_mc_debug, &mc_stats.arg_total); | ||
266 | |||
267 | xen_debugfs_create_u32_array("batch_histo", 0444, d_mc_debug, | ||
268 | mc_stats.histo, MC_BATCH); | ||
269 | xen_debugfs_create_u32_array("hypercall_histo", 0444, d_mc_debug, | ||
270 | mc_stats.histo_hypercalls, NHYPERCALLS); | ||
271 | xen_debugfs_create_u32_array("flush_reasons", 0444, d_mc_debug, | ||
272 | mc_stats.flush, FL_N_REASONS); | ||
273 | |||
274 | return 0; | ||
275 | } | ||
276 | fs_initcall(xen_mc_debugfs); | ||
277 | |||
278 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index d8faf79a0a1d..d77da613b1d2 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -11,11 +11,8 @@ | |||
11 | * useful topology information for the kernel to make use of. As a | 11 | * useful topology information for the kernel to make use of. As a |
12 | * result, all CPUs are treated as if they're single-core and | 12 | * result, all CPUs are treated as if they're single-core and |
13 | * single-threaded. | 13 | * single-threaded. |
14 | * | ||
15 | * This does not handle HOTPLUG_CPU yet. | ||
16 | */ | 14 | */ |
17 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
18 | #include <linux/kernel_stat.h> | ||
19 | #include <linux/err.h> | 16 | #include <linux/err.h> |
20 | #include <linux/smp.h> | 17 | #include <linux/smp.h> |
21 | 18 | ||
@@ -36,8 +33,6 @@ | |||
36 | #include "xen-ops.h" | 33 | #include "xen-ops.h" |
37 | #include "mmu.h" | 34 | #include "mmu.h" |
38 | 35 | ||
39 | static void __cpuinit xen_init_lock_cpu(int cpu); | ||
40 | |||
41 | cpumask_t xen_cpu_initialized_map; | 36 | cpumask_t xen_cpu_initialized_map; |
42 | 37 | ||
43 | static DEFINE_PER_CPU(int, resched_irq); | 38 | static DEFINE_PER_CPU(int, resched_irq); |
@@ -64,11 +59,12 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) | |||
64 | return IRQ_HANDLED; | 59 | return IRQ_HANDLED; |
65 | } | 60 | } |
66 | 61 | ||
67 | static __cpuinit void cpu_bringup_and_idle(void) | 62 | static __cpuinit void cpu_bringup(void) |
68 | { | 63 | { |
69 | int cpu = smp_processor_id(); | 64 | int cpu = smp_processor_id(); |
70 | 65 | ||
71 | cpu_init(); | 66 | cpu_init(); |
67 | touch_softlockup_watchdog(); | ||
72 | preempt_disable(); | 68 | preempt_disable(); |
73 | 69 | ||
74 | xen_enable_sysenter(); | 70 | xen_enable_sysenter(); |
@@ -89,6 +85,11 @@ static __cpuinit void cpu_bringup_and_idle(void) | |||
89 | local_irq_enable(); | 85 | local_irq_enable(); |
90 | 86 | ||
91 | wmb(); /* make sure everything is out */ | 87 | wmb(); /* make sure everything is out */ |
88 | } | ||
89 | |||
90 | static __cpuinit void cpu_bringup_and_idle(void) | ||
91 | { | ||
92 | cpu_bringup(); | ||
92 | cpu_idle(); | 93 | cpu_idle(); |
93 | } | 94 | } |
94 | 95 | ||
@@ -212,8 +213,6 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) | |||
212 | 213 | ||
213 | cpu_set(cpu, cpu_present_map); | 214 | cpu_set(cpu, cpu_present_map); |
214 | } | 215 | } |
215 | |||
216 | //init_xenbus_allowed_cpumask(); | ||
217 | } | 216 | } |
218 | 217 | ||
219 | static __cpuinit int | 218 | static __cpuinit int |
@@ -281,12 +280,6 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) | |||
281 | struct task_struct *idle = idle_task(cpu); | 280 | struct task_struct *idle = idle_task(cpu); |
282 | int rc; | 281 | int rc; |
283 | 282 | ||
284 | #if 0 | ||
285 | rc = cpu_up_check(cpu); | ||
286 | if (rc) | ||
287 | return rc; | ||
288 | #endif | ||
289 | |||
290 | #ifdef CONFIG_X86_64 | 283 | #ifdef CONFIG_X86_64 |
291 | /* Allocate node local memory for AP pdas */ | 284 | /* Allocate node local memory for AP pdas */ |
292 | WARN_ON(cpu == 0); | 285 | WARN_ON(cpu == 0); |
@@ -339,6 +332,60 @@ static void xen_smp_cpus_done(unsigned int max_cpus) | |||
339 | { | 332 | { |
340 | } | 333 | } |
341 | 334 | ||
335 | #ifdef CONFIG_HOTPLUG_CPU | ||
336 | static int xen_cpu_disable(void) | ||
337 | { | ||
338 | unsigned int cpu = smp_processor_id(); | ||
339 | if (cpu == 0) | ||
340 | return -EBUSY; | ||
341 | |||
342 | cpu_disable_common(); | ||
343 | |||
344 | load_cr3(swapper_pg_dir); | ||
345 | return 0; | ||
346 | } | ||
347 | |||
348 | static void xen_cpu_die(unsigned int cpu) | ||
349 | { | ||
350 | while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) { | ||
351 | current->state = TASK_UNINTERRUPTIBLE; | ||
352 | schedule_timeout(HZ/10); | ||
353 | } | ||
354 | unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); | ||
355 | unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); | ||
356 | unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL); | ||
357 | unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL); | ||
358 | xen_uninit_lock_cpu(cpu); | ||
359 | xen_teardown_timer(cpu); | ||
360 | |||
361 | if (num_online_cpus() == 1) | ||
362 | alternatives_smp_switch(0); | ||
363 | } | ||
364 | |||
365 | static void xen_play_dead(void) | ||
366 | { | ||
367 | play_dead_common(); | ||
368 | HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); | ||
369 | cpu_bringup(); | ||
370 | } | ||
371 | |||
372 | #else /* !CONFIG_HOTPLUG_CPU */ | ||
373 | static int xen_cpu_disable(void) | ||
374 | { | ||
375 | return -ENOSYS; | ||
376 | } | ||
377 | |||
378 | static void xen_cpu_die(unsigned int cpu) | ||
379 | { | ||
380 | BUG(); | ||
381 | } | ||
382 | |||
383 | static void xen_play_dead(void) | ||
384 | { | ||
385 | BUG(); | ||
386 | } | ||
387 | |||
388 | #endif | ||
342 | static void stop_self(void *v) | 389 | static void stop_self(void *v) |
343 | { | 390 | { |
344 | int cpu = smp_processor_id(); | 391 | int cpu = smp_processor_id(); |
@@ -419,176 +466,16 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) | |||
419 | return IRQ_HANDLED; | 466 | return IRQ_HANDLED; |
420 | } | 467 | } |
421 | 468 | ||
422 | struct xen_spinlock { | ||
423 | unsigned char lock; /* 0 -> free; 1 -> locked */ | ||
424 | unsigned short spinners; /* count of waiting cpus */ | ||
425 | }; | ||
426 | |||
427 | static int xen_spin_is_locked(struct raw_spinlock *lock) | ||
428 | { | ||
429 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
430 | |||
431 | return xl->lock != 0; | ||
432 | } | ||
433 | |||
434 | static int xen_spin_is_contended(struct raw_spinlock *lock) | ||
435 | { | ||
436 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
437 | |||
438 | /* Not strictly true; this is only the count of contended | ||
439 | lock-takers entering the slow path. */ | ||
440 | return xl->spinners != 0; | ||
441 | } | ||
442 | |||
443 | static int xen_spin_trylock(struct raw_spinlock *lock) | ||
444 | { | ||
445 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
446 | u8 old = 1; | ||
447 | |||
448 | asm("xchgb %b0,%1" | ||
449 | : "+q" (old), "+m" (xl->lock) : : "memory"); | ||
450 | |||
451 | return old == 0; | ||
452 | } | ||
453 | |||
454 | static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; | ||
455 | static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); | ||
456 | |||
457 | static inline void spinning_lock(struct xen_spinlock *xl) | ||
458 | { | ||
459 | __get_cpu_var(lock_spinners) = xl; | ||
460 | wmb(); /* set lock of interest before count */ | ||
461 | asm(LOCK_PREFIX " incw %0" | ||
462 | : "+m" (xl->spinners) : : "memory"); | ||
463 | } | ||
464 | |||
465 | static inline void unspinning_lock(struct xen_spinlock *xl) | ||
466 | { | ||
467 | asm(LOCK_PREFIX " decw %0" | ||
468 | : "+m" (xl->spinners) : : "memory"); | ||
469 | wmb(); /* decrement count before clearing lock */ | ||
470 | __get_cpu_var(lock_spinners) = NULL; | ||
471 | } | ||
472 | |||
473 | static noinline int xen_spin_lock_slow(struct raw_spinlock *lock) | ||
474 | { | ||
475 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
476 | int irq = __get_cpu_var(lock_kicker_irq); | ||
477 | int ret; | ||
478 | |||
479 | /* If kicker interrupts not initialized yet, just spin */ | ||
480 | if (irq == -1) | ||
481 | return 0; | ||
482 | |||
483 | /* announce we're spinning */ | ||
484 | spinning_lock(xl); | ||
485 | |||
486 | /* clear pending */ | ||
487 | xen_clear_irq_pending(irq); | ||
488 | |||
489 | /* check again make sure it didn't become free while | ||
490 | we weren't looking */ | ||
491 | ret = xen_spin_trylock(lock); | ||
492 | if (ret) | ||
493 | goto out; | ||
494 | |||
495 | /* block until irq becomes pending */ | ||
496 | xen_poll_irq(irq); | ||
497 | kstat_this_cpu.irqs[irq]++; | ||
498 | |||
499 | out: | ||
500 | unspinning_lock(xl); | ||
501 | return ret; | ||
502 | } | ||
503 | |||
504 | static void xen_spin_lock(struct raw_spinlock *lock) | ||
505 | { | ||
506 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
507 | int timeout; | ||
508 | u8 oldval; | ||
509 | |||
510 | do { | ||
511 | timeout = 1 << 10; | ||
512 | |||
513 | asm("1: xchgb %1,%0\n" | ||
514 | " testb %1,%1\n" | ||
515 | " jz 3f\n" | ||
516 | "2: rep;nop\n" | ||
517 | " cmpb $0,%0\n" | ||
518 | " je 1b\n" | ||
519 | " dec %2\n" | ||
520 | " jnz 2b\n" | ||
521 | "3:\n" | ||
522 | : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) | ||
523 | : "1" (1) | ||
524 | : "memory"); | ||
525 | |||
526 | } while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock))); | ||
527 | } | ||
528 | |||
529 | static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) | ||
530 | { | ||
531 | int cpu; | ||
532 | |||
533 | for_each_online_cpu(cpu) { | ||
534 | /* XXX should mix up next cpu selection */ | ||
535 | if (per_cpu(lock_spinners, cpu) == xl) { | ||
536 | xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); | ||
537 | break; | ||
538 | } | ||
539 | } | ||
540 | } | ||
541 | |||
542 | static void xen_spin_unlock(struct raw_spinlock *lock) | ||
543 | { | ||
544 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
545 | |||
546 | smp_wmb(); /* make sure no writes get moved after unlock */ | ||
547 | xl->lock = 0; /* release lock */ | ||
548 | |||
549 | /* make sure unlock happens before kick */ | ||
550 | barrier(); | ||
551 | |||
552 | if (unlikely(xl->spinners)) | ||
553 | xen_spin_unlock_slow(xl); | ||
554 | } | ||
555 | |||
556 | static __cpuinit void xen_init_lock_cpu(int cpu) | ||
557 | { | ||
558 | int irq; | ||
559 | const char *name; | ||
560 | |||
561 | name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); | ||
562 | irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, | ||
563 | cpu, | ||
564 | xen_reschedule_interrupt, | ||
565 | IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, | ||
566 | name, | ||
567 | NULL); | ||
568 | |||
569 | if (irq >= 0) { | ||
570 | disable_irq(irq); /* make sure it's never delivered */ | ||
571 | per_cpu(lock_kicker_irq, cpu) = irq; | ||
572 | } | ||
573 | |||
574 | printk("cpu %d spinlock event irq %d\n", cpu, irq); | ||
575 | } | ||
576 | |||
577 | static void __init xen_init_spinlocks(void) | ||
578 | { | ||
579 | pv_lock_ops.spin_is_locked = xen_spin_is_locked; | ||
580 | pv_lock_ops.spin_is_contended = xen_spin_is_contended; | ||
581 | pv_lock_ops.spin_lock = xen_spin_lock; | ||
582 | pv_lock_ops.spin_trylock = xen_spin_trylock; | ||
583 | pv_lock_ops.spin_unlock = xen_spin_unlock; | ||
584 | } | ||
585 | |||
586 | static const struct smp_ops xen_smp_ops __initdata = { | 469 | static const struct smp_ops xen_smp_ops __initdata = { |
587 | .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, | 470 | .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, |
588 | .smp_prepare_cpus = xen_smp_prepare_cpus, | 471 | .smp_prepare_cpus = xen_smp_prepare_cpus, |
589 | .cpu_up = xen_cpu_up, | ||
590 | .smp_cpus_done = xen_smp_cpus_done, | 472 | .smp_cpus_done = xen_smp_cpus_done, |
591 | 473 | ||
474 | .cpu_up = xen_cpu_up, | ||
475 | .cpu_die = xen_cpu_die, | ||
476 | .cpu_disable = xen_cpu_disable, | ||
477 | .play_dead = xen_play_dead, | ||
478 | |||
592 | .smp_send_stop = xen_smp_send_stop, | 479 | .smp_send_stop = xen_smp_send_stop, |
593 | .smp_send_reschedule = xen_smp_send_reschedule, | 480 | .smp_send_reschedule = xen_smp_send_reschedule, |
594 | 481 | ||
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c new file mode 100644 index 000000000000..dd71e3a021cd --- /dev/null +++ b/arch/x86/xen/spinlock.c | |||
@@ -0,0 +1,428 @@ | |||
1 | /* | ||
2 | * Split spinlock implementation out into its own file, so it can be | ||
3 | * compiled in a FTRACE-compatible way. | ||
4 | */ | ||
5 | #include <linux/kernel_stat.h> | ||
6 | #include <linux/spinlock.h> | ||
7 | #include <linux/debugfs.h> | ||
8 | #include <linux/log2.h> | ||
9 | |||
10 | #include <asm/paravirt.h> | ||
11 | |||
12 | #include <xen/interface/xen.h> | ||
13 | #include <xen/events.h> | ||
14 | |||
15 | #include "xen-ops.h" | ||
16 | #include "debugfs.h" | ||
17 | |||
18 | #ifdef CONFIG_XEN_DEBUG_FS | ||
19 | static struct xen_spinlock_stats | ||
20 | { | ||
21 | u64 taken; | ||
22 | u32 taken_slow; | ||
23 | u32 taken_slow_nested; | ||
24 | u32 taken_slow_pickup; | ||
25 | u32 taken_slow_spurious; | ||
26 | u32 taken_slow_irqenable; | ||
27 | |||
28 | u64 released; | ||
29 | u32 released_slow; | ||
30 | u32 released_slow_kicked; | ||
31 | |||
32 | #define HISTO_BUCKETS 30 | ||
33 | u32 histo_spin_total[HISTO_BUCKETS+1]; | ||
34 | u32 histo_spin_spinning[HISTO_BUCKETS+1]; | ||
35 | u32 histo_spin_blocked[HISTO_BUCKETS+1]; | ||
36 | |||
37 | u64 time_total; | ||
38 | u64 time_spinning; | ||
39 | u64 time_blocked; | ||
40 | } spinlock_stats; | ||
41 | |||
42 | static u8 zero_stats; | ||
43 | |||
44 | static unsigned lock_timeout = 1 << 10; | ||
45 | #define TIMEOUT lock_timeout | ||
46 | |||
47 | static inline void check_zero(void) | ||
48 | { | ||
49 | if (unlikely(zero_stats)) { | ||
50 | memset(&spinlock_stats, 0, sizeof(spinlock_stats)); | ||
51 | zero_stats = 0; | ||
52 | } | ||
53 | } | ||
54 | |||
55 | #define ADD_STATS(elem, val) \ | ||
56 | do { check_zero(); spinlock_stats.elem += (val); } while(0) | ||
57 | |||
58 | static inline u64 spin_time_start(void) | ||
59 | { | ||
60 | return xen_clocksource_read(); | ||
61 | } | ||
62 | |||
63 | static void __spin_time_accum(u64 delta, u32 *array) | ||
64 | { | ||
65 | unsigned index = ilog2(delta); | ||
66 | |||
67 | check_zero(); | ||
68 | |||
69 | if (index < HISTO_BUCKETS) | ||
70 | array[index]++; | ||
71 | else | ||
72 | array[HISTO_BUCKETS]++; | ||
73 | } | ||
74 | |||
75 | static inline void spin_time_accum_spinning(u64 start) | ||
76 | { | ||
77 | u32 delta = xen_clocksource_read() - start; | ||
78 | |||
79 | __spin_time_accum(delta, spinlock_stats.histo_spin_spinning); | ||
80 | spinlock_stats.time_spinning += delta; | ||
81 | } | ||
82 | |||
83 | static inline void spin_time_accum_total(u64 start) | ||
84 | { | ||
85 | u32 delta = xen_clocksource_read() - start; | ||
86 | |||
87 | __spin_time_accum(delta, spinlock_stats.histo_spin_total); | ||
88 | spinlock_stats.time_total += delta; | ||
89 | } | ||
90 | |||
91 | static inline void spin_time_accum_blocked(u64 start) | ||
92 | { | ||
93 | u32 delta = xen_clocksource_read() - start; | ||
94 | |||
95 | __spin_time_accum(delta, spinlock_stats.histo_spin_blocked); | ||
96 | spinlock_stats.time_blocked += delta; | ||
97 | } | ||
98 | #else /* !CONFIG_XEN_DEBUG_FS */ | ||
99 | #define TIMEOUT (1 << 10) | ||
100 | #define ADD_STATS(elem, val) do { (void)(val); } while(0) | ||
101 | |||
102 | static inline u64 spin_time_start(void) | ||
103 | { | ||
104 | return 0; | ||
105 | } | ||
106 | |||
107 | static inline void spin_time_accum_total(u64 start) | ||
108 | { | ||
109 | } | ||
110 | static inline void spin_time_accum_spinning(u64 start) | ||
111 | { | ||
112 | } | ||
113 | static inline void spin_time_accum_blocked(u64 start) | ||
114 | { | ||
115 | } | ||
116 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
117 | |||
118 | struct xen_spinlock { | ||
119 | unsigned char lock; /* 0 -> free; 1 -> locked */ | ||
120 | unsigned short spinners; /* count of waiting cpus */ | ||
121 | }; | ||
122 | |||
123 | static int xen_spin_is_locked(struct raw_spinlock *lock) | ||
124 | { | ||
125 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
126 | |||
127 | return xl->lock != 0; | ||
128 | } | ||
129 | |||
130 | static int xen_spin_is_contended(struct raw_spinlock *lock) | ||
131 | { | ||
132 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
133 | |||
134 | /* Not strictly true; this is only the count of contended | ||
135 | lock-takers entering the slow path. */ | ||
136 | return xl->spinners != 0; | ||
137 | } | ||
138 | |||
139 | static int xen_spin_trylock(struct raw_spinlock *lock) | ||
140 | { | ||
141 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
142 | u8 old = 1; | ||
143 | |||
144 | asm("xchgb %b0,%1" | ||
145 | : "+q" (old), "+m" (xl->lock) : : "memory"); | ||
146 | |||
147 | return old == 0; | ||
148 | } | ||
149 | |||
150 | static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; | ||
151 | static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); | ||
152 | |||
153 | /* | ||
154 | * Mark a cpu as interested in a lock. Returns the CPU's previous | ||
155 | * lock of interest, in case we got preempted by an interrupt. | ||
156 | */ | ||
157 | static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl) | ||
158 | { | ||
159 | struct xen_spinlock *prev; | ||
160 | |||
161 | prev = __get_cpu_var(lock_spinners); | ||
162 | __get_cpu_var(lock_spinners) = xl; | ||
163 | |||
164 | wmb(); /* set lock of interest before count */ | ||
165 | |||
166 | asm(LOCK_PREFIX " incw %0" | ||
167 | : "+m" (xl->spinners) : : "memory"); | ||
168 | |||
169 | return prev; | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * Mark a cpu as no longer interested in a lock. Restores previous | ||
174 | * lock of interest (NULL for none). | ||
175 | */ | ||
176 | static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev) | ||
177 | { | ||
178 | asm(LOCK_PREFIX " decw %0" | ||
179 | : "+m" (xl->spinners) : : "memory"); | ||
180 | wmb(); /* decrement count before restoring lock */ | ||
181 | __get_cpu_var(lock_spinners) = prev; | ||
182 | } | ||
183 | |||
184 | static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enable) | ||
185 | { | ||
186 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
187 | struct xen_spinlock *prev; | ||
188 | int irq = __get_cpu_var(lock_kicker_irq); | ||
189 | int ret; | ||
190 | unsigned long flags; | ||
191 | u64 start; | ||
192 | |||
193 | /* If kicker interrupts not initialized yet, just spin */ | ||
194 | if (irq == -1) | ||
195 | return 0; | ||
196 | |||
197 | start = spin_time_start(); | ||
198 | |||
199 | /* announce we're spinning */ | ||
200 | prev = spinning_lock(xl); | ||
201 | |||
202 | flags = __raw_local_save_flags(); | ||
203 | if (irq_enable) { | ||
204 | ADD_STATS(taken_slow_irqenable, 1); | ||
205 | raw_local_irq_enable(); | ||
206 | } | ||
207 | |||
208 | ADD_STATS(taken_slow, 1); | ||
209 | ADD_STATS(taken_slow_nested, prev != NULL); | ||
210 | |||
211 | do { | ||
212 | /* clear pending */ | ||
213 | xen_clear_irq_pending(irq); | ||
214 | |||
215 | /* check again make sure it didn't become free while | ||
216 | we weren't looking */ | ||
217 | ret = xen_spin_trylock(lock); | ||
218 | if (ret) { | ||
219 | ADD_STATS(taken_slow_pickup, 1); | ||
220 | |||
221 | /* | ||
222 | * If we interrupted another spinlock while it | ||
223 | * was blocking, make sure it doesn't block | ||
224 | * without rechecking the lock. | ||
225 | */ | ||
226 | if (prev != NULL) | ||
227 | xen_set_irq_pending(irq); | ||
228 | goto out; | ||
229 | } | ||
230 | |||
231 | /* | ||
232 | * Block until irq becomes pending. If we're | ||
233 | * interrupted at this point (after the trylock but | ||
234 | * before entering the block), then the nested lock | ||
235 | * handler guarantees that the irq will be left | ||
236 | * pending if there's any chance the lock became free; | ||
237 | * xen_poll_irq() returns immediately if the irq is | ||
238 | * pending. | ||
239 | */ | ||
240 | xen_poll_irq(irq); | ||
241 | ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); | ||
242 | } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ | ||
243 | |||
244 | kstat_this_cpu.irqs[irq]++; | ||
245 | |||
246 | out: | ||
247 | raw_local_irq_restore(flags); | ||
248 | unspinning_lock(xl, prev); | ||
249 | spin_time_accum_blocked(start); | ||
250 | |||
251 | return ret; | ||
252 | } | ||
253 | |||
254 | static inline void __xen_spin_lock(struct raw_spinlock *lock, bool irq_enable) | ||
255 | { | ||
256 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
257 | unsigned timeout; | ||
258 | u8 oldval; | ||
259 | u64 start_spin; | ||
260 | |||
261 | ADD_STATS(taken, 1); | ||
262 | |||
263 | start_spin = spin_time_start(); | ||
264 | |||
265 | do { | ||
266 | u64 start_spin_fast = spin_time_start(); | ||
267 | |||
268 | timeout = TIMEOUT; | ||
269 | |||
270 | asm("1: xchgb %1,%0\n" | ||
271 | " testb %1,%1\n" | ||
272 | " jz 3f\n" | ||
273 | "2: rep;nop\n" | ||
274 | " cmpb $0,%0\n" | ||
275 | " je 1b\n" | ||
276 | " dec %2\n" | ||
277 | " jnz 2b\n" | ||
278 | "3:\n" | ||
279 | : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) | ||
280 | : "1" (1) | ||
281 | : "memory"); | ||
282 | |||
283 | spin_time_accum_spinning(start_spin_fast); | ||
284 | |||
285 | } while (unlikely(oldval != 0 && | ||
286 | (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable)))); | ||
287 | |||
288 | spin_time_accum_total(start_spin); | ||
289 | } | ||
290 | |||
291 | static void xen_spin_lock(struct raw_spinlock *lock) | ||
292 | { | ||
293 | __xen_spin_lock(lock, false); | ||
294 | } | ||
295 | |||
296 | static void xen_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags) | ||
297 | { | ||
298 | __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags)); | ||
299 | } | ||
300 | |||
301 | static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) | ||
302 | { | ||
303 | int cpu; | ||
304 | |||
305 | ADD_STATS(released_slow, 1); | ||
306 | |||
307 | for_each_online_cpu(cpu) { | ||
308 | /* XXX should mix up next cpu selection */ | ||
309 | if (per_cpu(lock_spinners, cpu) == xl) { | ||
310 | ADD_STATS(released_slow_kicked, 1); | ||
311 | xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); | ||
312 | break; | ||
313 | } | ||
314 | } | ||
315 | } | ||
316 | |||
317 | static void xen_spin_unlock(struct raw_spinlock *lock) | ||
318 | { | ||
319 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
320 | |||
321 | ADD_STATS(released, 1); | ||
322 | |||
323 | smp_wmb(); /* make sure no writes get moved after unlock */ | ||
324 | xl->lock = 0; /* release lock */ | ||
325 | |||
326 | /* make sure unlock happens before kick */ | ||
327 | barrier(); | ||
328 | |||
329 | if (unlikely(xl->spinners)) | ||
330 | xen_spin_unlock_slow(xl); | ||
331 | } | ||
332 | |||
333 | static irqreturn_t dummy_handler(int irq, void *dev_id) | ||
334 | { | ||
335 | BUG(); | ||
336 | return IRQ_HANDLED; | ||
337 | } | ||
338 | |||
339 | void __cpuinit xen_init_lock_cpu(int cpu) | ||
340 | { | ||
341 | int irq; | ||
342 | const char *name; | ||
343 | |||
344 | name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); | ||
345 | irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, | ||
346 | cpu, | ||
347 | dummy_handler, | ||
348 | IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, | ||
349 | name, | ||
350 | NULL); | ||
351 | |||
352 | if (irq >= 0) { | ||
353 | disable_irq(irq); /* make sure it's never delivered */ | ||
354 | per_cpu(lock_kicker_irq, cpu) = irq; | ||
355 | } | ||
356 | |||
357 | printk("cpu %d spinlock event irq %d\n", cpu, irq); | ||
358 | } | ||
359 | |||
360 | void xen_uninit_lock_cpu(int cpu) | ||
361 | { | ||
362 | unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); | ||
363 | } | ||
364 | |||
365 | void __init xen_init_spinlocks(void) | ||
366 | { | ||
367 | pv_lock_ops.spin_is_locked = xen_spin_is_locked; | ||
368 | pv_lock_ops.spin_is_contended = xen_spin_is_contended; | ||
369 | pv_lock_ops.spin_lock = xen_spin_lock; | ||
370 | pv_lock_ops.spin_lock_flags = xen_spin_lock_flags; | ||
371 | pv_lock_ops.spin_trylock = xen_spin_trylock; | ||
372 | pv_lock_ops.spin_unlock = xen_spin_unlock; | ||
373 | } | ||
374 | |||
375 | #ifdef CONFIG_XEN_DEBUG_FS | ||
376 | |||
377 | static struct dentry *d_spin_debug; | ||
378 | |||
379 | static int __init xen_spinlock_debugfs(void) | ||
380 | { | ||
381 | struct dentry *d_xen = xen_init_debugfs(); | ||
382 | |||
383 | if (d_xen == NULL) | ||
384 | return -ENOMEM; | ||
385 | |||
386 | d_spin_debug = debugfs_create_dir("spinlocks", d_xen); | ||
387 | |||
388 | debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); | ||
389 | |||
390 | debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout); | ||
391 | |||
392 | debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken); | ||
393 | debugfs_create_u32("taken_slow", 0444, d_spin_debug, | ||
394 | &spinlock_stats.taken_slow); | ||
395 | debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug, | ||
396 | &spinlock_stats.taken_slow_nested); | ||
397 | debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, | ||
398 | &spinlock_stats.taken_slow_pickup); | ||
399 | debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, | ||
400 | &spinlock_stats.taken_slow_spurious); | ||
401 | debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug, | ||
402 | &spinlock_stats.taken_slow_irqenable); | ||
403 | |||
404 | debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released); | ||
405 | debugfs_create_u32("released_slow", 0444, d_spin_debug, | ||
406 | &spinlock_stats.released_slow); | ||
407 | debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, | ||
408 | &spinlock_stats.released_slow_kicked); | ||
409 | |||
410 | debugfs_create_u64("time_spinning", 0444, d_spin_debug, | ||
411 | &spinlock_stats.time_spinning); | ||
412 | debugfs_create_u64("time_blocked", 0444, d_spin_debug, | ||
413 | &spinlock_stats.time_blocked); | ||
414 | debugfs_create_u64("time_total", 0444, d_spin_debug, | ||
415 | &spinlock_stats.time_total); | ||
416 | |||
417 | xen_debugfs_create_u32_array("histo_total", 0444, d_spin_debug, | ||
418 | spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1); | ||
419 | xen_debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug, | ||
420 | spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1); | ||
421 | xen_debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, | ||
422 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); | ||
423 | |||
424 | return 0; | ||
425 | } | ||
426 | fs_initcall(xen_spinlock_debugfs); | ||
427 | |||
428 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 685b77470fc3..004ba86326ae 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -30,8 +30,6 @@ | |||
30 | #define TIMER_SLOP 100000 | 30 | #define TIMER_SLOP 100000 |
31 | #define NS_PER_TICK (1000000000LL / HZ) | 31 | #define NS_PER_TICK (1000000000LL / HZ) |
32 | 32 | ||
33 | static cycle_t xen_clocksource_read(void); | ||
34 | |||
35 | /* runstate info updated by Xen */ | 33 | /* runstate info updated by Xen */ |
36 | static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); | 34 | static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); |
37 | 35 | ||
@@ -213,7 +211,7 @@ unsigned long xen_tsc_khz(void) | |||
213 | return xen_khz; | 211 | return xen_khz; |
214 | } | 212 | } |
215 | 213 | ||
216 | static cycle_t xen_clocksource_read(void) | 214 | cycle_t xen_clocksource_read(void) |
217 | { | 215 | { |
218 | struct pvclock_vcpu_time_info *src; | 216 | struct pvclock_vcpu_time_info *src; |
219 | cycle_t ret; | 217 | cycle_t ret; |
@@ -452,6 +450,14 @@ void xen_setup_timer(int cpu) | |||
452 | setup_runstate_info(cpu); | 450 | setup_runstate_info(cpu); |
453 | } | 451 | } |
454 | 452 | ||
453 | void xen_teardown_timer(int cpu) | ||
454 | { | ||
455 | struct clock_event_device *evt; | ||
456 | BUG_ON(cpu == 0); | ||
457 | evt = &per_cpu(xen_clock_events, cpu); | ||
458 | unbind_from_irqhandler(evt->irq, NULL); | ||
459 | } | ||
460 | |||
455 | void xen_setup_cpu_clockevents(void) | 461 | void xen_setup_cpu_clockevents(void) |
456 | { | 462 | { |
457 | BUG_ON(preemptible()); | 463 | BUG_ON(preemptible()); |
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 2497a30f41de..42786f59d9c0 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S | |||
@@ -298,7 +298,7 @@ check_events: | |||
298 | push %eax | 298 | push %eax |
299 | push %ecx | 299 | push %ecx |
300 | push %edx | 300 | push %edx |
301 | call force_evtchn_callback | 301 | call xen_force_evtchn_callback |
302 | pop %edx | 302 | pop %edx |
303 | pop %ecx | 303 | pop %ecx |
304 | pop %eax | 304 | pop %eax |
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 7f58304fafb3..3b9bda46487a 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S | |||
@@ -122,7 +122,7 @@ check_events: | |||
122 | push %r9 | 122 | push %r9 |
123 | push %r10 | 123 | push %r10 |
124 | push %r11 | 124 | push %r11 |
125 | call force_evtchn_callback | 125 | call xen_force_evtchn_callback |
126 | pop %r11 | 126 | pop %r11 |
127 | pop %r10 | 127 | pop %r10 |
128 | pop %r9 | 128 | pop %r9 |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index dd3c23152a2e..d7422dc2a55c 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define XEN_OPS_H | 2 | #define XEN_OPS_H |
3 | 3 | ||
4 | #include <linux/init.h> | 4 | #include <linux/init.h> |
5 | #include <linux/clocksource.h> | ||
5 | #include <linux/irqreturn.h> | 6 | #include <linux/irqreturn.h> |
6 | #include <xen/xen-ops.h> | 7 | #include <xen/xen-ops.h> |
7 | 8 | ||
@@ -31,7 +32,10 @@ void xen_vcpu_restore(void); | |||
31 | 32 | ||
32 | void __init xen_build_dynamic_phys_to_machine(void); | 33 | void __init xen_build_dynamic_phys_to_machine(void); |
33 | 34 | ||
35 | void xen_init_irq_ops(void); | ||
34 | void xen_setup_timer(int cpu); | 36 | void xen_setup_timer(int cpu); |
37 | void xen_teardown_timer(int cpu); | ||
38 | cycle_t xen_clocksource_read(void); | ||
35 | void xen_setup_cpu_clockevents(void); | 39 | void xen_setup_cpu_clockevents(void); |
36 | unsigned long xen_tsc_khz(void); | 40 | unsigned long xen_tsc_khz(void); |
37 | void __init xen_time_init(void); | 41 | void __init xen_time_init(void); |
@@ -50,6 +54,10 @@ void __init xen_setup_vcpu_info_placement(void); | |||
50 | #ifdef CONFIG_SMP | 54 | #ifdef CONFIG_SMP |
51 | void xen_smp_init(void); | 55 | void xen_smp_init(void); |
52 | 56 | ||
57 | void __init xen_init_spinlocks(void); | ||
58 | __cpuinit void xen_init_lock_cpu(int cpu); | ||
59 | void xen_uninit_lock_cpu(int cpu); | ||
60 | |||
53 | extern cpumask_t xen_cpu_initialized_map; | 61 | extern cpumask_t xen_cpu_initialized_map; |
54 | #else | 62 | #else |
55 | static inline void xen_smp_init(void) {} | 63 | static inline void xen_smp_init(void) {} |
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 3ca643cafccd..d5e753255153 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c | |||
@@ -1032,7 +1032,7 @@ static struct xenbus_driver blkfront = { | |||
1032 | 1032 | ||
1033 | static int __init xlblk_init(void) | 1033 | static int __init xlblk_init(void) |
1034 | { | 1034 | { |
1035 | if (!is_running_on_xen()) | 1035 | if (!xen_domain()) |
1036 | return -ENODEV; | 1036 | return -ENODEV; |
1037 | 1037 | ||
1038 | if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { | 1038 | if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { |
diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c index 6b70aa66a587..538ceea5e7df 100644 --- a/drivers/char/hvc_xen.c +++ b/drivers/char/hvc_xen.c | |||
@@ -108,8 +108,8 @@ static int __init xen_init(void) | |||
108 | { | 108 | { |
109 | struct hvc_struct *hp; | 109 | struct hvc_struct *hp; |
110 | 110 | ||
111 | if (!is_running_on_xen() || | 111 | if (!xen_pv_domain() || |
112 | is_initial_xendomain() || | 112 | xen_initial_domain() || |
113 | !xen_start_info->console.domU.evtchn) | 113 | !xen_start_info->console.domU.evtchn) |
114 | return -ENODEV; | 114 | return -ENODEV; |
115 | 115 | ||
@@ -142,7 +142,7 @@ static void __exit xen_fini(void) | |||
142 | 142 | ||
143 | static int xen_cons_init(void) | 143 | static int xen_cons_init(void) |
144 | { | 144 | { |
145 | if (!is_running_on_xen()) | 145 | if (!xen_pv_domain()) |
146 | return 0; | 146 | return 0; |
147 | 147 | ||
148 | hvc_instantiate(HVC_COOKIE, 0, &hvc_ops); | 148 | hvc_instantiate(HVC_COOKIE, 0, &hvc_ops); |
diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c index 9ce3b3baf3a2..3ab6362f043c 100644 --- a/drivers/input/xen-kbdfront.c +++ b/drivers/input/xen-kbdfront.c | |||
@@ -335,11 +335,11 @@ static struct xenbus_driver xenkbd = { | |||
335 | 335 | ||
336 | static int __init xenkbd_init(void) | 336 | static int __init xenkbd_init(void) |
337 | { | 337 | { |
338 | if (!is_running_on_xen()) | 338 | if (!xen_domain()) |
339 | return -ENODEV; | 339 | return -ENODEV; |
340 | 340 | ||
341 | /* Nothing to do if running in dom0. */ | 341 | /* Nothing to do if running in dom0. */ |
342 | if (is_initial_xendomain()) | 342 | if (xen_initial_domain()) |
343 | return -ENODEV; | 343 | return -ENODEV; |
344 | 344 | ||
345 | return xenbus_register_frontend(&xenkbd); | 345 | return xenbus_register_frontend(&xenkbd); |
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index c749bdba214c..3c3dd403f5dd 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c | |||
@@ -1794,10 +1794,10 @@ static struct xenbus_driver netfront = { | |||
1794 | 1794 | ||
1795 | static int __init netif_init(void) | 1795 | static int __init netif_init(void) |
1796 | { | 1796 | { |
1797 | if (!is_running_on_xen()) | 1797 | if (!xen_domain()) |
1798 | return -ENODEV; | 1798 | return -ENODEV; |
1799 | 1799 | ||
1800 | if (is_initial_xendomain()) | 1800 | if (xen_initial_domain()) |
1801 | return 0; | 1801 | return 0; |
1802 | 1802 | ||
1803 | printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n"); | 1803 | printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n"); |
@@ -1809,7 +1809,7 @@ module_init(netif_init); | |||
1809 | 1809 | ||
1810 | static void __exit netif_exit(void) | 1810 | static void __exit netif_exit(void) |
1811 | { | 1811 | { |
1812 | if (is_initial_xendomain()) | 1812 | if (xen_initial_domain()) |
1813 | return; | 1813 | return; |
1814 | 1814 | ||
1815 | xenbus_unregister_driver(&netfront); | 1815 | xenbus_unregister_driver(&netfront); |
diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c index 47ed39b52f9c..a463b3dd837b 100644 --- a/drivers/video/xen-fbfront.c +++ b/drivers/video/xen-fbfront.c | |||
@@ -680,11 +680,11 @@ static struct xenbus_driver xenfb = { | |||
680 | 680 | ||
681 | static int __init xenfb_init(void) | 681 | static int __init xenfb_init(void) |
682 | { | 682 | { |
683 | if (!is_running_on_xen()) | 683 | if (!xen_domain()) |
684 | return -ENODEV; | 684 | return -ENODEV; |
685 | 685 | ||
686 | /* Nothing to do if running in dom0. */ | 686 | /* Nothing to do if running in dom0. */ |
687 | if (is_initial_xendomain()) | 687 | if (xen_initial_domain()) |
688 | return -ENODEV; | 688 | return -ENODEV; |
689 | 689 | ||
690 | return xenbus_register_frontend(&xenfb); | 690 | return xenbus_register_frontend(&xenfb); |
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 363286c54290..d2a8fdf0e191 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile | |||
@@ -1,4 +1,5 @@ | |||
1 | obj-y += grant-table.o features.o events.o manage.o | 1 | obj-y += grant-table.o features.o events.o manage.o |
2 | obj-y += xenbus/ | 2 | obj-y += xenbus/ |
3 | obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o | ||
3 | obj-$(CONFIG_XEN_XENCOMM) += xencomm.o | 4 | obj-$(CONFIG_XEN_XENCOMM) += xencomm.o |
4 | obj-$(CONFIG_XEN_BALLOON) += balloon.o | 5 | obj-$(CONFIG_XEN_BALLOON) += balloon.o |
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 2e15da5459cf..a51f3e17a5fd 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c | |||
@@ -226,9 +226,8 @@ static int increase_reservation(unsigned long nr_pages) | |||
226 | } | 226 | } |
227 | 227 | ||
228 | set_xen_guest_handle(reservation.extent_start, frame_list); | 228 | set_xen_guest_handle(reservation.extent_start, frame_list); |
229 | reservation.nr_extents = nr_pages; | 229 | reservation.nr_extents = nr_pages; |
230 | rc = HYPERVISOR_memory_op( | 230 | rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); |
231 | XENMEM_populate_physmap, &reservation); | ||
232 | if (rc < nr_pages) { | 231 | if (rc < nr_pages) { |
233 | if (rc > 0) { | 232 | if (rc > 0) { |
234 | int ret; | 233 | int ret; |
@@ -236,7 +235,7 @@ static int increase_reservation(unsigned long nr_pages) | |||
236 | /* We hit the Xen hard limit: reprobe. */ | 235 | /* We hit the Xen hard limit: reprobe. */ |
237 | reservation.nr_extents = rc; | 236 | reservation.nr_extents = rc; |
238 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | 237 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, |
239 | &reservation); | 238 | &reservation); |
240 | BUG_ON(ret != rc); | 239 | BUG_ON(ret != rc); |
241 | } | 240 | } |
242 | if (rc >= 0) | 241 | if (rc >= 0) |
@@ -420,7 +419,7 @@ static int __init balloon_init(void) | |||
420 | unsigned long pfn; | 419 | unsigned long pfn; |
421 | struct page *page; | 420 | struct page *page; |
422 | 421 | ||
423 | if (!is_running_on_xen()) | 422 | if (!xen_pv_domain()) |
424 | return -ENODEV; | 423 | return -ENODEV; |
425 | 424 | ||
426 | pr_info("xen_balloon: Initialising balloon driver.\n"); | 425 | pr_info("xen_balloon: Initialising balloon driver.\n"); |
@@ -464,136 +463,13 @@ static void balloon_exit(void) | |||
464 | 463 | ||
465 | module_exit(balloon_exit); | 464 | module_exit(balloon_exit); |
466 | 465 | ||
467 | static void balloon_update_driver_allowance(long delta) | 466 | #define BALLOON_SHOW(name, format, args...) \ |
468 | { | 467 | static ssize_t show_##name(struct sys_device *dev, \ |
469 | unsigned long flags; | 468 | struct sysdev_attribute *attr, \ |
470 | 469 | char *buf) \ | |
471 | spin_lock_irqsave(&balloon_lock, flags); | 470 | { \ |
472 | balloon_stats.driver_pages += delta; | 471 | return sprintf(buf, format, ##args); \ |
473 | spin_unlock_irqrestore(&balloon_lock, flags); | 472 | } \ |
474 | } | ||
475 | |||
476 | static int dealloc_pte_fn( | ||
477 | pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) | ||
478 | { | ||
479 | unsigned long mfn = pte_mfn(*pte); | ||
480 | int ret; | ||
481 | struct xen_memory_reservation reservation = { | ||
482 | .nr_extents = 1, | ||
483 | .extent_order = 0, | ||
484 | .domid = DOMID_SELF | ||
485 | }; | ||
486 | set_xen_guest_handle(reservation.extent_start, &mfn); | ||
487 | set_pte_at(&init_mm, addr, pte, __pte_ma(0ull)); | ||
488 | set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY); | ||
489 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); | ||
490 | BUG_ON(ret != 1); | ||
491 | return 0; | ||
492 | } | ||
493 | |||
494 | static struct page **alloc_empty_pages_and_pagevec(int nr_pages) | ||
495 | { | ||
496 | unsigned long vaddr, flags; | ||
497 | struct page *page, **pagevec; | ||
498 | int i, ret; | ||
499 | |||
500 | pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL); | ||
501 | if (pagevec == NULL) | ||
502 | return NULL; | ||
503 | |||
504 | for (i = 0; i < nr_pages; i++) { | ||
505 | page = pagevec[i] = alloc_page(GFP_KERNEL); | ||
506 | if (page == NULL) | ||
507 | goto err; | ||
508 | |||
509 | vaddr = (unsigned long)page_address(page); | ||
510 | |||
511 | scrub_page(page); | ||
512 | |||
513 | spin_lock_irqsave(&balloon_lock, flags); | ||
514 | |||
515 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | ||
516 | unsigned long gmfn = page_to_pfn(page); | ||
517 | struct xen_memory_reservation reservation = { | ||
518 | .nr_extents = 1, | ||
519 | .extent_order = 0, | ||
520 | .domid = DOMID_SELF | ||
521 | }; | ||
522 | set_xen_guest_handle(reservation.extent_start, &gmfn); | ||
523 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | ||
524 | &reservation); | ||
525 | if (ret == 1) | ||
526 | ret = 0; /* success */ | ||
527 | } else { | ||
528 | ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE, | ||
529 | dealloc_pte_fn, NULL); | ||
530 | } | ||
531 | |||
532 | if (ret != 0) { | ||
533 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
534 | __free_page(page); | ||
535 | goto err; | ||
536 | } | ||
537 | |||
538 | totalram_pages = --balloon_stats.current_pages; | ||
539 | |||
540 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
541 | } | ||
542 | |||
543 | out: | ||
544 | schedule_work(&balloon_worker); | ||
545 | flush_tlb_all(); | ||
546 | return pagevec; | ||
547 | |||
548 | err: | ||
549 | spin_lock_irqsave(&balloon_lock, flags); | ||
550 | while (--i >= 0) | ||
551 | balloon_append(pagevec[i]); | ||
552 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
553 | kfree(pagevec); | ||
554 | pagevec = NULL; | ||
555 | goto out; | ||
556 | } | ||
557 | |||
558 | static void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages) | ||
559 | { | ||
560 | unsigned long flags; | ||
561 | int i; | ||
562 | |||
563 | if (pagevec == NULL) | ||
564 | return; | ||
565 | |||
566 | spin_lock_irqsave(&balloon_lock, flags); | ||
567 | for (i = 0; i < nr_pages; i++) { | ||
568 | BUG_ON(page_count(pagevec[i]) != 1); | ||
569 | balloon_append(pagevec[i]); | ||
570 | } | ||
571 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
572 | |||
573 | kfree(pagevec); | ||
574 | |||
575 | schedule_work(&balloon_worker); | ||
576 | } | ||
577 | |||
578 | static void balloon_release_driver_page(struct page *page) | ||
579 | { | ||
580 | unsigned long flags; | ||
581 | |||
582 | spin_lock_irqsave(&balloon_lock, flags); | ||
583 | balloon_append(page); | ||
584 | balloon_stats.driver_pages--; | ||
585 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
586 | |||
587 | schedule_work(&balloon_worker); | ||
588 | } | ||
589 | |||
590 | |||
591 | #define BALLOON_SHOW(name, format, args...) \ | ||
592 | static ssize_t show_##name(struct sys_device *dev, \ | ||
593 | char *buf) \ | ||
594 | { \ | ||
595 | return sprintf(buf, format, ##args); \ | ||
596 | } \ | ||
597 | static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) | 473 | static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) |
598 | 474 | ||
599 | BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); | 475 | BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); |
@@ -604,7 +480,8 @@ BALLOON_SHOW(hard_limit_kb, | |||
604 | (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0); | 480 | (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0); |
605 | BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages)); | 481 | BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages)); |
606 | 482 | ||
607 | static ssize_t show_target_kb(struct sys_device *dev, char *buf) | 483 | static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr, |
484 | char *buf) | ||
608 | { | 485 | { |
609 | return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); | 486 | return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); |
610 | } | 487 | } |
@@ -614,19 +491,14 @@ static ssize_t store_target_kb(struct sys_device *dev, | |||
614 | const char *buf, | 491 | const char *buf, |
615 | size_t count) | 492 | size_t count) |
616 | { | 493 | { |
617 | char memstring[64], *endchar; | 494 | char *endchar; |
618 | unsigned long long target_bytes; | 495 | unsigned long long target_bytes; |
619 | 496 | ||
620 | if (!capable(CAP_SYS_ADMIN)) | 497 | if (!capable(CAP_SYS_ADMIN)) |
621 | return -EPERM; | 498 | return -EPERM; |
622 | 499 | ||
623 | if (count <= 1) | 500 | target_bytes = memparse(buf, &endchar); |
624 | return -EBADMSG; /* runt */ | ||
625 | if (count > sizeof(memstring)) | ||
626 | return -EFBIG; /* too long */ | ||
627 | strcpy(memstring, buf); | ||
628 | 501 | ||
629 | target_bytes = memparse(memstring, &endchar); | ||
630 | balloon_set_new_target(target_bytes >> PAGE_SHIFT); | 502 | balloon_set_new_target(target_bytes >> PAGE_SHIFT); |
631 | 503 | ||
632 | return count; | 504 | return count; |
@@ -694,20 +566,4 @@ static int register_balloon(struct sys_device *sysdev) | |||
694 | return error; | 566 | return error; |
695 | } | 567 | } |
696 | 568 | ||
697 | static void unregister_balloon(struct sys_device *sysdev) | ||
698 | { | ||
699 | int i; | ||
700 | |||
701 | sysfs_remove_group(&sysdev->kobj, &balloon_info_group); | ||
702 | for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) | ||
703 | sysdev_remove_file(sysdev, balloon_attrs[i]); | ||
704 | sysdev_unregister(sysdev); | ||
705 | sysdev_class_unregister(&balloon_sysdev_class); | ||
706 | } | ||
707 | |||
708 | static void balloon_sysfs_exit(void) | ||
709 | { | ||
710 | unregister_balloon(&balloon_sysdev); | ||
711 | } | ||
712 | |||
713 | MODULE_LICENSE("GPL"); | 569 | MODULE_LICENSE("GPL"); |
diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c new file mode 100644 index 000000000000..565280ec1c6a --- /dev/null +++ b/drivers/xen/cpu_hotplug.c | |||
@@ -0,0 +1,90 @@ | |||
1 | #include <linux/notifier.h> | ||
2 | |||
3 | #include <xen/xenbus.h> | ||
4 | |||
5 | #include <asm-x86/xen/hypervisor.h> | ||
6 | #include <asm/cpu.h> | ||
7 | |||
8 | static void enable_hotplug_cpu(int cpu) | ||
9 | { | ||
10 | if (!cpu_present(cpu)) | ||
11 | arch_register_cpu(cpu); | ||
12 | |||
13 | cpu_set(cpu, cpu_present_map); | ||
14 | } | ||
15 | |||
16 | static void disable_hotplug_cpu(int cpu) | ||
17 | { | ||
18 | if (cpu_present(cpu)) | ||
19 | arch_unregister_cpu(cpu); | ||
20 | |||
21 | cpu_clear(cpu, cpu_present_map); | ||
22 | } | ||
23 | |||
24 | static void vcpu_hotplug(unsigned int cpu) | ||
25 | { | ||
26 | int err; | ||
27 | char dir[32], state[32]; | ||
28 | |||
29 | if (!cpu_possible(cpu)) | ||
30 | return; | ||
31 | |||
32 | sprintf(dir, "cpu/%u", cpu); | ||
33 | err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state); | ||
34 | if (err != 1) { | ||
35 | printk(KERN_ERR "XENBUS: Unable to read cpu state\n"); | ||
36 | return; | ||
37 | } | ||
38 | |||
39 | if (strcmp(state, "online") == 0) { | ||
40 | enable_hotplug_cpu(cpu); | ||
41 | } else if (strcmp(state, "offline") == 0) { | ||
42 | (void)cpu_down(cpu); | ||
43 | disable_hotplug_cpu(cpu); | ||
44 | } else { | ||
45 | printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", | ||
46 | state, cpu); | ||
47 | } | ||
48 | } | ||
49 | |||
50 | static void handle_vcpu_hotplug_event(struct xenbus_watch *watch, | ||
51 | const char **vec, unsigned int len) | ||
52 | { | ||
53 | unsigned int cpu; | ||
54 | char *cpustr; | ||
55 | const char *node = vec[XS_WATCH_PATH]; | ||
56 | |||
57 | cpustr = strstr(node, "cpu/"); | ||
58 | if (cpustr != NULL) { | ||
59 | sscanf(cpustr, "cpu/%u", &cpu); | ||
60 | vcpu_hotplug(cpu); | ||
61 | } | ||
62 | } | ||
63 | |||
64 | static int setup_cpu_watcher(struct notifier_block *notifier, | ||
65 | unsigned long event, void *data) | ||
66 | { | ||
67 | static struct xenbus_watch cpu_watch = { | ||
68 | .node = "cpu", | ||
69 | .callback = handle_vcpu_hotplug_event}; | ||
70 | |||
71 | (void)register_xenbus_watch(&cpu_watch); | ||
72 | |||
73 | return NOTIFY_DONE; | ||
74 | } | ||
75 | |||
76 | static int __init setup_vcpu_hotplug_event(void) | ||
77 | { | ||
78 | static struct notifier_block xsn_cpu = { | ||
79 | .notifier_call = setup_cpu_watcher }; | ||
80 | |||
81 | if (!xen_pv_domain()) | ||
82 | return -ENODEV; | ||
83 | |||
84 | register_xenstore_notifier(&xsn_cpu); | ||
85 | |||
86 | return 0; | ||
87 | } | ||
88 | |||
89 | arch_initcall(setup_vcpu_hotplug_event); | ||
90 | |||
diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 0e0c28574af8..c3290bc186a0 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c | |||
@@ -84,17 +84,6 @@ static int irq_bindcount[NR_IRQS]; | |||
84 | /* Xen will never allocate port zero for any purpose. */ | 84 | /* Xen will never allocate port zero for any purpose. */ |
85 | #define VALID_EVTCHN(chn) ((chn) != 0) | 85 | #define VALID_EVTCHN(chn) ((chn) != 0) |
86 | 86 | ||
87 | /* | ||
88 | * Force a proper event-channel callback from Xen after clearing the | ||
89 | * callback mask. We do this in a very simple manner, by making a call | ||
90 | * down into Xen. The pending flag will be checked by Xen on return. | ||
91 | */ | ||
92 | void force_evtchn_callback(void) | ||
93 | { | ||
94 | (void)HYPERVISOR_xen_version(0, NULL); | ||
95 | } | ||
96 | EXPORT_SYMBOL_GPL(force_evtchn_callback); | ||
97 | |||
98 | static struct irq_chip xen_dynamic_chip; | 87 | static struct irq_chip xen_dynamic_chip; |
99 | 88 | ||
100 | /* Constructor for packed IRQ information. */ | 89 | /* Constructor for packed IRQ information. */ |
@@ -175,6 +164,12 @@ static inline void set_evtchn(int port) | |||
175 | sync_set_bit(port, &s->evtchn_pending[0]); | 164 | sync_set_bit(port, &s->evtchn_pending[0]); |
176 | } | 165 | } |
177 | 166 | ||
167 | static inline int test_evtchn(int port) | ||
168 | { | ||
169 | struct shared_info *s = HYPERVISOR_shared_info; | ||
170 | return sync_test_bit(port, &s->evtchn_pending[0]); | ||
171 | } | ||
172 | |||
178 | 173 | ||
179 | /** | 174 | /** |
180 | * notify_remote_via_irq - send event to remote end of event channel via irq | 175 | * notify_remote_via_irq - send event to remote end of event channel via irq |
@@ -365,6 +360,10 @@ static void unbind_from_irq(unsigned int irq) | |||
365 | per_cpu(virq_to_irq, cpu_from_evtchn(evtchn)) | 360 | per_cpu(virq_to_irq, cpu_from_evtchn(evtchn)) |
366 | [index_from_irq(irq)] = -1; | 361 | [index_from_irq(irq)] = -1; |
367 | break; | 362 | break; |
363 | case IRQT_IPI: | ||
364 | per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn)) | ||
365 | [index_from_irq(irq)] = -1; | ||
366 | break; | ||
368 | default: | 367 | default: |
369 | break; | 368 | break; |
370 | } | 369 | } |
@@ -743,6 +742,25 @@ void xen_clear_irq_pending(int irq) | |||
743 | clear_evtchn(evtchn); | 742 | clear_evtchn(evtchn); |
744 | } | 743 | } |
745 | 744 | ||
745 | void xen_set_irq_pending(int irq) | ||
746 | { | ||
747 | int evtchn = evtchn_from_irq(irq); | ||
748 | |||
749 | if (VALID_EVTCHN(evtchn)) | ||
750 | set_evtchn(evtchn); | ||
751 | } | ||
752 | |||
753 | bool xen_test_irq_pending(int irq) | ||
754 | { | ||
755 | int evtchn = evtchn_from_irq(irq); | ||
756 | bool ret = false; | ||
757 | |||
758 | if (VALID_EVTCHN(evtchn)) | ||
759 | ret = test_evtchn(evtchn); | ||
760 | |||
761 | return ret; | ||
762 | } | ||
763 | |||
746 | /* Poll waiting for an irq to become pending. In the usual case, the | 764 | /* Poll waiting for an irq to become pending. In the usual case, the |
747 | irq will be disabled so it won't deliver an interrupt. */ | 765 | irq will be disabled so it won't deliver an interrupt. */ |
748 | void xen_poll_irq(int irq) | 766 | void xen_poll_irq(int irq) |
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index e9e11168616a..06592b9da83c 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c | |||
@@ -508,7 +508,7 @@ static int __devinit gnttab_init(void) | |||
508 | unsigned int max_nr_glist_frames, nr_glist_frames; | 508 | unsigned int max_nr_glist_frames, nr_glist_frames; |
509 | unsigned int nr_init_grefs; | 509 | unsigned int nr_init_grefs; |
510 | 510 | ||
511 | if (!is_running_on_xen()) | 511 | if (!xen_domain()) |
512 | return -ENODEV; | 512 | return -ENODEV; |
513 | 513 | ||
514 | nr_grant_frames = 1; | 514 | nr_grant_frames = 1; |
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 57ceb5346b74..7f24a98a446f 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c | |||
@@ -814,7 +814,7 @@ static int __init xenbus_probe_init(void) | |||
814 | DPRINTK(""); | 814 | DPRINTK(""); |
815 | 815 | ||
816 | err = -ENODEV; | 816 | err = -ENODEV; |
817 | if (!is_running_on_xen()) | 817 | if (!xen_domain()) |
818 | goto out_error; | 818 | goto out_error; |
819 | 819 | ||
820 | /* Register ourselves with the kernel bus subsystem */ | 820 | /* Register ourselves with the kernel bus subsystem */ |
@@ -829,7 +829,7 @@ static int __init xenbus_probe_init(void) | |||
829 | /* | 829 | /* |
830 | * Domain0 doesn't have a store_evtchn or store_mfn yet. | 830 | * Domain0 doesn't have a store_evtchn or store_mfn yet. |
831 | */ | 831 | */ |
832 | if (is_initial_xendomain()) { | 832 | if (xen_initial_domain()) { |
833 | /* dom0 not yet supported */ | 833 | /* dom0 not yet supported */ |
834 | } else { | 834 | } else { |
835 | xenstored_ready = 1; | 835 | xenstored_ready = 1; |
@@ -846,7 +846,7 @@ static int __init xenbus_probe_init(void) | |||
846 | goto out_unreg_back; | 846 | goto out_unreg_back; |
847 | } | 847 | } |
848 | 848 | ||
849 | if (!is_initial_xendomain()) | 849 | if (!xen_initial_domain()) |
850 | xenbus_probe(NULL); | 850 | xenbus_probe(NULL); |
851 | 851 | ||
852 | return 0; | 852 | return 0; |
@@ -937,7 +937,7 @@ static void wait_for_devices(struct xenbus_driver *xendrv) | |||
937 | unsigned long timeout = jiffies + 10*HZ; | 937 | unsigned long timeout = jiffies + 10*HZ; |
938 | struct device_driver *drv = xendrv ? &xendrv->driver : NULL; | 938 | struct device_driver *drv = xendrv ? &xendrv->driver : NULL; |
939 | 939 | ||
940 | if (!ready_to_wait_for_devices || !is_running_on_xen()) | 940 | if (!ready_to_wait_for_devices || !xen_domain()) |
941 | return; | 941 | return; |
942 | 942 | ||
943 | while (exists_disconnected_device(drv)) { | 943 | while (exists_disconnected_device(drv)) { |
diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h index a44c4dc70590..06f786f4b4fb 100644 --- a/include/asm-x86/desc.h +++ b/include/asm-x86/desc.h | |||
@@ -24,6 +24,11 @@ static inline void fill_ldt(struct desc_struct *desc, | |||
24 | desc->d = info->seg_32bit; | 24 | desc->d = info->seg_32bit; |
25 | desc->g = info->limit_in_pages; | 25 | desc->g = info->limit_in_pages; |
26 | desc->base2 = (info->base_addr & 0xff000000) >> 24; | 26 | desc->base2 = (info->base_addr & 0xff000000) >> 24; |
27 | /* | ||
28 | * Don't allow setting of the lm bit. It is useless anyway | ||
29 | * because 64bit system calls require __USER_CS: | ||
30 | */ | ||
31 | desc->l = 0; | ||
27 | } | 32 | } |
28 | 33 | ||
29 | extern struct desc_ptr idt_descr; | 34 | extern struct desc_ptr idt_descr; |
@@ -97,7 +102,15 @@ static inline int desc_empty(const void *ptr) | |||
97 | native_write_gdt_entry(dt, entry, desc, type) | 102 | native_write_gdt_entry(dt, entry, desc, type) |
98 | #define write_idt_entry(dt, entry, g) \ | 103 | #define write_idt_entry(dt, entry, g) \ |
99 | native_write_idt_entry(dt, entry, g) | 104 | native_write_idt_entry(dt, entry, g) |
100 | #endif | 105 | |
106 | static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) | ||
107 | { | ||
108 | } | ||
109 | |||
110 | static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) | ||
111 | { | ||
112 | } | ||
113 | #endif /* CONFIG_PARAVIRT */ | ||
101 | 114 | ||
102 | static inline void native_write_idt_entry(gate_desc *idt, int entry, | 115 | static inline void native_write_idt_entry(gate_desc *idt, int entry, |
103 | const gate_desc *gate) | 116 | const gate_desc *gate) |
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index fbbde93f12d6..8e9b1266898c 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h | |||
@@ -124,6 +124,9 @@ struct pv_cpu_ops { | |||
124 | int entrynum, const void *desc, int size); | 124 | int entrynum, const void *desc, int size); |
125 | void (*write_idt_entry)(gate_desc *, | 125 | void (*write_idt_entry)(gate_desc *, |
126 | int entrynum, const gate_desc *gate); | 126 | int entrynum, const gate_desc *gate); |
127 | void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); | ||
128 | void (*free_ldt)(struct desc_struct *ldt, unsigned entries); | ||
129 | |||
127 | void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); | 130 | void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); |
128 | 131 | ||
129 | void (*set_iopl_mask)(unsigned mask); | 132 | void (*set_iopl_mask)(unsigned mask); |
@@ -330,6 +333,7 @@ struct pv_lock_ops { | |||
330 | int (*spin_is_locked)(struct raw_spinlock *lock); | 333 | int (*spin_is_locked)(struct raw_spinlock *lock); |
331 | int (*spin_is_contended)(struct raw_spinlock *lock); | 334 | int (*spin_is_contended)(struct raw_spinlock *lock); |
332 | void (*spin_lock)(struct raw_spinlock *lock); | 335 | void (*spin_lock)(struct raw_spinlock *lock); |
336 | void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags); | ||
333 | int (*spin_trylock)(struct raw_spinlock *lock); | 337 | int (*spin_trylock)(struct raw_spinlock *lock); |
334 | void (*spin_unlock)(struct raw_spinlock *lock); | 338 | void (*spin_unlock)(struct raw_spinlock *lock); |
335 | }; | 339 | }; |
@@ -824,6 +828,16 @@ do { \ | |||
824 | (aux) = __aux; \ | 828 | (aux) = __aux; \ |
825 | } while (0) | 829 | } while (0) |
826 | 830 | ||
831 | static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) | ||
832 | { | ||
833 | PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries); | ||
834 | } | ||
835 | |||
836 | static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) | ||
837 | { | ||
838 | PVOP_VCALL2(pv_cpu_ops.free_ldt, ldt, entries); | ||
839 | } | ||
840 | |||
827 | static inline void load_TR_desc(void) | 841 | static inline void load_TR_desc(void) |
828 | { | 842 | { |
829 | PVOP_VCALL0(pv_cpu_ops.load_tr_desc); | 843 | PVOP_VCALL0(pv_cpu_ops.load_tr_desc); |
@@ -1401,6 +1415,12 @@ static __always_inline void __raw_spin_lock(struct raw_spinlock *lock) | |||
1401 | PVOP_VCALL1(pv_lock_ops.spin_lock, lock); | 1415 | PVOP_VCALL1(pv_lock_ops.spin_lock, lock); |
1402 | } | 1416 | } |
1403 | 1417 | ||
1418 | static __always_inline void __raw_spin_lock_flags(struct raw_spinlock *lock, | ||
1419 | unsigned long flags) | ||
1420 | { | ||
1421 | PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags); | ||
1422 | } | ||
1423 | |||
1404 | static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock) | 1424 | static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock) |
1405 | { | 1425 | { |
1406 | return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock); | 1426 | return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock); |
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index 3c877f74f279..30b5146cc436 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h | |||
@@ -47,12 +47,16 @@ extern struct { | |||
47 | struct smp_ops { | 47 | struct smp_ops { |
48 | void (*smp_prepare_boot_cpu)(void); | 48 | void (*smp_prepare_boot_cpu)(void); |
49 | void (*smp_prepare_cpus)(unsigned max_cpus); | 49 | void (*smp_prepare_cpus)(unsigned max_cpus); |
50 | int (*cpu_up)(unsigned cpu); | ||
51 | void (*smp_cpus_done)(unsigned max_cpus); | 50 | void (*smp_cpus_done)(unsigned max_cpus); |
52 | 51 | ||
53 | void (*smp_send_stop)(void); | 52 | void (*smp_send_stop)(void); |
54 | void (*smp_send_reschedule)(int cpu); | 53 | void (*smp_send_reschedule)(int cpu); |
55 | 54 | ||
55 | int (*cpu_up)(unsigned cpu); | ||
56 | int (*cpu_disable)(void); | ||
57 | void (*cpu_die)(unsigned int cpu); | ||
58 | void (*play_dead)(void); | ||
59 | |||
56 | void (*send_call_func_ipi)(cpumask_t mask); | 60 | void (*send_call_func_ipi)(cpumask_t mask); |
57 | void (*send_call_func_single_ipi)(int cpu); | 61 | void (*send_call_func_single_ipi)(int cpu); |
58 | }; | 62 | }; |
@@ -91,6 +95,21 @@ static inline int __cpu_up(unsigned int cpu) | |||
91 | return smp_ops.cpu_up(cpu); | 95 | return smp_ops.cpu_up(cpu); |
92 | } | 96 | } |
93 | 97 | ||
98 | static inline int __cpu_disable(void) | ||
99 | { | ||
100 | return smp_ops.cpu_disable(); | ||
101 | } | ||
102 | |||
103 | static inline void __cpu_die(unsigned int cpu) | ||
104 | { | ||
105 | smp_ops.cpu_die(cpu); | ||
106 | } | ||
107 | |||
108 | static inline void play_dead(void) | ||
109 | { | ||
110 | smp_ops.play_dead(); | ||
111 | } | ||
112 | |||
94 | static inline void smp_send_reschedule(int cpu) | 113 | static inline void smp_send_reschedule(int cpu) |
95 | { | 114 | { |
96 | smp_ops.smp_send_reschedule(cpu); | 115 | smp_ops.smp_send_reschedule(cpu); |
@@ -106,16 +125,19 @@ static inline void arch_send_call_function_ipi(cpumask_t mask) | |||
106 | smp_ops.send_call_func_ipi(mask); | 125 | smp_ops.send_call_func_ipi(mask); |
107 | } | 126 | } |
108 | 127 | ||
128 | void cpu_disable_common(void); | ||
109 | void native_smp_prepare_boot_cpu(void); | 129 | void native_smp_prepare_boot_cpu(void); |
110 | void native_smp_prepare_cpus(unsigned int max_cpus); | 130 | void native_smp_prepare_cpus(unsigned int max_cpus); |
111 | void native_smp_cpus_done(unsigned int max_cpus); | 131 | void native_smp_cpus_done(unsigned int max_cpus); |
112 | int native_cpu_up(unsigned int cpunum); | 132 | int native_cpu_up(unsigned int cpunum); |
133 | int native_cpu_disable(void); | ||
134 | void native_cpu_die(unsigned int cpu); | ||
135 | void native_play_dead(void); | ||
136 | void play_dead_common(void); | ||
137 | |||
113 | void native_send_call_func_ipi(cpumask_t mask); | 138 | void native_send_call_func_ipi(cpumask_t mask); |
114 | void native_send_call_func_single_ipi(int cpu); | 139 | void native_send_call_func_single_ipi(int cpu); |
115 | 140 | ||
116 | extern int __cpu_disable(void); | ||
117 | extern void __cpu_die(unsigned int cpu); | ||
118 | |||
119 | void smp_store_cpu_info(int id); | 141 | void smp_store_cpu_info(int id); |
120 | #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) | 142 | #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) |
121 | 143 | ||
@@ -200,9 +222,5 @@ static inline int hard_smp_processor_id(void) | |||
200 | 222 | ||
201 | #endif /* CONFIG_X86_LOCAL_APIC */ | 223 | #endif /* CONFIG_X86_LOCAL_APIC */ |
202 | 224 | ||
203 | #ifdef CONFIG_HOTPLUG_CPU | ||
204 | extern void cpu_uninit(void); | ||
205 | #endif | ||
206 | |||
207 | #endif /* __ASSEMBLY__ */ | 225 | #endif /* __ASSEMBLY__ */ |
208 | #endif | 226 | #endif |
diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h index e39c790dbfd2..b755ea86367e 100644 --- a/include/asm-x86/spinlock.h +++ b/include/asm-x86/spinlock.h | |||
@@ -182,8 +182,6 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) | |||
182 | } | 182 | } |
183 | #endif | 183 | #endif |
184 | 184 | ||
185 | #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) | ||
186 | |||
187 | #ifdef CONFIG_PARAVIRT | 185 | #ifdef CONFIG_PARAVIRT |
188 | /* | 186 | /* |
189 | * Define virtualization-friendly old-style lock byte lock, for use in | 187 | * Define virtualization-friendly old-style lock byte lock, for use in |
@@ -272,6 +270,13 @@ static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) | |||
272 | { | 270 | { |
273 | __ticket_spin_unlock(lock); | 271 | __ticket_spin_unlock(lock); |
274 | } | 272 | } |
273 | |||
274 | static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock, | ||
275 | unsigned long flags) | ||
276 | { | ||
277 | __raw_spin_lock(lock); | ||
278 | } | ||
279 | |||
275 | #endif /* CONFIG_PARAVIRT */ | 280 | #endif /* CONFIG_PARAVIRT */ |
276 | 281 | ||
277 | static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) | 282 | static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) |
diff --git a/include/asm-x86/tlbflush.h b/include/asm-x86/tlbflush.h index 35c76ceb9f40..0e7bbb549116 100644 --- a/include/asm-x86/tlbflush.h +++ b/include/asm-x86/tlbflush.h | |||
@@ -119,6 +119,10 @@ static inline void native_flush_tlb_others(const cpumask_t *cpumask, | |||
119 | { | 119 | { |
120 | } | 120 | } |
121 | 121 | ||
122 | static inline void reset_lazy_tlbstate(void) | ||
123 | { | ||
124 | } | ||
125 | |||
122 | #else /* SMP */ | 126 | #else /* SMP */ |
123 | 127 | ||
124 | #include <asm/smp.h> | 128 | #include <asm/smp.h> |
@@ -151,6 +155,12 @@ struct tlb_state { | |||
151 | char __cacheline_padding[L1_CACHE_BYTES-8]; | 155 | char __cacheline_padding[L1_CACHE_BYTES-8]; |
152 | }; | 156 | }; |
153 | DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); | 157 | DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); |
158 | |||
159 | void reset_lazy_tlbstate(void); | ||
160 | #else | ||
161 | static inline void reset_lazy_tlbstate(void) | ||
162 | { | ||
163 | } | ||
154 | #endif | 164 | #endif |
155 | 165 | ||
156 | #endif /* SMP */ | 166 | #endif /* SMP */ |
diff --git a/include/asm-x86/xen/hypervisor.h b/include/asm-x86/xen/hypervisor.h index 04ee0610014a..fca066febc35 100644 --- a/include/asm-x86/xen/hypervisor.h +++ b/include/asm-x86/xen/hypervisor.h | |||
@@ -54,7 +54,6 @@ | |||
54 | /* arch/i386/kernel/setup.c */ | 54 | /* arch/i386/kernel/setup.c */ |
55 | extern struct shared_info *HYPERVISOR_shared_info; | 55 | extern struct shared_info *HYPERVISOR_shared_info; |
56 | extern struct start_info *xen_start_info; | 56 | extern struct start_info *xen_start_info; |
57 | #define is_initial_xendomain() (xen_start_info->flags & SIF_INITDOMAIN) | ||
58 | 57 | ||
59 | /* arch/i386/mach-xen/evtchn.c */ | 58 | /* arch/i386/mach-xen/evtchn.c */ |
60 | /* Force a proper event-channel callback from Xen. */ | 59 | /* Force a proper event-channel callback from Xen. */ |
@@ -67,6 +66,17 @@ u64 jiffies_to_st(unsigned long jiffies); | |||
67 | #define MULTI_UVMFLAGS_INDEX 3 | 66 | #define MULTI_UVMFLAGS_INDEX 3 |
68 | #define MULTI_UVMDOMID_INDEX 4 | 67 | #define MULTI_UVMDOMID_INDEX 4 |
69 | 68 | ||
70 | #define is_running_on_xen() (xen_start_info ? 1 : 0) | 69 | enum xen_domain_type { |
70 | XEN_NATIVE, | ||
71 | XEN_PV_DOMAIN, | ||
72 | XEN_HVM_DOMAIN, | ||
73 | }; | ||
74 | |||
75 | extern enum xen_domain_type xen_domain_type; | ||
76 | |||
77 | #define xen_domain() (xen_domain_type != XEN_NATIVE) | ||
78 | #define xen_pv_domain() (xen_domain_type == XEN_PV_DOMAIN) | ||
79 | #define xen_initial_domain() (xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN) | ||
80 | #define xen_hvm_domain() (xen_domain_type == XEN_HVM_DOMAIN) | ||
71 | 81 | ||
72 | #endif /* __HYPERVISOR_H__ */ | 82 | #endif /* __HYPERVISOR_H__ */ |
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2651f805ba6d..75d81f157d2e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h | |||
@@ -182,7 +182,7 @@ extern int vsscanf(const char *, const char *, va_list) | |||
182 | 182 | ||
183 | extern int get_option(char **str, int *pint); | 183 | extern int get_option(char **str, int *pint); |
184 | extern char *get_options(const char *str, int nints, int *ints); | 184 | extern char *get_options(const char *str, int nints, int *ints); |
185 | extern unsigned long long memparse(char *ptr, char **retptr); | 185 | extern unsigned long long memparse(const char *ptr, char **retptr); |
186 | 186 | ||
187 | extern int core_kernel_text(unsigned long addr); | 187 | extern int core_kernel_text(unsigned long addr); |
188 | extern int __kernel_text_address(unsigned long addr); | 188 | extern int __kernel_text_address(unsigned long addr); |
diff --git a/include/xen/events.h b/include/xen/events.h index 4680ff3fbc91..0d5f1adc0363 100644 --- a/include/xen/events.h +++ b/include/xen/events.h | |||
@@ -46,6 +46,8 @@ extern void xen_irq_resume(void); | |||
46 | 46 | ||
47 | /* Clear an irq's pending state, in preparation for polling on it */ | 47 | /* Clear an irq's pending state, in preparation for polling on it */ |
48 | void xen_clear_irq_pending(int irq); | 48 | void xen_clear_irq_pending(int irq); |
49 | void xen_set_irq_pending(int irq); | ||
50 | bool xen_test_irq_pending(int irq); | ||
49 | 51 | ||
50 | /* Poll waiting for an irq to become pending. In the usual case, the | 52 | /* Poll waiting for an irq to become pending. In the usual case, the |
51 | irq will be disabled so it won't deliver an interrupt. */ | 53 | irq will be disabled so it won't deliver an interrupt. */ |
diff --git a/lib/cmdline.c b/lib/cmdline.c index 5ba8a942a478..f5f3ad8b62ff 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c | |||
@@ -126,7 +126,7 @@ char *get_options(const char *str, int nints, int *ints) | |||
126 | * megabyte, or one gigabyte, respectively. | 126 | * megabyte, or one gigabyte, respectively. |
127 | */ | 127 | */ |
128 | 128 | ||
129 | unsigned long long memparse(char *ptr, char **retptr) | 129 | unsigned long long memparse(const char *ptr, char **retptr) |
130 | { | 130 | { |
131 | char *endptr; /* local pointer to end of parsed string */ | 131 | char *endptr; /* local pointer to end of parsed string */ |
132 | 132 | ||