diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/kernel/Makefile | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 11 | ||||
-rw-r--r-- | arch/x86/kernel/ldt.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt-spinlocks.c | 37 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt.c | 27 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 39 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 22 | ||||
-rw-r--r-- | arch/x86/kernel/smp.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 77 | ||||
-rw-r--r-- | arch/x86/kernel/tlb_32.c | 8 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 14 | ||||
-rw-r--r-- | arch/x86/xen/Kconfig | 10 | ||||
-rw-r--r-- | arch/x86/xen/Makefile | 12 | ||||
-rw-r--r-- | arch/x86/xen/debugfs.c | 123 | ||||
-rw-r--r-- | arch/x86/xen/debugfs.h | 10 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 189 | ||||
-rw-r--r-- | arch/x86/xen/irq.c | 143 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 270 | ||||
-rw-r--r-- | arch/x86/xen/multicalls.c | 115 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 245 | ||||
-rw-r--r-- | arch/x86/xen/spinlock.c | 428 | ||||
-rw-r--r-- | arch/x86/xen/time.c | 12 | ||||
-rw-r--r-- | arch/x86/xen/xen-asm_32.S | 2 | ||||
-rw-r--r-- | arch/x86/xen/xen-asm_64.S | 2 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 8 |
25 files changed, 1354 insertions, 469 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 3db651fc8ec5..d679cb2c79b4 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -10,7 +10,7 @@ ifdef CONFIG_FTRACE | |||
10 | # Do not profile debug and lowlevel utilities | 10 | # Do not profile debug and lowlevel utilities |
11 | CFLAGS_REMOVE_tsc.o = -pg | 11 | CFLAGS_REMOVE_tsc.o = -pg |
12 | CFLAGS_REMOVE_rtc.o = -pg | 12 | CFLAGS_REMOVE_rtc.o = -pg |
13 | CFLAGS_REMOVE_paravirt.o = -pg | 13 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg |
14 | endif | 14 | endif |
15 | 15 | ||
16 | # | 16 | # |
@@ -89,7 +89,7 @@ obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o | |||
89 | obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o | 89 | obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o |
90 | obj-$(CONFIG_KVM_GUEST) += kvm.o | 90 | obj-$(CONFIG_KVM_GUEST) += kvm.o |
91 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o | 91 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o |
92 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o | 92 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o paravirt-spinlocks.o |
93 | obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o | 93 | obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o |
94 | 94 | ||
95 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o | 95 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4e456bd955bb..9983bc3f5d18 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -728,14 +728,3 @@ void __cpuinit cpu_init(void) | |||
728 | mxcsr_feature_mask_init(); | 728 | mxcsr_feature_mask_init(); |
729 | } | 729 | } |
730 | 730 | ||
731 | #ifdef CONFIG_HOTPLUG_CPU | ||
732 | void __cpuinit cpu_uninit(void) | ||
733 | { | ||
734 | int cpu = raw_smp_processor_id(); | ||
735 | cpu_clear(cpu, cpu_initialized); | ||
736 | |||
737 | /* lazy TLB state */ | ||
738 | per_cpu(cpu_tlbstate, cpu).state = 0; | ||
739 | per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; | ||
740 | } | ||
741 | #endif | ||
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index b68e21f06f4f..6e388412a854 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
@@ -51,6 +51,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
51 | memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, | 51 | memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, |
52 | (mincount - oldsize) * LDT_ENTRY_SIZE); | 52 | (mincount - oldsize) * LDT_ENTRY_SIZE); |
53 | 53 | ||
54 | paravirt_alloc_ldt(newldt, mincount); | ||
55 | |||
54 | #ifdef CONFIG_X86_64 | 56 | #ifdef CONFIG_X86_64 |
55 | /* CHECKME: Do we really need this ? */ | 57 | /* CHECKME: Do we really need this ? */ |
56 | wmb(); | 58 | wmb(); |
@@ -73,6 +75,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
73 | #endif | 75 | #endif |
74 | } | 76 | } |
75 | if (oldsize) { | 77 | if (oldsize) { |
78 | paravirt_free_ldt(oldldt, oldsize); | ||
76 | if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) | 79 | if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) |
77 | vfree(oldldt); | 80 | vfree(oldldt); |
78 | else | 81 | else |
@@ -84,10 +87,13 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
84 | static inline int copy_ldt(mm_context_t *new, mm_context_t *old) | 87 | static inline int copy_ldt(mm_context_t *new, mm_context_t *old) |
85 | { | 88 | { |
86 | int err = alloc_ldt(new, old->size, 0); | 89 | int err = alloc_ldt(new, old->size, 0); |
90 | int i; | ||
87 | 91 | ||
88 | if (err < 0) | 92 | if (err < 0) |
89 | return err; | 93 | return err; |
90 | memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE); | 94 | |
95 | for(i = 0; i < old->size; i++) | ||
96 | write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); | ||
91 | return 0; | 97 | return 0; |
92 | } | 98 | } |
93 | 99 | ||
@@ -124,6 +130,7 @@ void destroy_context(struct mm_struct *mm) | |||
124 | if (mm == current->active_mm) | 130 | if (mm == current->active_mm) |
125 | clear_LDT(); | 131 | clear_LDT(); |
126 | #endif | 132 | #endif |
133 | paravirt_free_ldt(mm->context.ldt, mm->context.size); | ||
127 | if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) | 134 | if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) |
128 | vfree(mm->context.ldt); | 135 | vfree(mm->context.ldt); |
129 | else | 136 | else |
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c new file mode 100644 index 000000000000..0e9f1982b1dd --- /dev/null +++ b/arch/x86/kernel/paravirt-spinlocks.c | |||
@@ -0,0 +1,37 @@ | |||
1 | /* | ||
2 | * Split spinlock implementation out into its own file, so it can be | ||
3 | * compiled in a FTRACE-compatible way. | ||
4 | */ | ||
5 | #include <linux/spinlock.h> | ||
6 | #include <linux/module.h> | ||
7 | |||
8 | #include <asm/paravirt.h> | ||
9 | |||
10 | static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags) | ||
11 | { | ||
12 | __raw_spin_lock(lock); | ||
13 | } | ||
14 | |||
15 | struct pv_lock_ops pv_lock_ops = { | ||
16 | #ifdef CONFIG_SMP | ||
17 | .spin_is_locked = __ticket_spin_is_locked, | ||
18 | .spin_is_contended = __ticket_spin_is_contended, | ||
19 | |||
20 | .spin_lock = __ticket_spin_lock, | ||
21 | .spin_lock_flags = default_spin_lock_flags, | ||
22 | .spin_trylock = __ticket_spin_trylock, | ||
23 | .spin_unlock = __ticket_spin_unlock, | ||
24 | #endif | ||
25 | }; | ||
26 | EXPORT_SYMBOL(pv_lock_ops); | ||
27 | |||
28 | void __init paravirt_use_bytelocks(void) | ||
29 | { | ||
30 | #ifdef CONFIG_SMP | ||
31 | pv_lock_ops.spin_is_locked = __byte_spin_is_locked; | ||
32 | pv_lock_ops.spin_is_contended = __byte_spin_is_contended; | ||
33 | pv_lock_ops.spin_lock = __byte_spin_lock; | ||
34 | pv_lock_ops.spin_trylock = __byte_spin_trylock; | ||
35 | pv_lock_ops.spin_unlock = __byte_spin_unlock; | ||
36 | #endif | ||
37 | } | ||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 300da17e61cb..7faea1817d05 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -268,17 +268,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) | |||
268 | return __get_cpu_var(paravirt_lazy_mode); | 268 | return __get_cpu_var(paravirt_lazy_mode); |
269 | } | 269 | } |
270 | 270 | ||
271 | void __init paravirt_use_bytelocks(void) | ||
272 | { | ||
273 | #ifdef CONFIG_SMP | ||
274 | pv_lock_ops.spin_is_locked = __byte_spin_is_locked; | ||
275 | pv_lock_ops.spin_is_contended = __byte_spin_is_contended; | ||
276 | pv_lock_ops.spin_lock = __byte_spin_lock; | ||
277 | pv_lock_ops.spin_trylock = __byte_spin_trylock; | ||
278 | pv_lock_ops.spin_unlock = __byte_spin_unlock; | ||
279 | #endif | ||
280 | } | ||
281 | |||
282 | struct pv_info pv_info = { | 271 | struct pv_info pv_info = { |
283 | .name = "bare hardware", | 272 | .name = "bare hardware", |
284 | .paravirt_enabled = 0, | 273 | .paravirt_enabled = 0, |
@@ -348,6 +337,10 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
348 | .write_ldt_entry = native_write_ldt_entry, | 337 | .write_ldt_entry = native_write_ldt_entry, |
349 | .write_gdt_entry = native_write_gdt_entry, | 338 | .write_gdt_entry = native_write_gdt_entry, |
350 | .write_idt_entry = native_write_idt_entry, | 339 | .write_idt_entry = native_write_idt_entry, |
340 | |||
341 | .alloc_ldt = paravirt_nop, | ||
342 | .free_ldt = paravirt_nop, | ||
343 | |||
351 | .load_sp0 = native_load_sp0, | 344 | .load_sp0 = native_load_sp0, |
352 | 345 | ||
353 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) | 346 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) |
@@ -461,18 +454,6 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
461 | .set_fixmap = native_set_fixmap, | 454 | .set_fixmap = native_set_fixmap, |
462 | }; | 455 | }; |
463 | 456 | ||
464 | struct pv_lock_ops pv_lock_ops = { | ||
465 | #ifdef CONFIG_SMP | ||
466 | .spin_is_locked = __ticket_spin_is_locked, | ||
467 | .spin_is_contended = __ticket_spin_is_contended, | ||
468 | |||
469 | .spin_lock = __ticket_spin_lock, | ||
470 | .spin_trylock = __ticket_spin_trylock, | ||
471 | .spin_unlock = __ticket_spin_unlock, | ||
472 | #endif | ||
473 | }; | ||
474 | EXPORT_SYMBOL(pv_lock_ops); | ||
475 | |||
476 | EXPORT_SYMBOL_GPL(pv_time_ops); | 457 | EXPORT_SYMBOL_GPL(pv_time_ops); |
477 | EXPORT_SYMBOL (pv_cpu_ops); | 458 | EXPORT_SYMBOL (pv_cpu_ops); |
478 | EXPORT_SYMBOL (pv_mmu_ops); | 459 | EXPORT_SYMBOL (pv_mmu_ops); |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4b3cfdf54216..b76b38ff962b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -72,47 +72,12 @@ unsigned long thread_saved_pc(struct task_struct *tsk) | |||
72 | return ((unsigned long *)tsk->thread.sp)[3]; | 72 | return ((unsigned long *)tsk->thread.sp)[3]; |
73 | } | 73 | } |
74 | 74 | ||
75 | #ifdef CONFIG_HOTPLUG_CPU | 75 | #ifndef CONFIG_SMP |
76 | #include <asm/nmi.h> | ||
77 | |||
78 | static void cpu_exit_clear(void) | ||
79 | { | ||
80 | int cpu = raw_smp_processor_id(); | ||
81 | |||
82 | idle_task_exit(); | ||
83 | |||
84 | cpu_uninit(); | ||
85 | irq_ctx_exit(cpu); | ||
86 | |||
87 | cpu_clear(cpu, cpu_callout_map); | ||
88 | cpu_clear(cpu, cpu_callin_map); | ||
89 | |||
90 | numa_remove_cpu(cpu); | ||
91 | c1e_remove_cpu(cpu); | ||
92 | } | ||
93 | |||
94 | /* We don't actually take CPU down, just spin without interrupts. */ | ||
95 | static inline void play_dead(void) | ||
96 | { | ||
97 | /* This must be done before dead CPU ack */ | ||
98 | cpu_exit_clear(); | ||
99 | mb(); | ||
100 | /* Ack it */ | ||
101 | __get_cpu_var(cpu_state) = CPU_DEAD; | ||
102 | |||
103 | /* | ||
104 | * With physical CPU hotplug, we should halt the cpu | ||
105 | */ | ||
106 | local_irq_disable(); | ||
107 | /* mask all interrupts, flush any and all caches, and halt */ | ||
108 | wbinvd_halt(); | ||
109 | } | ||
110 | #else | ||
111 | static inline void play_dead(void) | 76 | static inline void play_dead(void) |
112 | { | 77 | { |
113 | BUG(); | 78 | BUG(); |
114 | } | 79 | } |
115 | #endif /* CONFIG_HOTPLUG_CPU */ | 80 | #endif |
116 | 81 | ||
117 | /* | 82 | /* |
118 | * The idle thread. There's no useful work to be | 83 | * The idle thread. There's no useful work to be |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e12e0e4dd256..ec27afa43d7e 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -85,30 +85,12 @@ void exit_idle(void) | |||
85 | __exit_idle(); | 85 | __exit_idle(); |
86 | } | 86 | } |
87 | 87 | ||
88 | #ifdef CONFIG_HOTPLUG_CPU | 88 | #ifndef CONFIG_SMP |
89 | DECLARE_PER_CPU(int, cpu_state); | ||
90 | |||
91 | #include <asm/nmi.h> | ||
92 | /* We halt the CPU with physical CPU hotplug */ | ||
93 | static inline void play_dead(void) | ||
94 | { | ||
95 | idle_task_exit(); | ||
96 | c1e_remove_cpu(raw_smp_processor_id()); | ||
97 | |||
98 | mb(); | ||
99 | /* Ack it */ | ||
100 | __get_cpu_var(cpu_state) = CPU_DEAD; | ||
101 | |||
102 | local_irq_disable(); | ||
103 | /* mask all interrupts, flush any and all caches, and halt */ | ||
104 | wbinvd_halt(); | ||
105 | } | ||
106 | #else | ||
107 | static inline void play_dead(void) | 89 | static inline void play_dead(void) |
108 | { | 90 | { |
109 | BUG(); | 91 | BUG(); |
110 | } | 92 | } |
111 | #endif /* CONFIG_HOTPLUG_CPU */ | 93 | #endif |
112 | 94 | ||
113 | /* | 95 | /* |
114 | * The idle thread. There's no useful work to be | 96 | * The idle thread. There's no useful work to be |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 361b7a4c640c..18f9b19f5f8f 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -214,12 +214,16 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) | |||
214 | struct smp_ops smp_ops = { | 214 | struct smp_ops smp_ops = { |
215 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, | 215 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, |
216 | .smp_prepare_cpus = native_smp_prepare_cpus, | 216 | .smp_prepare_cpus = native_smp_prepare_cpus, |
217 | .cpu_up = native_cpu_up, | ||
218 | .smp_cpus_done = native_smp_cpus_done, | 217 | .smp_cpus_done = native_smp_cpus_done, |
219 | 218 | ||
220 | .smp_send_stop = native_smp_send_stop, | 219 | .smp_send_stop = native_smp_send_stop, |
221 | .smp_send_reschedule = native_smp_send_reschedule, | 220 | .smp_send_reschedule = native_smp_send_reschedule, |
222 | 221 | ||
222 | .cpu_up = native_cpu_up, | ||
223 | .cpu_die = native_cpu_die, | ||
224 | .cpu_disable = native_cpu_disable, | ||
225 | .play_dead = native_play_dead, | ||
226 | |||
223 | .send_call_func_ipi = native_send_call_func_ipi, | 227 | .send_call_func_ipi = native_send_call_func_ipi, |
224 | .send_call_func_single_ipi = native_send_call_func_single_ipi, | 228 | .send_call_func_single_ipi = native_send_call_func_single_ipi, |
225 | }; | 229 | }; |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7985c5b3f916..06f1407d5542 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -52,6 +52,7 @@ | |||
52 | #include <asm/desc.h> | 52 | #include <asm/desc.h> |
53 | #include <asm/nmi.h> | 53 | #include <asm/nmi.h> |
54 | #include <asm/irq.h> | 54 | #include <asm/irq.h> |
55 | #include <asm/idle.h> | ||
55 | #include <asm/smp.h> | 56 | #include <asm/smp.h> |
56 | #include <asm/trampoline.h> | 57 | #include <asm/trampoline.h> |
57 | #include <asm/cpu.h> | 58 | #include <asm/cpu.h> |
@@ -1346,25 +1347,9 @@ static void __ref remove_cpu_from_maps(int cpu) | |||
1346 | numa_remove_cpu(cpu); | 1347 | numa_remove_cpu(cpu); |
1347 | } | 1348 | } |
1348 | 1349 | ||
1349 | int __cpu_disable(void) | 1350 | void cpu_disable_common(void) |
1350 | { | 1351 | { |
1351 | int cpu = smp_processor_id(); | 1352 | int cpu = smp_processor_id(); |
1352 | |||
1353 | /* | ||
1354 | * Perhaps use cpufreq to drop frequency, but that could go | ||
1355 | * into generic code. | ||
1356 | * | ||
1357 | * We won't take down the boot processor on i386 due to some | ||
1358 | * interrupts only being able to be serviced by the BSP. | ||
1359 | * Especially so if we're not using an IOAPIC -zwane | ||
1360 | */ | ||
1361 | if (cpu == 0) | ||
1362 | return -EBUSY; | ||
1363 | |||
1364 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
1365 | stop_apic_nmi_watchdog(NULL); | ||
1366 | clear_local_APIC(); | ||
1367 | |||
1368 | /* | 1353 | /* |
1369 | * HACK: | 1354 | * HACK: |
1370 | * Allow any queued timer interrupts to get serviced | 1355 | * Allow any queued timer interrupts to get serviced |
@@ -1382,10 +1367,32 @@ int __cpu_disable(void) | |||
1382 | remove_cpu_from_maps(cpu); | 1367 | remove_cpu_from_maps(cpu); |
1383 | unlock_vector_lock(); | 1368 | unlock_vector_lock(); |
1384 | fixup_irqs(cpu_online_map); | 1369 | fixup_irqs(cpu_online_map); |
1370 | } | ||
1371 | |||
1372 | int native_cpu_disable(void) | ||
1373 | { | ||
1374 | int cpu = smp_processor_id(); | ||
1375 | |||
1376 | /* | ||
1377 | * Perhaps use cpufreq to drop frequency, but that could go | ||
1378 | * into generic code. | ||
1379 | * | ||
1380 | * We won't take down the boot processor on i386 due to some | ||
1381 | * interrupts only being able to be serviced by the BSP. | ||
1382 | * Especially so if we're not using an IOAPIC -zwane | ||
1383 | */ | ||
1384 | if (cpu == 0) | ||
1385 | return -EBUSY; | ||
1386 | |||
1387 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
1388 | stop_apic_nmi_watchdog(NULL); | ||
1389 | clear_local_APIC(); | ||
1390 | |||
1391 | cpu_disable_common(); | ||
1385 | return 0; | 1392 | return 0; |
1386 | } | 1393 | } |
1387 | 1394 | ||
1388 | void __cpu_die(unsigned int cpu) | 1395 | void native_cpu_die(unsigned int cpu) |
1389 | { | 1396 | { |
1390 | /* We don't do anything here: idle task is faking death itself. */ | 1397 | /* We don't do anything here: idle task is faking death itself. */ |
1391 | unsigned int i; | 1398 | unsigned int i; |
@@ -1402,15 +1409,45 @@ void __cpu_die(unsigned int cpu) | |||
1402 | } | 1409 | } |
1403 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); | 1410 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); |
1404 | } | 1411 | } |
1412 | |||
1413 | void play_dead_common(void) | ||
1414 | { | ||
1415 | idle_task_exit(); | ||
1416 | reset_lazy_tlbstate(); | ||
1417 | irq_ctx_exit(raw_smp_processor_id()); | ||
1418 | c1e_remove_cpu(raw_smp_processor_id()); | ||
1419 | |||
1420 | mb(); | ||
1421 | /* Ack it */ | ||
1422 | __get_cpu_var(cpu_state) = CPU_DEAD; | ||
1423 | |||
1424 | /* | ||
1425 | * With physical CPU hotplug, we should halt the cpu | ||
1426 | */ | ||
1427 | local_irq_disable(); | ||
1428 | } | ||
1429 | |||
1430 | void native_play_dead(void) | ||
1431 | { | ||
1432 | play_dead_common(); | ||
1433 | wbinvd_halt(); | ||
1434 | } | ||
1435 | |||
1405 | #else /* ... !CONFIG_HOTPLUG_CPU */ | 1436 | #else /* ... !CONFIG_HOTPLUG_CPU */ |
1406 | int __cpu_disable(void) | 1437 | int native_cpu_disable(void) |
1407 | { | 1438 | { |
1408 | return -ENOSYS; | 1439 | return -ENOSYS; |
1409 | } | 1440 | } |
1410 | 1441 | ||
1411 | void __cpu_die(unsigned int cpu) | 1442 | void native_cpu_die(unsigned int cpu) |
1412 | { | 1443 | { |
1413 | /* We said "no" in __cpu_disable */ | 1444 | /* We said "no" in __cpu_disable */ |
1414 | BUG(); | 1445 | BUG(); |
1415 | } | 1446 | } |
1447 | |||
1448 | void native_play_dead(void) | ||
1449 | { | ||
1450 | BUG(); | ||
1451 | } | ||
1452 | |||
1416 | #endif | 1453 | #endif |
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index fec1ecedc9b7..e00534b33534 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c | |||
@@ -241,3 +241,11 @@ void flush_tlb_all(void) | |||
241 | on_each_cpu(do_flush_tlb_all, NULL, 1); | 241 | on_each_cpu(do_flush_tlb_all, NULL, 1); |
242 | } | 242 | } |
243 | 243 | ||
244 | void reset_lazy_tlbstate(void) | ||
245 | { | ||
246 | int cpu = raw_smp_processor_id(); | ||
247 | |||
248 | per_cpu(cpu_tlbstate, cpu).state = 0; | ||
249 | per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; | ||
250 | } | ||
251 | |||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 455f3fe67b42..356ed2dec3a6 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -915,15 +915,15 @@ LIST_HEAD(pgd_list); | |||
915 | 915 | ||
916 | void vmalloc_sync_all(void) | 916 | void vmalloc_sync_all(void) |
917 | { | 917 | { |
918 | #ifdef CONFIG_X86_32 | ||
919 | unsigned long start = VMALLOC_START & PGDIR_MASK; | ||
920 | unsigned long address; | 918 | unsigned long address; |
921 | 919 | ||
920 | #ifdef CONFIG_X86_32 | ||
922 | if (SHARED_KERNEL_PMD) | 921 | if (SHARED_KERNEL_PMD) |
923 | return; | 922 | return; |
924 | 923 | ||
925 | BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); | 924 | for (address = VMALLOC_START & PMD_MASK; |
926 | for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) { | 925 | address >= TASK_SIZE && address < FIXADDR_TOP; |
926 | address += PMD_SIZE) { | ||
927 | unsigned long flags; | 927 | unsigned long flags; |
928 | struct page *page; | 928 | struct page *page; |
929 | 929 | ||
@@ -936,10 +936,8 @@ void vmalloc_sync_all(void) | |||
936 | spin_unlock_irqrestore(&pgd_lock, flags); | 936 | spin_unlock_irqrestore(&pgd_lock, flags); |
937 | } | 937 | } |
938 | #else /* CONFIG_X86_64 */ | 938 | #else /* CONFIG_X86_64 */ |
939 | unsigned long start = VMALLOC_START & PGDIR_MASK; | 939 | for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END; |
940 | unsigned long address; | 940 | address += PGDIR_SIZE) { |
941 | |||
942 | for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { | ||
943 | const pgd_t *pgd_ref = pgd_offset_k(address); | 941 | const pgd_t *pgd_ref = pgd_offset_k(address); |
944 | unsigned long flags; | 942 | unsigned long flags; |
945 | struct page *page; | 943 | struct page *page; |
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 3815e425f470..d3e68465ace9 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -27,4 +27,12 @@ config XEN_MAX_DOMAIN_MEMORY | |||
27 | config XEN_SAVE_RESTORE | 27 | config XEN_SAVE_RESTORE |
28 | bool | 28 | bool |
29 | depends on PM | 29 | depends on PM |
30 | default y \ No newline at end of file | 30 | default y |
31 | |||
32 | config XEN_DEBUG_FS | ||
33 | bool "Enable Xen debug and tuning parameters in debugfs" | ||
34 | depends on XEN && DEBUG_FS | ||
35 | default n | ||
36 | help | ||
37 | Enable statistics output and various tuning options in debugfs. | ||
38 | Enabling this option may incur a significant performance overhead. \ No newline at end of file | ||
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 59c1e539aed2..313947940a1a 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -1,4 +1,12 @@ | |||
1 | obj-y := enlighten.o setup.o multicalls.o mmu.o \ | 1 | ifdef CONFIG_FTRACE |
2 | # Do not profile debug and lowlevel utilities | ||
3 | CFLAGS_REMOVE_spinlock.o = -pg | ||
4 | CFLAGS_REMOVE_time.o = -pg | ||
5 | CFLAGS_REMOVE_irq.o = -pg | ||
6 | endif | ||
7 | |||
8 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ | ||
2 | time.o xen-asm_$(BITS).o grant-table.o suspend.o | 9 | time.o xen-asm_$(BITS).o grant-table.o suspend.o |
3 | 10 | ||
4 | obj-$(CONFIG_SMP) += smp.o | 11 | obj-$(CONFIG_SMP) += smp.o spinlock.o |
12 | obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o \ No newline at end of file | ||
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c new file mode 100644 index 000000000000..b53225d2cac3 --- /dev/null +++ b/arch/x86/xen/debugfs.c | |||
@@ -0,0 +1,123 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/debugfs.h> | ||
3 | #include <linux/module.h> | ||
4 | |||
5 | #include "debugfs.h" | ||
6 | |||
7 | static struct dentry *d_xen_debug; | ||
8 | |||
9 | struct dentry * __init xen_init_debugfs(void) | ||
10 | { | ||
11 | if (!d_xen_debug) { | ||
12 | d_xen_debug = debugfs_create_dir("xen", NULL); | ||
13 | |||
14 | if (!d_xen_debug) | ||
15 | pr_warning("Could not create 'xen' debugfs directory\n"); | ||
16 | } | ||
17 | |||
18 | return d_xen_debug; | ||
19 | } | ||
20 | |||
21 | struct array_data | ||
22 | { | ||
23 | void *array; | ||
24 | unsigned elements; | ||
25 | }; | ||
26 | |||
27 | static int u32_array_open(struct inode *inode, struct file *file) | ||
28 | { | ||
29 | file->private_data = NULL; | ||
30 | return nonseekable_open(inode, file); | ||
31 | } | ||
32 | |||
33 | static size_t format_array(char *buf, size_t bufsize, const char *fmt, | ||
34 | u32 *array, unsigned array_size) | ||
35 | { | ||
36 | size_t ret = 0; | ||
37 | unsigned i; | ||
38 | |||
39 | for(i = 0; i < array_size; i++) { | ||
40 | size_t len; | ||
41 | |||
42 | len = snprintf(buf, bufsize, fmt, array[i]); | ||
43 | len++; /* ' ' or '\n' */ | ||
44 | ret += len; | ||
45 | |||
46 | if (buf) { | ||
47 | buf += len; | ||
48 | bufsize -= len; | ||
49 | buf[-1] = (i == array_size-1) ? '\n' : ' '; | ||
50 | } | ||
51 | } | ||
52 | |||
53 | ret++; /* \0 */ | ||
54 | if (buf) | ||
55 | *buf = '\0'; | ||
56 | |||
57 | return ret; | ||
58 | } | ||
59 | |||
60 | static char *format_array_alloc(const char *fmt, u32 *array, unsigned array_size) | ||
61 | { | ||
62 | size_t len = format_array(NULL, 0, fmt, array, array_size); | ||
63 | char *ret; | ||
64 | |||
65 | ret = kmalloc(len, GFP_KERNEL); | ||
66 | if (ret == NULL) | ||
67 | return NULL; | ||
68 | |||
69 | format_array(ret, len, fmt, array, array_size); | ||
70 | return ret; | ||
71 | } | ||
72 | |||
73 | static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len, | ||
74 | loff_t *ppos) | ||
75 | { | ||
76 | struct inode *inode = file->f_path.dentry->d_inode; | ||
77 | struct array_data *data = inode->i_private; | ||
78 | size_t size; | ||
79 | |||
80 | if (*ppos == 0) { | ||
81 | if (file->private_data) { | ||
82 | kfree(file->private_data); | ||
83 | file->private_data = NULL; | ||
84 | } | ||
85 | |||
86 | file->private_data = format_array_alloc("%u", data->array, data->elements); | ||
87 | } | ||
88 | |||
89 | size = 0; | ||
90 | if (file->private_data) | ||
91 | size = strlen(file->private_data); | ||
92 | |||
93 | return simple_read_from_buffer(buf, len, ppos, file->private_data, size); | ||
94 | } | ||
95 | |||
96 | static int xen_array_release(struct inode *inode, struct file *file) | ||
97 | { | ||
98 | kfree(file->private_data); | ||
99 | |||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | static struct file_operations u32_array_fops = { | ||
104 | .owner = THIS_MODULE, | ||
105 | .open = u32_array_open, | ||
106 | .release= xen_array_release, | ||
107 | .read = u32_array_read, | ||
108 | }; | ||
109 | |||
110 | struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode, | ||
111 | struct dentry *parent, | ||
112 | u32 *array, unsigned elements) | ||
113 | { | ||
114 | struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL); | ||
115 | |||
116 | if (data == NULL) | ||
117 | return NULL; | ||
118 | |||
119 | data->array = array; | ||
120 | data->elements = elements; | ||
121 | |||
122 | return debugfs_create_file(name, mode, parent, data, &u32_array_fops); | ||
123 | } | ||
diff --git a/arch/x86/xen/debugfs.h b/arch/x86/xen/debugfs.h new file mode 100644 index 000000000000..e28132084832 --- /dev/null +++ b/arch/x86/xen/debugfs.h | |||
@@ -0,0 +1,10 @@ | |||
1 | #ifndef _XEN_DEBUGFS_H | ||
2 | #define _XEN_DEBUGFS_H | ||
3 | |||
4 | struct dentry * __init xen_init_debugfs(void); | ||
5 | |||
6 | struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode, | ||
7 | struct dentry *parent, | ||
8 | u32 *array, unsigned elements); | ||
9 | |||
10 | #endif /* _XEN_DEBUGFS_H */ | ||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a4e201b47f64..8ca2f88bde1e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <xen/interface/xen.h> | 30 | #include <xen/interface/xen.h> |
31 | #include <xen/interface/physdev.h> | 31 | #include <xen/interface/physdev.h> |
32 | #include <xen/interface/vcpu.h> | 32 | #include <xen/interface/vcpu.h> |
33 | #include <xen/interface/sched.h> | ||
34 | #include <xen/features.h> | 33 | #include <xen/features.h> |
35 | #include <xen/page.h> | 34 | #include <xen/page.h> |
36 | #include <xen/hvc-console.h> | 35 | #include <xen/hvc-console.h> |
@@ -57,6 +56,9 @@ EXPORT_SYMBOL_GPL(hypercall_page); | |||
57 | DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); | 56 | DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); |
58 | DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); | 57 | DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); |
59 | 58 | ||
59 | enum xen_domain_type xen_domain_type = XEN_NATIVE; | ||
60 | EXPORT_SYMBOL_GPL(xen_domain_type); | ||
61 | |||
60 | /* | 62 | /* |
61 | * Identity map, in addition to plain kernel map. This needs to be | 63 | * Identity map, in addition to plain kernel map. This needs to be |
62 | * large enough to allocate page table pages to allocate the rest. | 64 | * large enough to allocate page table pages to allocate the rest. |
@@ -226,103 +228,68 @@ static unsigned long xen_get_debugreg(int reg) | |||
226 | return HYPERVISOR_get_debugreg(reg); | 228 | return HYPERVISOR_get_debugreg(reg); |
227 | } | 229 | } |
228 | 230 | ||
229 | static unsigned long xen_save_fl(void) | 231 | static void xen_leave_lazy(void) |
230 | { | 232 | { |
231 | struct vcpu_info *vcpu; | 233 | paravirt_leave_lazy(paravirt_get_lazy_mode()); |
232 | unsigned long flags; | 234 | xen_mc_flush(); |
233 | |||
234 | vcpu = x86_read_percpu(xen_vcpu); | ||
235 | |||
236 | /* flag has opposite sense of mask */ | ||
237 | flags = !vcpu->evtchn_upcall_mask; | ||
238 | |||
239 | /* convert to IF type flag | ||
240 | -0 -> 0x00000000 | ||
241 | -1 -> 0xffffffff | ||
242 | */ | ||
243 | return (-flags) & X86_EFLAGS_IF; | ||
244 | } | 235 | } |
245 | 236 | ||
246 | static void xen_restore_fl(unsigned long flags) | 237 | static unsigned long xen_store_tr(void) |
247 | { | 238 | { |
248 | struct vcpu_info *vcpu; | 239 | return 0; |
249 | |||
250 | /* convert from IF type flag */ | ||
251 | flags = !(flags & X86_EFLAGS_IF); | ||
252 | |||
253 | /* There's a one instruction preempt window here. We need to | ||
254 | make sure we're don't switch CPUs between getting the vcpu | ||
255 | pointer and updating the mask. */ | ||
256 | preempt_disable(); | ||
257 | vcpu = x86_read_percpu(xen_vcpu); | ||
258 | vcpu->evtchn_upcall_mask = flags; | ||
259 | preempt_enable_no_resched(); | ||
260 | |||
261 | /* Doesn't matter if we get preempted here, because any | ||
262 | pending event will get dealt with anyway. */ | ||
263 | |||
264 | if (flags == 0) { | ||
265 | preempt_check_resched(); | ||
266 | barrier(); /* unmask then check (avoid races) */ | ||
267 | if (unlikely(vcpu->evtchn_upcall_pending)) | ||
268 | force_evtchn_callback(); | ||
269 | } | ||
270 | } | 240 | } |
271 | 241 | ||
272 | static void xen_irq_disable(void) | 242 | /* |
243 | * Set the page permissions for a particular virtual address. If the | ||
244 | * address is a vmalloc mapping (or other non-linear mapping), then | ||
245 | * find the linear mapping of the page and also set its protections to | ||
246 | * match. | ||
247 | */ | ||
248 | static void set_aliased_prot(void *v, pgprot_t prot) | ||
273 | { | 249 | { |
274 | /* There's a one instruction preempt window here. We need to | 250 | int level; |
275 | make sure we're don't switch CPUs between getting the vcpu | 251 | pte_t *ptep; |
276 | pointer and updating the mask. */ | 252 | pte_t pte; |
277 | preempt_disable(); | 253 | unsigned long pfn; |
278 | x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; | 254 | struct page *page; |
279 | preempt_enable_no_resched(); | ||
280 | } | ||
281 | 255 | ||
282 | static void xen_irq_enable(void) | 256 | ptep = lookup_address((unsigned long)v, &level); |
283 | { | 257 | BUG_ON(ptep == NULL); |
284 | struct vcpu_info *vcpu; | ||
285 | 258 | ||
286 | /* We don't need to worry about being preempted here, since | 259 | pfn = pte_pfn(*ptep); |
287 | either a) interrupts are disabled, so no preemption, or b) | 260 | page = pfn_to_page(pfn); |
288 | the caller is confused and is trying to re-enable interrupts | ||
289 | on an indeterminate processor. */ | ||
290 | 261 | ||
291 | vcpu = x86_read_percpu(xen_vcpu); | 262 | pte = pfn_pte(pfn, prot); |
292 | vcpu->evtchn_upcall_mask = 0; | ||
293 | 263 | ||
294 | /* Doesn't matter if we get preempted here, because any | 264 | if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) |
295 | pending event will get dealt with anyway. */ | 265 | BUG(); |
296 | 266 | ||
297 | barrier(); /* unmask then check (avoid races) */ | 267 | if (!PageHighMem(page)) { |
298 | if (unlikely(vcpu->evtchn_upcall_pending)) | 268 | void *av = __va(PFN_PHYS(pfn)); |
299 | force_evtchn_callback(); | ||
300 | } | ||
301 | 269 | ||
302 | static void xen_safe_halt(void) | 270 | if (av != v) |
303 | { | 271 | if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0)) |
304 | /* Blocking includes an implicit local_irq_enable(). */ | 272 | BUG(); |
305 | if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) | 273 | } else |
306 | BUG(); | 274 | kmap_flush_unused(); |
307 | } | 275 | } |
308 | 276 | ||
309 | static void xen_halt(void) | 277 | static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) |
310 | { | 278 | { |
311 | if (irqs_disabled()) | 279 | const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; |
312 | HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); | 280 | int i; |
313 | else | ||
314 | xen_safe_halt(); | ||
315 | } | ||
316 | 281 | ||
317 | static void xen_leave_lazy(void) | 282 | for(i = 0; i < entries; i += entries_per_page) |
318 | { | 283 | set_aliased_prot(ldt + i, PAGE_KERNEL_RO); |
319 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | ||
320 | xen_mc_flush(); | ||
321 | } | 284 | } |
322 | 285 | ||
323 | static unsigned long xen_store_tr(void) | 286 | static void xen_free_ldt(struct desc_struct *ldt, unsigned entries) |
324 | { | 287 | { |
325 | return 0; | 288 | const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; |
289 | int i; | ||
290 | |||
291 | for(i = 0; i < entries; i += entries_per_page) | ||
292 | set_aliased_prot(ldt + i, PAGE_KERNEL); | ||
326 | } | 293 | } |
327 | 294 | ||
328 | static void xen_set_ldt(const void *addr, unsigned entries) | 295 | static void xen_set_ldt(const void *addr, unsigned entries) |
@@ -425,8 +392,7 @@ static void xen_load_gs_index(unsigned int idx) | |||
425 | static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, | 392 | static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, |
426 | const void *ptr) | 393 | const void *ptr) |
427 | { | 394 | { |
428 | unsigned long lp = (unsigned long)&dt[entrynum]; | 395 | xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]); |
429 | xmaddr_t mach_lp = virt_to_machine(lp); | ||
430 | u64 entry = *(u64 *)ptr; | 396 | u64 entry = *(u64 *)ptr; |
431 | 397 | ||
432 | preempt_disable(); | 398 | preempt_disable(); |
@@ -559,7 +525,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry, | |||
559 | } | 525 | } |
560 | 526 | ||
561 | static void xen_load_sp0(struct tss_struct *tss, | 527 | static void xen_load_sp0(struct tss_struct *tss, |
562 | struct thread_struct *thread) | 528 | struct thread_struct *thread) |
563 | { | 529 | { |
564 | struct multicall_space mcs = xen_mc_entry(0); | 530 | struct multicall_space mcs = xen_mc_entry(0); |
565 | MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); | 531 | MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); |
@@ -803,6 +769,19 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) | |||
803 | ret = -EFAULT; | 769 | ret = -EFAULT; |
804 | break; | 770 | break; |
805 | #endif | 771 | #endif |
772 | |||
773 | case MSR_STAR: | ||
774 | case MSR_CSTAR: | ||
775 | case MSR_LSTAR: | ||
776 | case MSR_SYSCALL_MASK: | ||
777 | case MSR_IA32_SYSENTER_CS: | ||
778 | case MSR_IA32_SYSENTER_ESP: | ||
779 | case MSR_IA32_SYSENTER_EIP: | ||
780 | /* Fast syscall setup is all done in hypercalls, so | ||
781 | these are all ignored. Stub them out here to stop | ||
782 | Xen console noise. */ | ||
783 | break; | ||
784 | |||
806 | default: | 785 | default: |
807 | ret = native_write_msr_safe(msr, low, high); | 786 | ret = native_write_msr_safe(msr, low, high); |
808 | } | 787 | } |
@@ -846,8 +825,8 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level) | |||
846 | SetPagePinned(page); | 825 | SetPagePinned(page); |
847 | 826 | ||
848 | if (!PageHighMem(page)) { | 827 | if (!PageHighMem(page)) { |
849 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); | 828 | make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); |
850 | if (level == PT_PTE) | 829 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) |
851 | pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); | 830 | pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); |
852 | } else | 831 | } else |
853 | /* make sure there are no stray mappings of | 832 | /* make sure there are no stray mappings of |
@@ -915,7 +894,7 @@ static void xen_release_ptpage(u32 pfn, unsigned level) | |||
915 | 894 | ||
916 | if (PagePinned(page)) { | 895 | if (PagePinned(page)) { |
917 | if (!PageHighMem(page)) { | 896 | if (!PageHighMem(page)) { |
918 | if (level == PT_PTE) | 897 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) |
919 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); | 898 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); |
920 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 899 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); |
921 | } | 900 | } |
@@ -1220,6 +1199,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
1220 | .load_gs_index = xen_load_gs_index, | 1199 | .load_gs_index = xen_load_gs_index, |
1221 | #endif | 1200 | #endif |
1222 | 1201 | ||
1202 | .alloc_ldt = xen_alloc_ldt, | ||
1203 | .free_ldt = xen_free_ldt, | ||
1204 | |||
1223 | .store_gdt = native_store_gdt, | 1205 | .store_gdt = native_store_gdt, |
1224 | .store_idt = native_store_idt, | 1206 | .store_idt = native_store_idt, |
1225 | .store_tr = xen_store_tr, | 1207 | .store_tr = xen_store_tr, |
@@ -1241,36 +1223,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
1241 | }, | 1223 | }, |
1242 | }; | 1224 | }; |
1243 | 1225 | ||
1244 | static void __init __xen_init_IRQ(void) | ||
1245 | { | ||
1246 | #ifdef CONFIG_X86_64 | ||
1247 | int i; | ||
1248 | |||
1249 | /* Create identity vector->irq map */ | ||
1250 | for(i = 0; i < NR_VECTORS; i++) { | ||
1251 | int cpu; | ||
1252 | |||
1253 | for_each_possible_cpu(cpu) | ||
1254 | per_cpu(vector_irq, cpu)[i] = i; | ||
1255 | } | ||
1256 | #endif /* CONFIG_X86_64 */ | ||
1257 | |||
1258 | xen_init_IRQ(); | ||
1259 | } | ||
1260 | |||
1261 | static const struct pv_irq_ops xen_irq_ops __initdata = { | ||
1262 | .init_IRQ = __xen_init_IRQ, | ||
1263 | .save_fl = xen_save_fl, | ||
1264 | .restore_fl = xen_restore_fl, | ||
1265 | .irq_disable = xen_irq_disable, | ||
1266 | .irq_enable = xen_irq_enable, | ||
1267 | .safe_halt = xen_safe_halt, | ||
1268 | .halt = xen_halt, | ||
1269 | #ifdef CONFIG_X86_64 | ||
1270 | .adjust_exception_frame = xen_adjust_exception_frame, | ||
1271 | #endif | ||
1272 | }; | ||
1273 | |||
1274 | static const struct pv_apic_ops xen_apic_ops __initdata = { | 1226 | static const struct pv_apic_ops xen_apic_ops __initdata = { |
1275 | #ifdef CONFIG_X86_LOCAL_APIC | 1227 | #ifdef CONFIG_X86_LOCAL_APIC |
1276 | .apic_write = xen_apic_write, | 1228 | .apic_write = xen_apic_write, |
@@ -1664,6 +1616,8 @@ asmlinkage void __init xen_start_kernel(void) | |||
1664 | if (!xen_start_info) | 1616 | if (!xen_start_info) |
1665 | return; | 1617 | return; |
1666 | 1618 | ||
1619 | xen_domain_type = XEN_PV_DOMAIN; | ||
1620 | |||
1667 | BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); | 1621 | BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); |
1668 | 1622 | ||
1669 | xen_setup_features(); | 1623 | xen_setup_features(); |
@@ -1673,10 +1627,11 @@ asmlinkage void __init xen_start_kernel(void) | |||
1673 | pv_init_ops = xen_init_ops; | 1627 | pv_init_ops = xen_init_ops; |
1674 | pv_time_ops = xen_time_ops; | 1628 | pv_time_ops = xen_time_ops; |
1675 | pv_cpu_ops = xen_cpu_ops; | 1629 | pv_cpu_ops = xen_cpu_ops; |
1676 | pv_irq_ops = xen_irq_ops; | ||
1677 | pv_apic_ops = xen_apic_ops; | 1630 | pv_apic_ops = xen_apic_ops; |
1678 | pv_mmu_ops = xen_mmu_ops; | 1631 | pv_mmu_ops = xen_mmu_ops; |
1679 | 1632 | ||
1633 | xen_init_irq_ops(); | ||
1634 | |||
1680 | if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { | 1635 | if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { |
1681 | pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; | 1636 | pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; |
1682 | pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; | 1637 | pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; |
@@ -1700,7 +1655,7 @@ asmlinkage void __init xen_start_kernel(void) | |||
1700 | 1655 | ||
1701 | /* Prevent unwanted bits from being set in PTEs. */ | 1656 | /* Prevent unwanted bits from being set in PTEs. */ |
1702 | __supported_pte_mask &= ~_PAGE_GLOBAL; | 1657 | __supported_pte_mask &= ~_PAGE_GLOBAL; |
1703 | if (!is_initial_xendomain()) | 1658 | if (!xen_initial_domain()) |
1704 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); | 1659 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); |
1705 | 1660 | ||
1706 | /* Don't do the full vcpu_info placement stuff until we have a | 1661 | /* Don't do the full vcpu_info placement stuff until we have a |
@@ -1735,7 +1690,7 @@ asmlinkage void __init xen_start_kernel(void) | |||
1735 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; | 1690 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; |
1736 | boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); | 1691 | boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); |
1737 | 1692 | ||
1738 | if (!is_initial_xendomain()) { | 1693 | if (!xen_initial_domain()) { |
1739 | add_preferred_console("xenboot", 0, NULL); | 1694 | add_preferred_console("xenboot", 0, NULL); |
1740 | add_preferred_console("tty", 0, NULL); | 1695 | add_preferred_console("tty", 0, NULL); |
1741 | add_preferred_console("hvc", 0, NULL); | 1696 | add_preferred_console("hvc", 0, NULL); |
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c new file mode 100644 index 000000000000..28b85ab8422e --- /dev/null +++ b/arch/x86/xen/irq.c | |||
@@ -0,0 +1,143 @@ | |||
1 | #include <linux/hardirq.h> | ||
2 | |||
3 | #include <xen/interface/xen.h> | ||
4 | #include <xen/interface/sched.h> | ||
5 | #include <xen/interface/vcpu.h> | ||
6 | |||
7 | #include <asm/xen/hypercall.h> | ||
8 | #include <asm/xen/hypervisor.h> | ||
9 | |||
10 | #include "xen-ops.h" | ||
11 | |||
12 | /* | ||
13 | * Force a proper event-channel callback from Xen after clearing the | ||
14 | * callback mask. We do this in a very simple manner, by making a call | ||
15 | * down into Xen. The pending flag will be checked by Xen on return. | ||
16 | */ | ||
17 | void xen_force_evtchn_callback(void) | ||
18 | { | ||
19 | (void)HYPERVISOR_xen_version(0, NULL); | ||
20 | } | ||
21 | |||
22 | static void __init __xen_init_IRQ(void) | ||
23 | { | ||
24 | #ifdef CONFIG_X86_64 | ||
25 | int i; | ||
26 | |||
27 | /* Create identity vector->irq map */ | ||
28 | for(i = 0; i < NR_VECTORS; i++) { | ||
29 | int cpu; | ||
30 | |||
31 | for_each_possible_cpu(cpu) | ||
32 | per_cpu(vector_irq, cpu)[i] = i; | ||
33 | } | ||
34 | #endif /* CONFIG_X86_64 */ | ||
35 | |||
36 | xen_init_IRQ(); | ||
37 | } | ||
38 | |||
39 | static unsigned long xen_save_fl(void) | ||
40 | { | ||
41 | struct vcpu_info *vcpu; | ||
42 | unsigned long flags; | ||
43 | |||
44 | vcpu = x86_read_percpu(xen_vcpu); | ||
45 | |||
46 | /* flag has opposite sense of mask */ | ||
47 | flags = !vcpu->evtchn_upcall_mask; | ||
48 | |||
49 | /* convert to IF type flag | ||
50 | -0 -> 0x00000000 | ||
51 | -1 -> 0xffffffff | ||
52 | */ | ||
53 | return (-flags) & X86_EFLAGS_IF; | ||
54 | } | ||
55 | |||
56 | static void xen_restore_fl(unsigned long flags) | ||
57 | { | ||
58 | struct vcpu_info *vcpu; | ||
59 | |||
60 | /* convert from IF type flag */ | ||
61 | flags = !(flags & X86_EFLAGS_IF); | ||
62 | |||
63 | /* There's a one instruction preempt window here. We need to | ||
64 | make sure we're don't switch CPUs between getting the vcpu | ||
65 | pointer and updating the mask. */ | ||
66 | preempt_disable(); | ||
67 | vcpu = x86_read_percpu(xen_vcpu); | ||
68 | vcpu->evtchn_upcall_mask = flags; | ||
69 | preempt_enable_no_resched(); | ||
70 | |||
71 | /* Doesn't matter if we get preempted here, because any | ||
72 | pending event will get dealt with anyway. */ | ||
73 | |||
74 | if (flags == 0) { | ||
75 | preempt_check_resched(); | ||
76 | barrier(); /* unmask then check (avoid races) */ | ||
77 | if (unlikely(vcpu->evtchn_upcall_pending)) | ||
78 | xen_force_evtchn_callback(); | ||
79 | } | ||
80 | } | ||
81 | |||
82 | static void xen_irq_disable(void) | ||
83 | { | ||
84 | /* There's a one instruction preempt window here. We need to | ||
85 | make sure we're don't switch CPUs between getting the vcpu | ||
86 | pointer and updating the mask. */ | ||
87 | preempt_disable(); | ||
88 | x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; | ||
89 | preempt_enable_no_resched(); | ||
90 | } | ||
91 | |||
92 | static void xen_irq_enable(void) | ||
93 | { | ||
94 | struct vcpu_info *vcpu; | ||
95 | |||
96 | /* We don't need to worry about being preempted here, since | ||
97 | either a) interrupts are disabled, so no preemption, or b) | ||
98 | the caller is confused and is trying to re-enable interrupts | ||
99 | on an indeterminate processor. */ | ||
100 | |||
101 | vcpu = x86_read_percpu(xen_vcpu); | ||
102 | vcpu->evtchn_upcall_mask = 0; | ||
103 | |||
104 | /* Doesn't matter if we get preempted here, because any | ||
105 | pending event will get dealt with anyway. */ | ||
106 | |||
107 | barrier(); /* unmask then check (avoid races) */ | ||
108 | if (unlikely(vcpu->evtchn_upcall_pending)) | ||
109 | xen_force_evtchn_callback(); | ||
110 | } | ||
111 | |||
112 | static void xen_safe_halt(void) | ||
113 | { | ||
114 | /* Blocking includes an implicit local_irq_enable(). */ | ||
115 | if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) | ||
116 | BUG(); | ||
117 | } | ||
118 | |||
119 | static void xen_halt(void) | ||
120 | { | ||
121 | if (irqs_disabled()) | ||
122 | HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); | ||
123 | else | ||
124 | xen_safe_halt(); | ||
125 | } | ||
126 | |||
127 | static const struct pv_irq_ops xen_irq_ops __initdata = { | ||
128 | .init_IRQ = __xen_init_IRQ, | ||
129 | .save_fl = xen_save_fl, | ||
130 | .restore_fl = xen_restore_fl, | ||
131 | .irq_disable = xen_irq_disable, | ||
132 | .irq_enable = xen_irq_enable, | ||
133 | .safe_halt = xen_safe_halt, | ||
134 | .halt = xen_halt, | ||
135 | #ifdef CONFIG_X86_64 | ||
136 | .adjust_exception_frame = xen_adjust_exception_frame, | ||
137 | #endif | ||
138 | }; | ||
139 | |||
140 | void __init xen_init_irq_ops() | ||
141 | { | ||
142 | pv_irq_ops = xen_irq_ops; | ||
143 | } | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index aa37469da696..64e58681767e 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -40,6 +40,7 @@ | |||
40 | */ | 40 | */ |
41 | #include <linux/sched.h> | 41 | #include <linux/sched.h> |
42 | #include <linux/highmem.h> | 42 | #include <linux/highmem.h> |
43 | #include <linux/debugfs.h> | ||
43 | #include <linux/bug.h> | 44 | #include <linux/bug.h> |
44 | 45 | ||
45 | #include <asm/pgtable.h> | 46 | #include <asm/pgtable.h> |
@@ -57,6 +58,61 @@ | |||
57 | 58 | ||
58 | #include "multicalls.h" | 59 | #include "multicalls.h" |
59 | #include "mmu.h" | 60 | #include "mmu.h" |
61 | #include "debugfs.h" | ||
62 | |||
63 | #define MMU_UPDATE_HISTO 30 | ||
64 | |||
65 | #ifdef CONFIG_XEN_DEBUG_FS | ||
66 | |||
67 | static struct { | ||
68 | u32 pgd_update; | ||
69 | u32 pgd_update_pinned; | ||
70 | u32 pgd_update_batched; | ||
71 | |||
72 | u32 pud_update; | ||
73 | u32 pud_update_pinned; | ||
74 | u32 pud_update_batched; | ||
75 | |||
76 | u32 pmd_update; | ||
77 | u32 pmd_update_pinned; | ||
78 | u32 pmd_update_batched; | ||
79 | |||
80 | u32 pte_update; | ||
81 | u32 pte_update_pinned; | ||
82 | u32 pte_update_batched; | ||
83 | |||
84 | u32 mmu_update; | ||
85 | u32 mmu_update_extended; | ||
86 | u32 mmu_update_histo[MMU_UPDATE_HISTO]; | ||
87 | |||
88 | u32 prot_commit; | ||
89 | u32 prot_commit_batched; | ||
90 | |||
91 | u32 set_pte_at; | ||
92 | u32 set_pte_at_batched; | ||
93 | u32 set_pte_at_pinned; | ||
94 | u32 set_pte_at_current; | ||
95 | u32 set_pte_at_kernel; | ||
96 | } mmu_stats; | ||
97 | |||
98 | static u8 zero_stats; | ||
99 | |||
100 | static inline void check_zero(void) | ||
101 | { | ||
102 | if (unlikely(zero_stats)) { | ||
103 | memset(&mmu_stats, 0, sizeof(mmu_stats)); | ||
104 | zero_stats = 0; | ||
105 | } | ||
106 | } | ||
107 | |||
108 | #define ADD_STATS(elem, val) \ | ||
109 | do { check_zero(); mmu_stats.elem += (val); } while(0) | ||
110 | |||
111 | #else /* !CONFIG_XEN_DEBUG_FS */ | ||
112 | |||
113 | #define ADD_STATS(elem, val) do { (void)(val); } while(0) | ||
114 | |||
115 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
60 | 116 | ||
61 | /* | 117 | /* |
62 | * Just beyond the highest usermode address. STACK_TOP_MAX has a | 118 | * Just beyond the highest usermode address. STACK_TOP_MAX has a |
@@ -229,25 +285,35 @@ void make_lowmem_page_readwrite(void *vaddr) | |||
229 | } | 285 | } |
230 | 286 | ||
231 | 287 | ||
232 | static bool page_pinned(void *ptr) | 288 | static bool xen_page_pinned(void *ptr) |
233 | { | 289 | { |
234 | struct page *page = virt_to_page(ptr); | 290 | struct page *page = virt_to_page(ptr); |
235 | 291 | ||
236 | return PagePinned(page); | 292 | return PagePinned(page); |
237 | } | 293 | } |
238 | 294 | ||
239 | static void extend_mmu_update(const struct mmu_update *update) | 295 | static void xen_extend_mmu_update(const struct mmu_update *update) |
240 | { | 296 | { |
241 | struct multicall_space mcs; | 297 | struct multicall_space mcs; |
242 | struct mmu_update *u; | 298 | struct mmu_update *u; |
243 | 299 | ||
244 | mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); | 300 | mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); |
245 | 301 | ||
246 | if (mcs.mc != NULL) | 302 | if (mcs.mc != NULL) { |
303 | ADD_STATS(mmu_update_extended, 1); | ||
304 | ADD_STATS(mmu_update_histo[mcs.mc->args[1]], -1); | ||
305 | |||
247 | mcs.mc->args[1]++; | 306 | mcs.mc->args[1]++; |
248 | else { | 307 | |
308 | if (mcs.mc->args[1] < MMU_UPDATE_HISTO) | ||
309 | ADD_STATS(mmu_update_histo[mcs.mc->args[1]], 1); | ||
310 | else | ||
311 | ADD_STATS(mmu_update_histo[0], 1); | ||
312 | } else { | ||
313 | ADD_STATS(mmu_update, 1); | ||
249 | mcs = __xen_mc_entry(sizeof(*u)); | 314 | mcs = __xen_mc_entry(sizeof(*u)); |
250 | MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); | 315 | MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); |
316 | ADD_STATS(mmu_update_histo[1], 1); | ||
251 | } | 317 | } |
252 | 318 | ||
253 | u = mcs.args; | 319 | u = mcs.args; |
@@ -265,7 +331,9 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | |||
265 | /* ptr may be ioremapped for 64-bit pagetable setup */ | 331 | /* ptr may be ioremapped for 64-bit pagetable setup */ |
266 | u.ptr = arbitrary_virt_to_machine(ptr).maddr; | 332 | u.ptr = arbitrary_virt_to_machine(ptr).maddr; |
267 | u.val = pmd_val_ma(val); | 333 | u.val = pmd_val_ma(val); |
268 | extend_mmu_update(&u); | 334 | xen_extend_mmu_update(&u); |
335 | |||
336 | ADD_STATS(pmd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
269 | 337 | ||
270 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 338 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
271 | 339 | ||
@@ -274,13 +342,17 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | |||
274 | 342 | ||
275 | void xen_set_pmd(pmd_t *ptr, pmd_t val) | 343 | void xen_set_pmd(pmd_t *ptr, pmd_t val) |
276 | { | 344 | { |
345 | ADD_STATS(pmd_update, 1); | ||
346 | |||
277 | /* If page is not pinned, we can just update the entry | 347 | /* If page is not pinned, we can just update the entry |
278 | directly */ | 348 | directly */ |
279 | if (!page_pinned(ptr)) { | 349 | if (!xen_page_pinned(ptr)) { |
280 | *ptr = val; | 350 | *ptr = val; |
281 | return; | 351 | return; |
282 | } | 352 | } |
283 | 353 | ||
354 | ADD_STATS(pmd_update_pinned, 1); | ||
355 | |||
284 | xen_set_pmd_hyper(ptr, val); | 356 | xen_set_pmd_hyper(ptr, val); |
285 | } | 357 | } |
286 | 358 | ||
@@ -300,12 +372,18 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | |||
300 | if (mm == &init_mm) | 372 | if (mm == &init_mm) |
301 | preempt_disable(); | 373 | preempt_disable(); |
302 | 374 | ||
375 | ADD_STATS(set_pte_at, 1); | ||
376 | // ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); | ||
377 | ADD_STATS(set_pte_at_current, mm == current->mm); | ||
378 | ADD_STATS(set_pte_at_kernel, mm == &init_mm); | ||
379 | |||
303 | if (mm == current->mm || mm == &init_mm) { | 380 | if (mm == current->mm || mm == &init_mm) { |
304 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { | 381 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { |
305 | struct multicall_space mcs; | 382 | struct multicall_space mcs; |
306 | mcs = xen_mc_entry(0); | 383 | mcs = xen_mc_entry(0); |
307 | 384 | ||
308 | MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); | 385 | MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); |
386 | ADD_STATS(set_pte_at_batched, 1); | ||
309 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 387 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
310 | goto out; | 388 | goto out; |
311 | } else | 389 | } else |
@@ -334,7 +412,10 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | |||
334 | 412 | ||
335 | u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; | 413 | u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; |
336 | u.val = pte_val_ma(pte); | 414 | u.val = pte_val_ma(pte); |
337 | extend_mmu_update(&u); | 415 | xen_extend_mmu_update(&u); |
416 | |||
417 | ADD_STATS(prot_commit, 1); | ||
418 | ADD_STATS(prot_commit_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
338 | 419 | ||
339 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 420 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
340 | } | 421 | } |
@@ -400,7 +481,9 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val) | |||
400 | /* ptr may be ioremapped for 64-bit pagetable setup */ | 481 | /* ptr may be ioremapped for 64-bit pagetable setup */ |
401 | u.ptr = arbitrary_virt_to_machine(ptr).maddr; | 482 | u.ptr = arbitrary_virt_to_machine(ptr).maddr; |
402 | u.val = pud_val_ma(val); | 483 | u.val = pud_val_ma(val); |
403 | extend_mmu_update(&u); | 484 | xen_extend_mmu_update(&u); |
485 | |||
486 | ADD_STATS(pud_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
404 | 487 | ||
405 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 488 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
406 | 489 | ||
@@ -409,18 +492,26 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val) | |||
409 | 492 | ||
410 | void xen_set_pud(pud_t *ptr, pud_t val) | 493 | void xen_set_pud(pud_t *ptr, pud_t val) |
411 | { | 494 | { |
495 | ADD_STATS(pud_update, 1); | ||
496 | |||
412 | /* If page is not pinned, we can just update the entry | 497 | /* If page is not pinned, we can just update the entry |
413 | directly */ | 498 | directly */ |
414 | if (!page_pinned(ptr)) { | 499 | if (!xen_page_pinned(ptr)) { |
415 | *ptr = val; | 500 | *ptr = val; |
416 | return; | 501 | return; |
417 | } | 502 | } |
418 | 503 | ||
504 | ADD_STATS(pud_update_pinned, 1); | ||
505 | |||
419 | xen_set_pud_hyper(ptr, val); | 506 | xen_set_pud_hyper(ptr, val); |
420 | } | 507 | } |
421 | 508 | ||
422 | void xen_set_pte(pte_t *ptep, pte_t pte) | 509 | void xen_set_pte(pte_t *ptep, pte_t pte) |
423 | { | 510 | { |
511 | ADD_STATS(pte_update, 1); | ||
512 | // ADD_STATS(pte_update_pinned, xen_page_pinned(ptep)); | ||
513 | ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
514 | |||
424 | #ifdef CONFIG_X86_PAE | 515 | #ifdef CONFIG_X86_PAE |
425 | ptep->pte_high = pte.pte_high; | 516 | ptep->pte_high = pte.pte_high; |
426 | smp_wmb(); | 517 | smp_wmb(); |
@@ -490,7 +581,7 @@ static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | |||
490 | 581 | ||
491 | u.ptr = virt_to_machine(ptr).maddr; | 582 | u.ptr = virt_to_machine(ptr).maddr; |
492 | u.val = pgd_val_ma(val); | 583 | u.val = pgd_val_ma(val); |
493 | extend_mmu_update(&u); | 584 | xen_extend_mmu_update(&u); |
494 | } | 585 | } |
495 | 586 | ||
496 | /* | 587 | /* |
@@ -517,17 +608,22 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val) | |||
517 | { | 608 | { |
518 | pgd_t *user_ptr = xen_get_user_pgd(ptr); | 609 | pgd_t *user_ptr = xen_get_user_pgd(ptr); |
519 | 610 | ||
611 | ADD_STATS(pgd_update, 1); | ||
612 | |||
520 | /* If page is not pinned, we can just update the entry | 613 | /* If page is not pinned, we can just update the entry |
521 | directly */ | 614 | directly */ |
522 | if (!page_pinned(ptr)) { | 615 | if (!xen_page_pinned(ptr)) { |
523 | *ptr = val; | 616 | *ptr = val; |
524 | if (user_ptr) { | 617 | if (user_ptr) { |
525 | WARN_ON(page_pinned(user_ptr)); | 618 | WARN_ON(xen_page_pinned(user_ptr)); |
526 | *user_ptr = val; | 619 | *user_ptr = val; |
527 | } | 620 | } |
528 | return; | 621 | return; |
529 | } | 622 | } |
530 | 623 | ||
624 | ADD_STATS(pgd_update_pinned, 1); | ||
625 | ADD_STATS(pgd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
626 | |||
531 | /* If it's pinned, then we can at least batch the kernel and | 627 | /* If it's pinned, then we can at least batch the kernel and |
532 | user updates together. */ | 628 | user updates together. */ |
533 | xen_mc_batch(); | 629 | xen_mc_batch(); |
@@ -555,8 +651,8 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val) | |||
555 | * For 64-bit, we must skip the Xen hole in the middle of the address | 651 | * For 64-bit, we must skip the Xen hole in the middle of the address |
556 | * space, just after the big x86-64 virtual hole. | 652 | * space, just after the big x86-64 virtual hole. |
557 | */ | 653 | */ |
558 | static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | 654 | static int xen_pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), |
559 | unsigned long limit) | 655 | unsigned long limit) |
560 | { | 656 | { |
561 | int flush = 0; | 657 | int flush = 0; |
562 | unsigned hole_low, hole_high; | 658 | unsigned hole_low, hole_high; |
@@ -590,8 +686,6 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | |||
590 | pmdidx_limit = 0; | 686 | pmdidx_limit = 0; |
591 | #endif | 687 | #endif |
592 | 688 | ||
593 | flush |= (*func)(virt_to_page(pgd), PT_PGD); | ||
594 | |||
595 | for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) { | 689 | for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) { |
596 | pud_t *pud; | 690 | pud_t *pud; |
597 | 691 | ||
@@ -637,16 +731,22 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | |||
637 | } | 731 | } |
638 | } | 732 | } |
639 | } | 733 | } |
734 | |||
640 | out: | 735 | out: |
736 | /* Do the top level last, so that the callbacks can use it as | ||
737 | a cue to do final things like tlb flushes. */ | ||
738 | flush |= (*func)(virt_to_page(pgd), PT_PGD); | ||
641 | 739 | ||
642 | return flush; | 740 | return flush; |
643 | } | 741 | } |
644 | 742 | ||
645 | static spinlock_t *lock_pte(struct page *page) | 743 | /* If we're using split pte locks, then take the page's lock and |
744 | return a pointer to it. Otherwise return NULL. */ | ||
745 | static spinlock_t *xen_pte_lock(struct page *page) | ||
646 | { | 746 | { |
647 | spinlock_t *ptl = NULL; | 747 | spinlock_t *ptl = NULL; |
648 | 748 | ||
649 | #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS | 749 | #if USE_SPLIT_PTLOCKS |
650 | ptl = __pte_lockptr(page); | 750 | ptl = __pte_lockptr(page); |
651 | spin_lock(ptl); | 751 | spin_lock(ptl); |
652 | #endif | 752 | #endif |
@@ -654,7 +754,7 @@ static spinlock_t *lock_pte(struct page *page) | |||
654 | return ptl; | 754 | return ptl; |
655 | } | 755 | } |
656 | 756 | ||
657 | static void do_unlock(void *v) | 757 | static void xen_pte_unlock(void *v) |
658 | { | 758 | { |
659 | spinlock_t *ptl = v; | 759 | spinlock_t *ptl = v; |
660 | spin_unlock(ptl); | 760 | spin_unlock(ptl); |
@@ -672,7 +772,7 @@ static void xen_do_pin(unsigned level, unsigned long pfn) | |||
672 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | 772 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); |
673 | } | 773 | } |
674 | 774 | ||
675 | static int pin_page(struct page *page, enum pt_level level) | 775 | static int xen_pin_page(struct page *page, enum pt_level level) |
676 | { | 776 | { |
677 | unsigned pgfl = TestSetPagePinned(page); | 777 | unsigned pgfl = TestSetPagePinned(page); |
678 | int flush; | 778 | int flush; |
@@ -691,21 +791,40 @@ static int pin_page(struct page *page, enum pt_level level) | |||
691 | 791 | ||
692 | flush = 0; | 792 | flush = 0; |
693 | 793 | ||
794 | /* | ||
795 | * We need to hold the pagetable lock between the time | ||
796 | * we make the pagetable RO and when we actually pin | ||
797 | * it. If we don't, then other users may come in and | ||
798 | * attempt to update the pagetable by writing it, | ||
799 | * which will fail because the memory is RO but not | ||
800 | * pinned, so Xen won't do the trap'n'emulate. | ||
801 | * | ||
802 | * If we're using split pte locks, we can't hold the | ||
803 | * entire pagetable's worth of locks during the | ||
804 | * traverse, because we may wrap the preempt count (8 | ||
805 | * bits). The solution is to mark RO and pin each PTE | ||
806 | * page while holding the lock. This means the number | ||
807 | * of locks we end up holding is never more than a | ||
808 | * batch size (~32 entries, at present). | ||
809 | * | ||
810 | * If we're not using split pte locks, we needn't pin | ||
811 | * the PTE pages independently, because we're | ||
812 | * protected by the overall pagetable lock. | ||
813 | */ | ||
694 | ptl = NULL; | 814 | ptl = NULL; |
695 | if (level == PT_PTE) | 815 | if (level == PT_PTE) |
696 | ptl = lock_pte(page); | 816 | ptl = xen_pte_lock(page); |
697 | 817 | ||
698 | MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, | 818 | MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, |
699 | pfn_pte(pfn, PAGE_KERNEL_RO), | 819 | pfn_pte(pfn, PAGE_KERNEL_RO), |
700 | level == PT_PGD ? UVMF_TLB_FLUSH : 0); | 820 | level == PT_PGD ? UVMF_TLB_FLUSH : 0); |
701 | 821 | ||
702 | if (level == PT_PTE) | 822 | if (ptl) { |
703 | xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn); | 823 | xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn); |
704 | 824 | ||
705 | if (ptl) { | ||
706 | /* Queue a deferred unlock for when this batch | 825 | /* Queue a deferred unlock for when this batch |
707 | is completed. */ | 826 | is completed. */ |
708 | xen_mc_callback(do_unlock, ptl); | 827 | xen_mc_callback(xen_pte_unlock, ptl); |
709 | } | 828 | } |
710 | } | 829 | } |
711 | 830 | ||
@@ -719,7 +838,7 @@ void xen_pgd_pin(pgd_t *pgd) | |||
719 | { | 838 | { |
720 | xen_mc_batch(); | 839 | xen_mc_batch(); |
721 | 840 | ||
722 | if (pgd_walk(pgd, pin_page, USER_LIMIT)) { | 841 | if (xen_pgd_walk(pgd, xen_pin_page, USER_LIMIT)) { |
723 | /* re-enable interrupts for kmap_flush_unused */ | 842 | /* re-enable interrupts for kmap_flush_unused */ |
724 | xen_mc_issue(0); | 843 | xen_mc_issue(0); |
725 | kmap_flush_unused(); | 844 | kmap_flush_unused(); |
@@ -733,14 +852,14 @@ void xen_pgd_pin(pgd_t *pgd) | |||
733 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); | 852 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); |
734 | 853 | ||
735 | if (user_pgd) { | 854 | if (user_pgd) { |
736 | pin_page(virt_to_page(user_pgd), PT_PGD); | 855 | xen_pin_page(virt_to_page(user_pgd), PT_PGD); |
737 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); | 856 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); |
738 | } | 857 | } |
739 | } | 858 | } |
740 | #else /* CONFIG_X86_32 */ | 859 | #else /* CONFIG_X86_32 */ |
741 | #ifdef CONFIG_X86_PAE | 860 | #ifdef CONFIG_X86_PAE |
742 | /* Need to make sure unshared kernel PMD is pinnable */ | 861 | /* Need to make sure unshared kernel PMD is pinnable */ |
743 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); | 862 | xen_pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); |
744 | #endif | 863 | #endif |
745 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); | 864 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); |
746 | #endif /* CONFIG_X86_64 */ | 865 | #endif /* CONFIG_X86_64 */ |
@@ -775,7 +894,7 @@ void xen_mm_pin_all(void) | |||
775 | * that's before we have page structures to store the bits. So do all | 894 | * that's before we have page structures to store the bits. So do all |
776 | * the book-keeping now. | 895 | * the book-keeping now. |
777 | */ | 896 | */ |
778 | static __init int mark_pinned(struct page *page, enum pt_level level) | 897 | static __init int xen_mark_pinned(struct page *page, enum pt_level level) |
779 | { | 898 | { |
780 | SetPagePinned(page); | 899 | SetPagePinned(page); |
781 | return 0; | 900 | return 0; |
@@ -783,10 +902,10 @@ static __init int mark_pinned(struct page *page, enum pt_level level) | |||
783 | 902 | ||
784 | void __init xen_mark_init_mm_pinned(void) | 903 | void __init xen_mark_init_mm_pinned(void) |
785 | { | 904 | { |
786 | pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP); | 905 | xen_pgd_walk(init_mm.pgd, xen_mark_pinned, FIXADDR_TOP); |
787 | } | 906 | } |
788 | 907 | ||
789 | static int unpin_page(struct page *page, enum pt_level level) | 908 | static int xen_unpin_page(struct page *page, enum pt_level level) |
790 | { | 909 | { |
791 | unsigned pgfl = TestClearPagePinned(page); | 910 | unsigned pgfl = TestClearPagePinned(page); |
792 | 911 | ||
@@ -796,10 +915,18 @@ static int unpin_page(struct page *page, enum pt_level level) | |||
796 | spinlock_t *ptl = NULL; | 915 | spinlock_t *ptl = NULL; |
797 | struct multicall_space mcs; | 916 | struct multicall_space mcs; |
798 | 917 | ||
918 | /* | ||
919 | * Do the converse to pin_page. If we're using split | ||
920 | * pte locks, we must be holding the lock for while | ||
921 | * the pte page is unpinned but still RO to prevent | ||
922 | * concurrent updates from seeing it in this | ||
923 | * partially-pinned state. | ||
924 | */ | ||
799 | if (level == PT_PTE) { | 925 | if (level == PT_PTE) { |
800 | ptl = lock_pte(page); | 926 | ptl = xen_pte_lock(page); |
801 | 927 | ||
802 | xen_do_pin(MMUEXT_UNPIN_TABLE, pfn); | 928 | if (ptl) |
929 | xen_do_pin(MMUEXT_UNPIN_TABLE, pfn); | ||
803 | } | 930 | } |
804 | 931 | ||
805 | mcs = __xen_mc_entry(0); | 932 | mcs = __xen_mc_entry(0); |
@@ -810,7 +937,7 @@ static int unpin_page(struct page *page, enum pt_level level) | |||
810 | 937 | ||
811 | if (ptl) { | 938 | if (ptl) { |
812 | /* unlock when batch completed */ | 939 | /* unlock when batch completed */ |
813 | xen_mc_callback(do_unlock, ptl); | 940 | xen_mc_callback(xen_pte_unlock, ptl); |
814 | } | 941 | } |
815 | } | 942 | } |
816 | 943 | ||
@@ -830,17 +957,17 @@ static void xen_pgd_unpin(pgd_t *pgd) | |||
830 | 957 | ||
831 | if (user_pgd) { | 958 | if (user_pgd) { |
832 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); | 959 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); |
833 | unpin_page(virt_to_page(user_pgd), PT_PGD); | 960 | xen_unpin_page(virt_to_page(user_pgd), PT_PGD); |
834 | } | 961 | } |
835 | } | 962 | } |
836 | #endif | 963 | #endif |
837 | 964 | ||
838 | #ifdef CONFIG_X86_PAE | 965 | #ifdef CONFIG_X86_PAE |
839 | /* Need to make sure unshared kernel PMD is unpinned */ | 966 | /* Need to make sure unshared kernel PMD is unpinned */ |
840 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); | 967 | xen_unpin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); |
841 | #endif | 968 | #endif |
842 | 969 | ||
843 | pgd_walk(pgd, unpin_page, USER_LIMIT); | 970 | xen_pgd_walk(pgd, xen_unpin_page, USER_LIMIT); |
844 | 971 | ||
845 | xen_mc_issue(0); | 972 | xen_mc_issue(0); |
846 | } | 973 | } |
@@ -907,7 +1034,7 @@ static void drop_other_mm_ref(void *info) | |||
907 | } | 1034 | } |
908 | } | 1035 | } |
909 | 1036 | ||
910 | static void drop_mm_ref(struct mm_struct *mm) | 1037 | static void xen_drop_mm_ref(struct mm_struct *mm) |
911 | { | 1038 | { |
912 | cpumask_t mask; | 1039 | cpumask_t mask; |
913 | unsigned cpu; | 1040 | unsigned cpu; |
@@ -937,7 +1064,7 @@ static void drop_mm_ref(struct mm_struct *mm) | |||
937 | smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); | 1064 | smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); |
938 | } | 1065 | } |
939 | #else | 1066 | #else |
940 | static void drop_mm_ref(struct mm_struct *mm) | 1067 | static void xen_drop_mm_ref(struct mm_struct *mm) |
941 | { | 1068 | { |
942 | if (current->active_mm == mm) | 1069 | if (current->active_mm == mm) |
943 | load_cr3(swapper_pg_dir); | 1070 | load_cr3(swapper_pg_dir); |
@@ -961,14 +1088,77 @@ static void drop_mm_ref(struct mm_struct *mm) | |||
961 | void xen_exit_mmap(struct mm_struct *mm) | 1088 | void xen_exit_mmap(struct mm_struct *mm) |
962 | { | 1089 | { |
963 | get_cpu(); /* make sure we don't move around */ | 1090 | get_cpu(); /* make sure we don't move around */ |
964 | drop_mm_ref(mm); | 1091 | xen_drop_mm_ref(mm); |
965 | put_cpu(); | 1092 | put_cpu(); |
966 | 1093 | ||
967 | spin_lock(&mm->page_table_lock); | 1094 | spin_lock(&mm->page_table_lock); |
968 | 1095 | ||
969 | /* pgd may not be pinned in the error exit path of execve */ | 1096 | /* pgd may not be pinned in the error exit path of execve */ |
970 | if (page_pinned(mm->pgd)) | 1097 | if (xen_page_pinned(mm->pgd)) |
971 | xen_pgd_unpin(mm->pgd); | 1098 | xen_pgd_unpin(mm->pgd); |
972 | 1099 | ||
973 | spin_unlock(&mm->page_table_lock); | 1100 | spin_unlock(&mm->page_table_lock); |
974 | } | 1101 | } |
1102 | |||
1103 | #ifdef CONFIG_XEN_DEBUG_FS | ||
1104 | |||
1105 | static struct dentry *d_mmu_debug; | ||
1106 | |||
1107 | static int __init xen_mmu_debugfs(void) | ||
1108 | { | ||
1109 | struct dentry *d_xen = xen_init_debugfs(); | ||
1110 | |||
1111 | if (d_xen == NULL) | ||
1112 | return -ENOMEM; | ||
1113 | |||
1114 | d_mmu_debug = debugfs_create_dir("mmu", d_xen); | ||
1115 | |||
1116 | debugfs_create_u8("zero_stats", 0644, d_mmu_debug, &zero_stats); | ||
1117 | |||
1118 | debugfs_create_u32("pgd_update", 0444, d_mmu_debug, &mmu_stats.pgd_update); | ||
1119 | debugfs_create_u32("pgd_update_pinned", 0444, d_mmu_debug, | ||
1120 | &mmu_stats.pgd_update_pinned); | ||
1121 | debugfs_create_u32("pgd_update_batched", 0444, d_mmu_debug, | ||
1122 | &mmu_stats.pgd_update_pinned); | ||
1123 | |||
1124 | debugfs_create_u32("pud_update", 0444, d_mmu_debug, &mmu_stats.pud_update); | ||
1125 | debugfs_create_u32("pud_update_pinned", 0444, d_mmu_debug, | ||
1126 | &mmu_stats.pud_update_pinned); | ||
1127 | debugfs_create_u32("pud_update_batched", 0444, d_mmu_debug, | ||
1128 | &mmu_stats.pud_update_pinned); | ||
1129 | |||
1130 | debugfs_create_u32("pmd_update", 0444, d_mmu_debug, &mmu_stats.pmd_update); | ||
1131 | debugfs_create_u32("pmd_update_pinned", 0444, d_mmu_debug, | ||
1132 | &mmu_stats.pmd_update_pinned); | ||
1133 | debugfs_create_u32("pmd_update_batched", 0444, d_mmu_debug, | ||
1134 | &mmu_stats.pmd_update_pinned); | ||
1135 | |||
1136 | debugfs_create_u32("pte_update", 0444, d_mmu_debug, &mmu_stats.pte_update); | ||
1137 | // debugfs_create_u32("pte_update_pinned", 0444, d_mmu_debug, | ||
1138 | // &mmu_stats.pte_update_pinned); | ||
1139 | debugfs_create_u32("pte_update_batched", 0444, d_mmu_debug, | ||
1140 | &mmu_stats.pte_update_pinned); | ||
1141 | |||
1142 | debugfs_create_u32("mmu_update", 0444, d_mmu_debug, &mmu_stats.mmu_update); | ||
1143 | debugfs_create_u32("mmu_update_extended", 0444, d_mmu_debug, | ||
1144 | &mmu_stats.mmu_update_extended); | ||
1145 | xen_debugfs_create_u32_array("mmu_update_histo", 0444, d_mmu_debug, | ||
1146 | mmu_stats.mmu_update_histo, 20); | ||
1147 | |||
1148 | debugfs_create_u32("set_pte_at", 0444, d_mmu_debug, &mmu_stats.set_pte_at); | ||
1149 | debugfs_create_u32("set_pte_at_batched", 0444, d_mmu_debug, | ||
1150 | &mmu_stats.set_pte_at_batched); | ||
1151 | debugfs_create_u32("set_pte_at_current", 0444, d_mmu_debug, | ||
1152 | &mmu_stats.set_pte_at_current); | ||
1153 | debugfs_create_u32("set_pte_at_kernel", 0444, d_mmu_debug, | ||
1154 | &mmu_stats.set_pte_at_kernel); | ||
1155 | |||
1156 | debugfs_create_u32("prot_commit", 0444, d_mmu_debug, &mmu_stats.prot_commit); | ||
1157 | debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug, | ||
1158 | &mmu_stats.prot_commit_batched); | ||
1159 | |||
1160 | return 0; | ||
1161 | } | ||
1162 | fs_initcall(xen_mmu_debugfs); | ||
1163 | |||
1164 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 9efd1c6c9776..8ea8a0d0b0de 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c | |||
@@ -21,16 +21,20 @@ | |||
21 | */ | 21 | */ |
22 | #include <linux/percpu.h> | 22 | #include <linux/percpu.h> |
23 | #include <linux/hardirq.h> | 23 | #include <linux/hardirq.h> |
24 | #include <linux/debugfs.h> | ||
24 | 25 | ||
25 | #include <asm/xen/hypercall.h> | 26 | #include <asm/xen/hypercall.h> |
26 | 27 | ||
27 | #include "multicalls.h" | 28 | #include "multicalls.h" |
29 | #include "debugfs.h" | ||
30 | |||
31 | #define MC_BATCH 32 | ||
28 | 32 | ||
29 | #define MC_DEBUG 1 | 33 | #define MC_DEBUG 1 |
30 | 34 | ||
31 | #define MC_BATCH 32 | ||
32 | #define MC_ARGS (MC_BATCH * 16) | 35 | #define MC_ARGS (MC_BATCH * 16) |
33 | 36 | ||
37 | |||
34 | struct mc_buffer { | 38 | struct mc_buffer { |
35 | struct multicall_entry entries[MC_BATCH]; | 39 | struct multicall_entry entries[MC_BATCH]; |
36 | #if MC_DEBUG | 40 | #if MC_DEBUG |
@@ -47,6 +51,76 @@ struct mc_buffer { | |||
47 | static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); | 51 | static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); |
48 | DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); | 52 | DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); |
49 | 53 | ||
54 | /* flush reasons 0- slots, 1- args, 2- callbacks */ | ||
55 | enum flush_reasons | ||
56 | { | ||
57 | FL_SLOTS, | ||
58 | FL_ARGS, | ||
59 | FL_CALLBACKS, | ||
60 | |||
61 | FL_N_REASONS | ||
62 | }; | ||
63 | |||
64 | #ifdef CONFIG_XEN_DEBUG_FS | ||
65 | #define NHYPERCALLS 40 /* not really */ | ||
66 | |||
67 | static struct { | ||
68 | unsigned histo[MC_BATCH+1]; | ||
69 | |||
70 | unsigned issued; | ||
71 | unsigned arg_total; | ||
72 | unsigned hypercalls; | ||
73 | unsigned histo_hypercalls[NHYPERCALLS]; | ||
74 | |||
75 | unsigned flush[FL_N_REASONS]; | ||
76 | } mc_stats; | ||
77 | |||
78 | static u8 zero_stats; | ||
79 | |||
80 | static inline void check_zero(void) | ||
81 | { | ||
82 | if (unlikely(zero_stats)) { | ||
83 | memset(&mc_stats, 0, sizeof(mc_stats)); | ||
84 | zero_stats = 0; | ||
85 | } | ||
86 | } | ||
87 | |||
88 | static void mc_add_stats(const struct mc_buffer *mc) | ||
89 | { | ||
90 | int i; | ||
91 | |||
92 | check_zero(); | ||
93 | |||
94 | mc_stats.issued++; | ||
95 | mc_stats.hypercalls += mc->mcidx; | ||
96 | mc_stats.arg_total += mc->argidx; | ||
97 | |||
98 | mc_stats.histo[mc->mcidx]++; | ||
99 | for(i = 0; i < mc->mcidx; i++) { | ||
100 | unsigned op = mc->entries[i].op; | ||
101 | if (op < NHYPERCALLS) | ||
102 | mc_stats.histo_hypercalls[op]++; | ||
103 | } | ||
104 | } | ||
105 | |||
106 | static void mc_stats_flush(enum flush_reasons idx) | ||
107 | { | ||
108 | check_zero(); | ||
109 | |||
110 | mc_stats.flush[idx]++; | ||
111 | } | ||
112 | |||
113 | #else /* !CONFIG_XEN_DEBUG_FS */ | ||
114 | |||
115 | static inline void mc_add_stats(const struct mc_buffer *mc) | ||
116 | { | ||
117 | } | ||
118 | |||
119 | static inline void mc_stats_flush(enum flush_reasons idx) | ||
120 | { | ||
121 | } | ||
122 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
123 | |||
50 | void xen_mc_flush(void) | 124 | void xen_mc_flush(void) |
51 | { | 125 | { |
52 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); | 126 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); |
@@ -60,6 +134,8 @@ void xen_mc_flush(void) | |||
60 | something in the middle */ | 134 | something in the middle */ |
61 | local_irq_save(flags); | 135 | local_irq_save(flags); |
62 | 136 | ||
137 | mc_add_stats(b); | ||
138 | |||
63 | if (b->mcidx) { | 139 | if (b->mcidx) { |
64 | #if MC_DEBUG | 140 | #if MC_DEBUG |
65 | memcpy(b->debug, b->entries, | 141 | memcpy(b->debug, b->entries, |
@@ -115,6 +191,7 @@ struct multicall_space __xen_mc_entry(size_t args) | |||
115 | 191 | ||
116 | if (b->mcidx == MC_BATCH || | 192 | if (b->mcidx == MC_BATCH || |
117 | (argidx + args) > MC_ARGS) { | 193 | (argidx + args) > MC_ARGS) { |
194 | mc_stats_flush(b->mcidx == MC_BATCH ? FL_SLOTS : FL_ARGS); | ||
118 | xen_mc_flush(); | 195 | xen_mc_flush(); |
119 | argidx = roundup(b->argidx, sizeof(u64)); | 196 | argidx = roundup(b->argidx, sizeof(u64)); |
120 | } | 197 | } |
@@ -158,10 +235,44 @@ void xen_mc_callback(void (*fn)(void *), void *data) | |||
158 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); | 235 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); |
159 | struct callback *cb; | 236 | struct callback *cb; |
160 | 237 | ||
161 | if (b->cbidx == MC_BATCH) | 238 | if (b->cbidx == MC_BATCH) { |
239 | mc_stats_flush(FL_CALLBACKS); | ||
162 | xen_mc_flush(); | 240 | xen_mc_flush(); |
241 | } | ||
163 | 242 | ||
164 | cb = &b->callbacks[b->cbidx++]; | 243 | cb = &b->callbacks[b->cbidx++]; |
165 | cb->fn = fn; | 244 | cb->fn = fn; |
166 | cb->data = data; | 245 | cb->data = data; |
167 | } | 246 | } |
247 | |||
248 | #ifdef CONFIG_XEN_DEBUG_FS | ||
249 | |||
250 | static struct dentry *d_mc_debug; | ||
251 | |||
252 | static int __init xen_mc_debugfs(void) | ||
253 | { | ||
254 | struct dentry *d_xen = xen_init_debugfs(); | ||
255 | |||
256 | if (d_xen == NULL) | ||
257 | return -ENOMEM; | ||
258 | |||
259 | d_mc_debug = debugfs_create_dir("multicalls", d_xen); | ||
260 | |||
261 | debugfs_create_u8("zero_stats", 0644, d_mc_debug, &zero_stats); | ||
262 | |||
263 | debugfs_create_u32("batches", 0444, d_mc_debug, &mc_stats.issued); | ||
264 | debugfs_create_u32("hypercalls", 0444, d_mc_debug, &mc_stats.hypercalls); | ||
265 | debugfs_create_u32("arg_total", 0444, d_mc_debug, &mc_stats.arg_total); | ||
266 | |||
267 | xen_debugfs_create_u32_array("batch_histo", 0444, d_mc_debug, | ||
268 | mc_stats.histo, MC_BATCH); | ||
269 | xen_debugfs_create_u32_array("hypercall_histo", 0444, d_mc_debug, | ||
270 | mc_stats.histo_hypercalls, NHYPERCALLS); | ||
271 | xen_debugfs_create_u32_array("flush_reasons", 0444, d_mc_debug, | ||
272 | mc_stats.flush, FL_N_REASONS); | ||
273 | |||
274 | return 0; | ||
275 | } | ||
276 | fs_initcall(xen_mc_debugfs); | ||
277 | |||
278 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index d8faf79a0a1d..d77da613b1d2 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -11,11 +11,8 @@ | |||
11 | * useful topology information for the kernel to make use of. As a | 11 | * useful topology information for the kernel to make use of. As a |
12 | * result, all CPUs are treated as if they're single-core and | 12 | * result, all CPUs are treated as if they're single-core and |
13 | * single-threaded. | 13 | * single-threaded. |
14 | * | ||
15 | * This does not handle HOTPLUG_CPU yet. | ||
16 | */ | 14 | */ |
17 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
18 | #include <linux/kernel_stat.h> | ||
19 | #include <linux/err.h> | 16 | #include <linux/err.h> |
20 | #include <linux/smp.h> | 17 | #include <linux/smp.h> |
21 | 18 | ||
@@ -36,8 +33,6 @@ | |||
36 | #include "xen-ops.h" | 33 | #include "xen-ops.h" |
37 | #include "mmu.h" | 34 | #include "mmu.h" |
38 | 35 | ||
39 | static void __cpuinit xen_init_lock_cpu(int cpu); | ||
40 | |||
41 | cpumask_t xen_cpu_initialized_map; | 36 | cpumask_t xen_cpu_initialized_map; |
42 | 37 | ||
43 | static DEFINE_PER_CPU(int, resched_irq); | 38 | static DEFINE_PER_CPU(int, resched_irq); |
@@ -64,11 +59,12 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) | |||
64 | return IRQ_HANDLED; | 59 | return IRQ_HANDLED; |
65 | } | 60 | } |
66 | 61 | ||
67 | static __cpuinit void cpu_bringup_and_idle(void) | 62 | static __cpuinit void cpu_bringup(void) |
68 | { | 63 | { |
69 | int cpu = smp_processor_id(); | 64 | int cpu = smp_processor_id(); |
70 | 65 | ||
71 | cpu_init(); | 66 | cpu_init(); |
67 | touch_softlockup_watchdog(); | ||
72 | preempt_disable(); | 68 | preempt_disable(); |
73 | 69 | ||
74 | xen_enable_sysenter(); | 70 | xen_enable_sysenter(); |
@@ -89,6 +85,11 @@ static __cpuinit void cpu_bringup_and_idle(void) | |||
89 | local_irq_enable(); | 85 | local_irq_enable(); |
90 | 86 | ||
91 | wmb(); /* make sure everything is out */ | 87 | wmb(); /* make sure everything is out */ |
88 | } | ||
89 | |||
90 | static __cpuinit void cpu_bringup_and_idle(void) | ||
91 | { | ||
92 | cpu_bringup(); | ||
92 | cpu_idle(); | 93 | cpu_idle(); |
93 | } | 94 | } |
94 | 95 | ||
@@ -212,8 +213,6 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) | |||
212 | 213 | ||
213 | cpu_set(cpu, cpu_present_map); | 214 | cpu_set(cpu, cpu_present_map); |
214 | } | 215 | } |
215 | |||
216 | //init_xenbus_allowed_cpumask(); | ||
217 | } | 216 | } |
218 | 217 | ||
219 | static __cpuinit int | 218 | static __cpuinit int |
@@ -281,12 +280,6 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) | |||
281 | struct task_struct *idle = idle_task(cpu); | 280 | struct task_struct *idle = idle_task(cpu); |
282 | int rc; | 281 | int rc; |
283 | 282 | ||
284 | #if 0 | ||
285 | rc = cpu_up_check(cpu); | ||
286 | if (rc) | ||
287 | return rc; | ||
288 | #endif | ||
289 | |||
290 | #ifdef CONFIG_X86_64 | 283 | #ifdef CONFIG_X86_64 |
291 | /* Allocate node local memory for AP pdas */ | 284 | /* Allocate node local memory for AP pdas */ |
292 | WARN_ON(cpu == 0); | 285 | WARN_ON(cpu == 0); |
@@ -339,6 +332,60 @@ static void xen_smp_cpus_done(unsigned int max_cpus) | |||
339 | { | 332 | { |
340 | } | 333 | } |
341 | 334 | ||
335 | #ifdef CONFIG_HOTPLUG_CPU | ||
336 | static int xen_cpu_disable(void) | ||
337 | { | ||
338 | unsigned int cpu = smp_processor_id(); | ||
339 | if (cpu == 0) | ||
340 | return -EBUSY; | ||
341 | |||
342 | cpu_disable_common(); | ||
343 | |||
344 | load_cr3(swapper_pg_dir); | ||
345 | return 0; | ||
346 | } | ||
347 | |||
348 | static void xen_cpu_die(unsigned int cpu) | ||
349 | { | ||
350 | while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) { | ||
351 | current->state = TASK_UNINTERRUPTIBLE; | ||
352 | schedule_timeout(HZ/10); | ||
353 | } | ||
354 | unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); | ||
355 | unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); | ||
356 | unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL); | ||
357 | unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL); | ||
358 | xen_uninit_lock_cpu(cpu); | ||
359 | xen_teardown_timer(cpu); | ||
360 | |||
361 | if (num_online_cpus() == 1) | ||
362 | alternatives_smp_switch(0); | ||
363 | } | ||
364 | |||
365 | static void xen_play_dead(void) | ||
366 | { | ||
367 | play_dead_common(); | ||
368 | HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); | ||
369 | cpu_bringup(); | ||
370 | } | ||
371 | |||
372 | #else /* !CONFIG_HOTPLUG_CPU */ | ||
373 | static int xen_cpu_disable(void) | ||
374 | { | ||
375 | return -ENOSYS; | ||
376 | } | ||
377 | |||
378 | static void xen_cpu_die(unsigned int cpu) | ||
379 | { | ||
380 | BUG(); | ||
381 | } | ||
382 | |||
383 | static void xen_play_dead(void) | ||
384 | { | ||
385 | BUG(); | ||
386 | } | ||
387 | |||
388 | #endif | ||
342 | static void stop_self(void *v) | 389 | static void stop_self(void *v) |
343 | { | 390 | { |
344 | int cpu = smp_processor_id(); | 391 | int cpu = smp_processor_id(); |
@@ -419,176 +466,16 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) | |||
419 | return IRQ_HANDLED; | 466 | return IRQ_HANDLED; |
420 | } | 467 | } |
421 | 468 | ||
422 | struct xen_spinlock { | ||
423 | unsigned char lock; /* 0 -> free; 1 -> locked */ | ||
424 | unsigned short spinners; /* count of waiting cpus */ | ||
425 | }; | ||
426 | |||
427 | static int xen_spin_is_locked(struct raw_spinlock *lock) | ||
428 | { | ||
429 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
430 | |||
431 | return xl->lock != 0; | ||
432 | } | ||
433 | |||
434 | static int xen_spin_is_contended(struct raw_spinlock *lock) | ||
435 | { | ||
436 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
437 | |||
438 | /* Not strictly true; this is only the count of contended | ||
439 | lock-takers entering the slow path. */ | ||
440 | return xl->spinners != 0; | ||
441 | } | ||
442 | |||
443 | static int xen_spin_trylock(struct raw_spinlock *lock) | ||
444 | { | ||
445 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
446 | u8 old = 1; | ||
447 | |||
448 | asm("xchgb %b0,%1" | ||
449 | : "+q" (old), "+m" (xl->lock) : : "memory"); | ||
450 | |||
451 | return old == 0; | ||
452 | } | ||
453 | |||
454 | static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; | ||
455 | static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); | ||
456 | |||
457 | static inline void spinning_lock(struct xen_spinlock *xl) | ||
458 | { | ||
459 | __get_cpu_var(lock_spinners) = xl; | ||
460 | wmb(); /* set lock of interest before count */ | ||
461 | asm(LOCK_PREFIX " incw %0" | ||
462 | : "+m" (xl->spinners) : : "memory"); | ||
463 | } | ||
464 | |||
465 | static inline void unspinning_lock(struct xen_spinlock *xl) | ||
466 | { | ||
467 | asm(LOCK_PREFIX " decw %0" | ||
468 | : "+m" (xl->spinners) : : "memory"); | ||
469 | wmb(); /* decrement count before clearing lock */ | ||
470 | __get_cpu_var(lock_spinners) = NULL; | ||
471 | } | ||
472 | |||
473 | static noinline int xen_spin_lock_slow(struct raw_spinlock *lock) | ||
474 | { | ||
475 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
476 | int irq = __get_cpu_var(lock_kicker_irq); | ||
477 | int ret; | ||
478 | |||
479 | /* If kicker interrupts not initialized yet, just spin */ | ||
480 | if (irq == -1) | ||
481 | return 0; | ||
482 | |||
483 | /* announce we're spinning */ | ||
484 | spinning_lock(xl); | ||
485 | |||
486 | /* clear pending */ | ||
487 | xen_clear_irq_pending(irq); | ||
488 | |||
489 | /* check again make sure it didn't become free while | ||
490 | we weren't looking */ | ||
491 | ret = xen_spin_trylock(lock); | ||
492 | if (ret) | ||
493 | goto out; | ||
494 | |||
495 | /* block until irq becomes pending */ | ||
496 | xen_poll_irq(irq); | ||
497 | kstat_this_cpu.irqs[irq]++; | ||
498 | |||
499 | out: | ||
500 | unspinning_lock(xl); | ||
501 | return ret; | ||
502 | } | ||
503 | |||
504 | static void xen_spin_lock(struct raw_spinlock *lock) | ||
505 | { | ||
506 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
507 | int timeout; | ||
508 | u8 oldval; | ||
509 | |||
510 | do { | ||
511 | timeout = 1 << 10; | ||
512 | |||
513 | asm("1: xchgb %1,%0\n" | ||
514 | " testb %1,%1\n" | ||
515 | " jz 3f\n" | ||
516 | "2: rep;nop\n" | ||
517 | " cmpb $0,%0\n" | ||
518 | " je 1b\n" | ||
519 | " dec %2\n" | ||
520 | " jnz 2b\n" | ||
521 | "3:\n" | ||
522 | : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) | ||
523 | : "1" (1) | ||
524 | : "memory"); | ||
525 | |||
526 | } while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock))); | ||
527 | } | ||
528 | |||
529 | static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) | ||
530 | { | ||
531 | int cpu; | ||
532 | |||
533 | for_each_online_cpu(cpu) { | ||
534 | /* XXX should mix up next cpu selection */ | ||
535 | if (per_cpu(lock_spinners, cpu) == xl) { | ||
536 | xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); | ||
537 | break; | ||
538 | } | ||
539 | } | ||
540 | } | ||
541 | |||
542 | static void xen_spin_unlock(struct raw_spinlock *lock) | ||
543 | { | ||
544 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
545 | |||
546 | smp_wmb(); /* make sure no writes get moved after unlock */ | ||
547 | xl->lock = 0; /* release lock */ | ||
548 | |||
549 | /* make sure unlock happens before kick */ | ||
550 | barrier(); | ||
551 | |||
552 | if (unlikely(xl->spinners)) | ||
553 | xen_spin_unlock_slow(xl); | ||
554 | } | ||
555 | |||
556 | static __cpuinit void xen_init_lock_cpu(int cpu) | ||
557 | { | ||
558 | int irq; | ||
559 | const char *name; | ||
560 | |||
561 | name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); | ||
562 | irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, | ||
563 | cpu, | ||
564 | xen_reschedule_interrupt, | ||
565 | IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, | ||
566 | name, | ||
567 | NULL); | ||
568 | |||
569 | if (irq >= 0) { | ||
570 | disable_irq(irq); /* make sure it's never delivered */ | ||
571 | per_cpu(lock_kicker_irq, cpu) = irq; | ||
572 | } | ||
573 | |||
574 | printk("cpu %d spinlock event irq %d\n", cpu, irq); | ||
575 | } | ||
576 | |||
577 | static void __init xen_init_spinlocks(void) | ||
578 | { | ||
579 | pv_lock_ops.spin_is_locked = xen_spin_is_locked; | ||
580 | pv_lock_ops.spin_is_contended = xen_spin_is_contended; | ||
581 | pv_lock_ops.spin_lock = xen_spin_lock; | ||
582 | pv_lock_ops.spin_trylock = xen_spin_trylock; | ||
583 | pv_lock_ops.spin_unlock = xen_spin_unlock; | ||
584 | } | ||
585 | |||
586 | static const struct smp_ops xen_smp_ops __initdata = { | 469 | static const struct smp_ops xen_smp_ops __initdata = { |
587 | .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, | 470 | .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, |
588 | .smp_prepare_cpus = xen_smp_prepare_cpus, | 471 | .smp_prepare_cpus = xen_smp_prepare_cpus, |
589 | .cpu_up = xen_cpu_up, | ||
590 | .smp_cpus_done = xen_smp_cpus_done, | 472 | .smp_cpus_done = xen_smp_cpus_done, |
591 | 473 | ||
474 | .cpu_up = xen_cpu_up, | ||
475 | .cpu_die = xen_cpu_die, | ||
476 | .cpu_disable = xen_cpu_disable, | ||
477 | .play_dead = xen_play_dead, | ||
478 | |||
592 | .smp_send_stop = xen_smp_send_stop, | 479 | .smp_send_stop = xen_smp_send_stop, |
593 | .smp_send_reschedule = xen_smp_send_reschedule, | 480 | .smp_send_reschedule = xen_smp_send_reschedule, |
594 | 481 | ||
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c new file mode 100644 index 000000000000..dd71e3a021cd --- /dev/null +++ b/arch/x86/xen/spinlock.c | |||
@@ -0,0 +1,428 @@ | |||
1 | /* | ||
2 | * Split spinlock implementation out into its own file, so it can be | ||
3 | * compiled in a FTRACE-compatible way. | ||
4 | */ | ||
5 | #include <linux/kernel_stat.h> | ||
6 | #include <linux/spinlock.h> | ||
7 | #include <linux/debugfs.h> | ||
8 | #include <linux/log2.h> | ||
9 | |||
10 | #include <asm/paravirt.h> | ||
11 | |||
12 | #include <xen/interface/xen.h> | ||
13 | #include <xen/events.h> | ||
14 | |||
15 | #include "xen-ops.h" | ||
16 | #include "debugfs.h" | ||
17 | |||
18 | #ifdef CONFIG_XEN_DEBUG_FS | ||
19 | static struct xen_spinlock_stats | ||
20 | { | ||
21 | u64 taken; | ||
22 | u32 taken_slow; | ||
23 | u32 taken_slow_nested; | ||
24 | u32 taken_slow_pickup; | ||
25 | u32 taken_slow_spurious; | ||
26 | u32 taken_slow_irqenable; | ||
27 | |||
28 | u64 released; | ||
29 | u32 released_slow; | ||
30 | u32 released_slow_kicked; | ||
31 | |||
32 | #define HISTO_BUCKETS 30 | ||
33 | u32 histo_spin_total[HISTO_BUCKETS+1]; | ||
34 | u32 histo_spin_spinning[HISTO_BUCKETS+1]; | ||
35 | u32 histo_spin_blocked[HISTO_BUCKETS+1]; | ||
36 | |||
37 | u64 time_total; | ||
38 | u64 time_spinning; | ||
39 | u64 time_blocked; | ||
40 | } spinlock_stats; | ||
41 | |||
42 | static u8 zero_stats; | ||
43 | |||
44 | static unsigned lock_timeout = 1 << 10; | ||
45 | #define TIMEOUT lock_timeout | ||
46 | |||
47 | static inline void check_zero(void) | ||
48 | { | ||
49 | if (unlikely(zero_stats)) { | ||
50 | memset(&spinlock_stats, 0, sizeof(spinlock_stats)); | ||
51 | zero_stats = 0; | ||
52 | } | ||
53 | } | ||
54 | |||
55 | #define ADD_STATS(elem, val) \ | ||
56 | do { check_zero(); spinlock_stats.elem += (val); } while(0) | ||
57 | |||
58 | static inline u64 spin_time_start(void) | ||
59 | { | ||
60 | return xen_clocksource_read(); | ||
61 | } | ||
62 | |||
63 | static void __spin_time_accum(u64 delta, u32 *array) | ||
64 | { | ||
65 | unsigned index = ilog2(delta); | ||
66 | |||
67 | check_zero(); | ||
68 | |||
69 | if (index < HISTO_BUCKETS) | ||
70 | array[index]++; | ||
71 | else | ||
72 | array[HISTO_BUCKETS]++; | ||
73 | } | ||
74 | |||
75 | static inline void spin_time_accum_spinning(u64 start) | ||
76 | { | ||
77 | u32 delta = xen_clocksource_read() - start; | ||
78 | |||
79 | __spin_time_accum(delta, spinlock_stats.histo_spin_spinning); | ||
80 | spinlock_stats.time_spinning += delta; | ||
81 | } | ||
82 | |||
83 | static inline void spin_time_accum_total(u64 start) | ||
84 | { | ||
85 | u32 delta = xen_clocksource_read() - start; | ||
86 | |||
87 | __spin_time_accum(delta, spinlock_stats.histo_spin_total); | ||
88 | spinlock_stats.time_total += delta; | ||
89 | } | ||
90 | |||
91 | static inline void spin_time_accum_blocked(u64 start) | ||
92 | { | ||
93 | u32 delta = xen_clocksource_read() - start; | ||
94 | |||
95 | __spin_time_accum(delta, spinlock_stats.histo_spin_blocked); | ||
96 | spinlock_stats.time_blocked += delta; | ||
97 | } | ||
98 | #else /* !CONFIG_XEN_DEBUG_FS */ | ||
99 | #define TIMEOUT (1 << 10) | ||
100 | #define ADD_STATS(elem, val) do { (void)(val); } while(0) | ||
101 | |||
102 | static inline u64 spin_time_start(void) | ||
103 | { | ||
104 | return 0; | ||
105 | } | ||
106 | |||
107 | static inline void spin_time_accum_total(u64 start) | ||
108 | { | ||
109 | } | ||
110 | static inline void spin_time_accum_spinning(u64 start) | ||
111 | { | ||
112 | } | ||
113 | static inline void spin_time_accum_blocked(u64 start) | ||
114 | { | ||
115 | } | ||
116 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
117 | |||
118 | struct xen_spinlock { | ||
119 | unsigned char lock; /* 0 -> free; 1 -> locked */ | ||
120 | unsigned short spinners; /* count of waiting cpus */ | ||
121 | }; | ||
122 | |||
123 | static int xen_spin_is_locked(struct raw_spinlock *lock) | ||
124 | { | ||
125 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
126 | |||
127 | return xl->lock != 0; | ||
128 | } | ||
129 | |||
130 | static int xen_spin_is_contended(struct raw_spinlock *lock) | ||
131 | { | ||
132 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
133 | |||
134 | /* Not strictly true; this is only the count of contended | ||
135 | lock-takers entering the slow path. */ | ||
136 | return xl->spinners != 0; | ||
137 | } | ||
138 | |||
139 | static int xen_spin_trylock(struct raw_spinlock *lock) | ||
140 | { | ||
141 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
142 | u8 old = 1; | ||
143 | |||
144 | asm("xchgb %b0,%1" | ||
145 | : "+q" (old), "+m" (xl->lock) : : "memory"); | ||
146 | |||
147 | return old == 0; | ||
148 | } | ||
149 | |||
150 | static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; | ||
151 | static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); | ||
152 | |||
153 | /* | ||
154 | * Mark a cpu as interested in a lock. Returns the CPU's previous | ||
155 | * lock of interest, in case we got preempted by an interrupt. | ||
156 | */ | ||
157 | static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl) | ||
158 | { | ||
159 | struct xen_spinlock *prev; | ||
160 | |||
161 | prev = __get_cpu_var(lock_spinners); | ||
162 | __get_cpu_var(lock_spinners) = xl; | ||
163 | |||
164 | wmb(); /* set lock of interest before count */ | ||
165 | |||
166 | asm(LOCK_PREFIX " incw %0" | ||
167 | : "+m" (xl->spinners) : : "memory"); | ||
168 | |||
169 | return prev; | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * Mark a cpu as no longer interested in a lock. Restores previous | ||
174 | * lock of interest (NULL for none). | ||
175 | */ | ||
176 | static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev) | ||
177 | { | ||
178 | asm(LOCK_PREFIX " decw %0" | ||
179 | : "+m" (xl->spinners) : : "memory"); | ||
180 | wmb(); /* decrement count before restoring lock */ | ||
181 | __get_cpu_var(lock_spinners) = prev; | ||
182 | } | ||
183 | |||
184 | static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enable) | ||
185 | { | ||
186 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
187 | struct xen_spinlock *prev; | ||
188 | int irq = __get_cpu_var(lock_kicker_irq); | ||
189 | int ret; | ||
190 | unsigned long flags; | ||
191 | u64 start; | ||
192 | |||
193 | /* If kicker interrupts not initialized yet, just spin */ | ||
194 | if (irq == -1) | ||
195 | return 0; | ||
196 | |||
197 | start = spin_time_start(); | ||
198 | |||
199 | /* announce we're spinning */ | ||
200 | prev = spinning_lock(xl); | ||
201 | |||
202 | flags = __raw_local_save_flags(); | ||
203 | if (irq_enable) { | ||
204 | ADD_STATS(taken_slow_irqenable, 1); | ||
205 | raw_local_irq_enable(); | ||
206 | } | ||
207 | |||
208 | ADD_STATS(taken_slow, 1); | ||
209 | ADD_STATS(taken_slow_nested, prev != NULL); | ||
210 | |||
211 | do { | ||
212 | /* clear pending */ | ||
213 | xen_clear_irq_pending(irq); | ||
214 | |||
215 | /* check again make sure it didn't become free while | ||
216 | we weren't looking */ | ||
217 | ret = xen_spin_trylock(lock); | ||
218 | if (ret) { | ||
219 | ADD_STATS(taken_slow_pickup, 1); | ||
220 | |||
221 | /* | ||
222 | * If we interrupted another spinlock while it | ||
223 | * was blocking, make sure it doesn't block | ||
224 | * without rechecking the lock. | ||
225 | */ | ||
226 | if (prev != NULL) | ||
227 | xen_set_irq_pending(irq); | ||
228 | goto out; | ||
229 | } | ||
230 | |||
231 | /* | ||
232 | * Block until irq becomes pending. If we're | ||
233 | * interrupted at this point (after the trylock but | ||
234 | * before entering the block), then the nested lock | ||
235 | * handler guarantees that the irq will be left | ||
236 | * pending if there's any chance the lock became free; | ||
237 | * xen_poll_irq() returns immediately if the irq is | ||
238 | * pending. | ||
239 | */ | ||
240 | xen_poll_irq(irq); | ||
241 | ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); | ||
242 | } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ | ||
243 | |||
244 | kstat_this_cpu.irqs[irq]++; | ||
245 | |||
246 | out: | ||
247 | raw_local_irq_restore(flags); | ||
248 | unspinning_lock(xl, prev); | ||
249 | spin_time_accum_blocked(start); | ||
250 | |||
251 | return ret; | ||
252 | } | ||
253 | |||
254 | static inline void __xen_spin_lock(struct raw_spinlock *lock, bool irq_enable) | ||
255 | { | ||
256 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
257 | unsigned timeout; | ||
258 | u8 oldval; | ||
259 | u64 start_spin; | ||
260 | |||
261 | ADD_STATS(taken, 1); | ||
262 | |||
263 | start_spin = spin_time_start(); | ||
264 | |||
265 | do { | ||
266 | u64 start_spin_fast = spin_time_start(); | ||
267 | |||
268 | timeout = TIMEOUT; | ||
269 | |||
270 | asm("1: xchgb %1,%0\n" | ||
271 | " testb %1,%1\n" | ||
272 | " jz 3f\n" | ||
273 | "2: rep;nop\n" | ||
274 | " cmpb $0,%0\n" | ||
275 | " je 1b\n" | ||
276 | " dec %2\n" | ||
277 | " jnz 2b\n" | ||
278 | "3:\n" | ||
279 | : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) | ||
280 | : "1" (1) | ||
281 | : "memory"); | ||
282 | |||
283 | spin_time_accum_spinning(start_spin_fast); | ||
284 | |||
285 | } while (unlikely(oldval != 0 && | ||
286 | (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable)))); | ||
287 | |||
288 | spin_time_accum_total(start_spin); | ||
289 | } | ||
290 | |||
291 | static void xen_spin_lock(struct raw_spinlock *lock) | ||
292 | { | ||
293 | __xen_spin_lock(lock, false); | ||
294 | } | ||
295 | |||
296 | static void xen_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags) | ||
297 | { | ||
298 | __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags)); | ||
299 | } | ||
300 | |||
301 | static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) | ||
302 | { | ||
303 | int cpu; | ||
304 | |||
305 | ADD_STATS(released_slow, 1); | ||
306 | |||
307 | for_each_online_cpu(cpu) { | ||
308 | /* XXX should mix up next cpu selection */ | ||
309 | if (per_cpu(lock_spinners, cpu) == xl) { | ||
310 | ADD_STATS(released_slow_kicked, 1); | ||
311 | xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); | ||
312 | break; | ||
313 | } | ||
314 | } | ||
315 | } | ||
316 | |||
317 | static void xen_spin_unlock(struct raw_spinlock *lock) | ||
318 | { | ||
319 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
320 | |||
321 | ADD_STATS(released, 1); | ||
322 | |||
323 | smp_wmb(); /* make sure no writes get moved after unlock */ | ||
324 | xl->lock = 0; /* release lock */ | ||
325 | |||
326 | /* make sure unlock happens before kick */ | ||
327 | barrier(); | ||
328 | |||
329 | if (unlikely(xl->spinners)) | ||
330 | xen_spin_unlock_slow(xl); | ||
331 | } | ||
332 | |||
333 | static irqreturn_t dummy_handler(int irq, void *dev_id) | ||
334 | { | ||
335 | BUG(); | ||
336 | return IRQ_HANDLED; | ||
337 | } | ||
338 | |||
339 | void __cpuinit xen_init_lock_cpu(int cpu) | ||
340 | { | ||
341 | int irq; | ||
342 | const char *name; | ||
343 | |||
344 | name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); | ||
345 | irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, | ||
346 | cpu, | ||
347 | dummy_handler, | ||
348 | IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, | ||
349 | name, | ||
350 | NULL); | ||
351 | |||
352 | if (irq >= 0) { | ||
353 | disable_irq(irq); /* make sure it's never delivered */ | ||
354 | per_cpu(lock_kicker_irq, cpu) = irq; | ||
355 | } | ||
356 | |||
357 | printk("cpu %d spinlock event irq %d\n", cpu, irq); | ||
358 | } | ||
359 | |||
360 | void xen_uninit_lock_cpu(int cpu) | ||
361 | { | ||
362 | unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); | ||
363 | } | ||
364 | |||
365 | void __init xen_init_spinlocks(void) | ||
366 | { | ||
367 | pv_lock_ops.spin_is_locked = xen_spin_is_locked; | ||
368 | pv_lock_ops.spin_is_contended = xen_spin_is_contended; | ||
369 | pv_lock_ops.spin_lock = xen_spin_lock; | ||
370 | pv_lock_ops.spin_lock_flags = xen_spin_lock_flags; | ||
371 | pv_lock_ops.spin_trylock = xen_spin_trylock; | ||
372 | pv_lock_ops.spin_unlock = xen_spin_unlock; | ||
373 | } | ||
374 | |||
375 | #ifdef CONFIG_XEN_DEBUG_FS | ||
376 | |||
377 | static struct dentry *d_spin_debug; | ||
378 | |||
379 | static int __init xen_spinlock_debugfs(void) | ||
380 | { | ||
381 | struct dentry *d_xen = xen_init_debugfs(); | ||
382 | |||
383 | if (d_xen == NULL) | ||
384 | return -ENOMEM; | ||
385 | |||
386 | d_spin_debug = debugfs_create_dir("spinlocks", d_xen); | ||
387 | |||
388 | debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); | ||
389 | |||
390 | debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout); | ||
391 | |||
392 | debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken); | ||
393 | debugfs_create_u32("taken_slow", 0444, d_spin_debug, | ||
394 | &spinlock_stats.taken_slow); | ||
395 | debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug, | ||
396 | &spinlock_stats.taken_slow_nested); | ||
397 | debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, | ||
398 | &spinlock_stats.taken_slow_pickup); | ||
399 | debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, | ||
400 | &spinlock_stats.taken_slow_spurious); | ||
401 | debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug, | ||
402 | &spinlock_stats.taken_slow_irqenable); | ||
403 | |||
404 | debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released); | ||
405 | debugfs_create_u32("released_slow", 0444, d_spin_debug, | ||
406 | &spinlock_stats.released_slow); | ||
407 | debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, | ||
408 | &spinlock_stats.released_slow_kicked); | ||
409 | |||
410 | debugfs_create_u64("time_spinning", 0444, d_spin_debug, | ||
411 | &spinlock_stats.time_spinning); | ||
412 | debugfs_create_u64("time_blocked", 0444, d_spin_debug, | ||
413 | &spinlock_stats.time_blocked); | ||
414 | debugfs_create_u64("time_total", 0444, d_spin_debug, | ||
415 | &spinlock_stats.time_total); | ||
416 | |||
417 | xen_debugfs_create_u32_array("histo_total", 0444, d_spin_debug, | ||
418 | spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1); | ||
419 | xen_debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug, | ||
420 | spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1); | ||
421 | xen_debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, | ||
422 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); | ||
423 | |||
424 | return 0; | ||
425 | } | ||
426 | fs_initcall(xen_spinlock_debugfs); | ||
427 | |||
428 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 685b77470fc3..004ba86326ae 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -30,8 +30,6 @@ | |||
30 | #define TIMER_SLOP 100000 | 30 | #define TIMER_SLOP 100000 |
31 | #define NS_PER_TICK (1000000000LL / HZ) | 31 | #define NS_PER_TICK (1000000000LL / HZ) |
32 | 32 | ||
33 | static cycle_t xen_clocksource_read(void); | ||
34 | |||
35 | /* runstate info updated by Xen */ | 33 | /* runstate info updated by Xen */ |
36 | static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); | 34 | static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); |
37 | 35 | ||
@@ -213,7 +211,7 @@ unsigned long xen_tsc_khz(void) | |||
213 | return xen_khz; | 211 | return xen_khz; |
214 | } | 212 | } |
215 | 213 | ||
216 | static cycle_t xen_clocksource_read(void) | 214 | cycle_t xen_clocksource_read(void) |
217 | { | 215 | { |
218 | struct pvclock_vcpu_time_info *src; | 216 | struct pvclock_vcpu_time_info *src; |
219 | cycle_t ret; | 217 | cycle_t ret; |
@@ -452,6 +450,14 @@ void xen_setup_timer(int cpu) | |||
452 | setup_runstate_info(cpu); | 450 | setup_runstate_info(cpu); |
453 | } | 451 | } |
454 | 452 | ||
453 | void xen_teardown_timer(int cpu) | ||
454 | { | ||
455 | struct clock_event_device *evt; | ||
456 | BUG_ON(cpu == 0); | ||
457 | evt = &per_cpu(xen_clock_events, cpu); | ||
458 | unbind_from_irqhandler(evt->irq, NULL); | ||
459 | } | ||
460 | |||
455 | void xen_setup_cpu_clockevents(void) | 461 | void xen_setup_cpu_clockevents(void) |
456 | { | 462 | { |
457 | BUG_ON(preemptible()); | 463 | BUG_ON(preemptible()); |
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 2497a30f41de..42786f59d9c0 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S | |||
@@ -298,7 +298,7 @@ check_events: | |||
298 | push %eax | 298 | push %eax |
299 | push %ecx | 299 | push %ecx |
300 | push %edx | 300 | push %edx |
301 | call force_evtchn_callback | 301 | call xen_force_evtchn_callback |
302 | pop %edx | 302 | pop %edx |
303 | pop %ecx | 303 | pop %ecx |
304 | pop %eax | 304 | pop %eax |
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 7f58304fafb3..3b9bda46487a 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S | |||
@@ -122,7 +122,7 @@ check_events: | |||
122 | push %r9 | 122 | push %r9 |
123 | push %r10 | 123 | push %r10 |
124 | push %r11 | 124 | push %r11 |
125 | call force_evtchn_callback | 125 | call xen_force_evtchn_callback |
126 | pop %r11 | 126 | pop %r11 |
127 | pop %r10 | 127 | pop %r10 |
128 | pop %r9 | 128 | pop %r9 |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index dd3c23152a2e..d7422dc2a55c 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define XEN_OPS_H | 2 | #define XEN_OPS_H |
3 | 3 | ||
4 | #include <linux/init.h> | 4 | #include <linux/init.h> |
5 | #include <linux/clocksource.h> | ||
5 | #include <linux/irqreturn.h> | 6 | #include <linux/irqreturn.h> |
6 | #include <xen/xen-ops.h> | 7 | #include <xen/xen-ops.h> |
7 | 8 | ||
@@ -31,7 +32,10 @@ void xen_vcpu_restore(void); | |||
31 | 32 | ||
32 | void __init xen_build_dynamic_phys_to_machine(void); | 33 | void __init xen_build_dynamic_phys_to_machine(void); |
33 | 34 | ||
35 | void xen_init_irq_ops(void); | ||
34 | void xen_setup_timer(int cpu); | 36 | void xen_setup_timer(int cpu); |
37 | void xen_teardown_timer(int cpu); | ||
38 | cycle_t xen_clocksource_read(void); | ||
35 | void xen_setup_cpu_clockevents(void); | 39 | void xen_setup_cpu_clockevents(void); |
36 | unsigned long xen_tsc_khz(void); | 40 | unsigned long xen_tsc_khz(void); |
37 | void __init xen_time_init(void); | 41 | void __init xen_time_init(void); |
@@ -50,6 +54,10 @@ void __init xen_setup_vcpu_info_placement(void); | |||
50 | #ifdef CONFIG_SMP | 54 | #ifdef CONFIG_SMP |
51 | void xen_smp_init(void); | 55 | void xen_smp_init(void); |
52 | 56 | ||
57 | void __init xen_init_spinlocks(void); | ||
58 | __cpuinit void xen_init_lock_cpu(int cpu); | ||
59 | void xen_uninit_lock_cpu(int cpu); | ||
60 | |||
53 | extern cpumask_t xen_cpu_initialized_map; | 61 | extern cpumask_t xen_cpu_initialized_map; |
54 | #else | 62 | #else |
55 | static inline void xen_smp_init(void) {} | 63 | static inline void xen_smp_init(void) {} |