aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-14 13:23:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-14 13:23:49 -0400
commitb8cb48aae1b8c50b37dcb7710363aa69a7a0d9ca (patch)
treec7b04bfc8ece8b0121a9030c7f49e7bf9bf570d1 /arch/x86/xen
parent0cc6d77e55eca9557bbe41bf2db94b31aa8fcb2a (diff)
parent78c86e5e5691fc84d5fbea0cd4ac7147e87b7490 (diff)
Merge branch 'x86-xen-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-xen-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: x86: split __phys_addr out into separate file xen: use stronger barrier after unlocking lock xen: only enable interrupts while actually blocking for spinlock xen: make -fstack-protector work under Xen
Diffstat (limited to 'arch/x86/xen')
-rw-r--r--arch/x86/xen/Makefile2
-rw-r--r--arch/x86/xen/enlighten.c131
-rw-r--r--arch/x86/xen/smp.c1
-rw-r--r--arch/x86/xen/spinlock.c28
4 files changed, 133 insertions, 29 deletions
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 7410640db173..3bb4fc21f4f2 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -8,6 +8,7 @@ endif
8# Make sure early boot has no stackprotector 8# Make sure early boot has no stackprotector
9nostackp := $(call cc-option, -fno-stack-protector) 9nostackp := $(call cc-option, -fno-stack-protector)
10CFLAGS_enlighten.o := $(nostackp) 10CFLAGS_enlighten.o := $(nostackp)
11CFLAGS_mmu.o := $(nostackp)
11 12
12obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ 13obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
13 time.o xen-asm.o xen-asm_$(BITS).o \ 14 time.o xen-asm.o xen-asm_$(BITS).o \
@@ -16,3 +17,4 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
16obj-$(CONFIG_SMP) += smp.o 17obj-$(CONFIG_SMP) += smp.o
17obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o 18obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
18obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o 19obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
20
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b62ccb840cfb..0dd0c2c6cae0 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -51,6 +51,7 @@
51#include <asm/pgtable.h> 51#include <asm/pgtable.h>
52#include <asm/tlbflush.h> 52#include <asm/tlbflush.h>
53#include <asm/reboot.h> 53#include <asm/reboot.h>
54#include <asm/stackprotector.h>
54 55
55#include "xen-ops.h" 56#include "xen-ops.h"
56#include "mmu.h" 57#include "mmu.h"
@@ -330,18 +331,28 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
330 unsigned long frames[pages]; 331 unsigned long frames[pages];
331 int f; 332 int f;
332 333
333 /* A GDT can be up to 64k in size, which corresponds to 8192 334 /*
334 8-byte entries, or 16 4k pages.. */ 335 * A GDT can be up to 64k in size, which corresponds to 8192
336 * 8-byte entries, or 16 4k pages..
337 */
335 338
336 BUG_ON(size > 65536); 339 BUG_ON(size > 65536);
337 BUG_ON(va & ~PAGE_MASK); 340 BUG_ON(va & ~PAGE_MASK);
338 341
339 for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { 342 for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
340 int level; 343 int level;
341 pte_t *ptep = lookup_address(va, &level); 344 pte_t *ptep;
342 unsigned long pfn, mfn; 345 unsigned long pfn, mfn;
343 void *virt; 346 void *virt;
344 347
348 /*
349 * The GDT is per-cpu and is in the percpu data area.
350 * That can be virtually mapped, so we need to do a
351 * page-walk to get the underlying MFN for the
352 * hypercall. The page can also be in the kernel's
353 * linear range, so we need to RO that mapping too.
354 */
355 ptep = lookup_address(va, &level);
345 BUG_ON(ptep == NULL); 356 BUG_ON(ptep == NULL);
346 357
347 pfn = pte_pfn(*ptep); 358 pfn = pte_pfn(*ptep);
@@ -358,6 +369,44 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
358 BUG(); 369 BUG();
359} 370}
360 371
372/*
373 * load_gdt for early boot, when the gdt is only mapped once
374 */
375static __init void xen_load_gdt_boot(const struct desc_ptr *dtr)
376{
377 unsigned long va = dtr->address;
378 unsigned int size = dtr->size + 1;
379 unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
380 unsigned long frames[pages];
381 int f;
382
383 /*
384 * A GDT can be up to 64k in size, which corresponds to 8192
385 * 8-byte entries, or 16 4k pages..
386 */
387
388 BUG_ON(size > 65536);
389 BUG_ON(va & ~PAGE_MASK);
390
391 for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
392 pte_t pte;
393 unsigned long pfn, mfn;
394
395 pfn = virt_to_pfn(va);
396 mfn = pfn_to_mfn(pfn);
397
398 pte = pfn_pte(pfn, PAGE_KERNEL_RO);
399
400 if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
401 BUG();
402
403 frames[f] = mfn;
404 }
405
406 if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
407 BUG();
408}
409
361static void load_TLS_descriptor(struct thread_struct *t, 410static void load_TLS_descriptor(struct thread_struct *t,
362 unsigned int cpu, unsigned int i) 411 unsigned int cpu, unsigned int i)
363{ 412{
@@ -581,6 +630,29 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
581 preempt_enable(); 630 preempt_enable();
582} 631}
583 632
633/*
634 * Version of write_gdt_entry for use at early boot-time needed to
635 * update an entry as simply as possible.
636 */
637static __init void xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
638 const void *desc, int type)
639{
640 switch (type) {
641 case DESC_LDT:
642 case DESC_TSS:
643 /* ignore */
644 break;
645
646 default: {
647 xmaddr_t maddr = virt_to_machine(&dt[entry]);
648
649 if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
650 dt[entry] = *(struct desc_struct *)desc;
651 }
652
653 }
654}
655
584static void xen_load_sp0(struct tss_struct *tss, 656static void xen_load_sp0(struct tss_struct *tss,
585 struct thread_struct *thread) 657 struct thread_struct *thread)
586{ 658{
@@ -965,6 +1037,23 @@ static const struct machine_ops __initdata xen_machine_ops = {
965 .emergency_restart = xen_emergency_restart, 1037 .emergency_restart = xen_emergency_restart,
966}; 1038};
967 1039
1040/*
1041 * Set up the GDT and segment registers for -fstack-protector. Until
1042 * we do this, we have to be careful not to call any stack-protected
1043 * function, which is most of the kernel.
1044 */
1045static void __init xen_setup_stackprotector(void)
1046{
1047 pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
1048 pv_cpu_ops.load_gdt = xen_load_gdt_boot;
1049
1050 setup_stack_canary_segment(0);
1051 switch_to_new_gdt(0);
1052
1053 pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry;
1054 pv_cpu_ops.load_gdt = xen_load_gdt;
1055}
1056
968/* First C function to be called on Xen boot */ 1057/* First C function to be called on Xen boot */
969asmlinkage void __init xen_start_kernel(void) 1058asmlinkage void __init xen_start_kernel(void)
970{ 1059{
@@ -983,13 +1072,28 @@ asmlinkage void __init xen_start_kernel(void)
983 pv_apic_ops = xen_apic_ops; 1072 pv_apic_ops = xen_apic_ops;
984 pv_mmu_ops = xen_mmu_ops; 1073 pv_mmu_ops = xen_mmu_ops;
985 1074
986#ifdef CONFIG_X86_64
987 /* 1075 /*
988 * Setup percpu state. We only need to do this for 64-bit 1076 * Set up some pagetable state before starting to set any ptes.
989 * because 32-bit already has %fs set properly.
990 */ 1077 */
991 load_percpu_segment(0); 1078
992#endif 1079 /* Prevent unwanted bits from being set in PTEs. */
1080 __supported_pte_mask &= ~_PAGE_GLOBAL;
1081 if (!xen_initial_domain())
1082 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1083
1084 __supported_pte_mask |= _PAGE_IOMAP;
1085
1086 xen_setup_features();
1087
1088 /* Get mfn list */
1089 if (!xen_feature(XENFEAT_auto_translated_physmap))
1090 xen_build_dynamic_phys_to_machine();
1091
1092 /*
1093 * Set up kernel GDT and segment registers, mainly so that
1094 * -fstack-protector code can be executed.
1095 */
1096 xen_setup_stackprotector();
993 1097
994 xen_init_irq_ops(); 1098 xen_init_irq_ops();
995 xen_init_cpuid_mask(); 1099 xen_init_cpuid_mask();
@@ -1001,8 +1105,6 @@ asmlinkage void __init xen_start_kernel(void)
1001 set_xen_basic_apic_ops(); 1105 set_xen_basic_apic_ops();
1002#endif 1106#endif
1003 1107
1004 xen_setup_features();
1005
1006 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { 1108 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
1007 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; 1109 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
1008 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; 1110 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
@@ -1019,17 +1121,8 @@ asmlinkage void __init xen_start_kernel(void)
1019 1121
1020 xen_smp_init(); 1122 xen_smp_init();
1021 1123
1022 /* Get mfn list */
1023 if (!xen_feature(XENFEAT_auto_translated_physmap))
1024 xen_build_dynamic_phys_to_machine();
1025
1026 pgd = (pgd_t *)xen_start_info->pt_base; 1124 pgd = (pgd_t *)xen_start_info->pt_base;
1027 1125
1028 /* Prevent unwanted bits from being set in PTEs. */
1029 __supported_pte_mask &= ~_PAGE_GLOBAL;
1030 if (!xen_initial_domain())
1031 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1032
1033#ifdef CONFIG_X86_64 1126#ifdef CONFIG_X86_64
1034 /* Work out if we support NX */ 1127 /* Work out if we support NX */
1035 check_efer(); 1128 check_efer();
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 429834ec1687..fe03eeed7b48 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -236,6 +236,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
236 ctxt->user_regs.ss = __KERNEL_DS; 236 ctxt->user_regs.ss = __KERNEL_DS;
237#ifdef CONFIG_X86_32 237#ifdef CONFIG_X86_32
238 ctxt->user_regs.fs = __KERNEL_PERCPU; 238 ctxt->user_regs.fs = __KERNEL_PERCPU;
239 ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
239#else 240#else
240 ctxt->gs_base_kernel = per_cpu_offset(cpu); 241 ctxt->gs_base_kernel = per_cpu_offset(cpu);
241#endif 242#endif
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 5601506f2dd9..36a5141108df 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -187,7 +187,6 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
187 struct xen_spinlock *prev; 187 struct xen_spinlock *prev;
188 int irq = __get_cpu_var(lock_kicker_irq); 188 int irq = __get_cpu_var(lock_kicker_irq);
189 int ret; 189 int ret;
190 unsigned long flags;
191 u64 start; 190 u64 start;
192 191
193 /* If kicker interrupts not initialized yet, just spin */ 192 /* If kicker interrupts not initialized yet, just spin */
@@ -199,16 +198,12 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
199 /* announce we're spinning */ 198 /* announce we're spinning */
200 prev = spinning_lock(xl); 199 prev = spinning_lock(xl);
201 200
202 flags = __raw_local_save_flags();
203 if (irq_enable) {
204 ADD_STATS(taken_slow_irqenable, 1);
205 raw_local_irq_enable();
206 }
207
208 ADD_STATS(taken_slow, 1); 201 ADD_STATS(taken_slow, 1);
209 ADD_STATS(taken_slow_nested, prev != NULL); 202 ADD_STATS(taken_slow_nested, prev != NULL);
210 203
211 do { 204 do {
205 unsigned long flags;
206
212 /* clear pending */ 207 /* clear pending */
213 xen_clear_irq_pending(irq); 208 xen_clear_irq_pending(irq);
214 209
@@ -228,6 +223,12 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
228 goto out; 223 goto out;
229 } 224 }
230 225
226 flags = __raw_local_save_flags();
227 if (irq_enable) {
228 ADD_STATS(taken_slow_irqenable, 1);
229 raw_local_irq_enable();
230 }
231
231 /* 232 /*
232 * Block until irq becomes pending. If we're 233 * Block until irq becomes pending. If we're
233 * interrupted at this point (after the trylock but 234 * interrupted at this point (after the trylock but
@@ -238,13 +239,15 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
238 * pending. 239 * pending.
239 */ 240 */
240 xen_poll_irq(irq); 241 xen_poll_irq(irq);
242
243 raw_local_irq_restore(flags);
244
241 ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); 245 ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
242 } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ 246 } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
243 247
244 kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); 248 kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
245 249
246out: 250out:
247 raw_local_irq_restore(flags);
248 unspinning_lock(xl, prev); 251 unspinning_lock(xl, prev);
249 spin_time_accum_blocked(start); 252 spin_time_accum_blocked(start);
250 253
@@ -323,8 +326,13 @@ static void xen_spin_unlock(struct raw_spinlock *lock)
323 smp_wmb(); /* make sure no writes get moved after unlock */ 326 smp_wmb(); /* make sure no writes get moved after unlock */
324 xl->lock = 0; /* release lock */ 327 xl->lock = 0; /* release lock */
325 328
326 /* make sure unlock happens before kick */ 329 /*
327 barrier(); 330 * Make sure unlock happens before checking for waiting
331 * spinners. We need a strong barrier to enforce the
332 * write-read ordering to different memory locations, as the
333 * CPU makes no implied guarantees about their ordering.
334 */
335 mb();
328 336
329 if (unlikely(xl->spinners)) 337 if (unlikely(xl->spinners))
330 xen_spin_unlock_slow(xl); 338 xen_spin_unlock_slow(xl);