diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-14 13:23:49 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-14 13:23:49 -0400 |
commit | b8cb48aae1b8c50b37dcb7710363aa69a7a0d9ca (patch) | |
tree | c7b04bfc8ece8b0121a9030c7f49e7bf9bf570d1 /arch/x86/xen | |
parent | 0cc6d77e55eca9557bbe41bf2db94b31aa8fcb2a (diff) | |
parent | 78c86e5e5691fc84d5fbea0cd4ac7147e87b7490 (diff) |
Merge branch 'x86-xen-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-xen-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
x86: split __phys_addr out into separate file
xen: use stronger barrier after unlocking lock
xen: only enable interrupts while actually blocking for spinlock
xen: make -fstack-protector work under Xen
Diffstat (limited to 'arch/x86/xen')
-rw-r--r-- | arch/x86/xen/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 131 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 1 | ||||
-rw-r--r-- | arch/x86/xen/spinlock.c | 28 |
4 files changed, 133 insertions, 29 deletions
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 7410640db173..3bb4fc21f4f2 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -8,6 +8,7 @@ endif | |||
8 | # Make sure early boot has no stackprotector | 8 | # Make sure early boot has no stackprotector |
9 | nostackp := $(call cc-option, -fno-stack-protector) | 9 | nostackp := $(call cc-option, -fno-stack-protector) |
10 | CFLAGS_enlighten.o := $(nostackp) | 10 | CFLAGS_enlighten.o := $(nostackp) |
11 | CFLAGS_mmu.o := $(nostackp) | ||
11 | 12 | ||
12 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ | 13 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ |
13 | time.o xen-asm.o xen-asm_$(BITS).o \ | 14 | time.o xen-asm.o xen-asm_$(BITS).o \ |
@@ -16,3 +17,4 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ | |||
16 | obj-$(CONFIG_SMP) += smp.o | 17 | obj-$(CONFIG_SMP) += smp.o |
17 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o | 18 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o |
18 | obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o | 19 | obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o |
20 | |||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b62ccb840cfb..0dd0c2c6cae0 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <asm/pgtable.h> | 51 | #include <asm/pgtable.h> |
52 | #include <asm/tlbflush.h> | 52 | #include <asm/tlbflush.h> |
53 | #include <asm/reboot.h> | 53 | #include <asm/reboot.h> |
54 | #include <asm/stackprotector.h> | ||
54 | 55 | ||
55 | #include "xen-ops.h" | 56 | #include "xen-ops.h" |
56 | #include "mmu.h" | 57 | #include "mmu.h" |
@@ -330,18 +331,28 @@ static void xen_load_gdt(const struct desc_ptr *dtr) | |||
330 | unsigned long frames[pages]; | 331 | unsigned long frames[pages]; |
331 | int f; | 332 | int f; |
332 | 333 | ||
333 | /* A GDT can be up to 64k in size, which corresponds to 8192 | 334 | /* |
334 | 8-byte entries, or 16 4k pages.. */ | 335 | * A GDT can be up to 64k in size, which corresponds to 8192 |
336 | * 8-byte entries, or 16 4k pages.. | ||
337 | */ | ||
335 | 338 | ||
336 | BUG_ON(size > 65536); | 339 | BUG_ON(size > 65536); |
337 | BUG_ON(va & ~PAGE_MASK); | 340 | BUG_ON(va & ~PAGE_MASK); |
338 | 341 | ||
339 | for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { | 342 | for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { |
340 | int level; | 343 | int level; |
341 | pte_t *ptep = lookup_address(va, &level); | 344 | pte_t *ptep; |
342 | unsigned long pfn, mfn; | 345 | unsigned long pfn, mfn; |
343 | void *virt; | 346 | void *virt; |
344 | 347 | ||
348 | /* | ||
349 | * The GDT is per-cpu and is in the percpu data area. | ||
350 | * That can be virtually mapped, so we need to do a | ||
351 | * page-walk to get the underlying MFN for the | ||
352 | * hypercall. The page can also be in the kernel's | ||
353 | * linear range, so we need to RO that mapping too. | ||
354 | */ | ||
355 | ptep = lookup_address(va, &level); | ||
345 | BUG_ON(ptep == NULL); | 356 | BUG_ON(ptep == NULL); |
346 | 357 | ||
347 | pfn = pte_pfn(*ptep); | 358 | pfn = pte_pfn(*ptep); |
@@ -358,6 +369,44 @@ static void xen_load_gdt(const struct desc_ptr *dtr) | |||
358 | BUG(); | 369 | BUG(); |
359 | } | 370 | } |
360 | 371 | ||
372 | /* | ||
373 | * load_gdt for early boot, when the gdt is only mapped once | ||
374 | */ | ||
375 | static __init void xen_load_gdt_boot(const struct desc_ptr *dtr) | ||
376 | { | ||
377 | unsigned long va = dtr->address; | ||
378 | unsigned int size = dtr->size + 1; | ||
379 | unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; | ||
380 | unsigned long frames[pages]; | ||
381 | int f; | ||
382 | |||
383 | /* | ||
384 | * A GDT can be up to 64k in size, which corresponds to 8192 | ||
385 | * 8-byte entries, or 16 4k pages.. | ||
386 | */ | ||
387 | |||
388 | BUG_ON(size > 65536); | ||
389 | BUG_ON(va & ~PAGE_MASK); | ||
390 | |||
391 | for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { | ||
392 | pte_t pte; | ||
393 | unsigned long pfn, mfn; | ||
394 | |||
395 | pfn = virt_to_pfn(va); | ||
396 | mfn = pfn_to_mfn(pfn); | ||
397 | |||
398 | pte = pfn_pte(pfn, PAGE_KERNEL_RO); | ||
399 | |||
400 | if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0)) | ||
401 | BUG(); | ||
402 | |||
403 | frames[f] = mfn; | ||
404 | } | ||
405 | |||
406 | if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct))) | ||
407 | BUG(); | ||
408 | } | ||
409 | |||
361 | static void load_TLS_descriptor(struct thread_struct *t, | 410 | static void load_TLS_descriptor(struct thread_struct *t, |
362 | unsigned int cpu, unsigned int i) | 411 | unsigned int cpu, unsigned int i) |
363 | { | 412 | { |
@@ -581,6 +630,29 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry, | |||
581 | preempt_enable(); | 630 | preempt_enable(); |
582 | } | 631 | } |
583 | 632 | ||
633 | /* | ||
634 | * Version of write_gdt_entry for use at early boot-time needed to | ||
635 | * update an entry as simply as possible. | ||
636 | */ | ||
637 | static __init void xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, | ||
638 | const void *desc, int type) | ||
639 | { | ||
640 | switch (type) { | ||
641 | case DESC_LDT: | ||
642 | case DESC_TSS: | ||
643 | /* ignore */ | ||
644 | break; | ||
645 | |||
646 | default: { | ||
647 | xmaddr_t maddr = virt_to_machine(&dt[entry]); | ||
648 | |||
649 | if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc)) | ||
650 | dt[entry] = *(struct desc_struct *)desc; | ||
651 | } | ||
652 | |||
653 | } | ||
654 | } | ||
655 | |||
584 | static void xen_load_sp0(struct tss_struct *tss, | 656 | static void xen_load_sp0(struct tss_struct *tss, |
585 | struct thread_struct *thread) | 657 | struct thread_struct *thread) |
586 | { | 658 | { |
@@ -965,6 +1037,23 @@ static const struct machine_ops __initdata xen_machine_ops = { | |||
965 | .emergency_restart = xen_emergency_restart, | 1037 | .emergency_restart = xen_emergency_restart, |
966 | }; | 1038 | }; |
967 | 1039 | ||
1040 | /* | ||
1041 | * Set up the GDT and segment registers for -fstack-protector. Until | ||
1042 | * we do this, we have to be careful not to call any stack-protected | ||
1043 | * function, which is most of the kernel. | ||
1044 | */ | ||
1045 | static void __init xen_setup_stackprotector(void) | ||
1046 | { | ||
1047 | pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot; | ||
1048 | pv_cpu_ops.load_gdt = xen_load_gdt_boot; | ||
1049 | |||
1050 | setup_stack_canary_segment(0); | ||
1051 | switch_to_new_gdt(0); | ||
1052 | |||
1053 | pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry; | ||
1054 | pv_cpu_ops.load_gdt = xen_load_gdt; | ||
1055 | } | ||
1056 | |||
968 | /* First C function to be called on Xen boot */ | 1057 | /* First C function to be called on Xen boot */ |
969 | asmlinkage void __init xen_start_kernel(void) | 1058 | asmlinkage void __init xen_start_kernel(void) |
970 | { | 1059 | { |
@@ -983,13 +1072,28 @@ asmlinkage void __init xen_start_kernel(void) | |||
983 | pv_apic_ops = xen_apic_ops; | 1072 | pv_apic_ops = xen_apic_ops; |
984 | pv_mmu_ops = xen_mmu_ops; | 1073 | pv_mmu_ops = xen_mmu_ops; |
985 | 1074 | ||
986 | #ifdef CONFIG_X86_64 | ||
987 | /* | 1075 | /* |
988 | * Setup percpu state. We only need to do this for 64-bit | 1076 | * Set up some pagetable state before starting to set any ptes. |
989 | * because 32-bit already has %fs set properly. | ||
990 | */ | 1077 | */ |
991 | load_percpu_segment(0); | 1078 | |
992 | #endif | 1079 | /* Prevent unwanted bits from being set in PTEs. */ |
1080 | __supported_pte_mask &= ~_PAGE_GLOBAL; | ||
1081 | if (!xen_initial_domain()) | ||
1082 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); | ||
1083 | |||
1084 | __supported_pte_mask |= _PAGE_IOMAP; | ||
1085 | |||
1086 | xen_setup_features(); | ||
1087 | |||
1088 | /* Get mfn list */ | ||
1089 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | ||
1090 | xen_build_dynamic_phys_to_machine(); | ||
1091 | |||
1092 | /* | ||
1093 | * Set up kernel GDT and segment registers, mainly so that | ||
1094 | * -fstack-protector code can be executed. | ||
1095 | */ | ||
1096 | xen_setup_stackprotector(); | ||
993 | 1097 | ||
994 | xen_init_irq_ops(); | 1098 | xen_init_irq_ops(); |
995 | xen_init_cpuid_mask(); | 1099 | xen_init_cpuid_mask(); |
@@ -1001,8 +1105,6 @@ asmlinkage void __init xen_start_kernel(void) | |||
1001 | set_xen_basic_apic_ops(); | 1105 | set_xen_basic_apic_ops(); |
1002 | #endif | 1106 | #endif |
1003 | 1107 | ||
1004 | xen_setup_features(); | ||
1005 | |||
1006 | if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { | 1108 | if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { |
1007 | pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; | 1109 | pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; |
1008 | pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; | 1110 | pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; |
@@ -1019,17 +1121,8 @@ asmlinkage void __init xen_start_kernel(void) | |||
1019 | 1121 | ||
1020 | xen_smp_init(); | 1122 | xen_smp_init(); |
1021 | 1123 | ||
1022 | /* Get mfn list */ | ||
1023 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | ||
1024 | xen_build_dynamic_phys_to_machine(); | ||
1025 | |||
1026 | pgd = (pgd_t *)xen_start_info->pt_base; | 1124 | pgd = (pgd_t *)xen_start_info->pt_base; |
1027 | 1125 | ||
1028 | /* Prevent unwanted bits from being set in PTEs. */ | ||
1029 | __supported_pte_mask &= ~_PAGE_GLOBAL; | ||
1030 | if (!xen_initial_domain()) | ||
1031 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); | ||
1032 | |||
1033 | #ifdef CONFIG_X86_64 | 1126 | #ifdef CONFIG_X86_64 |
1034 | /* Work out if we support NX */ | 1127 | /* Work out if we support NX */ |
1035 | check_efer(); | 1128 | check_efer(); |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 429834ec1687..fe03eeed7b48 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -236,6 +236,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
236 | ctxt->user_regs.ss = __KERNEL_DS; | 236 | ctxt->user_regs.ss = __KERNEL_DS; |
237 | #ifdef CONFIG_X86_32 | 237 | #ifdef CONFIG_X86_32 |
238 | ctxt->user_regs.fs = __KERNEL_PERCPU; | 238 | ctxt->user_regs.fs = __KERNEL_PERCPU; |
239 | ctxt->user_regs.gs = __KERNEL_STACK_CANARY; | ||
239 | #else | 240 | #else |
240 | ctxt->gs_base_kernel = per_cpu_offset(cpu); | 241 | ctxt->gs_base_kernel = per_cpu_offset(cpu); |
241 | #endif | 242 | #endif |
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 5601506f2dd9..36a5141108df 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c | |||
@@ -187,7 +187,6 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl | |||
187 | struct xen_spinlock *prev; | 187 | struct xen_spinlock *prev; |
188 | int irq = __get_cpu_var(lock_kicker_irq); | 188 | int irq = __get_cpu_var(lock_kicker_irq); |
189 | int ret; | 189 | int ret; |
190 | unsigned long flags; | ||
191 | u64 start; | 190 | u64 start; |
192 | 191 | ||
193 | /* If kicker interrupts not initialized yet, just spin */ | 192 | /* If kicker interrupts not initialized yet, just spin */ |
@@ -199,16 +198,12 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl | |||
199 | /* announce we're spinning */ | 198 | /* announce we're spinning */ |
200 | prev = spinning_lock(xl); | 199 | prev = spinning_lock(xl); |
201 | 200 | ||
202 | flags = __raw_local_save_flags(); | ||
203 | if (irq_enable) { | ||
204 | ADD_STATS(taken_slow_irqenable, 1); | ||
205 | raw_local_irq_enable(); | ||
206 | } | ||
207 | |||
208 | ADD_STATS(taken_slow, 1); | 201 | ADD_STATS(taken_slow, 1); |
209 | ADD_STATS(taken_slow_nested, prev != NULL); | 202 | ADD_STATS(taken_slow_nested, prev != NULL); |
210 | 203 | ||
211 | do { | 204 | do { |
205 | unsigned long flags; | ||
206 | |||
212 | /* clear pending */ | 207 | /* clear pending */ |
213 | xen_clear_irq_pending(irq); | 208 | xen_clear_irq_pending(irq); |
214 | 209 | ||
@@ -228,6 +223,12 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl | |||
228 | goto out; | 223 | goto out; |
229 | } | 224 | } |
230 | 225 | ||
226 | flags = __raw_local_save_flags(); | ||
227 | if (irq_enable) { | ||
228 | ADD_STATS(taken_slow_irqenable, 1); | ||
229 | raw_local_irq_enable(); | ||
230 | } | ||
231 | |||
231 | /* | 232 | /* |
232 | * Block until irq becomes pending. If we're | 233 | * Block until irq becomes pending. If we're |
233 | * interrupted at this point (after the trylock but | 234 | * interrupted at this point (after the trylock but |
@@ -238,13 +239,15 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl | |||
238 | * pending. | 239 | * pending. |
239 | */ | 240 | */ |
240 | xen_poll_irq(irq); | 241 | xen_poll_irq(irq); |
242 | |||
243 | raw_local_irq_restore(flags); | ||
244 | |||
241 | ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); | 245 | ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); |
242 | } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ | 246 | } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ |
243 | 247 | ||
244 | kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); | 248 | kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); |
245 | 249 | ||
246 | out: | 250 | out: |
247 | raw_local_irq_restore(flags); | ||
248 | unspinning_lock(xl, prev); | 251 | unspinning_lock(xl, prev); |
249 | spin_time_accum_blocked(start); | 252 | spin_time_accum_blocked(start); |
250 | 253 | ||
@@ -323,8 +326,13 @@ static void xen_spin_unlock(struct raw_spinlock *lock) | |||
323 | smp_wmb(); /* make sure no writes get moved after unlock */ | 326 | smp_wmb(); /* make sure no writes get moved after unlock */ |
324 | xl->lock = 0; /* release lock */ | 327 | xl->lock = 0; /* release lock */ |
325 | 328 | ||
326 | /* make sure unlock happens before kick */ | 329 | /* |
327 | barrier(); | 330 | * Make sure unlock happens before checking for waiting |
331 | * spinners. We need a strong barrier to enforce the | ||
332 | * write-read ordering to different memory locations, as the | ||
333 | * CPU makes no implied guarantees about their ordering. | ||
334 | */ | ||
335 | mb(); | ||
328 | 336 | ||
329 | if (unlikely(xl->spinners)) | 337 | if (unlikely(xl->spinners)) |
330 | xen_spin_unlock_slow(xl); | 338 | xen_spin_unlock_slow(xl); |