diff options
author | Mukesh Rathor <mukesh.rathor@oracle.com> | 2013-12-13 11:48:08 -0500 |
---|---|---|
committer | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2014-01-06 10:44:12 -0500 |
commit | 5840c84b16aad223d5305d8a569ea55de4120d67 (patch) | |
tree | f0c880e57877c420226ac058a483cae17f56db53 /arch/x86 | |
parent | 8d656bbe43aee6d1be6b49fcf8acbc04588472bc (diff) |
xen/pvh: Secondary VCPU bringup (non-bootup CPUs)
The VCPU bringup protocol follows the PV with certain twists.
From xen/include/public/arch-x86/xen.h:
Also note that when calling DOMCTL_setvcpucontext and VCPU_initialise
for HVM and PVH guests, not all information in this structure is updated:
- For HVM guests, the structures read include: fpu_ctxt (if
VGCT_I387_VALID is set), flags, user_regs, debugreg[*]
- PVH guests are the same as HVM guests, but additionally use ctrlreg[3] to
set cr3. All other fields not used should be set to 0.
This is what we do. We piggyback on the 'xen_setup_gdt' - but modify
a bit - we need to call 'load_percpu_segment' so that 'switch_to_new_gdt'
can load per-cpu data-structures. It has no effect on the VCPU0.
We also piggyback on the %rdi register to pass in the CPU number - so
that when we bootup a new CPU, the cpu_bringup_and_idle will have
passed as the first parameter the CPU number (via %rdi for 64-bit).
Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/xen/enlighten.c | 11 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 49 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 1 |
3 files changed, 42 insertions, 19 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 1170d00879d5..2eca6187fc92 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -1409,14 +1409,19 @@ static void __init xen_boot_params_init_edd(void) | |||
1409 | * Set up the GDT and segment registers for -fstack-protector. Until | 1409 | * Set up the GDT and segment registers for -fstack-protector. Until |
1410 | * we do this, we have to be careful not to call any stack-protected | 1410 | * we do this, we have to be careful not to call any stack-protected |
1411 | * function, which is most of the kernel. | 1411 | * function, which is most of the kernel. |
1412 | * | ||
1413 | * Note, that it is __ref because the only caller of this after init | ||
1414 | * is PVH which is not going to use xen_load_gdt_boot or other | ||
1415 | * __init functions. | ||
1412 | */ | 1416 | */ |
1413 | static void __init xen_setup_gdt(void) | 1417 | void __ref xen_setup_gdt(int cpu) |
1414 | { | 1418 | { |
1415 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | 1419 | if (xen_feature(XENFEAT_auto_translated_physmap)) { |
1416 | #ifdef CONFIG_X86_64 | 1420 | #ifdef CONFIG_X86_64 |
1417 | unsigned long dummy; | 1421 | unsigned long dummy; |
1418 | 1422 | ||
1419 | switch_to_new_gdt(0); /* GDT and GS set */ | 1423 | load_percpu_segment(cpu); /* We need to access per-cpu area */ |
1424 | switch_to_new_gdt(cpu); /* GDT and GS set */ | ||
1420 | 1425 | ||
1421 | /* We are switching of the Xen provided GDT to our HVM mode | 1426 | /* We are switching of the Xen provided GDT to our HVM mode |
1422 | * GDT. The new GDT has __KERNEL_CS with CS.L = 1 | 1427 | * GDT. The new GDT has __KERNEL_CS with CS.L = 1 |
@@ -1529,7 +1534,7 @@ asmlinkage void __init xen_start_kernel(void) | |||
1529 | * Set up kernel GDT and segment registers, mainly so that | 1534 | * Set up kernel GDT and segment registers, mainly so that |
1530 | * -fstack-protector code can be executed. | 1535 | * -fstack-protector code can be executed. |
1531 | */ | 1536 | */ |
1532 | xen_setup_gdt(); | 1537 | xen_setup_gdt(0); |
1533 | 1538 | ||
1534 | xen_init_irq_ops(); | 1539 | xen_init_irq_ops(); |
1535 | xen_init_cpuid_mask(); | 1540 | xen_init_cpuid_mask(); |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index c36b325abd83..5e46190133b2 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -73,9 +73,11 @@ static void cpu_bringup(void) | |||
73 | touch_softlockup_watchdog(); | 73 | touch_softlockup_watchdog(); |
74 | preempt_disable(); | 74 | preempt_disable(); |
75 | 75 | ||
76 | xen_enable_sysenter(); | 76 | /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */ |
77 | xen_enable_syscall(); | 77 | if (!xen_feature(XENFEAT_supervisor_mode_kernel)) { |
78 | 78 | xen_enable_sysenter(); | |
79 | xen_enable_syscall(); | ||
80 | } | ||
79 | cpu = smp_processor_id(); | 81 | cpu = smp_processor_id(); |
80 | smp_store_cpu_info(cpu); | 82 | smp_store_cpu_info(cpu); |
81 | cpu_data(cpu).x86_max_cores = 1; | 83 | cpu_data(cpu).x86_max_cores = 1; |
@@ -97,8 +99,14 @@ static void cpu_bringup(void) | |||
97 | wmb(); /* make sure everything is out */ | 99 | wmb(); /* make sure everything is out */ |
98 | } | 100 | } |
99 | 101 | ||
100 | static void cpu_bringup_and_idle(void) | 102 | /* Note: cpu parameter is only relevant for PVH */ |
103 | static void cpu_bringup_and_idle(int cpu) | ||
101 | { | 104 | { |
105 | #ifdef CONFIG_X86_64 | ||
106 | if (xen_feature(XENFEAT_auto_translated_physmap) && | ||
107 | xen_feature(XENFEAT_supervisor_mode_kernel)) | ||
108 | xen_setup_gdt(cpu); | ||
109 | #endif | ||
102 | cpu_bringup(); | 110 | cpu_bringup(); |
103 | cpu_startup_entry(CPUHP_ONLINE); | 111 | cpu_startup_entry(CPUHP_ONLINE); |
104 | } | 112 | } |
@@ -274,9 +282,10 @@ static void __init xen_smp_prepare_boot_cpu(void) | |||
274 | native_smp_prepare_boot_cpu(); | 282 | native_smp_prepare_boot_cpu(); |
275 | 283 | ||
276 | if (xen_pv_domain()) { | 284 | if (xen_pv_domain()) { |
277 | /* We've switched to the "real" per-cpu gdt, so make sure the | 285 | if (!xen_feature(XENFEAT_writable_page_tables)) |
278 | old memory can be recycled */ | 286 | /* We've switched to the "real" per-cpu gdt, so make |
279 | make_lowmem_page_readwrite(xen_initial_gdt); | 287 | * sure the old memory can be recycled. */ |
288 | make_lowmem_page_readwrite(xen_initial_gdt); | ||
280 | 289 | ||
281 | #ifdef CONFIG_X86_32 | 290 | #ifdef CONFIG_X86_32 |
282 | /* | 291 | /* |
@@ -360,22 +369,21 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
360 | 369 | ||
361 | gdt = get_cpu_gdt_table(cpu); | 370 | gdt = get_cpu_gdt_table(cpu); |
362 | 371 | ||
363 | ctxt->flags = VGCF_IN_KERNEL; | ||
364 | ctxt->user_regs.ss = __KERNEL_DS; | ||
365 | #ifdef CONFIG_X86_32 | 372 | #ifdef CONFIG_X86_32 |
373 | /* Note: PVH is not yet supported on x86_32. */ | ||
366 | ctxt->user_regs.fs = __KERNEL_PERCPU; | 374 | ctxt->user_regs.fs = __KERNEL_PERCPU; |
367 | ctxt->user_regs.gs = __KERNEL_STACK_CANARY; | 375 | ctxt->user_regs.gs = __KERNEL_STACK_CANARY; |
368 | #else | ||
369 | ctxt->gs_base_kernel = per_cpu_offset(cpu); | ||
370 | #endif | 376 | #endif |
371 | ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; | 377 | ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; |
372 | 378 | ||
373 | memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); | 379 | memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); |
374 | 380 | ||
375 | { | 381 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
382 | ctxt->flags = VGCF_IN_KERNEL; | ||
376 | ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ | 383 | ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ |
377 | ctxt->user_regs.ds = __USER_DS; | 384 | ctxt->user_regs.ds = __USER_DS; |
378 | ctxt->user_regs.es = __USER_DS; | 385 | ctxt->user_regs.es = __USER_DS; |
386 | ctxt->user_regs.ss = __KERNEL_DS; | ||
379 | 387 | ||
380 | xen_copy_trap_info(ctxt->trap_ctxt); | 388 | xen_copy_trap_info(ctxt->trap_ctxt); |
381 | 389 | ||
@@ -396,18 +404,27 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
396 | #ifdef CONFIG_X86_32 | 404 | #ifdef CONFIG_X86_32 |
397 | ctxt->event_callback_cs = __KERNEL_CS; | 405 | ctxt->event_callback_cs = __KERNEL_CS; |
398 | ctxt->failsafe_callback_cs = __KERNEL_CS; | 406 | ctxt->failsafe_callback_cs = __KERNEL_CS; |
407 | #else | ||
408 | ctxt->gs_base_kernel = per_cpu_offset(cpu); | ||
399 | #endif | 409 | #endif |
400 | ctxt->event_callback_eip = | 410 | ctxt->event_callback_eip = |
401 | (unsigned long)xen_hypervisor_callback; | 411 | (unsigned long)xen_hypervisor_callback; |
402 | ctxt->failsafe_callback_eip = | 412 | ctxt->failsafe_callback_eip = |
403 | (unsigned long)xen_failsafe_callback; | 413 | (unsigned long)xen_failsafe_callback; |
414 | ctxt->user_regs.cs = __KERNEL_CS; | ||
415 | per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); | ||
416 | #ifdef CONFIG_X86_32 | ||
404 | } | 417 | } |
405 | ctxt->user_regs.cs = __KERNEL_CS; | 418 | #else |
419 | } else | ||
420 | /* N.B. The user_regs.eip (cpu_bringup_and_idle) is called with | ||
421 | * %rdi having the cpu number - which means are passing in | ||
422 | * as the first parameter the cpu. Subtle! | ||
423 | */ | ||
424 | ctxt->user_regs.rdi = cpu; | ||
425 | #endif | ||
406 | ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); | 426 | ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); |
407 | |||
408 | per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); | ||
409 | ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); | 427 | ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); |
410 | |||
411 | if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) | 428 | if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) |
412 | BUG(); | 429 | BUG(); |
413 | 430 | ||
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 95f8c6142328..9059c24ed564 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -123,4 +123,5 @@ __visible void xen_adjust_exception_frame(void); | |||
123 | 123 | ||
124 | extern int xen_panic_handler_init(void); | 124 | extern int xen_panic_handler_init(void); |
125 | 125 | ||
126 | void xen_setup_gdt(int cpu); | ||
126 | #endif /* XEN_OPS_H */ | 127 | #endif /* XEN_OPS_H */ |