aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen/enlighten.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen/enlighten.c')
-rw-r--r--arch/x86/xen/enlighten.c688
1 files changed, 543 insertions, 145 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bb508456ef52..3da6acb7eafc 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -33,6 +33,7 @@
33#include <xen/interface/sched.h> 33#include <xen/interface/sched.h>
34#include <xen/features.h> 34#include <xen/features.h>
35#include <xen/page.h> 35#include <xen/page.h>
36#include <xen/hvc-console.h>
36 37
37#include <asm/paravirt.h> 38#include <asm/paravirt.h>
38#include <asm/page.h> 39#include <asm/page.h>
@@ -40,12 +41,12 @@
40#include <asm/xen/hypervisor.h> 41#include <asm/xen/hypervisor.h>
41#include <asm/fixmap.h> 42#include <asm/fixmap.h>
42#include <asm/processor.h> 43#include <asm/processor.h>
44#include <asm/msr-index.h>
43#include <asm/setup.h> 45#include <asm/setup.h>
44#include <asm/desc.h> 46#include <asm/desc.h>
45#include <asm/pgtable.h> 47#include <asm/pgtable.h>
46#include <asm/tlbflush.h> 48#include <asm/tlbflush.h>
47#include <asm/reboot.h> 49#include <asm/reboot.h>
48#include <asm/pgalloc.h>
49 50
50#include "xen-ops.h" 51#include "xen-ops.h"
51#include "mmu.h" 52#include "mmu.h"
@@ -57,6 +58,18 @@ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
57DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); 58DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
58 59
59/* 60/*
61 * Identity map, in addition to plain kernel map. This needs to be
62 * large enough to allocate page table pages to allocate the rest.
63 * Each page can map 2MB.
64 */
65static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
66
67#ifdef CONFIG_X86_64
68/* l3 pud for userspace vsyscall mapping */
69static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
70#endif /* CONFIG_X86_64 */
71
72/*
60 * Note about cr3 (pagetable base) values: 73 * Note about cr3 (pagetable base) values:
61 * 74 *
62 * xen_cr3 contains the current logical cr3 value; it contains the 75 * xen_cr3 contains the current logical cr3 value; it contains the
@@ -363,14 +376,6 @@ static void load_TLS_descriptor(struct thread_struct *t,
363 376
364static void xen_load_tls(struct thread_struct *t, unsigned int cpu) 377static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
365{ 378{
366 xen_mc_batch();
367
368 load_TLS_descriptor(t, cpu, 0);
369 load_TLS_descriptor(t, cpu, 1);
370 load_TLS_descriptor(t, cpu, 2);
371
372 xen_mc_issue(PARAVIRT_LAZY_CPU);
373
374 /* 379 /*
375 * XXX sleazy hack: If we're being called in a lazy-cpu zone, 380 * XXX sleazy hack: If we're being called in a lazy-cpu zone,
376 * it means we're in a context switch, and %gs has just been 381 * it means we're in a context switch, and %gs has just been
@@ -379,10 +384,39 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
379 * Either way, it has been saved, and the new value will get 384 * Either way, it has been saved, and the new value will get
380 * loaded properly. This will go away as soon as Xen has been 385 * loaded properly. This will go away as soon as Xen has been
381 * modified to not save/restore %gs for normal hypercalls. 386 * modified to not save/restore %gs for normal hypercalls.
387 *
388 * On x86_64, this hack is not used for %gs, because gs points
389 * to KERNEL_GS_BASE (and uses it for PDA references), so we
390 * must not zero %gs on x86_64
391 *
392 * For x86_64, we need to zero %fs, otherwise we may get an
393 * exception between the new %fs descriptor being loaded and
394 * %fs being effectively cleared at __switch_to().
382 */ 395 */
383 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) 396 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
397#ifdef CONFIG_X86_32
384 loadsegment(gs, 0); 398 loadsegment(gs, 0);
399#else
400 loadsegment(fs, 0);
401#endif
402 }
403
404 xen_mc_batch();
405
406 load_TLS_descriptor(t, cpu, 0);
407 load_TLS_descriptor(t, cpu, 1);
408 load_TLS_descriptor(t, cpu, 2);
409
410 xen_mc_issue(PARAVIRT_LAZY_CPU);
411}
412
413#ifdef CONFIG_X86_64
414static void xen_load_gs_index(unsigned int idx)
415{
416 if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
417 BUG();
385} 418}
419#endif
386 420
387static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, 421static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
388 const void *ptr) 422 const void *ptr)
@@ -400,23 +434,18 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
400 preempt_enable(); 434 preempt_enable();
401} 435}
402 436
403static int cvt_gate_to_trap(int vector, u32 low, u32 high, 437static int cvt_gate_to_trap(int vector, const gate_desc *val,
404 struct trap_info *info) 438 struct trap_info *info)
405{ 439{
406 u8 type, dpl; 440 if (val->type != 0xf && val->type != 0xe)
407
408 type = (high >> 8) & 0x1f;
409 dpl = (high >> 13) & 3;
410
411 if (type != 0xf && type != 0xe)
412 return 0; 441 return 0;
413 442
414 info->vector = vector; 443 info->vector = vector;
415 info->address = (high & 0xffff0000) | (low & 0x0000ffff); 444 info->address = gate_offset(*val);
416 info->cs = low >> 16; 445 info->cs = gate_segment(*val);
417 info->flags = dpl; 446 info->flags = val->dpl;
418 /* interrupt gates clear IF */ 447 /* interrupt gates clear IF */
419 if (type == 0xe) 448 if (val->type == 0xe)
420 info->flags |= 4; 449 info->flags |= 4;
421 450
422 return 1; 451 return 1;
@@ -443,11 +472,10 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
443 472
444 if (p >= start && (p + 8) <= end) { 473 if (p >= start && (p + 8) <= end) {
445 struct trap_info info[2]; 474 struct trap_info info[2];
446 u32 *desc = (u32 *)g;
447 475
448 info[1].address = 0; 476 info[1].address = 0;
449 477
450 if (cvt_gate_to_trap(entrynum, desc[0], desc[1], &info[0])) 478 if (cvt_gate_to_trap(entrynum, g, &info[0]))
451 if (HYPERVISOR_set_trap_table(info)) 479 if (HYPERVISOR_set_trap_table(info))
452 BUG(); 480 BUG();
453 } 481 }
@@ -460,13 +488,13 @@ static void xen_convert_trap_info(const struct desc_ptr *desc,
460{ 488{
461 unsigned in, out, count; 489 unsigned in, out, count;
462 490
463 count = (desc->size+1) / 8; 491 count = (desc->size+1) / sizeof(gate_desc);
464 BUG_ON(count > 256); 492 BUG_ON(count > 256);
465 493
466 for (in = out = 0; in < count; in++) { 494 for (in = out = 0; in < count; in++) {
467 const u32 *entry = (u32 *)(desc->address + in * 8); 495 gate_desc *entry = (gate_desc*)(desc->address) + in;
468 496
469 if (cvt_gate_to_trap(in, entry[0], entry[1], &traps[out])) 497 if (cvt_gate_to_trap(in, entry, &traps[out]))
470 out++; 498 out++;
471 } 499 }
472 traps[out].address = 0; 500 traps[out].address = 0;
@@ -695,33 +723,89 @@ static void set_current_cr3(void *v)
695 x86_write_percpu(xen_current_cr3, (unsigned long)v); 723 x86_write_percpu(xen_current_cr3, (unsigned long)v);
696} 724}
697 725
698static void xen_write_cr3(unsigned long cr3) 726static void __xen_write_cr3(bool kernel, unsigned long cr3)
699{ 727{
700 struct mmuext_op *op; 728 struct mmuext_op *op;
701 struct multicall_space mcs; 729 struct multicall_space mcs;
702 unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3)); 730 unsigned long mfn;
703 731
704 BUG_ON(preemptible()); 732 if (cr3)
733 mfn = pfn_to_mfn(PFN_DOWN(cr3));
734 else
735 mfn = 0;
705 736
706 mcs = xen_mc_entry(sizeof(*op)); /* disables interrupts */ 737 WARN_ON(mfn == 0 && kernel);
707 738
708 /* Update while interrupts are disabled, so its atomic with 739 mcs = __xen_mc_entry(sizeof(*op));
709 respect to ipis */
710 x86_write_percpu(xen_cr3, cr3);
711 740
712 op = mcs.args; 741 op = mcs.args;
713 op->cmd = MMUEXT_NEW_BASEPTR; 742 op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
714 op->arg1.mfn = mfn; 743 op->arg1.mfn = mfn;
715 744
716 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 745 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
717 746
718 /* Update xen_update_cr3 once the batch has actually 747 if (kernel) {
719 been submitted. */ 748 x86_write_percpu(xen_cr3, cr3);
720 xen_mc_callback(set_current_cr3, (void *)cr3); 749
750 /* Update xen_current_cr3 once the batch has actually
751 been submitted. */
752 xen_mc_callback(set_current_cr3, (void *)cr3);
753 }
754}
755
756static void xen_write_cr3(unsigned long cr3)
757{
758 BUG_ON(preemptible());
759
760 xen_mc_batch(); /* disables interrupts */
761
762 /* Update while interrupts are disabled, so its atomic with
763 respect to ipis */
764 x86_write_percpu(xen_cr3, cr3);
765
766 __xen_write_cr3(true, cr3);
767
768#ifdef CONFIG_X86_64
769 {
770 pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
771 if (user_pgd)
772 __xen_write_cr3(false, __pa(user_pgd));
773 else
774 __xen_write_cr3(false, 0);
775 }
776#endif
721 777
722 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ 778 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
723} 779}
724 780
781static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
782{
783 int ret;
784
785 ret = 0;
786
787 switch(msr) {
788#ifdef CONFIG_X86_64
789 unsigned which;
790 u64 base;
791
792 case MSR_FS_BASE: which = SEGBASE_FS; goto set;
793 case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set;
794 case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set;
795
796 set:
797 base = ((u64)high << 32) | low;
798 if (HYPERVISOR_set_segment_base(which, base) != 0)
799 ret = -EFAULT;
800 break;
801#endif
802 default:
803 ret = native_write_msr_safe(msr, low, high);
804 }
805
806 return ret;
807}
808
725/* Early in boot, while setting up the initial pagetable, assume 809/* Early in boot, while setting up the initial pagetable, assume
726 everything is pinned. */ 810 everything is pinned. */
727static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn) 811static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
@@ -778,6 +862,48 @@ static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn)
778 xen_alloc_ptpage(mm, pfn, PT_PMD); 862 xen_alloc_ptpage(mm, pfn, PT_PMD);
779} 863}
780 864
865static int xen_pgd_alloc(struct mm_struct *mm)
866{
867 pgd_t *pgd = mm->pgd;
868 int ret = 0;
869
870 BUG_ON(PagePinned(virt_to_page(pgd)));
871
872#ifdef CONFIG_X86_64
873 {
874 struct page *page = virt_to_page(pgd);
875 pgd_t *user_pgd;
876
877 BUG_ON(page->private != 0);
878
879 ret = -ENOMEM;
880
881 user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
882 page->private = (unsigned long)user_pgd;
883
884 if (user_pgd != NULL) {
885 user_pgd[pgd_index(VSYSCALL_START)] =
886 __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
887 ret = 0;
888 }
889
890 BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
891 }
892#endif
893
894 return ret;
895}
896
897static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
898{
899#ifdef CONFIG_X86_64
900 pgd_t *user_pgd = xen_get_user_pgd(pgd);
901
902 if (user_pgd)
903 free_page((unsigned long)user_pgd);
904#endif
905}
906
781/* This should never happen until we're OK to use struct page */ 907/* This should never happen until we're OK to use struct page */
782static void xen_release_ptpage(u32 pfn, unsigned level) 908static void xen_release_ptpage(u32 pfn, unsigned level)
783{ 909{
@@ -803,6 +929,18 @@ static void xen_release_pmd(u32 pfn)
803 xen_release_ptpage(pfn, PT_PMD); 929 xen_release_ptpage(pfn, PT_PMD);
804} 930}
805 931
932#if PAGETABLE_LEVELS == 4
933static void xen_alloc_pud(struct mm_struct *mm, u32 pfn)
934{
935 xen_alloc_ptpage(mm, pfn, PT_PUD);
936}
937
938static void xen_release_pud(u32 pfn)
939{
940 xen_release_ptpage(pfn, PT_PUD);
941}
942#endif
943
806#ifdef CONFIG_HIGHPTE 944#ifdef CONFIG_HIGHPTE
807static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) 945static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
808{ 946{
@@ -841,68 +979,16 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
841 979
842static __init void xen_pagetable_setup_start(pgd_t *base) 980static __init void xen_pagetable_setup_start(pgd_t *base)
843{ 981{
844 pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
845 int i;
846
847 /* special set_pte for pagetable initialization */
848 pv_mmu_ops.set_pte = xen_set_pte_init;
849
850 init_mm.pgd = base;
851 /*
852 * copy top-level of Xen-supplied pagetable into place. This
853 * is a stand-in while we copy the pmd pages.
854 */
855 memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t));
856
857 /*
858 * For PAE, need to allocate new pmds, rather than
859 * share Xen's, since Xen doesn't like pmd's being
860 * shared between address spaces.
861 */
862 for (i = 0; i < PTRS_PER_PGD; i++) {
863 if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
864 pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
865
866 memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
867 PAGE_SIZE);
868
869 make_lowmem_page_readonly(pmd);
870
871 set_pgd(&base[i], __pgd(1 + __pa(pmd)));
872 } else
873 pgd_clear(&base[i]);
874 }
875
876 /* make sure zero_page is mapped RO so we can use it in pagetables */
877 make_lowmem_page_readonly(empty_zero_page);
878 make_lowmem_page_readonly(base);
879 /*
880 * Switch to new pagetable. This is done before
881 * pagetable_init has done anything so that the new pages
882 * added to the table can be prepared properly for Xen.
883 */
884 xen_write_cr3(__pa(base));
885
886 /* Unpin initial Xen pagetable */
887 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
888 PFN_DOWN(__pa(xen_start_info->pt_base)));
889} 982}
890 983
891void xen_setup_shared_info(void) 984void xen_setup_shared_info(void)
892{ 985{
893 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 986 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
894 unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); 987 set_fixmap(FIX_PARAVIRT_BOOTMAP,
895 988 xen_start_info->shared_info);
896 /* 989
897 * Create a mapping for the shared info page. 990 HYPERVISOR_shared_info =
898 * Should be set_fixmap(), but shared_info is a machine 991 (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
899 * address with no corresponding pseudo-phys address.
900 */
901 set_pte_mfn(addr,
902 PFN_DOWN(xen_start_info->shared_info),
903 PAGE_KERNEL);
904
905 HYPERVISOR_shared_info = (struct shared_info *)addr;
906 } else 992 } else
907 HYPERVISOR_shared_info = 993 HYPERVISOR_shared_info =
908 (struct shared_info *)__va(xen_start_info->shared_info); 994 (struct shared_info *)__va(xen_start_info->shared_info);
@@ -917,26 +1003,32 @@ void xen_setup_shared_info(void)
917 1003
918static __init void xen_pagetable_setup_done(pgd_t *base) 1004static __init void xen_pagetable_setup_done(pgd_t *base)
919{ 1005{
920 /* This will work as long as patching hasn't happened yet
921 (which it hasn't) */
922 pv_mmu_ops.alloc_pte = xen_alloc_pte;
923 pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
924 pv_mmu_ops.release_pte = xen_release_pte;
925 pv_mmu_ops.release_pmd = xen_release_pmd;
926 pv_mmu_ops.set_pte = xen_set_pte;
927
928 xen_setup_shared_info(); 1006 xen_setup_shared_info();
929
930 /* Actually pin the pagetable down, but we can't set PG_pinned
931 yet because the page structures don't exist yet. */
932 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
933} 1007}
934 1008
935static __init void xen_post_allocator_init(void) 1009static __init void xen_post_allocator_init(void)
936{ 1010{
1011 pv_mmu_ops.set_pte = xen_set_pte;
937 pv_mmu_ops.set_pmd = xen_set_pmd; 1012 pv_mmu_ops.set_pmd = xen_set_pmd;
938 pv_mmu_ops.set_pud = xen_set_pud; 1013 pv_mmu_ops.set_pud = xen_set_pud;
1014#if PAGETABLE_LEVELS == 4
1015 pv_mmu_ops.set_pgd = xen_set_pgd;
1016#endif
1017
1018 /* This will work as long as patching hasn't happened yet
1019 (which it hasn't) */
1020 pv_mmu_ops.alloc_pte = xen_alloc_pte;
1021 pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
1022 pv_mmu_ops.release_pte = xen_release_pte;
1023 pv_mmu_ops.release_pmd = xen_release_pmd;
1024#if PAGETABLE_LEVELS == 4
1025 pv_mmu_ops.alloc_pud = xen_alloc_pud;
1026 pv_mmu_ops.release_pud = xen_release_pud;
1027#endif
939 1028
1029#ifdef CONFIG_X86_64
1030 SetPagePinned(virt_to_page(level3_user_vsyscall));
1031#endif
940 xen_mark_init_mm_pinned(); 1032 xen_mark_init_mm_pinned();
941} 1033}
942 1034
@@ -950,6 +1042,7 @@ void xen_setup_vcpu_info_placement(void)
950 1042
951 /* xen_vcpu_setup managed to place the vcpu_info within the 1043 /* xen_vcpu_setup managed to place the vcpu_info within the
952 percpu area for all cpus, so make use of it */ 1044 percpu area for all cpus, so make use of it */
1045#ifdef CONFIG_X86_32
953 if (have_vcpu_info_placement) { 1046 if (have_vcpu_info_placement) {
954 printk(KERN_INFO "Xen: using vcpu_info placement\n"); 1047 printk(KERN_INFO "Xen: using vcpu_info placement\n");
955 1048
@@ -959,6 +1052,7 @@ void xen_setup_vcpu_info_placement(void)
959 pv_irq_ops.irq_enable = xen_irq_enable_direct; 1052 pv_irq_ops.irq_enable = xen_irq_enable_direct;
960 pv_mmu_ops.read_cr2 = xen_read_cr2_direct; 1053 pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
961 } 1054 }
1055#endif
962} 1056}
963 1057
964static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, 1058static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
@@ -979,10 +1073,12 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
979 goto patch_site 1073 goto patch_site
980 1074
981 switch (type) { 1075 switch (type) {
1076#ifdef CONFIG_X86_32
982 SITE(pv_irq_ops, irq_enable); 1077 SITE(pv_irq_ops, irq_enable);
983 SITE(pv_irq_ops, irq_disable); 1078 SITE(pv_irq_ops, irq_disable);
984 SITE(pv_irq_ops, save_fl); 1079 SITE(pv_irq_ops, save_fl);
985 SITE(pv_irq_ops, restore_fl); 1080 SITE(pv_irq_ops, restore_fl);
1081#endif /* CONFIG_X86_32 */
986#undef SITE 1082#undef SITE
987 1083
988 patch_site: 1084 patch_site:
@@ -1025,8 +1121,15 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
1025#ifdef CONFIG_X86_F00F_BUG 1121#ifdef CONFIG_X86_F00F_BUG
1026 case FIX_F00F_IDT: 1122 case FIX_F00F_IDT:
1027#endif 1123#endif
1124#ifdef CONFIG_X86_32
1028 case FIX_WP_TEST: 1125 case FIX_WP_TEST:
1029 case FIX_VDSO: 1126 case FIX_VDSO:
1127# ifdef CONFIG_HIGHMEM
1128 case FIX_KMAP_BEGIN ... FIX_KMAP_END:
1129# endif
1130#else
1131 case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
1132#endif
1030#ifdef CONFIG_X86_LOCAL_APIC 1133#ifdef CONFIG_X86_LOCAL_APIC
1031 case FIX_APIC_BASE: /* maps dummy local APIC */ 1134 case FIX_APIC_BASE: /* maps dummy local APIC */
1032#endif 1135#endif
@@ -1039,6 +1142,15 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
1039 } 1142 }
1040 1143
1041 __native_set_fixmap(idx, pte); 1144 __native_set_fixmap(idx, pte);
1145
1146#ifdef CONFIG_X86_64
1147 /* Replicate changes to map the vsyscall page into the user
1148 pagetable vsyscall mapping. */
1149 if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
1150 unsigned long vaddr = __fix_to_virt(idx);
1151 set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
1152 }
1153#endif
1042} 1154}
1043 1155
1044static const struct pv_info xen_info __initdata = { 1156static const struct pv_info xen_info __initdata = {
@@ -1084,18 +1196,25 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
1084 .wbinvd = native_wbinvd, 1196 .wbinvd = native_wbinvd,
1085 1197
1086 .read_msr = native_read_msr_safe, 1198 .read_msr = native_read_msr_safe,
1087 .write_msr = native_write_msr_safe, 1199 .write_msr = xen_write_msr_safe,
1088 .read_tsc = native_read_tsc, 1200 .read_tsc = native_read_tsc,
1089 .read_pmc = native_read_pmc, 1201 .read_pmc = native_read_pmc,
1090 1202
1091 .iret = xen_iret, 1203 .iret = xen_iret,
1092 .irq_enable_sysexit = xen_sysexit, 1204 .irq_enable_sysexit = xen_sysexit,
1205#ifdef CONFIG_X86_64
1206 .usergs_sysret32 = xen_sysret32,
1207 .usergs_sysret64 = xen_sysret64,
1208#endif
1093 1209
1094 .load_tr_desc = paravirt_nop, 1210 .load_tr_desc = paravirt_nop,
1095 .set_ldt = xen_set_ldt, 1211 .set_ldt = xen_set_ldt,
1096 .load_gdt = xen_load_gdt, 1212 .load_gdt = xen_load_gdt,
1097 .load_idt = xen_load_idt, 1213 .load_idt = xen_load_idt,
1098 .load_tls = xen_load_tls, 1214 .load_tls = xen_load_tls,
1215#ifdef CONFIG_X86_64
1216 .load_gs_index = xen_load_gs_index,
1217#endif
1099 1218
1100 .store_gdt = native_store_gdt, 1219 .store_gdt = native_store_gdt,
1101 .store_idt = native_store_idt, 1220 .store_idt = native_store_idt,
@@ -1109,14 +1228,34 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
1109 .set_iopl_mask = xen_set_iopl_mask, 1228 .set_iopl_mask = xen_set_iopl_mask,
1110 .io_delay = xen_io_delay, 1229 .io_delay = xen_io_delay,
1111 1230
1231 /* Xen takes care of %gs when switching to usermode for us */
1232 .swapgs = paravirt_nop,
1233
1112 .lazy_mode = { 1234 .lazy_mode = {
1113 .enter = paravirt_enter_lazy_cpu, 1235 .enter = paravirt_enter_lazy_cpu,
1114 .leave = xen_leave_lazy, 1236 .leave = xen_leave_lazy,
1115 }, 1237 },
1116}; 1238};
1117 1239
1240static void __init __xen_init_IRQ(void)
1241{
1242#ifdef CONFIG_X86_64
1243 int i;
1244
1245 /* Create identity vector->irq map */
1246 for(i = 0; i < NR_VECTORS; i++) {
1247 int cpu;
1248
1249 for_each_possible_cpu(cpu)
1250 per_cpu(vector_irq, cpu)[i] = i;
1251 }
1252#endif /* CONFIG_X86_64 */
1253
1254 xen_init_IRQ();
1255}
1256
1118static const struct pv_irq_ops xen_irq_ops __initdata = { 1257static const struct pv_irq_ops xen_irq_ops __initdata = {
1119 .init_IRQ = xen_init_IRQ, 1258 .init_IRQ = __xen_init_IRQ,
1120 .save_fl = xen_save_fl, 1259 .save_fl = xen_save_fl,
1121 .restore_fl = xen_restore_fl, 1260 .restore_fl = xen_restore_fl,
1122 .irq_disable = xen_irq_disable, 1261 .irq_disable = xen_irq_disable,
@@ -1124,7 +1263,7 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
1124 .safe_halt = xen_safe_halt, 1263 .safe_halt = xen_safe_halt,
1125 .halt = xen_halt, 1264 .halt = xen_halt,
1126#ifdef CONFIG_X86_64 1265#ifdef CONFIG_X86_64
1127 .adjust_exception_frame = paravirt_nop, 1266 .adjust_exception_frame = xen_adjust_exception_frame,
1128#endif 1267#endif
1129}; 1268};
1130 1269
@@ -1157,8 +1296,8 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1157 .pte_update = paravirt_nop, 1296 .pte_update = paravirt_nop,
1158 .pte_update_defer = paravirt_nop, 1297 .pte_update_defer = paravirt_nop,
1159 1298
1160 .pgd_alloc = __paravirt_pgd_alloc, 1299 .pgd_alloc = xen_pgd_alloc,
1161 .pgd_free = paravirt_nop, 1300 .pgd_free = xen_pgd_free,
1162 1301
1163 .alloc_pte = xen_alloc_pte_init, 1302 .alloc_pte = xen_alloc_pte_init,
1164 .release_pte = xen_release_pte_init, 1303 .release_pte = xen_release_pte_init,
@@ -1170,7 +1309,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1170 .kmap_atomic_pte = xen_kmap_atomic_pte, 1309 .kmap_atomic_pte = xen_kmap_atomic_pte,
1171#endif 1310#endif
1172 1311
1173 .set_pte = NULL, /* see xen_pagetable_setup_* */ 1312#ifdef CONFIG_X86_64
1313 .set_pte = xen_set_pte,
1314#else
1315 .set_pte = xen_set_pte_init,
1316#endif
1174 .set_pte_at = xen_set_pte_at, 1317 .set_pte_at = xen_set_pte_at,
1175 .set_pmd = xen_set_pmd_hyper, 1318 .set_pmd = xen_set_pmd_hyper,
1176 1319
@@ -1184,15 +1327,26 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1184 .make_pte = xen_make_pte, 1327 .make_pte = xen_make_pte,
1185 .make_pgd = xen_make_pgd, 1328 .make_pgd = xen_make_pgd,
1186 1329
1330#ifdef CONFIG_X86_PAE
1187 .set_pte_atomic = xen_set_pte_atomic, 1331 .set_pte_atomic = xen_set_pte_atomic,
1188 .set_pte_present = xen_set_pte_at, 1332 .set_pte_present = xen_set_pte_at,
1189 .set_pud = xen_set_pud_hyper,
1190 .pte_clear = xen_pte_clear, 1333 .pte_clear = xen_pte_clear,
1191 .pmd_clear = xen_pmd_clear, 1334 .pmd_clear = xen_pmd_clear,
1335#endif /* CONFIG_X86_PAE */
1336 .set_pud = xen_set_pud_hyper,
1192 1337
1193 .make_pmd = xen_make_pmd, 1338 .make_pmd = xen_make_pmd,
1194 .pmd_val = xen_pmd_val, 1339 .pmd_val = xen_pmd_val,
1195 1340
1341#if PAGETABLE_LEVELS == 4
1342 .pud_val = xen_pud_val,
1343 .make_pud = xen_make_pud,
1344 .set_pgd = xen_set_pgd_hyper,
1345
1346 .alloc_pud = xen_alloc_pte_init,
1347 .release_pud = xen_release_pte_init,
1348#endif /* PAGETABLE_LEVELS == 4 */
1349
1196 .activate_mm = xen_activate_mm, 1350 .activate_mm = xen_activate_mm,
1197 .dup_mmap = xen_dup_mmap, 1351 .dup_mmap = xen_dup_mmap,
1198 .exit_mmap = xen_exit_mmap, 1352 .exit_mmap = xen_exit_mmap,
@@ -1205,21 +1359,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1205 .set_fixmap = xen_set_fixmap, 1359 .set_fixmap = xen_set_fixmap,
1206}; 1360};
1207 1361
1208#ifdef CONFIG_SMP
1209static const struct smp_ops xen_smp_ops __initdata = {
1210 .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
1211 .smp_prepare_cpus = xen_smp_prepare_cpus,
1212 .cpu_up = xen_cpu_up,
1213 .smp_cpus_done = xen_smp_cpus_done,
1214
1215 .smp_send_stop = xen_smp_send_stop,
1216 .smp_send_reschedule = xen_smp_send_reschedule,
1217
1218 .send_call_func_ipi = xen_smp_send_call_function_ipi,
1219 .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
1220};
1221#endif /* CONFIG_SMP */
1222
1223static void xen_reboot(int reason) 1362static void xen_reboot(int reason)
1224{ 1363{
1225 struct sched_shutdown r = { .reason = reason }; 1364 struct sched_shutdown r = { .reason = reason };
@@ -1264,6 +1403,7 @@ static const struct machine_ops __initdata xen_machine_ops = {
1264 1403
1265static void __init xen_reserve_top(void) 1404static void __init xen_reserve_top(void)
1266{ 1405{
1406#ifdef CONFIG_X86_32
1267 unsigned long top = HYPERVISOR_VIRT_START; 1407 unsigned long top = HYPERVISOR_VIRT_START;
1268 struct xen_platform_parameters pp; 1408 struct xen_platform_parameters pp;
1269 1409
@@ -1271,7 +1411,247 @@ static void __init xen_reserve_top(void)
1271 top = pp.virt_start; 1411 top = pp.virt_start;
1272 1412
1273 reserve_top_address(-top + 2 * PAGE_SIZE); 1413 reserve_top_address(-top + 2 * PAGE_SIZE);
1414#endif /* CONFIG_X86_32 */
1415}
1416
1417/*
1418 * Like __va(), but returns address in the kernel mapping (which is
1419 * all we have until the physical memory mapping has been set up.
1420 */
1421static void *__ka(phys_addr_t paddr)
1422{
1423#ifdef CONFIG_X86_64
1424 return (void *)(paddr + __START_KERNEL_map);
1425#else
1426 return __va(paddr);
1427#endif
1428}
1429
1430/* Convert a machine address to physical address */
1431static unsigned long m2p(phys_addr_t maddr)
1432{
1433 phys_addr_t paddr;
1434
1435 maddr &= PTE_MASK;
1436 paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
1437
1438 return paddr;
1439}
1440
1441/* Convert a machine address to kernel virtual */
1442static void *m2v(phys_addr_t maddr)
1443{
1444 return __ka(m2p(maddr));
1445}
1446
1447#ifdef CONFIG_X86_64
1448static void walk(pgd_t *pgd, unsigned long addr)
1449{
1450 unsigned l4idx = pgd_index(addr);
1451 unsigned l3idx = pud_index(addr);
1452 unsigned l2idx = pmd_index(addr);
1453 unsigned l1idx = pte_index(addr);
1454 pgd_t l4;
1455 pud_t l3;
1456 pmd_t l2;
1457 pte_t l1;
1458
1459 xen_raw_printk("walk %p, %lx -> %d %d %d %d\n",
1460 pgd, addr, l4idx, l3idx, l2idx, l1idx);
1461
1462 l4 = pgd[l4idx];
1463 xen_raw_printk(" l4: %016lx\n", l4.pgd);
1464 xen_raw_printk(" %016lx\n", pgd_val(l4));
1465
1466 l3 = ((pud_t *)(m2v(l4.pgd)))[l3idx];
1467 xen_raw_printk(" l3: %016lx\n", l3.pud);
1468 xen_raw_printk(" %016lx\n", pud_val(l3));
1469
1470 l2 = ((pmd_t *)(m2v(l3.pud)))[l2idx];
1471 xen_raw_printk(" l2: %016lx\n", l2.pmd);
1472 xen_raw_printk(" %016lx\n", pmd_val(l2));
1473
1474 l1 = ((pte_t *)(m2v(l2.pmd)))[l1idx];
1475 xen_raw_printk(" l1: %016lx\n", l1.pte);
1476 xen_raw_printk(" %016lx\n", pte_val(l1));
1477}
1478#endif
1479
1480static void set_page_prot(void *addr, pgprot_t prot)
1481{
1482 unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
1483 pte_t pte = pfn_pte(pfn, prot);
1484
1485 xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016llx pte=%016llx\n",
1486 addr, pfn, get_phys_to_machine(pfn),
1487 pgprot_val(prot), pte.pte);
1488
1489 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
1490 BUG();
1491}
1492
1493static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1494{
1495 unsigned pmdidx, pteidx;
1496 unsigned ident_pte;
1497 unsigned long pfn;
1498
1499 ident_pte = 0;
1500 pfn = 0;
1501 for(pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
1502 pte_t *pte_page;
1503
1504 /* Reuse or allocate a page of ptes */
1505 if (pmd_present(pmd[pmdidx]))
1506 pte_page = m2v(pmd[pmdidx].pmd);
1507 else {
1508 /* Check for free pte pages */
1509 if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
1510 break;
1511
1512 pte_page = &level1_ident_pgt[ident_pte];
1513 ident_pte += PTRS_PER_PTE;
1514
1515 pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
1516 }
1517
1518 /* Install mappings */
1519 for(pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
1520 pte_t pte;
1521
1522 if (pfn > max_pfn_mapped)
1523 max_pfn_mapped = pfn;
1524
1525 if (!pte_none(pte_page[pteidx]))
1526 continue;
1527
1528 pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
1529 pte_page[pteidx] = pte;
1530 }
1531 }
1532
1533 for(pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
1534 set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
1535
1536 set_page_prot(pmd, PAGE_KERNEL_RO);
1537}
1538
1539#ifdef CONFIG_X86_64
1540static void convert_pfn_mfn(void *v)
1541{
1542 pte_t *pte = v;
1543 int i;
1544
1545 /* All levels are converted the same way, so just treat them
1546 as ptes. */
1547 for(i = 0; i < PTRS_PER_PTE; i++)
1548 pte[i] = xen_make_pte(pte[i].pte);
1549}
1550
1551/*
1552 * Set up the inital kernel pagetable.
1553 *
1554 * We can construct this by grafting the Xen provided pagetable into
1555 * head_64.S's preconstructed pagetables. We copy the Xen L2's into
1556 * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This
1557 * means that only the kernel has a physical mapping to start with -
1558 * but that's enough to get __va working. We need to fill in the rest
1559 * of the physical mapping once some sort of allocator has been set
1560 * up.
1561 */
1562static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1563{
1564 pud_t *l3;
1565 pmd_t *l2;
1566
1567 /* Zap identity mapping */
1568 init_level4_pgt[0] = __pgd(0);
1569
1570 /* Pre-constructed entries are in pfn, so convert to mfn */
1571 convert_pfn_mfn(init_level4_pgt);
1572 convert_pfn_mfn(level3_ident_pgt);
1573 convert_pfn_mfn(level3_kernel_pgt);
1574
1575 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
1576 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
1577
1578 memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
1579 memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
1580
1581 l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
1582 l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
1583 memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
1584
1585 /* Set up identity map */
1586 xen_map_identity_early(level2_ident_pgt, max_pfn);
1587
1588 /* Make pagetable pieces RO */
1589 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
1590 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
1591 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
1592 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
1593 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1594 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
1595
1596 /* Pin down new L4 */
1597 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
1598 PFN_DOWN(__pa_symbol(init_level4_pgt)));
1599
1600 /* Unpin Xen-provided one */
1601 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1602
1603 /* Switch over */
1604 pgd = init_level4_pgt;
1605
1606 /*
1607 * At this stage there can be no user pgd, and no page
1608 * structure to attach it to, so make sure we just set kernel
1609 * pgd.
1610 */
1611 xen_mc_batch();
1612 __xen_write_cr3(true, __pa(pgd));
1613 xen_mc_issue(PARAVIRT_LAZY_CPU);
1614
1615 reserve_early(__pa(xen_start_info->pt_base),
1616 __pa(xen_start_info->pt_base +
1617 xen_start_info->nr_pt_frames * PAGE_SIZE),
1618 "XEN PAGETABLES");
1619
1620 return pgd;
1621}
1622#else /* !CONFIG_X86_64 */
1623static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
1624
1625static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1626{
1627 pmd_t *kernel_pmd;
1628
1629 init_pg_tables_start = __pa(pgd);
1630 init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
1631 max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
1632
1633 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
1634 memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
1635
1636 xen_map_identity_early(level2_kernel_pgt, max_pfn);
1637
1638 memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
1639 set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
1640 __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
1641
1642 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1643 set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
1644 set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
1645
1646 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1647
1648 xen_write_cr3(__pa(swapper_pg_dir));
1649
1650 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
1651
1652 return swapper_pg_dir;
1274} 1653}
1654#endif /* CONFIG_X86_64 */
1275 1655
1276/* First C function to be called on Xen boot */ 1656/* First C function to be called on Xen boot */
1277asmlinkage void __init xen_start_kernel(void) 1657asmlinkage void __init xen_start_kernel(void)
@@ -1301,53 +1681,56 @@ asmlinkage void __init xen_start_kernel(void)
1301 1681
1302 machine_ops = xen_machine_ops; 1682 machine_ops = xen_machine_ops;
1303 1683
1304#ifdef CONFIG_SMP 1684#ifdef CONFIG_X86_64
1305 smp_ops = xen_smp_ops; 1685 /* Disable until direct per-cpu data access. */
1686 have_vcpu_info_placement = 0;
1687 x86_64_init_pda();
1306#endif 1688#endif
1307 1689
1690 xen_smp_init();
1691
1308 /* Get mfn list */ 1692 /* Get mfn list */
1309 if (!xen_feature(XENFEAT_auto_translated_physmap)) 1693 if (!xen_feature(XENFEAT_auto_translated_physmap))
1310 xen_build_dynamic_phys_to_machine(); 1694 xen_build_dynamic_phys_to_machine();
1311 1695
1312 pgd = (pgd_t *)xen_start_info->pt_base; 1696 pgd = (pgd_t *)xen_start_info->pt_base;
1313 1697
1314 init_pg_tables_start = __pa(pgd); 1698 /* Prevent unwanted bits from being set in PTEs. */
1315 init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; 1699 __supported_pte_mask &= ~_PAGE_GLOBAL;
1316 max_pfn_mapped = (init_pg_tables_end + 512*1024) >> PAGE_SHIFT; 1700 if (!is_initial_xendomain())
1317 1701 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1318 init_mm.pgd = pgd; /* use the Xen pagetables to start */
1319
1320 /* keep using Xen gdt for now; no urgent need to change it */
1321
1322 x86_write_percpu(xen_cr3, __pa(pgd));
1323 x86_write_percpu(xen_current_cr3, __pa(pgd));
1324 1702
1325 /* Don't do the full vcpu_info placement stuff until we have a 1703 /* Don't do the full vcpu_info placement stuff until we have a
1326 possible map and a non-dummy shared_info. */ 1704 possible map and a non-dummy shared_info. */
1327 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; 1705 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
1328 1706
1707 xen_raw_console_write("mapping kernel into physical memory\n");
1708 pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
1709
1710 init_mm.pgd = pgd;
1711
1712 /* keep using Xen gdt for now; no urgent need to change it */
1713
1329 pv_info.kernel_rpl = 1; 1714 pv_info.kernel_rpl = 1;
1330 if (xen_feature(XENFEAT_supervisor_mode_kernel)) 1715 if (xen_feature(XENFEAT_supervisor_mode_kernel))
1331 pv_info.kernel_rpl = 0; 1716 pv_info.kernel_rpl = 0;
1332 1717
1333 /* Prevent unwanted bits from being set in PTEs. */
1334 __supported_pte_mask &= ~_PAGE_GLOBAL;
1335 if (!is_initial_xendomain())
1336 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1337
1338 /* set the limit of our address space */ 1718 /* set the limit of our address space */
1339 xen_reserve_top(); 1719 xen_reserve_top();
1340 1720
1721#ifdef CONFIG_X86_32
1341 /* set up basic CPUID stuff */ 1722 /* set up basic CPUID stuff */
1342 cpu_detect(&new_cpu_data); 1723 cpu_detect(&new_cpu_data);
1343 new_cpu_data.hard_math = 1; 1724 new_cpu_data.hard_math = 1;
1344 new_cpu_data.x86_capability[0] = cpuid_edx(1); 1725 new_cpu_data.x86_capability[0] = cpuid_edx(1);
1726#endif
1345 1727
1346 /* Poke various useful things into boot_params */ 1728 /* Poke various useful things into boot_params */
1347 boot_params.hdr.type_of_loader = (9 << 4) | 0; 1729 boot_params.hdr.type_of_loader = (9 << 4) | 0;
1348 boot_params.hdr.ramdisk_image = xen_start_info->mod_start 1730 boot_params.hdr.ramdisk_image = xen_start_info->mod_start
1349 ? __pa(xen_start_info->mod_start) : 0; 1731 ? __pa(xen_start_info->mod_start) : 0;
1350 boot_params.hdr.ramdisk_size = xen_start_info->mod_len; 1732 boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
1733 boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
1351 1734
1352 if (!is_initial_xendomain()) { 1735 if (!is_initial_xendomain()) {
1353 add_preferred_console("xenboot", 0, NULL); 1736 add_preferred_console("xenboot", 0, NULL);
@@ -1355,6 +1738,21 @@ asmlinkage void __init xen_start_kernel(void)
1355 add_preferred_console("hvc", 0, NULL); 1738 add_preferred_console("hvc", 0, NULL);
1356 } 1739 }
1357 1740
1741 xen_raw_console_write("about to get started...\n");
1742
1743#if 0
1744 xen_raw_printk("&boot_params=%p __pa(&boot_params)=%lx __va(__pa(&boot_params))=%lx\n",
1745 &boot_params, __pa_symbol(&boot_params),
1746 __va(__pa_symbol(&boot_params)));
1747
1748 walk(pgd, &boot_params);
1749 walk(pgd, __va(__pa(&boot_params)));
1750#endif
1751
1358 /* Start the world */ 1752 /* Start the world */
1753#ifdef CONFIG_X86_32
1359 i386_start_kernel(); 1754 i386_start_kernel();
1755#else
1756 x86_64_start_reservations((char *)__pa_symbol(&boot_params));
1757#endif
1360} 1758}