aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen/enlighten.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen/enlighten.c')
-rw-r--r--arch/x86/xen/enlighten.c697
1 files changed, 549 insertions, 148 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bb508456ef52..194bbd6e3241 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -33,6 +33,7 @@
33#include <xen/interface/sched.h> 33#include <xen/interface/sched.h>
34#include <xen/features.h> 34#include <xen/features.h>
35#include <xen/page.h> 35#include <xen/page.h>
36#include <xen/hvc-console.h>
36 37
37#include <asm/paravirt.h> 38#include <asm/paravirt.h>
38#include <asm/page.h> 39#include <asm/page.h>
@@ -40,12 +41,12 @@
40#include <asm/xen/hypervisor.h> 41#include <asm/xen/hypervisor.h>
41#include <asm/fixmap.h> 42#include <asm/fixmap.h>
42#include <asm/processor.h> 43#include <asm/processor.h>
44#include <asm/msr-index.h>
43#include <asm/setup.h> 45#include <asm/setup.h>
44#include <asm/desc.h> 46#include <asm/desc.h>
45#include <asm/pgtable.h> 47#include <asm/pgtable.h>
46#include <asm/tlbflush.h> 48#include <asm/tlbflush.h>
47#include <asm/reboot.h> 49#include <asm/reboot.h>
48#include <asm/pgalloc.h>
49 50
50#include "xen-ops.h" 51#include "xen-ops.h"
51#include "mmu.h" 52#include "mmu.h"
@@ -57,6 +58,18 @@ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
57DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); 58DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
58 59
59/* 60/*
61 * Identity map, in addition to plain kernel map. This needs to be
62 * large enough to allocate page table pages to allocate the rest.
63 * Each page can map 2MB.
64 */
65static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
66
67#ifdef CONFIG_X86_64
68/* l3 pud for userspace vsyscall mapping */
69static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
70#endif /* CONFIG_X86_64 */
71
72/*
60 * Note about cr3 (pagetable base) values: 73 * Note about cr3 (pagetable base) values:
61 * 74 *
62 * xen_cr3 contains the current logical cr3 value; it contains the 75 * xen_cr3 contains the current logical cr3 value; it contains the
@@ -167,10 +180,14 @@ void xen_vcpu_restore(void)
167 180
168static void __init xen_banner(void) 181static void __init xen_banner(void)
169{ 182{
183 unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL);
184 struct xen_extraversion extra;
185 HYPERVISOR_xen_version(XENVER_extraversion, &extra);
186
170 printk(KERN_INFO "Booting paravirtualized kernel on %s\n", 187 printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
171 pv_info.name); 188 pv_info.name);
172 printk(KERN_INFO "Hypervisor signature: %s%s\n", 189 printk(KERN_INFO "Xen version: %d.%d%s%s\n",
173 xen_start_info->magic, 190 version >> 16, version & 0xffff, extra.extraversion,
174 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); 191 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
175} 192}
176 193
@@ -363,14 +380,6 @@ static void load_TLS_descriptor(struct thread_struct *t,
363 380
364static void xen_load_tls(struct thread_struct *t, unsigned int cpu) 381static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
365{ 382{
366 xen_mc_batch();
367
368 load_TLS_descriptor(t, cpu, 0);
369 load_TLS_descriptor(t, cpu, 1);
370 load_TLS_descriptor(t, cpu, 2);
371
372 xen_mc_issue(PARAVIRT_LAZY_CPU);
373
374 /* 383 /*
375 * XXX sleazy hack: If we're being called in a lazy-cpu zone, 384 * XXX sleazy hack: If we're being called in a lazy-cpu zone,
376 * it means we're in a context switch, and %gs has just been 385 * it means we're in a context switch, and %gs has just been
@@ -379,10 +388,39 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
379 * Either way, it has been saved, and the new value will get 388 * Either way, it has been saved, and the new value will get
380 * loaded properly. This will go away as soon as Xen has been 389 * loaded properly. This will go away as soon as Xen has been
381 * modified to not save/restore %gs for normal hypercalls. 390 * modified to not save/restore %gs for normal hypercalls.
391 *
392 * On x86_64, this hack is not used for %gs, because gs points
393 * to KERNEL_GS_BASE (and uses it for PDA references), so we
394 * must not zero %gs on x86_64
395 *
396 * For x86_64, we need to zero %fs, otherwise we may get an
397 * exception between the new %fs descriptor being loaded and
398 * %fs being effectively cleared at __switch_to().
382 */ 399 */
383 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) 400 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
401#ifdef CONFIG_X86_32
384 loadsegment(gs, 0); 402 loadsegment(gs, 0);
403#else
404 loadsegment(fs, 0);
405#endif
406 }
407
408 xen_mc_batch();
409
410 load_TLS_descriptor(t, cpu, 0);
411 load_TLS_descriptor(t, cpu, 1);
412 load_TLS_descriptor(t, cpu, 2);
413
414 xen_mc_issue(PARAVIRT_LAZY_CPU);
415}
416
417#ifdef CONFIG_X86_64
418static void xen_load_gs_index(unsigned int idx)
419{
420 if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
421 BUG();
385} 422}
423#endif
386 424
387static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, 425static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
388 const void *ptr) 426 const void *ptr)
@@ -400,23 +438,18 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
400 preempt_enable(); 438 preempt_enable();
401} 439}
402 440
403static int cvt_gate_to_trap(int vector, u32 low, u32 high, 441static int cvt_gate_to_trap(int vector, const gate_desc *val,
404 struct trap_info *info) 442 struct trap_info *info)
405{ 443{
406 u8 type, dpl; 444 if (val->type != 0xf && val->type != 0xe)
407
408 type = (high >> 8) & 0x1f;
409 dpl = (high >> 13) & 3;
410
411 if (type != 0xf && type != 0xe)
412 return 0; 445 return 0;
413 446
414 info->vector = vector; 447 info->vector = vector;
415 info->address = (high & 0xffff0000) | (low & 0x0000ffff); 448 info->address = gate_offset(*val);
416 info->cs = low >> 16; 449 info->cs = gate_segment(*val);
417 info->flags = dpl; 450 info->flags = val->dpl;
418 /* interrupt gates clear IF */ 451 /* interrupt gates clear IF */
419 if (type == 0xe) 452 if (val->type == 0xe)
420 info->flags |= 4; 453 info->flags |= 4;
421 454
422 return 1; 455 return 1;
@@ -443,11 +476,10 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
443 476
444 if (p >= start && (p + 8) <= end) { 477 if (p >= start && (p + 8) <= end) {
445 struct trap_info info[2]; 478 struct trap_info info[2];
446 u32 *desc = (u32 *)g;
447 479
448 info[1].address = 0; 480 info[1].address = 0;
449 481
450 if (cvt_gate_to_trap(entrynum, desc[0], desc[1], &info[0])) 482 if (cvt_gate_to_trap(entrynum, g, &info[0]))
451 if (HYPERVISOR_set_trap_table(info)) 483 if (HYPERVISOR_set_trap_table(info))
452 BUG(); 484 BUG();
453 } 485 }
@@ -460,13 +492,13 @@ static void xen_convert_trap_info(const struct desc_ptr *desc,
460{ 492{
461 unsigned in, out, count; 493 unsigned in, out, count;
462 494
463 count = (desc->size+1) / 8; 495 count = (desc->size+1) / sizeof(gate_desc);
464 BUG_ON(count > 256); 496 BUG_ON(count > 256);
465 497
466 for (in = out = 0; in < count; in++) { 498 for (in = out = 0; in < count; in++) {
467 const u32 *entry = (u32 *)(desc->address + in * 8); 499 gate_desc *entry = (gate_desc*)(desc->address) + in;
468 500
469 if (cvt_gate_to_trap(in, entry[0], entry[1], &traps[out])) 501 if (cvt_gate_to_trap(in, entry, &traps[out]))
470 out++; 502 out++;
471 } 503 }
472 traps[out].address = 0; 504 traps[out].address = 0;
@@ -695,33 +727,89 @@ static void set_current_cr3(void *v)
695 x86_write_percpu(xen_current_cr3, (unsigned long)v); 727 x86_write_percpu(xen_current_cr3, (unsigned long)v);
696} 728}
697 729
698static void xen_write_cr3(unsigned long cr3) 730static void __xen_write_cr3(bool kernel, unsigned long cr3)
699{ 731{
700 struct mmuext_op *op; 732 struct mmuext_op *op;
701 struct multicall_space mcs; 733 struct multicall_space mcs;
702 unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3)); 734 unsigned long mfn;
703 735
704 BUG_ON(preemptible()); 736 if (cr3)
737 mfn = pfn_to_mfn(PFN_DOWN(cr3));
738 else
739 mfn = 0;
705 740
706 mcs = xen_mc_entry(sizeof(*op)); /* disables interrupts */ 741 WARN_ON(mfn == 0 && kernel);
707 742
708 /* Update while interrupts are disabled, so its atomic with 743 mcs = __xen_mc_entry(sizeof(*op));
709 respect to ipis */
710 x86_write_percpu(xen_cr3, cr3);
711 744
712 op = mcs.args; 745 op = mcs.args;
713 op->cmd = MMUEXT_NEW_BASEPTR; 746 op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
714 op->arg1.mfn = mfn; 747 op->arg1.mfn = mfn;
715 748
716 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 749 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
717 750
718 /* Update xen_update_cr3 once the batch has actually 751 if (kernel) {
719 been submitted. */ 752 x86_write_percpu(xen_cr3, cr3);
720 xen_mc_callback(set_current_cr3, (void *)cr3); 753
754 /* Update xen_current_cr3 once the batch has actually
755 been submitted. */
756 xen_mc_callback(set_current_cr3, (void *)cr3);
757 }
758}
759
760static void xen_write_cr3(unsigned long cr3)
761{
762 BUG_ON(preemptible());
763
764 xen_mc_batch(); /* disables interrupts */
765
766 /* Update while interrupts are disabled, so its atomic with
767 respect to ipis */
768 x86_write_percpu(xen_cr3, cr3);
769
770 __xen_write_cr3(true, cr3);
771
772#ifdef CONFIG_X86_64
773 {
774 pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
775 if (user_pgd)
776 __xen_write_cr3(false, __pa(user_pgd));
777 else
778 __xen_write_cr3(false, 0);
779 }
780#endif
721 781
722 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ 782 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
723} 783}
724 784
785static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
786{
787 int ret;
788
789 ret = 0;
790
791 switch(msr) {
792#ifdef CONFIG_X86_64
793 unsigned which;
794 u64 base;
795
796 case MSR_FS_BASE: which = SEGBASE_FS; goto set;
797 case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set;
798 case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set;
799
800 set:
801 base = ((u64)high << 32) | low;
802 if (HYPERVISOR_set_segment_base(which, base) != 0)
803 ret = -EFAULT;
804 break;
805#endif
806 default:
807 ret = native_write_msr_safe(msr, low, high);
808 }
809
810 return ret;
811}
812
725/* Early in boot, while setting up the initial pagetable, assume 813/* Early in boot, while setting up the initial pagetable, assume
726 everything is pinned. */ 814 everything is pinned. */
727static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn) 815static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
@@ -778,6 +866,48 @@ static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn)
778 xen_alloc_ptpage(mm, pfn, PT_PMD); 866 xen_alloc_ptpage(mm, pfn, PT_PMD);
779} 867}
780 868
869static int xen_pgd_alloc(struct mm_struct *mm)
870{
871 pgd_t *pgd = mm->pgd;
872 int ret = 0;
873
874 BUG_ON(PagePinned(virt_to_page(pgd)));
875
876#ifdef CONFIG_X86_64
877 {
878 struct page *page = virt_to_page(pgd);
879 pgd_t *user_pgd;
880
881 BUG_ON(page->private != 0);
882
883 ret = -ENOMEM;
884
885 user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
886 page->private = (unsigned long)user_pgd;
887
888 if (user_pgd != NULL) {
889 user_pgd[pgd_index(VSYSCALL_START)] =
890 __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
891 ret = 0;
892 }
893
894 BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
895 }
896#endif
897
898 return ret;
899}
900
901static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
902{
903#ifdef CONFIG_X86_64
904 pgd_t *user_pgd = xen_get_user_pgd(pgd);
905
906 if (user_pgd)
907 free_page((unsigned long)user_pgd);
908#endif
909}
910
781/* This should never happen until we're OK to use struct page */ 911/* This should never happen until we're OK to use struct page */
782static void xen_release_ptpage(u32 pfn, unsigned level) 912static void xen_release_ptpage(u32 pfn, unsigned level)
783{ 913{
@@ -803,6 +933,18 @@ static void xen_release_pmd(u32 pfn)
803 xen_release_ptpage(pfn, PT_PMD); 933 xen_release_ptpage(pfn, PT_PMD);
804} 934}
805 935
936#if PAGETABLE_LEVELS == 4
937static void xen_alloc_pud(struct mm_struct *mm, u32 pfn)
938{
939 xen_alloc_ptpage(mm, pfn, PT_PUD);
940}
941
942static void xen_release_pud(u32 pfn)
943{
944 xen_release_ptpage(pfn, PT_PUD);
945}
946#endif
947
806#ifdef CONFIG_HIGHPTE 948#ifdef CONFIG_HIGHPTE
807static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) 949static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
808{ 950{
@@ -841,68 +983,16 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
841 983
842static __init void xen_pagetable_setup_start(pgd_t *base) 984static __init void xen_pagetable_setup_start(pgd_t *base)
843{ 985{
844 pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
845 int i;
846
847 /* special set_pte for pagetable initialization */
848 pv_mmu_ops.set_pte = xen_set_pte_init;
849
850 init_mm.pgd = base;
851 /*
852 * copy top-level of Xen-supplied pagetable into place. This
853 * is a stand-in while we copy the pmd pages.
854 */
855 memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t));
856
857 /*
858 * For PAE, need to allocate new pmds, rather than
859 * share Xen's, since Xen doesn't like pmd's being
860 * shared between address spaces.
861 */
862 for (i = 0; i < PTRS_PER_PGD; i++) {
863 if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
864 pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
865
866 memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
867 PAGE_SIZE);
868
869 make_lowmem_page_readonly(pmd);
870
871 set_pgd(&base[i], __pgd(1 + __pa(pmd)));
872 } else
873 pgd_clear(&base[i]);
874 }
875
876 /* make sure zero_page is mapped RO so we can use it in pagetables */
877 make_lowmem_page_readonly(empty_zero_page);
878 make_lowmem_page_readonly(base);
879 /*
880 * Switch to new pagetable. This is done before
881 * pagetable_init has done anything so that the new pages
882 * added to the table can be prepared properly for Xen.
883 */
884 xen_write_cr3(__pa(base));
885
886 /* Unpin initial Xen pagetable */
887 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
888 PFN_DOWN(__pa(xen_start_info->pt_base)));
889} 986}
890 987
891void xen_setup_shared_info(void) 988void xen_setup_shared_info(void)
892{ 989{
893 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 990 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
894 unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); 991 set_fixmap(FIX_PARAVIRT_BOOTMAP,
895 992 xen_start_info->shared_info);
896 /* 993
897 * Create a mapping for the shared info page. 994 HYPERVISOR_shared_info =
898 * Should be set_fixmap(), but shared_info is a machine 995 (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
899 * address with no corresponding pseudo-phys address.
900 */
901 set_pte_mfn(addr,
902 PFN_DOWN(xen_start_info->shared_info),
903 PAGE_KERNEL);
904
905 HYPERVISOR_shared_info = (struct shared_info *)addr;
906 } else 996 } else
907 HYPERVISOR_shared_info = 997 HYPERVISOR_shared_info =
908 (struct shared_info *)__va(xen_start_info->shared_info); 998 (struct shared_info *)__va(xen_start_info->shared_info);
@@ -917,26 +1007,32 @@ void xen_setup_shared_info(void)
917 1007
918static __init void xen_pagetable_setup_done(pgd_t *base) 1008static __init void xen_pagetable_setup_done(pgd_t *base)
919{ 1009{
920 /* This will work as long as patching hasn't happened yet
921 (which it hasn't) */
922 pv_mmu_ops.alloc_pte = xen_alloc_pte;
923 pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
924 pv_mmu_ops.release_pte = xen_release_pte;
925 pv_mmu_ops.release_pmd = xen_release_pmd;
926 pv_mmu_ops.set_pte = xen_set_pte;
927
928 xen_setup_shared_info(); 1010 xen_setup_shared_info();
929
930 /* Actually pin the pagetable down, but we can't set PG_pinned
931 yet because the page structures don't exist yet. */
932 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
933} 1011}
934 1012
935static __init void xen_post_allocator_init(void) 1013static __init void xen_post_allocator_init(void)
936{ 1014{
1015 pv_mmu_ops.set_pte = xen_set_pte;
937 pv_mmu_ops.set_pmd = xen_set_pmd; 1016 pv_mmu_ops.set_pmd = xen_set_pmd;
938 pv_mmu_ops.set_pud = xen_set_pud; 1017 pv_mmu_ops.set_pud = xen_set_pud;
1018#if PAGETABLE_LEVELS == 4
1019 pv_mmu_ops.set_pgd = xen_set_pgd;
1020#endif
1021
1022 /* This will work as long as patching hasn't happened yet
1023 (which it hasn't) */
1024 pv_mmu_ops.alloc_pte = xen_alloc_pte;
1025 pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
1026 pv_mmu_ops.release_pte = xen_release_pte;
1027 pv_mmu_ops.release_pmd = xen_release_pmd;
1028#if PAGETABLE_LEVELS == 4
1029 pv_mmu_ops.alloc_pud = xen_alloc_pud;
1030 pv_mmu_ops.release_pud = xen_release_pud;
1031#endif
939 1032
1033#ifdef CONFIG_X86_64
1034 SetPagePinned(virt_to_page(level3_user_vsyscall));
1035#endif
940 xen_mark_init_mm_pinned(); 1036 xen_mark_init_mm_pinned();
941} 1037}
942 1038
@@ -950,6 +1046,7 @@ void xen_setup_vcpu_info_placement(void)
950 1046
951 /* xen_vcpu_setup managed to place the vcpu_info within the 1047 /* xen_vcpu_setup managed to place the vcpu_info within the
952 percpu area for all cpus, so make use of it */ 1048 percpu area for all cpus, so make use of it */
1049#ifdef CONFIG_X86_32
953 if (have_vcpu_info_placement) { 1050 if (have_vcpu_info_placement) {
954 printk(KERN_INFO "Xen: using vcpu_info placement\n"); 1051 printk(KERN_INFO "Xen: using vcpu_info placement\n");
955 1052
@@ -959,6 +1056,7 @@ void xen_setup_vcpu_info_placement(void)
959 pv_irq_ops.irq_enable = xen_irq_enable_direct; 1056 pv_irq_ops.irq_enable = xen_irq_enable_direct;
960 pv_mmu_ops.read_cr2 = xen_read_cr2_direct; 1057 pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
961 } 1058 }
1059#endif
962} 1060}
963 1061
964static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, 1062static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
@@ -979,10 +1077,12 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
979 goto patch_site 1077 goto patch_site
980 1078
981 switch (type) { 1079 switch (type) {
1080#ifdef CONFIG_X86_32
982 SITE(pv_irq_ops, irq_enable); 1081 SITE(pv_irq_ops, irq_enable);
983 SITE(pv_irq_ops, irq_disable); 1082 SITE(pv_irq_ops, irq_disable);
984 SITE(pv_irq_ops, save_fl); 1083 SITE(pv_irq_ops, save_fl);
985 SITE(pv_irq_ops, restore_fl); 1084 SITE(pv_irq_ops, restore_fl);
1085#endif /* CONFIG_X86_32 */
986#undef SITE 1086#undef SITE
987 1087
988 patch_site: 1088 patch_site:
@@ -1025,8 +1125,15 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
1025#ifdef CONFIG_X86_F00F_BUG 1125#ifdef CONFIG_X86_F00F_BUG
1026 case FIX_F00F_IDT: 1126 case FIX_F00F_IDT:
1027#endif 1127#endif
1128#ifdef CONFIG_X86_32
1028 case FIX_WP_TEST: 1129 case FIX_WP_TEST:
1029 case FIX_VDSO: 1130 case FIX_VDSO:
1131# ifdef CONFIG_HIGHMEM
1132 case FIX_KMAP_BEGIN ... FIX_KMAP_END:
1133# endif
1134#else
1135 case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
1136#endif
1030#ifdef CONFIG_X86_LOCAL_APIC 1137#ifdef CONFIG_X86_LOCAL_APIC
1031 case FIX_APIC_BASE: /* maps dummy local APIC */ 1138 case FIX_APIC_BASE: /* maps dummy local APIC */
1032#endif 1139#endif
@@ -1039,6 +1146,15 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
1039 } 1146 }
1040 1147
1041 __native_set_fixmap(idx, pte); 1148 __native_set_fixmap(idx, pte);
1149
1150#ifdef CONFIG_X86_64
1151 /* Replicate changes to map the vsyscall page into the user
1152 pagetable vsyscall mapping. */
1153 if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
1154 unsigned long vaddr = __fix_to_virt(idx);
1155 set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
1156 }
1157#endif
1042} 1158}
1043 1159
1044static const struct pv_info xen_info __initdata = { 1160static const struct pv_info xen_info __initdata = {
@@ -1084,18 +1200,25 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
1084 .wbinvd = native_wbinvd, 1200 .wbinvd = native_wbinvd,
1085 1201
1086 .read_msr = native_read_msr_safe, 1202 .read_msr = native_read_msr_safe,
1087 .write_msr = native_write_msr_safe, 1203 .write_msr = xen_write_msr_safe,
1088 .read_tsc = native_read_tsc, 1204 .read_tsc = native_read_tsc,
1089 .read_pmc = native_read_pmc, 1205 .read_pmc = native_read_pmc,
1090 1206
1091 .iret = xen_iret, 1207 .iret = xen_iret,
1092 .irq_enable_sysexit = xen_sysexit, 1208 .irq_enable_sysexit = xen_sysexit,
1209#ifdef CONFIG_X86_64
1210 .usergs_sysret32 = xen_sysret32,
1211 .usergs_sysret64 = xen_sysret64,
1212#endif
1093 1213
1094 .load_tr_desc = paravirt_nop, 1214 .load_tr_desc = paravirt_nop,
1095 .set_ldt = xen_set_ldt, 1215 .set_ldt = xen_set_ldt,
1096 .load_gdt = xen_load_gdt, 1216 .load_gdt = xen_load_gdt,
1097 .load_idt = xen_load_idt, 1217 .load_idt = xen_load_idt,
1098 .load_tls = xen_load_tls, 1218 .load_tls = xen_load_tls,
1219#ifdef CONFIG_X86_64
1220 .load_gs_index = xen_load_gs_index,
1221#endif
1099 1222
1100 .store_gdt = native_store_gdt, 1223 .store_gdt = native_store_gdt,
1101 .store_idt = native_store_idt, 1224 .store_idt = native_store_idt,
@@ -1109,14 +1232,34 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
1109 .set_iopl_mask = xen_set_iopl_mask, 1232 .set_iopl_mask = xen_set_iopl_mask,
1110 .io_delay = xen_io_delay, 1233 .io_delay = xen_io_delay,
1111 1234
1235 /* Xen takes care of %gs when switching to usermode for us */
1236 .swapgs = paravirt_nop,
1237
1112 .lazy_mode = { 1238 .lazy_mode = {
1113 .enter = paravirt_enter_lazy_cpu, 1239 .enter = paravirt_enter_lazy_cpu,
1114 .leave = xen_leave_lazy, 1240 .leave = xen_leave_lazy,
1115 }, 1241 },
1116}; 1242};
1117 1243
1244static void __init __xen_init_IRQ(void)
1245{
1246#ifdef CONFIG_X86_64
1247 int i;
1248
1249 /* Create identity vector->irq map */
1250 for(i = 0; i < NR_VECTORS; i++) {
1251 int cpu;
1252
1253 for_each_possible_cpu(cpu)
1254 per_cpu(vector_irq, cpu)[i] = i;
1255 }
1256#endif /* CONFIG_X86_64 */
1257
1258 xen_init_IRQ();
1259}
1260
1118static const struct pv_irq_ops xen_irq_ops __initdata = { 1261static const struct pv_irq_ops xen_irq_ops __initdata = {
1119 .init_IRQ = xen_init_IRQ, 1262 .init_IRQ = __xen_init_IRQ,
1120 .save_fl = xen_save_fl, 1263 .save_fl = xen_save_fl,
1121 .restore_fl = xen_restore_fl, 1264 .restore_fl = xen_restore_fl,
1122 .irq_disable = xen_irq_disable, 1265 .irq_disable = xen_irq_disable,
@@ -1124,14 +1267,13 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
1124 .safe_halt = xen_safe_halt, 1267 .safe_halt = xen_safe_halt,
1125 .halt = xen_halt, 1268 .halt = xen_halt,
1126#ifdef CONFIG_X86_64 1269#ifdef CONFIG_X86_64
1127 .adjust_exception_frame = paravirt_nop, 1270 .adjust_exception_frame = xen_adjust_exception_frame,
1128#endif 1271#endif
1129}; 1272};
1130 1273
1131static const struct pv_apic_ops xen_apic_ops __initdata = { 1274static const struct pv_apic_ops xen_apic_ops __initdata = {
1132#ifdef CONFIG_X86_LOCAL_APIC 1275#ifdef CONFIG_X86_LOCAL_APIC
1133 .apic_write = xen_apic_write, 1276 .apic_write = xen_apic_write,
1134 .apic_write_atomic = xen_apic_write,
1135 .apic_read = xen_apic_read, 1277 .apic_read = xen_apic_read,
1136 .setup_boot_clock = paravirt_nop, 1278 .setup_boot_clock = paravirt_nop,
1137 .setup_secondary_clock = paravirt_nop, 1279 .setup_secondary_clock = paravirt_nop,
@@ -1157,8 +1299,8 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1157 .pte_update = paravirt_nop, 1299 .pte_update = paravirt_nop,
1158 .pte_update_defer = paravirt_nop, 1300 .pte_update_defer = paravirt_nop,
1159 1301
1160 .pgd_alloc = __paravirt_pgd_alloc, 1302 .pgd_alloc = xen_pgd_alloc,
1161 .pgd_free = paravirt_nop, 1303 .pgd_free = xen_pgd_free,
1162 1304
1163 .alloc_pte = xen_alloc_pte_init, 1305 .alloc_pte = xen_alloc_pte_init,
1164 .release_pte = xen_release_pte_init, 1306 .release_pte = xen_release_pte_init,
@@ -1170,7 +1312,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1170 .kmap_atomic_pte = xen_kmap_atomic_pte, 1312 .kmap_atomic_pte = xen_kmap_atomic_pte,
1171#endif 1313#endif
1172 1314
1173 .set_pte = NULL, /* see xen_pagetable_setup_* */ 1315#ifdef CONFIG_X86_64
1316 .set_pte = xen_set_pte,
1317#else
1318 .set_pte = xen_set_pte_init,
1319#endif
1174 .set_pte_at = xen_set_pte_at, 1320 .set_pte_at = xen_set_pte_at,
1175 .set_pmd = xen_set_pmd_hyper, 1321 .set_pmd = xen_set_pmd_hyper,
1176 1322
@@ -1184,15 +1330,26 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1184 .make_pte = xen_make_pte, 1330 .make_pte = xen_make_pte,
1185 .make_pgd = xen_make_pgd, 1331 .make_pgd = xen_make_pgd,
1186 1332
1333#ifdef CONFIG_X86_PAE
1187 .set_pte_atomic = xen_set_pte_atomic, 1334 .set_pte_atomic = xen_set_pte_atomic,
1188 .set_pte_present = xen_set_pte_at, 1335 .set_pte_present = xen_set_pte_at,
1189 .set_pud = xen_set_pud_hyper,
1190 .pte_clear = xen_pte_clear, 1336 .pte_clear = xen_pte_clear,
1191 .pmd_clear = xen_pmd_clear, 1337 .pmd_clear = xen_pmd_clear,
1338#endif /* CONFIG_X86_PAE */
1339 .set_pud = xen_set_pud_hyper,
1192 1340
1193 .make_pmd = xen_make_pmd, 1341 .make_pmd = xen_make_pmd,
1194 .pmd_val = xen_pmd_val, 1342 .pmd_val = xen_pmd_val,
1195 1343
1344#if PAGETABLE_LEVELS == 4
1345 .pud_val = xen_pud_val,
1346 .make_pud = xen_make_pud,
1347 .set_pgd = xen_set_pgd_hyper,
1348
1349 .alloc_pud = xen_alloc_pte_init,
1350 .release_pud = xen_release_pte_init,
1351#endif /* PAGETABLE_LEVELS == 4 */
1352
1196 .activate_mm = xen_activate_mm, 1353 .activate_mm = xen_activate_mm,
1197 .dup_mmap = xen_dup_mmap, 1354 .dup_mmap = xen_dup_mmap,
1198 .exit_mmap = xen_exit_mmap, 1355 .exit_mmap = xen_exit_mmap,
@@ -1205,21 +1362,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1205 .set_fixmap = xen_set_fixmap, 1362 .set_fixmap = xen_set_fixmap,
1206}; 1363};
1207 1364
1208#ifdef CONFIG_SMP
1209static const struct smp_ops xen_smp_ops __initdata = {
1210 .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
1211 .smp_prepare_cpus = xen_smp_prepare_cpus,
1212 .cpu_up = xen_cpu_up,
1213 .smp_cpus_done = xen_smp_cpus_done,
1214
1215 .smp_send_stop = xen_smp_send_stop,
1216 .smp_send_reschedule = xen_smp_send_reschedule,
1217
1218 .send_call_func_ipi = xen_smp_send_call_function_ipi,
1219 .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
1220};
1221#endif /* CONFIG_SMP */
1222
1223static void xen_reboot(int reason) 1365static void xen_reboot(int reason)
1224{ 1366{
1225 struct sched_shutdown r = { .reason = reason }; 1367 struct sched_shutdown r = { .reason = reason };
@@ -1264,6 +1406,7 @@ static const struct machine_ops __initdata xen_machine_ops = {
1264 1406
1265static void __init xen_reserve_top(void) 1407static void __init xen_reserve_top(void)
1266{ 1408{
1409#ifdef CONFIG_X86_32
1267 unsigned long top = HYPERVISOR_VIRT_START; 1410 unsigned long top = HYPERVISOR_VIRT_START;
1268 struct xen_platform_parameters pp; 1411 struct xen_platform_parameters pp;
1269 1412
@@ -1271,8 +1414,248 @@ static void __init xen_reserve_top(void)
1271 top = pp.virt_start; 1414 top = pp.virt_start;
1272 1415
1273 reserve_top_address(-top + 2 * PAGE_SIZE); 1416 reserve_top_address(-top + 2 * PAGE_SIZE);
1417#endif /* CONFIG_X86_32 */
1418}
1419
1420/*
1421 * Like __va(), but returns address in the kernel mapping (which is
1422 * all we have until the physical memory mapping has been set up.
1423 */
1424static void *__ka(phys_addr_t paddr)
1425{
1426#ifdef CONFIG_X86_64
1427 return (void *)(paddr + __START_KERNEL_map);
1428#else
1429 return __va(paddr);
1430#endif
1274} 1431}
1275 1432
1433/* Convert a machine address to physical address */
1434static unsigned long m2p(phys_addr_t maddr)
1435{
1436 phys_addr_t paddr;
1437
1438 maddr &= PTE_MASK;
1439 paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
1440
1441 return paddr;
1442}
1443
1444/* Convert a machine address to kernel virtual */
1445static void *m2v(phys_addr_t maddr)
1446{
1447 return __ka(m2p(maddr));
1448}
1449
1450#ifdef CONFIG_X86_64
1451static void walk(pgd_t *pgd, unsigned long addr)
1452{
1453 unsigned l4idx = pgd_index(addr);
1454 unsigned l3idx = pud_index(addr);
1455 unsigned l2idx = pmd_index(addr);
1456 unsigned l1idx = pte_index(addr);
1457 pgd_t l4;
1458 pud_t l3;
1459 pmd_t l2;
1460 pte_t l1;
1461
1462 xen_raw_printk("walk %p, %lx -> %d %d %d %d\n",
1463 pgd, addr, l4idx, l3idx, l2idx, l1idx);
1464
1465 l4 = pgd[l4idx];
1466 xen_raw_printk(" l4: %016lx\n", l4.pgd);
1467 xen_raw_printk(" %016lx\n", pgd_val(l4));
1468
1469 l3 = ((pud_t *)(m2v(l4.pgd)))[l3idx];
1470 xen_raw_printk(" l3: %016lx\n", l3.pud);
1471 xen_raw_printk(" %016lx\n", pud_val(l3));
1472
1473 l2 = ((pmd_t *)(m2v(l3.pud)))[l2idx];
1474 xen_raw_printk(" l2: %016lx\n", l2.pmd);
1475 xen_raw_printk(" %016lx\n", pmd_val(l2));
1476
1477 l1 = ((pte_t *)(m2v(l2.pmd)))[l1idx];
1478 xen_raw_printk(" l1: %016lx\n", l1.pte);
1479 xen_raw_printk(" %016lx\n", pte_val(l1));
1480}
1481#endif
1482
1483static void set_page_prot(void *addr, pgprot_t prot)
1484{
1485 unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
1486 pte_t pte = pfn_pte(pfn, prot);
1487
1488 xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016llx pte=%016llx\n",
1489 addr, pfn, get_phys_to_machine(pfn),
1490 pgprot_val(prot), pte.pte);
1491
1492 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
1493 BUG();
1494}
1495
1496static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1497{
1498 unsigned pmdidx, pteidx;
1499 unsigned ident_pte;
1500 unsigned long pfn;
1501
1502 ident_pte = 0;
1503 pfn = 0;
1504 for(pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
1505 pte_t *pte_page;
1506
1507 /* Reuse or allocate a page of ptes */
1508 if (pmd_present(pmd[pmdidx]))
1509 pte_page = m2v(pmd[pmdidx].pmd);
1510 else {
1511 /* Check for free pte pages */
1512 if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
1513 break;
1514
1515 pte_page = &level1_ident_pgt[ident_pte];
1516 ident_pte += PTRS_PER_PTE;
1517
1518 pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
1519 }
1520
1521 /* Install mappings */
1522 for(pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
1523 pte_t pte;
1524
1525 if (pfn > max_pfn_mapped)
1526 max_pfn_mapped = pfn;
1527
1528 if (!pte_none(pte_page[pteidx]))
1529 continue;
1530
1531 pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
1532 pte_page[pteidx] = pte;
1533 }
1534 }
1535
1536 for(pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
1537 set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
1538
1539 set_page_prot(pmd, PAGE_KERNEL_RO);
1540}
1541
1542#ifdef CONFIG_X86_64
1543static void convert_pfn_mfn(void *v)
1544{
1545 pte_t *pte = v;
1546 int i;
1547
1548 /* All levels are converted the same way, so just treat them
1549 as ptes. */
1550 for(i = 0; i < PTRS_PER_PTE; i++)
1551 pte[i] = xen_make_pte(pte[i].pte);
1552}
1553
1554/*
1555 * Set up the inital kernel pagetable.
1556 *
1557 * We can construct this by grafting the Xen provided pagetable into
1558 * head_64.S's preconstructed pagetables. We copy the Xen L2's into
1559 * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This
1560 * means that only the kernel has a physical mapping to start with -
1561 * but that's enough to get __va working. We need to fill in the rest
1562 * of the physical mapping once some sort of allocator has been set
1563 * up.
1564 */
1565static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1566{
1567 pud_t *l3;
1568 pmd_t *l2;
1569
1570 /* Zap identity mapping */
1571 init_level4_pgt[0] = __pgd(0);
1572
1573 /* Pre-constructed entries are in pfn, so convert to mfn */
1574 convert_pfn_mfn(init_level4_pgt);
1575 convert_pfn_mfn(level3_ident_pgt);
1576 convert_pfn_mfn(level3_kernel_pgt);
1577
1578 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
1579 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
1580
1581 memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
1582 memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
1583
1584 l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
1585 l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
1586 memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
1587
1588 /* Set up identity map */
1589 xen_map_identity_early(level2_ident_pgt, max_pfn);
1590
1591 /* Make pagetable pieces RO */
1592 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
1593 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
1594 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
1595 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
1596 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1597 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
1598
1599 /* Pin down new L4 */
1600 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
1601 PFN_DOWN(__pa_symbol(init_level4_pgt)));
1602
1603 /* Unpin Xen-provided one */
1604 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1605
1606 /* Switch over */
1607 pgd = init_level4_pgt;
1608
1609 /*
1610 * At this stage there can be no user pgd, and no page
1611 * structure to attach it to, so make sure we just set kernel
1612 * pgd.
1613 */
1614 xen_mc_batch();
1615 __xen_write_cr3(true, __pa(pgd));
1616 xen_mc_issue(PARAVIRT_LAZY_CPU);
1617
1618 reserve_early(__pa(xen_start_info->pt_base),
1619 __pa(xen_start_info->pt_base +
1620 xen_start_info->nr_pt_frames * PAGE_SIZE),
1621 "XEN PAGETABLES");
1622
1623 return pgd;
1624}
1625#else /* !CONFIG_X86_64 */
1626static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
1627
1628static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1629{
1630 pmd_t *kernel_pmd;
1631
1632 init_pg_tables_start = __pa(pgd);
1633 init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
1634 max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
1635
1636 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
1637 memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
1638
1639 xen_map_identity_early(level2_kernel_pgt, max_pfn);
1640
1641 memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
1642 set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
1643 __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
1644
1645 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1646 set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
1647 set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
1648
1649 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1650
1651 xen_write_cr3(__pa(swapper_pg_dir));
1652
1653 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
1654
1655 return swapper_pg_dir;
1656}
1657#endif /* CONFIG_X86_64 */
1658
1276/* First C function to be called on Xen boot */ 1659/* First C function to be called on Xen boot */
1277asmlinkage void __init xen_start_kernel(void) 1660asmlinkage void __init xen_start_kernel(void)
1278{ 1661{
@@ -1301,53 +1684,56 @@ asmlinkage void __init xen_start_kernel(void)
1301 1684
1302 machine_ops = xen_machine_ops; 1685 machine_ops = xen_machine_ops;
1303 1686
1304#ifdef CONFIG_SMP 1687#ifdef CONFIG_X86_64
1305 smp_ops = xen_smp_ops; 1688 /* Disable until direct per-cpu data access. */
1689 have_vcpu_info_placement = 0;
1690 x86_64_init_pda();
1306#endif 1691#endif
1307 1692
1693 xen_smp_init();
1694
1308 /* Get mfn list */ 1695 /* Get mfn list */
1309 if (!xen_feature(XENFEAT_auto_translated_physmap)) 1696 if (!xen_feature(XENFEAT_auto_translated_physmap))
1310 xen_build_dynamic_phys_to_machine(); 1697 xen_build_dynamic_phys_to_machine();
1311 1698
1312 pgd = (pgd_t *)xen_start_info->pt_base; 1699 pgd = (pgd_t *)xen_start_info->pt_base;
1313 1700
1314 init_pg_tables_start = __pa(pgd); 1701 /* Prevent unwanted bits from being set in PTEs. */
1315 init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; 1702 __supported_pte_mask &= ~_PAGE_GLOBAL;
1316 max_pfn_mapped = (init_pg_tables_end + 512*1024) >> PAGE_SHIFT; 1703 if (!is_initial_xendomain())
1317 1704 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1318 init_mm.pgd = pgd; /* use the Xen pagetables to start */
1319
1320 /* keep using Xen gdt for now; no urgent need to change it */
1321
1322 x86_write_percpu(xen_cr3, __pa(pgd));
1323 x86_write_percpu(xen_current_cr3, __pa(pgd));
1324 1705
1325 /* Don't do the full vcpu_info placement stuff until we have a 1706 /* Don't do the full vcpu_info placement stuff until we have a
1326 possible map and a non-dummy shared_info. */ 1707 possible map and a non-dummy shared_info. */
1327 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; 1708 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
1328 1709
1710 xen_raw_console_write("mapping kernel into physical memory\n");
1711 pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
1712
1713 init_mm.pgd = pgd;
1714
1715 /* keep using Xen gdt for now; no urgent need to change it */
1716
1329 pv_info.kernel_rpl = 1; 1717 pv_info.kernel_rpl = 1;
1330 if (xen_feature(XENFEAT_supervisor_mode_kernel)) 1718 if (xen_feature(XENFEAT_supervisor_mode_kernel))
1331 pv_info.kernel_rpl = 0; 1719 pv_info.kernel_rpl = 0;
1332 1720
1333 /* Prevent unwanted bits from being set in PTEs. */
1334 __supported_pte_mask &= ~_PAGE_GLOBAL;
1335 if (!is_initial_xendomain())
1336 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1337
1338 /* set the limit of our address space */ 1721 /* set the limit of our address space */
1339 xen_reserve_top(); 1722 xen_reserve_top();
1340 1723
1724#ifdef CONFIG_X86_32
1341 /* set up basic CPUID stuff */ 1725 /* set up basic CPUID stuff */
1342 cpu_detect(&new_cpu_data); 1726 cpu_detect(&new_cpu_data);
1343 new_cpu_data.hard_math = 1; 1727 new_cpu_data.hard_math = 1;
1344 new_cpu_data.x86_capability[0] = cpuid_edx(1); 1728 new_cpu_data.x86_capability[0] = cpuid_edx(1);
1729#endif
1345 1730
1346 /* Poke various useful things into boot_params */ 1731 /* Poke various useful things into boot_params */
1347 boot_params.hdr.type_of_loader = (9 << 4) | 0; 1732 boot_params.hdr.type_of_loader = (9 << 4) | 0;
1348 boot_params.hdr.ramdisk_image = xen_start_info->mod_start 1733 boot_params.hdr.ramdisk_image = xen_start_info->mod_start
1349 ? __pa(xen_start_info->mod_start) : 0; 1734 ? __pa(xen_start_info->mod_start) : 0;
1350 boot_params.hdr.ramdisk_size = xen_start_info->mod_len; 1735 boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
1736 boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
1351 1737
1352 if (!is_initial_xendomain()) { 1738 if (!is_initial_xendomain()) {
1353 add_preferred_console("xenboot", 0, NULL); 1739 add_preferred_console("xenboot", 0, NULL);
@@ -1355,6 +1741,21 @@ asmlinkage void __init xen_start_kernel(void)
1355 add_preferred_console("hvc", 0, NULL); 1741 add_preferred_console("hvc", 0, NULL);
1356 } 1742 }
1357 1743
1744 xen_raw_console_write("about to get started...\n");
1745
1746#if 0
1747 xen_raw_printk("&boot_params=%p __pa(&boot_params)=%lx __va(__pa(&boot_params))=%lx\n",
1748 &boot_params, __pa_symbol(&boot_params),
1749 __va(__pa_symbol(&boot_params)));
1750
1751 walk(pgd, &boot_params);
1752 walk(pgd, __va(__pa(&boot_params)));
1753#endif
1754
1358 /* Start the world */ 1755 /* Start the world */
1756#ifdef CONFIG_X86_32
1359 i386_start_kernel(); 1757 i386_start_kernel();
1758#else
1759 x86_64_start_reservations((char *)__pa_symbol(&boot_params));
1760#endif
1360} 1761}