aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen/enlighten.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-22 03:06:21 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-22 03:06:21 -0400
commit76c3bb15d6786a0b8da0ad0090e0c9c3672fc08b (patch)
tree3824e008db9d554229a70c85fbbc13238276bd7a /arch/x86/xen/enlighten.c
parent7be42004065ce4df193aeef5befd26805267d0d9 (diff)
parent93ded9b8fd42abe2c3607097963d8de6ad9117eb (diff)
Merge branch 'linus' into x86/x2apic
Diffstat (limited to 'arch/x86/xen/enlighten.c')
-rw-r--r--arch/x86/xen/enlighten.c696
1 files changed, 549 insertions, 147 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index e4d1459a63df..c910345860c3 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -33,6 +33,7 @@
33#include <xen/interface/sched.h> 33#include <xen/interface/sched.h>
34#include <xen/features.h> 34#include <xen/features.h>
35#include <xen/page.h> 35#include <xen/page.h>
36#include <xen/hvc-console.h>
36 37
37#include <asm/paravirt.h> 38#include <asm/paravirt.h>
38#include <asm/apic.h> 39#include <asm/apic.h>
@@ -41,12 +42,12 @@
41#include <asm/xen/hypervisor.h> 42#include <asm/xen/hypervisor.h>
42#include <asm/fixmap.h> 43#include <asm/fixmap.h>
43#include <asm/processor.h> 44#include <asm/processor.h>
45#include <asm/msr-index.h>
44#include <asm/setup.h> 46#include <asm/setup.h>
45#include <asm/desc.h> 47#include <asm/desc.h>
46#include <asm/pgtable.h> 48#include <asm/pgtable.h>
47#include <asm/tlbflush.h> 49#include <asm/tlbflush.h>
48#include <asm/reboot.h> 50#include <asm/reboot.h>
49#include <asm/pgalloc.h>
50 51
51#include "xen-ops.h" 52#include "xen-ops.h"
52#include "mmu.h" 53#include "mmu.h"
@@ -58,6 +59,18 @@ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
58DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); 59DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
59 60
60/* 61/*
62 * Identity map, in addition to plain kernel map. This needs to be
63 * large enough to allocate page table pages to allocate the rest.
64 * Each page can map 2MB.
65 */
66static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
67
68#ifdef CONFIG_X86_64
69/* l3 pud for userspace vsyscall mapping */
70static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
71#endif /* CONFIG_X86_64 */
72
73/*
61 * Note about cr3 (pagetable base) values: 74 * Note about cr3 (pagetable base) values:
62 * 75 *
63 * xen_cr3 contains the current logical cr3 value; it contains the 76 * xen_cr3 contains the current logical cr3 value; it contains the
@@ -168,10 +181,14 @@ void xen_vcpu_restore(void)
168 181
169static void __init xen_banner(void) 182static void __init xen_banner(void)
170{ 183{
184 unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL);
185 struct xen_extraversion extra;
186 HYPERVISOR_xen_version(XENVER_extraversion, &extra);
187
171 printk(KERN_INFO "Booting paravirtualized kernel on %s\n", 188 printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
172 pv_info.name); 189 pv_info.name);
173 printk(KERN_INFO "Hypervisor signature: %s%s\n", 190 printk(KERN_INFO "Xen version: %d.%d%s%s\n",
174 xen_start_info->magic, 191 version >> 16, version & 0xffff, extra.extraversion,
175 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); 192 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
176} 193}
177 194
@@ -364,14 +381,6 @@ static void load_TLS_descriptor(struct thread_struct *t,
364 381
365static void xen_load_tls(struct thread_struct *t, unsigned int cpu) 382static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
366{ 383{
367 xen_mc_batch();
368
369 load_TLS_descriptor(t, cpu, 0);
370 load_TLS_descriptor(t, cpu, 1);
371 load_TLS_descriptor(t, cpu, 2);
372
373 xen_mc_issue(PARAVIRT_LAZY_CPU);
374
375 /* 384 /*
376 * XXX sleazy hack: If we're being called in a lazy-cpu zone, 385 * XXX sleazy hack: If we're being called in a lazy-cpu zone,
377 * it means we're in a context switch, and %gs has just been 386 * it means we're in a context switch, and %gs has just been
@@ -380,10 +389,39 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
380 * Either way, it has been saved, and the new value will get 389 * Either way, it has been saved, and the new value will get
381 * loaded properly. This will go away as soon as Xen has been 390 * loaded properly. This will go away as soon as Xen has been
382 * modified to not save/restore %gs for normal hypercalls. 391 * modified to not save/restore %gs for normal hypercalls.
392 *
393 * On x86_64, this hack is not used for %gs, because gs points
394 * to KERNEL_GS_BASE (and uses it for PDA references), so we
395 * must not zero %gs on x86_64
396 *
397 * For x86_64, we need to zero %fs, otherwise we may get an
398 * exception between the new %fs descriptor being loaded and
399 * %fs being effectively cleared at __switch_to().
383 */ 400 */
384 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) 401 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
402#ifdef CONFIG_X86_32
385 loadsegment(gs, 0); 403 loadsegment(gs, 0);
404#else
405 loadsegment(fs, 0);
406#endif
407 }
408
409 xen_mc_batch();
410
411 load_TLS_descriptor(t, cpu, 0);
412 load_TLS_descriptor(t, cpu, 1);
413 load_TLS_descriptor(t, cpu, 2);
414
415 xen_mc_issue(PARAVIRT_LAZY_CPU);
416}
417
418#ifdef CONFIG_X86_64
419static void xen_load_gs_index(unsigned int idx)
420{
421 if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
422 BUG();
386} 423}
424#endif
387 425
388static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, 426static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
389 const void *ptr) 427 const void *ptr)
@@ -401,23 +439,18 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
401 preempt_enable(); 439 preempt_enable();
402} 440}
403 441
404static int cvt_gate_to_trap(int vector, u32 low, u32 high, 442static int cvt_gate_to_trap(int vector, const gate_desc *val,
405 struct trap_info *info) 443 struct trap_info *info)
406{ 444{
407 u8 type, dpl; 445 if (val->type != 0xf && val->type != 0xe)
408
409 type = (high >> 8) & 0x1f;
410 dpl = (high >> 13) & 3;
411
412 if (type != 0xf && type != 0xe)
413 return 0; 446 return 0;
414 447
415 info->vector = vector; 448 info->vector = vector;
416 info->address = (high & 0xffff0000) | (low & 0x0000ffff); 449 info->address = gate_offset(*val);
417 info->cs = low >> 16; 450 info->cs = gate_segment(*val);
418 info->flags = dpl; 451 info->flags = val->dpl;
419 /* interrupt gates clear IF */ 452 /* interrupt gates clear IF */
420 if (type == 0xe) 453 if (val->type == 0xe)
421 info->flags |= 4; 454 info->flags |= 4;
422 455
423 return 1; 456 return 1;
@@ -444,11 +477,10 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
444 477
445 if (p >= start && (p + 8) <= end) { 478 if (p >= start && (p + 8) <= end) {
446 struct trap_info info[2]; 479 struct trap_info info[2];
447 u32 *desc = (u32 *)g;
448 480
449 info[1].address = 0; 481 info[1].address = 0;
450 482
451 if (cvt_gate_to_trap(entrynum, desc[0], desc[1], &info[0])) 483 if (cvt_gate_to_trap(entrynum, g, &info[0]))
452 if (HYPERVISOR_set_trap_table(info)) 484 if (HYPERVISOR_set_trap_table(info))
453 BUG(); 485 BUG();
454 } 486 }
@@ -461,13 +493,13 @@ static void xen_convert_trap_info(const struct desc_ptr *desc,
461{ 493{
462 unsigned in, out, count; 494 unsigned in, out, count;
463 495
464 count = (desc->size+1) / 8; 496 count = (desc->size+1) / sizeof(gate_desc);
465 BUG_ON(count > 256); 497 BUG_ON(count > 256);
466 498
467 for (in = out = 0; in < count; in++) { 499 for (in = out = 0; in < count; in++) {
468 const u32 *entry = (u32 *)(desc->address + in * 8); 500 gate_desc *entry = (gate_desc*)(desc->address) + in;
469 501
470 if (cvt_gate_to_trap(in, entry[0], entry[1], &traps[out])) 502 if (cvt_gate_to_trap(in, entry, &traps[out]))
471 out++; 503 out++;
472 } 504 }
473 traps[out].address = 0; 505 traps[out].address = 0;
@@ -727,33 +759,89 @@ static void set_current_cr3(void *v)
727 x86_write_percpu(xen_current_cr3, (unsigned long)v); 759 x86_write_percpu(xen_current_cr3, (unsigned long)v);
728} 760}
729 761
730static void xen_write_cr3(unsigned long cr3) 762static void __xen_write_cr3(bool kernel, unsigned long cr3)
731{ 763{
732 struct mmuext_op *op; 764 struct mmuext_op *op;
733 struct multicall_space mcs; 765 struct multicall_space mcs;
734 unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3)); 766 unsigned long mfn;
735 767
736 BUG_ON(preemptible()); 768 if (cr3)
769 mfn = pfn_to_mfn(PFN_DOWN(cr3));
770 else
771 mfn = 0;
737 772
738 mcs = xen_mc_entry(sizeof(*op)); /* disables interrupts */ 773 WARN_ON(mfn == 0 && kernel);
739 774
740 /* Update while interrupts are disabled, so its atomic with 775 mcs = __xen_mc_entry(sizeof(*op));
741 respect to ipis */
742 x86_write_percpu(xen_cr3, cr3);
743 776
744 op = mcs.args; 777 op = mcs.args;
745 op->cmd = MMUEXT_NEW_BASEPTR; 778 op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
746 op->arg1.mfn = mfn; 779 op->arg1.mfn = mfn;
747 780
748 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 781 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
749 782
750 /* Update xen_update_cr3 once the batch has actually 783 if (kernel) {
751 been submitted. */ 784 x86_write_percpu(xen_cr3, cr3);
752 xen_mc_callback(set_current_cr3, (void *)cr3); 785
786 /* Update xen_current_cr3 once the batch has actually
787 been submitted. */
788 xen_mc_callback(set_current_cr3, (void *)cr3);
789 }
790}
791
792static void xen_write_cr3(unsigned long cr3)
793{
794 BUG_ON(preemptible());
795
796 xen_mc_batch(); /* disables interrupts */
797
798 /* Update while interrupts are disabled, so its atomic with
799 respect to ipis */
800 x86_write_percpu(xen_cr3, cr3);
801
802 __xen_write_cr3(true, cr3);
803
804#ifdef CONFIG_X86_64
805 {
806 pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
807 if (user_pgd)
808 __xen_write_cr3(false, __pa(user_pgd));
809 else
810 __xen_write_cr3(false, 0);
811 }
812#endif
753 813
754 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ 814 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
755} 815}
756 816
817static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
818{
819 int ret;
820
821 ret = 0;
822
823 switch(msr) {
824#ifdef CONFIG_X86_64
825 unsigned which;
826 u64 base;
827
828 case MSR_FS_BASE: which = SEGBASE_FS; goto set;
829 case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set;
830 case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set;
831
832 set:
833 base = ((u64)high << 32) | low;
834 if (HYPERVISOR_set_segment_base(which, base) != 0)
835 ret = -EFAULT;
836 break;
837#endif
838 default:
839 ret = native_write_msr_safe(msr, low, high);
840 }
841
842 return ret;
843}
844
757/* Early in boot, while setting up the initial pagetable, assume 845/* Early in boot, while setting up the initial pagetable, assume
758 everything is pinned. */ 846 everything is pinned. */
759static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn) 847static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
@@ -810,6 +898,48 @@ static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn)
810 xen_alloc_ptpage(mm, pfn, PT_PMD); 898 xen_alloc_ptpage(mm, pfn, PT_PMD);
811} 899}
812 900
901static int xen_pgd_alloc(struct mm_struct *mm)
902{
903 pgd_t *pgd = mm->pgd;
904 int ret = 0;
905
906 BUG_ON(PagePinned(virt_to_page(pgd)));
907
908#ifdef CONFIG_X86_64
909 {
910 struct page *page = virt_to_page(pgd);
911 pgd_t *user_pgd;
912
913 BUG_ON(page->private != 0);
914
915 ret = -ENOMEM;
916
917 user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
918 page->private = (unsigned long)user_pgd;
919
920 if (user_pgd != NULL) {
921 user_pgd[pgd_index(VSYSCALL_START)] =
922 __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
923 ret = 0;
924 }
925
926 BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
927 }
928#endif
929
930 return ret;
931}
932
933static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
934{
935#ifdef CONFIG_X86_64
936 pgd_t *user_pgd = xen_get_user_pgd(pgd);
937
938 if (user_pgd)
939 free_page((unsigned long)user_pgd);
940#endif
941}
942
813/* This should never happen until we're OK to use struct page */ 943/* This should never happen until we're OK to use struct page */
814static void xen_release_ptpage(u32 pfn, unsigned level) 944static void xen_release_ptpage(u32 pfn, unsigned level)
815{ 945{
@@ -835,6 +965,18 @@ static void xen_release_pmd(u32 pfn)
835 xen_release_ptpage(pfn, PT_PMD); 965 xen_release_ptpage(pfn, PT_PMD);
836} 966}
837 967
968#if PAGETABLE_LEVELS == 4
969static void xen_alloc_pud(struct mm_struct *mm, u32 pfn)
970{
971 xen_alloc_ptpage(mm, pfn, PT_PUD);
972}
973
974static void xen_release_pud(u32 pfn)
975{
976 xen_release_ptpage(pfn, PT_PUD);
977}
978#endif
979
838#ifdef CONFIG_HIGHPTE 980#ifdef CONFIG_HIGHPTE
839static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) 981static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
840{ 982{
@@ -873,68 +1015,16 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
873 1015
874static __init void xen_pagetable_setup_start(pgd_t *base) 1016static __init void xen_pagetable_setup_start(pgd_t *base)
875{ 1017{
876 pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
877 int i;
878
879 /* special set_pte for pagetable initialization */
880 pv_mmu_ops.set_pte = xen_set_pte_init;
881
882 init_mm.pgd = base;
883 /*
884 * copy top-level of Xen-supplied pagetable into place. This
885 * is a stand-in while we copy the pmd pages.
886 */
887 memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t));
888
889 /*
890 * For PAE, need to allocate new pmds, rather than
891 * share Xen's, since Xen doesn't like pmd's being
892 * shared between address spaces.
893 */
894 for (i = 0; i < PTRS_PER_PGD; i++) {
895 if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
896 pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
897
898 memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
899 PAGE_SIZE);
900
901 make_lowmem_page_readonly(pmd);
902
903 set_pgd(&base[i], __pgd(1 + __pa(pmd)));
904 } else
905 pgd_clear(&base[i]);
906 }
907
908 /* make sure zero_page is mapped RO so we can use it in pagetables */
909 make_lowmem_page_readonly(empty_zero_page);
910 make_lowmem_page_readonly(base);
911 /*
912 * Switch to new pagetable. This is done before
913 * pagetable_init has done anything so that the new pages
914 * added to the table can be prepared properly for Xen.
915 */
916 xen_write_cr3(__pa(base));
917
918 /* Unpin initial Xen pagetable */
919 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
920 PFN_DOWN(__pa(xen_start_info->pt_base)));
921} 1018}
922 1019
923void xen_setup_shared_info(void) 1020void xen_setup_shared_info(void)
924{ 1021{
925 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 1022 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
926 unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); 1023 set_fixmap(FIX_PARAVIRT_BOOTMAP,
927 1024 xen_start_info->shared_info);
928 /* 1025
929 * Create a mapping for the shared info page. 1026 HYPERVISOR_shared_info =
930 * Should be set_fixmap(), but shared_info is a machine 1027 (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
931 * address with no corresponding pseudo-phys address.
932 */
933 set_pte_mfn(addr,
934 PFN_DOWN(xen_start_info->shared_info),
935 PAGE_KERNEL);
936
937 HYPERVISOR_shared_info = (struct shared_info *)addr;
938 } else 1028 } else
939 HYPERVISOR_shared_info = 1029 HYPERVISOR_shared_info =
940 (struct shared_info *)__va(xen_start_info->shared_info); 1030 (struct shared_info *)__va(xen_start_info->shared_info);
@@ -949,26 +1039,32 @@ void xen_setup_shared_info(void)
949 1039
950static __init void xen_pagetable_setup_done(pgd_t *base) 1040static __init void xen_pagetable_setup_done(pgd_t *base)
951{ 1041{
952 /* This will work as long as patching hasn't happened yet
953 (which it hasn't) */
954 pv_mmu_ops.alloc_pte = xen_alloc_pte;
955 pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
956 pv_mmu_ops.release_pte = xen_release_pte;
957 pv_mmu_ops.release_pmd = xen_release_pmd;
958 pv_mmu_ops.set_pte = xen_set_pte;
959
960 xen_setup_shared_info(); 1042 xen_setup_shared_info();
961
962 /* Actually pin the pagetable down, but we can't set PG_pinned
963 yet because the page structures don't exist yet. */
964 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
965} 1043}
966 1044
967static __init void xen_post_allocator_init(void) 1045static __init void xen_post_allocator_init(void)
968{ 1046{
1047 pv_mmu_ops.set_pte = xen_set_pte;
969 pv_mmu_ops.set_pmd = xen_set_pmd; 1048 pv_mmu_ops.set_pmd = xen_set_pmd;
970 pv_mmu_ops.set_pud = xen_set_pud; 1049 pv_mmu_ops.set_pud = xen_set_pud;
1050#if PAGETABLE_LEVELS == 4
1051 pv_mmu_ops.set_pgd = xen_set_pgd;
1052#endif
1053
1054 /* This will work as long as patching hasn't happened yet
1055 (which it hasn't) */
1056 pv_mmu_ops.alloc_pte = xen_alloc_pte;
1057 pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
1058 pv_mmu_ops.release_pte = xen_release_pte;
1059 pv_mmu_ops.release_pmd = xen_release_pmd;
1060#if PAGETABLE_LEVELS == 4
1061 pv_mmu_ops.alloc_pud = xen_alloc_pud;
1062 pv_mmu_ops.release_pud = xen_release_pud;
1063#endif
971 1064
1065#ifdef CONFIG_X86_64
1066 SetPagePinned(virt_to_page(level3_user_vsyscall));
1067#endif
972 xen_mark_init_mm_pinned(); 1068 xen_mark_init_mm_pinned();
973} 1069}
974 1070
@@ -982,6 +1078,7 @@ void xen_setup_vcpu_info_placement(void)
982 1078
983 /* xen_vcpu_setup managed to place the vcpu_info within the 1079 /* xen_vcpu_setup managed to place the vcpu_info within the
984 percpu area for all cpus, so make use of it */ 1080 percpu area for all cpus, so make use of it */
1081#ifdef CONFIG_X86_32
985 if (have_vcpu_info_placement) { 1082 if (have_vcpu_info_placement) {
986 printk(KERN_INFO "Xen: using vcpu_info placement\n"); 1083 printk(KERN_INFO "Xen: using vcpu_info placement\n");
987 1084
@@ -991,6 +1088,7 @@ void xen_setup_vcpu_info_placement(void)
991 pv_irq_ops.irq_enable = xen_irq_enable_direct; 1088 pv_irq_ops.irq_enable = xen_irq_enable_direct;
992 pv_mmu_ops.read_cr2 = xen_read_cr2_direct; 1089 pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
993 } 1090 }
1091#endif
994} 1092}
995 1093
996static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, 1094static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
@@ -1011,10 +1109,12 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
1011 goto patch_site 1109 goto patch_site
1012 1110
1013 switch (type) { 1111 switch (type) {
1112#ifdef CONFIG_X86_32
1014 SITE(pv_irq_ops, irq_enable); 1113 SITE(pv_irq_ops, irq_enable);
1015 SITE(pv_irq_ops, irq_disable); 1114 SITE(pv_irq_ops, irq_disable);
1016 SITE(pv_irq_ops, save_fl); 1115 SITE(pv_irq_ops, save_fl);
1017 SITE(pv_irq_ops, restore_fl); 1116 SITE(pv_irq_ops, restore_fl);
1117#endif /* CONFIG_X86_32 */
1018#undef SITE 1118#undef SITE
1019 1119
1020 patch_site: 1120 patch_site:
@@ -1057,8 +1157,15 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
1057#ifdef CONFIG_X86_F00F_BUG 1157#ifdef CONFIG_X86_F00F_BUG
1058 case FIX_F00F_IDT: 1158 case FIX_F00F_IDT:
1059#endif 1159#endif
1160#ifdef CONFIG_X86_32
1060 case FIX_WP_TEST: 1161 case FIX_WP_TEST:
1061 case FIX_VDSO: 1162 case FIX_VDSO:
1163# ifdef CONFIG_HIGHMEM
1164 case FIX_KMAP_BEGIN ... FIX_KMAP_END:
1165# endif
1166#else
1167 case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
1168#endif
1062#ifdef CONFIG_X86_LOCAL_APIC 1169#ifdef CONFIG_X86_LOCAL_APIC
1063 case FIX_APIC_BASE: /* maps dummy local APIC */ 1170 case FIX_APIC_BASE: /* maps dummy local APIC */
1064#endif 1171#endif
@@ -1071,6 +1178,15 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
1071 } 1178 }
1072 1179
1073 __native_set_fixmap(idx, pte); 1180 __native_set_fixmap(idx, pte);
1181
1182#ifdef CONFIG_X86_64
1183 /* Replicate changes to map the vsyscall page into the user
1184 pagetable vsyscall mapping. */
1185 if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
1186 unsigned long vaddr = __fix_to_virt(idx);
1187 set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
1188 }
1189#endif
1074} 1190}
1075 1191
1076static const struct pv_info xen_info __initdata = { 1192static const struct pv_info xen_info __initdata = {
@@ -1116,18 +1232,25 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
1116 .wbinvd = native_wbinvd, 1232 .wbinvd = native_wbinvd,
1117 1233
1118 .read_msr = native_read_msr_safe, 1234 .read_msr = native_read_msr_safe,
1119 .write_msr = native_write_msr_safe, 1235 .write_msr = xen_write_msr_safe,
1120 .read_tsc = native_read_tsc, 1236 .read_tsc = native_read_tsc,
1121 .read_pmc = native_read_pmc, 1237 .read_pmc = native_read_pmc,
1122 1238
1123 .iret = xen_iret, 1239 .iret = xen_iret,
1124 .irq_enable_sysexit = xen_sysexit, 1240 .irq_enable_sysexit = xen_sysexit,
1241#ifdef CONFIG_X86_64
1242 .usergs_sysret32 = xen_sysret32,
1243 .usergs_sysret64 = xen_sysret64,
1244#endif
1125 1245
1126 .load_tr_desc = paravirt_nop, 1246 .load_tr_desc = paravirt_nop,
1127 .set_ldt = xen_set_ldt, 1247 .set_ldt = xen_set_ldt,
1128 .load_gdt = xen_load_gdt, 1248 .load_gdt = xen_load_gdt,
1129 .load_idt = xen_load_idt, 1249 .load_idt = xen_load_idt,
1130 .load_tls = xen_load_tls, 1250 .load_tls = xen_load_tls,
1251#ifdef CONFIG_X86_64
1252 .load_gs_index = xen_load_gs_index,
1253#endif
1131 1254
1132 .store_gdt = native_store_gdt, 1255 .store_gdt = native_store_gdt,
1133 .store_idt = native_store_idt, 1256 .store_idt = native_store_idt,
@@ -1141,14 +1264,34 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
1141 .set_iopl_mask = xen_set_iopl_mask, 1264 .set_iopl_mask = xen_set_iopl_mask,
1142 .io_delay = xen_io_delay, 1265 .io_delay = xen_io_delay,
1143 1266
1267 /* Xen takes care of %gs when switching to usermode for us */
1268 .swapgs = paravirt_nop,
1269
1144 .lazy_mode = { 1270 .lazy_mode = {
1145 .enter = paravirt_enter_lazy_cpu, 1271 .enter = paravirt_enter_lazy_cpu,
1146 .leave = xen_leave_lazy, 1272 .leave = xen_leave_lazy,
1147 }, 1273 },
1148}; 1274};
1149 1275
1276static void __init __xen_init_IRQ(void)
1277{
1278#ifdef CONFIG_X86_64
1279 int i;
1280
1281 /* Create identity vector->irq map */
1282 for(i = 0; i < NR_VECTORS; i++) {
1283 int cpu;
1284
1285 for_each_possible_cpu(cpu)
1286 per_cpu(vector_irq, cpu)[i] = i;
1287 }
1288#endif /* CONFIG_X86_64 */
1289
1290 xen_init_IRQ();
1291}
1292
1150static const struct pv_irq_ops xen_irq_ops __initdata = { 1293static const struct pv_irq_ops xen_irq_ops __initdata = {
1151 .init_IRQ = xen_init_IRQ, 1294 .init_IRQ = __xen_init_IRQ,
1152 .save_fl = xen_save_fl, 1295 .save_fl = xen_save_fl,
1153 .restore_fl = xen_restore_fl, 1296 .restore_fl = xen_restore_fl,
1154 .irq_disable = xen_irq_disable, 1297 .irq_disable = xen_irq_disable,
@@ -1156,7 +1299,7 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
1156 .safe_halt = xen_safe_halt, 1299 .safe_halt = xen_safe_halt,
1157 .halt = xen_halt, 1300 .halt = xen_halt,
1158#ifdef CONFIG_X86_64 1301#ifdef CONFIG_X86_64
1159 .adjust_exception_frame = paravirt_nop, 1302 .adjust_exception_frame = xen_adjust_exception_frame,
1160#endif 1303#endif
1161}; 1304};
1162 1305
@@ -1186,8 +1329,8 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1186 .pte_update = paravirt_nop, 1329 .pte_update = paravirt_nop,
1187 .pte_update_defer = paravirt_nop, 1330 .pte_update_defer = paravirt_nop,
1188 1331
1189 .pgd_alloc = __paravirt_pgd_alloc, 1332 .pgd_alloc = xen_pgd_alloc,
1190 .pgd_free = paravirt_nop, 1333 .pgd_free = xen_pgd_free,
1191 1334
1192 .alloc_pte = xen_alloc_pte_init, 1335 .alloc_pte = xen_alloc_pte_init,
1193 .release_pte = xen_release_pte_init, 1336 .release_pte = xen_release_pte_init,
@@ -1199,7 +1342,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1199 .kmap_atomic_pte = xen_kmap_atomic_pte, 1342 .kmap_atomic_pte = xen_kmap_atomic_pte,
1200#endif 1343#endif
1201 1344
1202 .set_pte = NULL, /* see xen_pagetable_setup_* */ 1345#ifdef CONFIG_X86_64
1346 .set_pte = xen_set_pte,
1347#else
1348 .set_pte = xen_set_pte_init,
1349#endif
1203 .set_pte_at = xen_set_pte_at, 1350 .set_pte_at = xen_set_pte_at,
1204 .set_pmd = xen_set_pmd_hyper, 1351 .set_pmd = xen_set_pmd_hyper,
1205 1352
@@ -1213,15 +1360,26 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1213 .make_pte = xen_make_pte, 1360 .make_pte = xen_make_pte,
1214 .make_pgd = xen_make_pgd, 1361 .make_pgd = xen_make_pgd,
1215 1362
1363#ifdef CONFIG_X86_PAE
1216 .set_pte_atomic = xen_set_pte_atomic, 1364 .set_pte_atomic = xen_set_pte_atomic,
1217 .set_pte_present = xen_set_pte_at, 1365 .set_pte_present = xen_set_pte_at,
1218 .set_pud = xen_set_pud_hyper,
1219 .pte_clear = xen_pte_clear, 1366 .pte_clear = xen_pte_clear,
1220 .pmd_clear = xen_pmd_clear, 1367 .pmd_clear = xen_pmd_clear,
1368#endif /* CONFIG_X86_PAE */
1369 .set_pud = xen_set_pud_hyper,
1221 1370
1222 .make_pmd = xen_make_pmd, 1371 .make_pmd = xen_make_pmd,
1223 .pmd_val = xen_pmd_val, 1372 .pmd_val = xen_pmd_val,
1224 1373
1374#if PAGETABLE_LEVELS == 4
1375 .pud_val = xen_pud_val,
1376 .make_pud = xen_make_pud,
1377 .set_pgd = xen_set_pgd_hyper,
1378
1379 .alloc_pud = xen_alloc_pte_init,
1380 .release_pud = xen_release_pte_init,
1381#endif /* PAGETABLE_LEVELS == 4 */
1382
1225 .activate_mm = xen_activate_mm, 1383 .activate_mm = xen_activate_mm,
1226 .dup_mmap = xen_dup_mmap, 1384 .dup_mmap = xen_dup_mmap,
1227 .exit_mmap = xen_exit_mmap, 1385 .exit_mmap = xen_exit_mmap,
@@ -1234,21 +1392,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1234 .set_fixmap = xen_set_fixmap, 1392 .set_fixmap = xen_set_fixmap,
1235}; 1393};
1236 1394
1237#ifdef CONFIG_SMP
1238static const struct smp_ops xen_smp_ops __initdata = {
1239 .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
1240 .smp_prepare_cpus = xen_smp_prepare_cpus,
1241 .cpu_up = xen_cpu_up,
1242 .smp_cpus_done = xen_smp_cpus_done,
1243
1244 .smp_send_stop = xen_smp_send_stop,
1245 .smp_send_reschedule = xen_smp_send_reschedule,
1246
1247 .send_call_func_ipi = xen_smp_send_call_function_ipi,
1248 .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
1249};
1250#endif /* CONFIG_SMP */
1251
1252static void xen_reboot(int reason) 1395static void xen_reboot(int reason)
1253{ 1396{
1254 struct sched_shutdown r = { .reason = reason }; 1397 struct sched_shutdown r = { .reason = reason };
@@ -1293,6 +1436,7 @@ static const struct machine_ops __initdata xen_machine_ops = {
1293 1436
1294static void __init xen_reserve_top(void) 1437static void __init xen_reserve_top(void)
1295{ 1438{
1439#ifdef CONFIG_X86_32
1296 unsigned long top = HYPERVISOR_VIRT_START; 1440 unsigned long top = HYPERVISOR_VIRT_START;
1297 struct xen_platform_parameters pp; 1441 struct xen_platform_parameters pp;
1298 1442
@@ -1300,8 +1444,248 @@ static void __init xen_reserve_top(void)
1300 top = pp.virt_start; 1444 top = pp.virt_start;
1301 1445
1302 reserve_top_address(-top + 2 * PAGE_SIZE); 1446 reserve_top_address(-top + 2 * PAGE_SIZE);
1447#endif /* CONFIG_X86_32 */
1448}
1449
1450/*
1451 * Like __va(), but returns address in the kernel mapping (which is
1452 * all we have until the physical memory mapping has been set up.
1453 */
1454static void *__ka(phys_addr_t paddr)
1455{
1456#ifdef CONFIG_X86_64
1457 return (void *)(paddr + __START_KERNEL_map);
1458#else
1459 return __va(paddr);
1460#endif
1303} 1461}
1304 1462
1463/* Convert a machine address to physical address */
1464static unsigned long m2p(phys_addr_t maddr)
1465{
1466 phys_addr_t paddr;
1467
1468 maddr &= PTE_MASK;
1469 paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
1470
1471 return paddr;
1472}
1473
1474/* Convert a machine address to kernel virtual */
1475static void *m2v(phys_addr_t maddr)
1476{
1477 return __ka(m2p(maddr));
1478}
1479
1480#ifdef CONFIG_X86_64
1481static void walk(pgd_t *pgd, unsigned long addr)
1482{
1483 unsigned l4idx = pgd_index(addr);
1484 unsigned l3idx = pud_index(addr);
1485 unsigned l2idx = pmd_index(addr);
1486 unsigned l1idx = pte_index(addr);
1487 pgd_t l4;
1488 pud_t l3;
1489 pmd_t l2;
1490 pte_t l1;
1491
1492 xen_raw_printk("walk %p, %lx -> %d %d %d %d\n",
1493 pgd, addr, l4idx, l3idx, l2idx, l1idx);
1494
1495 l4 = pgd[l4idx];
1496 xen_raw_printk(" l4: %016lx\n", l4.pgd);
1497 xen_raw_printk(" %016lx\n", pgd_val(l4));
1498
1499 l3 = ((pud_t *)(m2v(l4.pgd)))[l3idx];
1500 xen_raw_printk(" l3: %016lx\n", l3.pud);
1501 xen_raw_printk(" %016lx\n", pud_val(l3));
1502
1503 l2 = ((pmd_t *)(m2v(l3.pud)))[l2idx];
1504 xen_raw_printk(" l2: %016lx\n", l2.pmd);
1505 xen_raw_printk(" %016lx\n", pmd_val(l2));
1506
1507 l1 = ((pte_t *)(m2v(l2.pmd)))[l1idx];
1508 xen_raw_printk(" l1: %016lx\n", l1.pte);
1509 xen_raw_printk(" %016lx\n", pte_val(l1));
1510}
1511#endif
1512
1513static void set_page_prot(void *addr, pgprot_t prot)
1514{
1515 unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
1516 pte_t pte = pfn_pte(pfn, prot);
1517
1518 xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016llx pte=%016llx\n",
1519 addr, pfn, get_phys_to_machine(pfn),
1520 pgprot_val(prot), pte.pte);
1521
1522 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
1523 BUG();
1524}
1525
1526static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1527{
1528 unsigned pmdidx, pteidx;
1529 unsigned ident_pte;
1530 unsigned long pfn;
1531
1532 ident_pte = 0;
1533 pfn = 0;
1534 for(pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
1535 pte_t *pte_page;
1536
1537 /* Reuse or allocate a page of ptes */
1538 if (pmd_present(pmd[pmdidx]))
1539 pte_page = m2v(pmd[pmdidx].pmd);
1540 else {
1541 /* Check for free pte pages */
1542 if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
1543 break;
1544
1545 pte_page = &level1_ident_pgt[ident_pte];
1546 ident_pte += PTRS_PER_PTE;
1547
1548 pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
1549 }
1550
1551 /* Install mappings */
1552 for(pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
1553 pte_t pte;
1554
1555 if (pfn > max_pfn_mapped)
1556 max_pfn_mapped = pfn;
1557
1558 if (!pte_none(pte_page[pteidx]))
1559 continue;
1560
1561 pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
1562 pte_page[pteidx] = pte;
1563 }
1564 }
1565
1566 for(pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
1567 set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
1568
1569 set_page_prot(pmd, PAGE_KERNEL_RO);
1570}
1571
1572#ifdef CONFIG_X86_64
1573static void convert_pfn_mfn(void *v)
1574{
1575 pte_t *pte = v;
1576 int i;
1577
1578 /* All levels are converted the same way, so just treat them
1579 as ptes. */
1580 for(i = 0; i < PTRS_PER_PTE; i++)
1581 pte[i] = xen_make_pte(pte[i].pte);
1582}
1583
1584/*
1585 * Set up the inital kernel pagetable.
1586 *
1587 * We can construct this by grafting the Xen provided pagetable into
1588 * head_64.S's preconstructed pagetables. We copy the Xen L2's into
1589 * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This
1590 * means that only the kernel has a physical mapping to start with -
1591 * but that's enough to get __va working. We need to fill in the rest
1592 * of the physical mapping once some sort of allocator has been set
1593 * up.
1594 */
1595static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1596{
1597 pud_t *l3;
1598 pmd_t *l2;
1599
1600 /* Zap identity mapping */
1601 init_level4_pgt[0] = __pgd(0);
1602
1603 /* Pre-constructed entries are in pfn, so convert to mfn */
1604 convert_pfn_mfn(init_level4_pgt);
1605 convert_pfn_mfn(level3_ident_pgt);
1606 convert_pfn_mfn(level3_kernel_pgt);
1607
1608 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
1609 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
1610
1611 memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
1612 memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
1613
1614 l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
1615 l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
1616 memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
1617
1618 /* Set up identity map */
1619 xen_map_identity_early(level2_ident_pgt, max_pfn);
1620
1621 /* Make pagetable pieces RO */
1622 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
1623 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
1624 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
1625 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
1626 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1627 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
1628
1629 /* Pin down new L4 */
1630 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
1631 PFN_DOWN(__pa_symbol(init_level4_pgt)));
1632
1633 /* Unpin Xen-provided one */
1634 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1635
1636 /* Switch over */
1637 pgd = init_level4_pgt;
1638
1639 /*
1640 * At this stage there can be no user pgd, and no page
1641 * structure to attach it to, so make sure we just set kernel
1642 * pgd.
1643 */
1644 xen_mc_batch();
1645 __xen_write_cr3(true, __pa(pgd));
1646 xen_mc_issue(PARAVIRT_LAZY_CPU);
1647
1648 reserve_early(__pa(xen_start_info->pt_base),
1649 __pa(xen_start_info->pt_base +
1650 xen_start_info->nr_pt_frames * PAGE_SIZE),
1651 "XEN PAGETABLES");
1652
1653 return pgd;
1654}
1655#else /* !CONFIG_X86_64 */
1656static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
1657
1658static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1659{
1660 pmd_t *kernel_pmd;
1661
1662 init_pg_tables_start = __pa(pgd);
1663 init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
1664 max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
1665
1666 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
1667 memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
1668
1669 xen_map_identity_early(level2_kernel_pgt, max_pfn);
1670
1671 memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
1672 set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
1673 __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
1674
1675 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1676 set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
1677 set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
1678
1679 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1680
1681 xen_write_cr3(__pa(swapper_pg_dir));
1682
1683 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
1684
1685 return swapper_pg_dir;
1686}
1687#endif /* CONFIG_X86_64 */
1688
1305/* First C function to be called on Xen boot */ 1689/* First C function to be called on Xen boot */
1306asmlinkage void __init xen_start_kernel(void) 1690asmlinkage void __init xen_start_kernel(void)
1307{ 1691{
@@ -1337,53 +1721,56 @@ asmlinkage void __init xen_start_kernel(void)
1337 1721
1338 machine_ops = xen_machine_ops; 1722 machine_ops = xen_machine_ops;
1339 1723
1340#ifdef CONFIG_SMP 1724#ifdef CONFIG_X86_64
1341 smp_ops = xen_smp_ops; 1725 /* Disable until direct per-cpu data access. */
1726 have_vcpu_info_placement = 0;
1727 x86_64_init_pda();
1342#endif 1728#endif
1343 1729
1730 xen_smp_init();
1731
1344 /* Get mfn list */ 1732 /* Get mfn list */
1345 if (!xen_feature(XENFEAT_auto_translated_physmap)) 1733 if (!xen_feature(XENFEAT_auto_translated_physmap))
1346 xen_build_dynamic_phys_to_machine(); 1734 xen_build_dynamic_phys_to_machine();
1347 1735
1348 pgd = (pgd_t *)xen_start_info->pt_base; 1736 pgd = (pgd_t *)xen_start_info->pt_base;
1349 1737
1350 init_pg_tables_start = __pa(pgd); 1738 /* Prevent unwanted bits from being set in PTEs. */
1351 init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; 1739 __supported_pte_mask &= ~_PAGE_GLOBAL;
1352 max_pfn_mapped = (init_pg_tables_end + 512*1024) >> PAGE_SHIFT; 1740 if (!is_initial_xendomain())
1353 1741 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1354 init_mm.pgd = pgd; /* use the Xen pagetables to start */
1355
1356 /* keep using Xen gdt for now; no urgent need to change it */
1357
1358 x86_write_percpu(xen_cr3, __pa(pgd));
1359 x86_write_percpu(xen_current_cr3, __pa(pgd));
1360 1742
1361 /* Don't do the full vcpu_info placement stuff until we have a 1743 /* Don't do the full vcpu_info placement stuff until we have a
1362 possible map and a non-dummy shared_info. */ 1744 possible map and a non-dummy shared_info. */
1363 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; 1745 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
1364 1746
1747 xen_raw_console_write("mapping kernel into physical memory\n");
1748 pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
1749
1750 init_mm.pgd = pgd;
1751
1752 /* keep using Xen gdt for now; no urgent need to change it */
1753
1365 pv_info.kernel_rpl = 1; 1754 pv_info.kernel_rpl = 1;
1366 if (xen_feature(XENFEAT_supervisor_mode_kernel)) 1755 if (xen_feature(XENFEAT_supervisor_mode_kernel))
1367 pv_info.kernel_rpl = 0; 1756 pv_info.kernel_rpl = 0;
1368 1757
1369 /* Prevent unwanted bits from being set in PTEs. */
1370 __supported_pte_mask &= ~_PAGE_GLOBAL;
1371 if (!is_initial_xendomain())
1372 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1373
1374 /* set the limit of our address space */ 1758 /* set the limit of our address space */
1375 xen_reserve_top(); 1759 xen_reserve_top();
1376 1760
1761#ifdef CONFIG_X86_32
1377 /* set up basic CPUID stuff */ 1762 /* set up basic CPUID stuff */
1378 cpu_detect(&new_cpu_data); 1763 cpu_detect(&new_cpu_data);
1379 new_cpu_data.hard_math = 1; 1764 new_cpu_data.hard_math = 1;
1380 new_cpu_data.x86_capability[0] = cpuid_edx(1); 1765 new_cpu_data.x86_capability[0] = cpuid_edx(1);
1766#endif
1381 1767
1382 /* Poke various useful things into boot_params */ 1768 /* Poke various useful things into boot_params */
1383 boot_params.hdr.type_of_loader = (9 << 4) | 0; 1769 boot_params.hdr.type_of_loader = (9 << 4) | 0;
1384 boot_params.hdr.ramdisk_image = xen_start_info->mod_start 1770 boot_params.hdr.ramdisk_image = xen_start_info->mod_start
1385 ? __pa(xen_start_info->mod_start) : 0; 1771 ? __pa(xen_start_info->mod_start) : 0;
1386 boot_params.hdr.ramdisk_size = xen_start_info->mod_len; 1772 boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
1773 boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
1387 1774
1388 if (!is_initial_xendomain()) { 1775 if (!is_initial_xendomain()) {
1389 add_preferred_console("xenboot", 0, NULL); 1776 add_preferred_console("xenboot", 0, NULL);
@@ -1391,6 +1778,21 @@ asmlinkage void __init xen_start_kernel(void)
1391 add_preferred_console("hvc", 0, NULL); 1778 add_preferred_console("hvc", 0, NULL);
1392 } 1779 }
1393 1780
1781 xen_raw_console_write("about to get started...\n");
1782
1783#if 0
1784 xen_raw_printk("&boot_params=%p __pa(&boot_params)=%lx __va(__pa(&boot_params))=%lx\n",
1785 &boot_params, __pa_symbol(&boot_params),
1786 __va(__pa_symbol(&boot_params)));
1787
1788 walk(pgd, &boot_params);
1789 walk(pgd, __va(__pa(&boot_params)));
1790#endif
1791
1394 /* Start the world */ 1792 /* Start the world */
1793#ifdef CONFIG_X86_32
1395 i386_start_kernel(); 1794 i386_start_kernel();
1795#else
1796 x86_64_start_reservations((char *)__pa_symbol(&boot_params));
1797#endif
1396} 1798}