diff options
Diffstat (limited to 'arch/x86/xen/enlighten.c')
-rw-r--r-- | arch/x86/xen/enlighten.c | 816 |
1 files changed, 43 insertions, 773 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b58e96338149..82cd39a6cbd3 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -61,40 +61,13 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); | |||
61 | enum xen_domain_type xen_domain_type = XEN_NATIVE; | 61 | enum xen_domain_type xen_domain_type = XEN_NATIVE; |
62 | EXPORT_SYMBOL_GPL(xen_domain_type); | 62 | EXPORT_SYMBOL_GPL(xen_domain_type); |
63 | 63 | ||
64 | /* | ||
65 | * Identity map, in addition to plain kernel map. This needs to be | ||
66 | * large enough to allocate page table pages to allocate the rest. | ||
67 | * Each page can map 2MB. | ||
68 | */ | ||
69 | static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; | ||
70 | |||
71 | #ifdef CONFIG_X86_64 | ||
72 | /* l3 pud for userspace vsyscall mapping */ | ||
73 | static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; | ||
74 | #endif /* CONFIG_X86_64 */ | ||
75 | |||
76 | /* | ||
77 | * Note about cr3 (pagetable base) values: | ||
78 | * | ||
79 | * xen_cr3 contains the current logical cr3 value; it contains the | ||
80 | * last set cr3. This may not be the current effective cr3, because | ||
81 | * its update may be being lazily deferred. However, a vcpu looking | ||
82 | * at its own cr3 can use this value knowing that it everything will | ||
83 | * be self-consistent. | ||
84 | * | ||
85 | * xen_current_cr3 contains the actual vcpu cr3; it is set once the | ||
86 | * hypercall to set the vcpu cr3 is complete (so it may be a little | ||
87 | * out of date, but it will never be set early). If one vcpu is | ||
88 | * looking at another vcpu's cr3 value, it should use this variable. | ||
89 | */ | ||
90 | DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */ | ||
91 | DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ | ||
92 | |||
93 | struct start_info *xen_start_info; | 64 | struct start_info *xen_start_info; |
94 | EXPORT_SYMBOL_GPL(xen_start_info); | 65 | EXPORT_SYMBOL_GPL(xen_start_info); |
95 | 66 | ||
96 | struct shared_info xen_dummy_shared_info; | 67 | struct shared_info xen_dummy_shared_info; |
97 | 68 | ||
69 | void *xen_initial_gdt; | ||
70 | |||
98 | /* | 71 | /* |
99 | * Point at some empty memory to start with. We map the real shared_info | 72 | * Point at some empty memory to start with. We map the real shared_info |
100 | * page as soon as fixmap is up and running. | 73 | * page as soon as fixmap is up and running. |
@@ -114,14 +87,7 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; | |||
114 | * | 87 | * |
115 | * 0: not available, 1: available | 88 | * 0: not available, 1: available |
116 | */ | 89 | */ |
117 | static int have_vcpu_info_placement = | 90 | static int have_vcpu_info_placement = 1; |
118 | #ifdef CONFIG_X86_32 | ||
119 | 1 | ||
120 | #else | ||
121 | 0 | ||
122 | #endif | ||
123 | ; | ||
124 | |||
125 | 91 | ||
126 | static void xen_vcpu_setup(int cpu) | 92 | static void xen_vcpu_setup(int cpu) |
127 | { | 93 | { |
@@ -137,7 +103,7 @@ static void xen_vcpu_setup(int cpu) | |||
137 | 103 | ||
138 | vcpup = &per_cpu(xen_vcpu_info, cpu); | 104 | vcpup = &per_cpu(xen_vcpu_info, cpu); |
139 | 105 | ||
140 | info.mfn = virt_to_mfn(vcpup); | 106 | info.mfn = arbitrary_virt_to_mfn(vcpup); |
141 | info.offset = offset_in_page(vcpup); | 107 | info.offset = offset_in_page(vcpup); |
142 | 108 | ||
143 | printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n", | 109 | printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n", |
@@ -237,7 +203,7 @@ static unsigned long xen_get_debugreg(int reg) | |||
237 | return HYPERVISOR_get_debugreg(reg); | 203 | return HYPERVISOR_get_debugreg(reg); |
238 | } | 204 | } |
239 | 205 | ||
240 | static void xen_leave_lazy(void) | 206 | void xen_leave_lazy(void) |
241 | { | 207 | { |
242 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | 208 | paravirt_leave_lazy(paravirt_get_lazy_mode()); |
243 | xen_mc_flush(); | 209 | xen_mc_flush(); |
@@ -335,8 +301,10 @@ static void xen_load_gdt(const struct desc_ptr *dtr) | |||
335 | frames = mcs.args; | 301 | frames = mcs.args; |
336 | 302 | ||
337 | for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { | 303 | for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { |
338 | frames[f] = virt_to_mfn(va); | 304 | frames[f] = arbitrary_virt_to_mfn((void *)va); |
305 | |||
339 | make_lowmem_page_readonly((void *)va); | 306 | make_lowmem_page_readonly((void *)va); |
307 | make_lowmem_page_readonly(mfn_to_virt(frames[f])); | ||
340 | } | 308 | } |
341 | 309 | ||
342 | MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); | 310 | MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); |
@@ -348,7 +316,7 @@ static void load_TLS_descriptor(struct thread_struct *t, | |||
348 | unsigned int cpu, unsigned int i) | 316 | unsigned int cpu, unsigned int i) |
349 | { | 317 | { |
350 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | 318 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); |
351 | xmaddr_t maddr = virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); | 319 | xmaddr_t maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); |
352 | struct multicall_space mc = __xen_mc_entry(0); | 320 | struct multicall_space mc = __xen_mc_entry(0); |
353 | 321 | ||
354 | MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); | 322 | MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); |
@@ -357,13 +325,14 @@ static void load_TLS_descriptor(struct thread_struct *t, | |||
357 | static void xen_load_tls(struct thread_struct *t, unsigned int cpu) | 325 | static void xen_load_tls(struct thread_struct *t, unsigned int cpu) |
358 | { | 326 | { |
359 | /* | 327 | /* |
360 | * XXX sleazy hack: If we're being called in a lazy-cpu zone, | 328 | * XXX sleazy hack: If we're being called in a lazy-cpu zone |
361 | * it means we're in a context switch, and %gs has just been | 329 | * and lazy gs handling is enabled, it means we're in a |
362 | * saved. This means we can zero it out to prevent faults on | 330 | * context switch, and %gs has just been saved. This means we |
363 | * exit from the hypervisor if the next process has no %gs. | 331 | * can zero it out to prevent faults on exit from the |
364 | * Either way, it has been saved, and the new value will get | 332 | * hypervisor if the next process has no %gs. Either way, it |
365 | * loaded properly. This will go away as soon as Xen has been | 333 | * has been saved, and the new value will get loaded properly. |
366 | * modified to not save/restore %gs for normal hypercalls. | 334 | * This will go away as soon as Xen has been modified to not |
335 | * save/restore %gs for normal hypercalls. | ||
367 | * | 336 | * |
368 | * On x86_64, this hack is not used for %gs, because gs points | 337 | * On x86_64, this hack is not used for %gs, because gs points |
369 | * to KERNEL_GS_BASE (and uses it for PDA references), so we | 338 | * to KERNEL_GS_BASE (and uses it for PDA references), so we |
@@ -375,7 +344,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) | |||
375 | */ | 344 | */ |
376 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { | 345 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { |
377 | #ifdef CONFIG_X86_32 | 346 | #ifdef CONFIG_X86_32 |
378 | loadsegment(gs, 0); | 347 | lazy_load_gs(0); |
379 | #else | 348 | #else |
380 | loadsegment(fs, 0); | 349 | loadsegment(fs, 0); |
381 | #endif | 350 | #endif |
@@ -521,7 +490,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry, | |||
521 | break; | 490 | break; |
522 | 491 | ||
523 | default: { | 492 | default: { |
524 | xmaddr_t maddr = virt_to_machine(&dt[entry]); | 493 | xmaddr_t maddr = arbitrary_virt_to_machine(&dt[entry]); |
525 | 494 | ||
526 | xen_mc_flush(); | 495 | xen_mc_flush(); |
527 | if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc)) | 496 | if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc)) |
@@ -587,94 +556,18 @@ static u32 xen_safe_apic_wait_icr_idle(void) | |||
587 | return 0; | 556 | return 0; |
588 | } | 557 | } |
589 | 558 | ||
590 | static struct apic_ops xen_basic_apic_ops = { | 559 | static void set_xen_basic_apic_ops(void) |
591 | .read = xen_apic_read, | ||
592 | .write = xen_apic_write, | ||
593 | .icr_read = xen_apic_icr_read, | ||
594 | .icr_write = xen_apic_icr_write, | ||
595 | .wait_icr_idle = xen_apic_wait_icr_idle, | ||
596 | .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle, | ||
597 | }; | ||
598 | |||
599 | #endif | ||
600 | |||
601 | static void xen_flush_tlb(void) | ||
602 | { | ||
603 | struct mmuext_op *op; | ||
604 | struct multicall_space mcs; | ||
605 | |||
606 | preempt_disable(); | ||
607 | |||
608 | mcs = xen_mc_entry(sizeof(*op)); | ||
609 | |||
610 | op = mcs.args; | ||
611 | op->cmd = MMUEXT_TLB_FLUSH_LOCAL; | ||
612 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | ||
613 | |||
614 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
615 | |||
616 | preempt_enable(); | ||
617 | } | ||
618 | |||
619 | static void xen_flush_tlb_single(unsigned long addr) | ||
620 | { | 560 | { |
621 | struct mmuext_op *op; | 561 | apic->read = xen_apic_read; |
622 | struct multicall_space mcs; | 562 | apic->write = xen_apic_write; |
623 | 563 | apic->icr_read = xen_apic_icr_read; | |
624 | preempt_disable(); | 564 | apic->icr_write = xen_apic_icr_write; |
625 | 565 | apic->wait_icr_idle = xen_apic_wait_icr_idle; | |
626 | mcs = xen_mc_entry(sizeof(*op)); | 566 | apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle; |
627 | op = mcs.args; | ||
628 | op->cmd = MMUEXT_INVLPG_LOCAL; | ||
629 | op->arg1.linear_addr = addr & PAGE_MASK; | ||
630 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | ||
631 | |||
632 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
633 | |||
634 | preempt_enable(); | ||
635 | } | 567 | } |
636 | 568 | ||
637 | static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, | 569 | #endif |
638 | unsigned long va) | ||
639 | { | ||
640 | struct { | ||
641 | struct mmuext_op op; | ||
642 | cpumask_t mask; | ||
643 | } *args; | ||
644 | cpumask_t cpumask = *cpus; | ||
645 | struct multicall_space mcs; | ||
646 | |||
647 | /* | ||
648 | * A couple of (to be removed) sanity checks: | ||
649 | * | ||
650 | * - current CPU must not be in mask | ||
651 | * - mask must exist :) | ||
652 | */ | ||
653 | BUG_ON(cpus_empty(cpumask)); | ||
654 | BUG_ON(cpu_isset(smp_processor_id(), cpumask)); | ||
655 | BUG_ON(!mm); | ||
656 | |||
657 | /* If a CPU which we ran on has gone down, OK. */ | ||
658 | cpus_and(cpumask, cpumask, cpu_online_map); | ||
659 | if (cpus_empty(cpumask)) | ||
660 | return; | ||
661 | |||
662 | mcs = xen_mc_entry(sizeof(*args)); | ||
663 | args = mcs.args; | ||
664 | args->mask = cpumask; | ||
665 | args->op.arg2.vcpumask = &args->mask; | ||
666 | |||
667 | if (va == TLB_FLUSH_ALL) { | ||
668 | args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; | ||
669 | } else { | ||
670 | args->op.cmd = MMUEXT_INVLPG_MULTI; | ||
671 | args->op.arg1.linear_addr = va; | ||
672 | } | ||
673 | |||
674 | MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); | ||
675 | 570 | ||
676 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
677 | } | ||
678 | 571 | ||
679 | static void xen_clts(void) | 572 | static void xen_clts(void) |
680 | { | 573 | { |
@@ -700,21 +593,6 @@ static void xen_write_cr0(unsigned long cr0) | |||
700 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 593 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
701 | } | 594 | } |
702 | 595 | ||
703 | static void xen_write_cr2(unsigned long cr2) | ||
704 | { | ||
705 | x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; | ||
706 | } | ||
707 | |||
708 | static unsigned long xen_read_cr2(void) | ||
709 | { | ||
710 | return x86_read_percpu(xen_vcpu)->arch.cr2; | ||
711 | } | ||
712 | |||
713 | static unsigned long xen_read_cr2_direct(void) | ||
714 | { | ||
715 | return x86_read_percpu(xen_vcpu_info.arch.cr2); | ||
716 | } | ||
717 | |||
718 | static void xen_write_cr4(unsigned long cr4) | 596 | static void xen_write_cr4(unsigned long cr4) |
719 | { | 597 | { |
720 | cr4 &= ~X86_CR4_PGE; | 598 | cr4 &= ~X86_CR4_PGE; |
@@ -723,71 +601,6 @@ static void xen_write_cr4(unsigned long cr4) | |||
723 | native_write_cr4(cr4); | 601 | native_write_cr4(cr4); |
724 | } | 602 | } |
725 | 603 | ||
726 | static unsigned long xen_read_cr3(void) | ||
727 | { | ||
728 | return x86_read_percpu(xen_cr3); | ||
729 | } | ||
730 | |||
731 | static void set_current_cr3(void *v) | ||
732 | { | ||
733 | x86_write_percpu(xen_current_cr3, (unsigned long)v); | ||
734 | } | ||
735 | |||
736 | static void __xen_write_cr3(bool kernel, unsigned long cr3) | ||
737 | { | ||
738 | struct mmuext_op *op; | ||
739 | struct multicall_space mcs; | ||
740 | unsigned long mfn; | ||
741 | |||
742 | if (cr3) | ||
743 | mfn = pfn_to_mfn(PFN_DOWN(cr3)); | ||
744 | else | ||
745 | mfn = 0; | ||
746 | |||
747 | WARN_ON(mfn == 0 && kernel); | ||
748 | |||
749 | mcs = __xen_mc_entry(sizeof(*op)); | ||
750 | |||
751 | op = mcs.args; | ||
752 | op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; | ||
753 | op->arg1.mfn = mfn; | ||
754 | |||
755 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | ||
756 | |||
757 | if (kernel) { | ||
758 | x86_write_percpu(xen_cr3, cr3); | ||
759 | |||
760 | /* Update xen_current_cr3 once the batch has actually | ||
761 | been submitted. */ | ||
762 | xen_mc_callback(set_current_cr3, (void *)cr3); | ||
763 | } | ||
764 | } | ||
765 | |||
766 | static void xen_write_cr3(unsigned long cr3) | ||
767 | { | ||
768 | BUG_ON(preemptible()); | ||
769 | |||
770 | xen_mc_batch(); /* disables interrupts */ | ||
771 | |||
772 | /* Update while interrupts are disabled, so its atomic with | ||
773 | respect to ipis */ | ||
774 | x86_write_percpu(xen_cr3, cr3); | ||
775 | |||
776 | __xen_write_cr3(true, cr3); | ||
777 | |||
778 | #ifdef CONFIG_X86_64 | ||
779 | { | ||
780 | pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); | ||
781 | if (user_pgd) | ||
782 | __xen_write_cr3(false, __pa(user_pgd)); | ||
783 | else | ||
784 | __xen_write_cr3(false, 0); | ||
785 | } | ||
786 | #endif | ||
787 | |||
788 | xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ | ||
789 | } | ||
790 | |||
791 | static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) | 604 | static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) |
792 | { | 605 | { |
793 | int ret; | 606 | int ret; |
@@ -829,185 +642,6 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) | |||
829 | return ret; | 642 | return ret; |
830 | } | 643 | } |
831 | 644 | ||
832 | /* Early in boot, while setting up the initial pagetable, assume | ||
833 | everything is pinned. */ | ||
834 | static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) | ||
835 | { | ||
836 | #ifdef CONFIG_FLATMEM | ||
837 | BUG_ON(mem_map); /* should only be used early */ | ||
838 | #endif | ||
839 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); | ||
840 | } | ||
841 | |||
842 | /* Early release_pte assumes that all pts are pinned, since there's | ||
843 | only init_mm and anything attached to that is pinned. */ | ||
844 | static void xen_release_pte_init(unsigned long pfn) | ||
845 | { | ||
846 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | ||
847 | } | ||
848 | |||
849 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | ||
850 | { | ||
851 | struct mmuext_op op; | ||
852 | op.cmd = cmd; | ||
853 | op.arg1.mfn = pfn_to_mfn(pfn); | ||
854 | if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) | ||
855 | BUG(); | ||
856 | } | ||
857 | |||
858 | /* This needs to make sure the new pte page is pinned iff its being | ||
859 | attached to a pinned pagetable. */ | ||
860 | static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) | ||
861 | { | ||
862 | struct page *page = pfn_to_page(pfn); | ||
863 | |||
864 | if (PagePinned(virt_to_page(mm->pgd))) { | ||
865 | SetPagePinned(page); | ||
866 | |||
867 | vm_unmap_aliases(); | ||
868 | if (!PageHighMem(page)) { | ||
869 | make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); | ||
870 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) | ||
871 | pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); | ||
872 | } else { | ||
873 | /* make sure there are no stray mappings of | ||
874 | this page */ | ||
875 | kmap_flush_unused(); | ||
876 | } | ||
877 | } | ||
878 | } | ||
879 | |||
880 | static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn) | ||
881 | { | ||
882 | xen_alloc_ptpage(mm, pfn, PT_PTE); | ||
883 | } | ||
884 | |||
885 | static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn) | ||
886 | { | ||
887 | xen_alloc_ptpage(mm, pfn, PT_PMD); | ||
888 | } | ||
889 | |||
890 | static int xen_pgd_alloc(struct mm_struct *mm) | ||
891 | { | ||
892 | pgd_t *pgd = mm->pgd; | ||
893 | int ret = 0; | ||
894 | |||
895 | BUG_ON(PagePinned(virt_to_page(pgd))); | ||
896 | |||
897 | #ifdef CONFIG_X86_64 | ||
898 | { | ||
899 | struct page *page = virt_to_page(pgd); | ||
900 | pgd_t *user_pgd; | ||
901 | |||
902 | BUG_ON(page->private != 0); | ||
903 | |||
904 | ret = -ENOMEM; | ||
905 | |||
906 | user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); | ||
907 | page->private = (unsigned long)user_pgd; | ||
908 | |||
909 | if (user_pgd != NULL) { | ||
910 | user_pgd[pgd_index(VSYSCALL_START)] = | ||
911 | __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); | ||
912 | ret = 0; | ||
913 | } | ||
914 | |||
915 | BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); | ||
916 | } | ||
917 | #endif | ||
918 | |||
919 | return ret; | ||
920 | } | ||
921 | |||
922 | static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) | ||
923 | { | ||
924 | #ifdef CONFIG_X86_64 | ||
925 | pgd_t *user_pgd = xen_get_user_pgd(pgd); | ||
926 | |||
927 | if (user_pgd) | ||
928 | free_page((unsigned long)user_pgd); | ||
929 | #endif | ||
930 | } | ||
931 | |||
932 | /* This should never happen until we're OK to use struct page */ | ||
933 | static void xen_release_ptpage(unsigned long pfn, unsigned level) | ||
934 | { | ||
935 | struct page *page = pfn_to_page(pfn); | ||
936 | |||
937 | if (PagePinned(page)) { | ||
938 | if (!PageHighMem(page)) { | ||
939 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) | ||
940 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); | ||
941 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | ||
942 | } | ||
943 | ClearPagePinned(page); | ||
944 | } | ||
945 | } | ||
946 | |||
947 | static void xen_release_pte(unsigned long pfn) | ||
948 | { | ||
949 | xen_release_ptpage(pfn, PT_PTE); | ||
950 | } | ||
951 | |||
952 | static void xen_release_pmd(unsigned long pfn) | ||
953 | { | ||
954 | xen_release_ptpage(pfn, PT_PMD); | ||
955 | } | ||
956 | |||
957 | #if PAGETABLE_LEVELS == 4 | ||
958 | static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) | ||
959 | { | ||
960 | xen_alloc_ptpage(mm, pfn, PT_PUD); | ||
961 | } | ||
962 | |||
963 | static void xen_release_pud(unsigned long pfn) | ||
964 | { | ||
965 | xen_release_ptpage(pfn, PT_PUD); | ||
966 | } | ||
967 | #endif | ||
968 | |||
969 | #ifdef CONFIG_HIGHPTE | ||
970 | static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) | ||
971 | { | ||
972 | pgprot_t prot = PAGE_KERNEL; | ||
973 | |||
974 | if (PagePinned(page)) | ||
975 | prot = PAGE_KERNEL_RO; | ||
976 | |||
977 | if (0 && PageHighMem(page)) | ||
978 | printk("mapping highpte %lx type %d prot %s\n", | ||
979 | page_to_pfn(page), type, | ||
980 | (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ"); | ||
981 | |||
982 | return kmap_atomic_prot(page, type, prot); | ||
983 | } | ||
984 | #endif | ||
985 | |||
986 | #ifdef CONFIG_X86_32 | ||
987 | static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) | ||
988 | { | ||
989 | /* If there's an existing pte, then don't allow _PAGE_RW to be set */ | ||
990 | if (pte_val_ma(*ptep) & _PAGE_PRESENT) | ||
991 | pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & | ||
992 | pte_val_ma(pte)); | ||
993 | |||
994 | return pte; | ||
995 | } | ||
996 | |||
997 | /* Init-time set_pte while constructing initial pagetables, which | ||
998 | doesn't allow RO pagetable pages to be remapped RW */ | ||
999 | static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) | ||
1000 | { | ||
1001 | pte = mask_rw_pte(ptep, pte); | ||
1002 | |||
1003 | xen_set_pte(ptep, pte); | ||
1004 | } | ||
1005 | #endif | ||
1006 | |||
1007 | static __init void xen_pagetable_setup_start(pgd_t *base) | ||
1008 | { | ||
1009 | } | ||
1010 | |||
1011 | void xen_setup_shared_info(void) | 645 | void xen_setup_shared_info(void) |
1012 | { | 646 | { |
1013 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | 647 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
@@ -1028,37 +662,6 @@ void xen_setup_shared_info(void) | |||
1028 | xen_setup_mfn_list_list(); | 662 | xen_setup_mfn_list_list(); |
1029 | } | 663 | } |
1030 | 664 | ||
1031 | static __init void xen_pagetable_setup_done(pgd_t *base) | ||
1032 | { | ||
1033 | xen_setup_shared_info(); | ||
1034 | } | ||
1035 | |||
1036 | static __init void xen_post_allocator_init(void) | ||
1037 | { | ||
1038 | pv_mmu_ops.set_pte = xen_set_pte; | ||
1039 | pv_mmu_ops.set_pmd = xen_set_pmd; | ||
1040 | pv_mmu_ops.set_pud = xen_set_pud; | ||
1041 | #if PAGETABLE_LEVELS == 4 | ||
1042 | pv_mmu_ops.set_pgd = xen_set_pgd; | ||
1043 | #endif | ||
1044 | |||
1045 | /* This will work as long as patching hasn't happened yet | ||
1046 | (which it hasn't) */ | ||
1047 | pv_mmu_ops.alloc_pte = xen_alloc_pte; | ||
1048 | pv_mmu_ops.alloc_pmd = xen_alloc_pmd; | ||
1049 | pv_mmu_ops.release_pte = xen_release_pte; | ||
1050 | pv_mmu_ops.release_pmd = xen_release_pmd; | ||
1051 | #if PAGETABLE_LEVELS == 4 | ||
1052 | pv_mmu_ops.alloc_pud = xen_alloc_pud; | ||
1053 | pv_mmu_ops.release_pud = xen_release_pud; | ||
1054 | #endif | ||
1055 | |||
1056 | #ifdef CONFIG_X86_64 | ||
1057 | SetPagePinned(virt_to_page(level3_user_vsyscall)); | ||
1058 | #endif | ||
1059 | xen_mark_init_mm_pinned(); | ||
1060 | } | ||
1061 | |||
1062 | /* This is called once we have the cpu_possible_map */ | 665 | /* This is called once we have the cpu_possible_map */ |
1063 | void xen_setup_vcpu_info_placement(void) | 666 | void xen_setup_vcpu_info_placement(void) |
1064 | { | 667 | { |
@@ -1072,10 +675,10 @@ void xen_setup_vcpu_info_placement(void) | |||
1072 | if (have_vcpu_info_placement) { | 675 | if (have_vcpu_info_placement) { |
1073 | printk(KERN_INFO "Xen: using vcpu_info placement\n"); | 676 | printk(KERN_INFO "Xen: using vcpu_info placement\n"); |
1074 | 677 | ||
1075 | pv_irq_ops.save_fl = xen_save_fl_direct; | 678 | pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); |
1076 | pv_irq_ops.restore_fl = xen_restore_fl_direct; | 679 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); |
1077 | pv_irq_ops.irq_disable = xen_irq_disable_direct; | 680 | pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); |
1078 | pv_irq_ops.irq_enable = xen_irq_enable_direct; | 681 | pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct); |
1079 | pv_mmu_ops.read_cr2 = xen_read_cr2_direct; | 682 | pv_mmu_ops.read_cr2 = xen_read_cr2_direct; |
1080 | } | 683 | } |
1081 | } | 684 | } |
@@ -1133,49 +736,6 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, | |||
1133 | return ret; | 736 | return ret; |
1134 | } | 737 | } |
1135 | 738 | ||
1136 | static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) | ||
1137 | { | ||
1138 | pte_t pte; | ||
1139 | |||
1140 | phys >>= PAGE_SHIFT; | ||
1141 | |||
1142 | switch (idx) { | ||
1143 | case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: | ||
1144 | #ifdef CONFIG_X86_F00F_BUG | ||
1145 | case FIX_F00F_IDT: | ||
1146 | #endif | ||
1147 | #ifdef CONFIG_X86_32 | ||
1148 | case FIX_WP_TEST: | ||
1149 | case FIX_VDSO: | ||
1150 | # ifdef CONFIG_HIGHMEM | ||
1151 | case FIX_KMAP_BEGIN ... FIX_KMAP_END: | ||
1152 | # endif | ||
1153 | #else | ||
1154 | case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: | ||
1155 | #endif | ||
1156 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1157 | case FIX_APIC_BASE: /* maps dummy local APIC */ | ||
1158 | #endif | ||
1159 | pte = pfn_pte(phys, prot); | ||
1160 | break; | ||
1161 | |||
1162 | default: | ||
1163 | pte = mfn_pte(phys, prot); | ||
1164 | break; | ||
1165 | } | ||
1166 | |||
1167 | __native_set_fixmap(idx, pte); | ||
1168 | |||
1169 | #ifdef CONFIG_X86_64 | ||
1170 | /* Replicate changes to map the vsyscall page into the user | ||
1171 | pagetable vsyscall mapping. */ | ||
1172 | if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { | ||
1173 | unsigned long vaddr = __fix_to_virt(idx); | ||
1174 | set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); | ||
1175 | } | ||
1176 | #endif | ||
1177 | } | ||
1178 | |||
1179 | static const struct pv_info xen_info __initdata = { | 739 | static const struct pv_info xen_info __initdata = { |
1180 | .paravirt_enabled = 1, | 740 | .paravirt_enabled = 1, |
1181 | .shared_kernel_pmd = 0, | 741 | .shared_kernel_pmd = 0, |
@@ -1271,87 +831,6 @@ static const struct pv_apic_ops xen_apic_ops __initdata = { | |||
1271 | #endif | 831 | #endif |
1272 | }; | 832 | }; |
1273 | 833 | ||
1274 | static const struct pv_mmu_ops xen_mmu_ops __initdata = { | ||
1275 | .pagetable_setup_start = xen_pagetable_setup_start, | ||
1276 | .pagetable_setup_done = xen_pagetable_setup_done, | ||
1277 | |||
1278 | .read_cr2 = xen_read_cr2, | ||
1279 | .write_cr2 = xen_write_cr2, | ||
1280 | |||
1281 | .read_cr3 = xen_read_cr3, | ||
1282 | .write_cr3 = xen_write_cr3, | ||
1283 | |||
1284 | .flush_tlb_user = xen_flush_tlb, | ||
1285 | .flush_tlb_kernel = xen_flush_tlb, | ||
1286 | .flush_tlb_single = xen_flush_tlb_single, | ||
1287 | .flush_tlb_others = xen_flush_tlb_others, | ||
1288 | |||
1289 | .pte_update = paravirt_nop, | ||
1290 | .pte_update_defer = paravirt_nop, | ||
1291 | |||
1292 | .pgd_alloc = xen_pgd_alloc, | ||
1293 | .pgd_free = xen_pgd_free, | ||
1294 | |||
1295 | .alloc_pte = xen_alloc_pte_init, | ||
1296 | .release_pte = xen_release_pte_init, | ||
1297 | .alloc_pmd = xen_alloc_pte_init, | ||
1298 | .alloc_pmd_clone = paravirt_nop, | ||
1299 | .release_pmd = xen_release_pte_init, | ||
1300 | |||
1301 | #ifdef CONFIG_HIGHPTE | ||
1302 | .kmap_atomic_pte = xen_kmap_atomic_pte, | ||
1303 | #endif | ||
1304 | |||
1305 | #ifdef CONFIG_X86_64 | ||
1306 | .set_pte = xen_set_pte, | ||
1307 | #else | ||
1308 | .set_pte = xen_set_pte_init, | ||
1309 | #endif | ||
1310 | .set_pte_at = xen_set_pte_at, | ||
1311 | .set_pmd = xen_set_pmd_hyper, | ||
1312 | |||
1313 | .ptep_modify_prot_start = __ptep_modify_prot_start, | ||
1314 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, | ||
1315 | |||
1316 | .pte_val = xen_pte_val, | ||
1317 | .pte_flags = native_pte_flags, | ||
1318 | .pgd_val = xen_pgd_val, | ||
1319 | |||
1320 | .make_pte = xen_make_pte, | ||
1321 | .make_pgd = xen_make_pgd, | ||
1322 | |||
1323 | #ifdef CONFIG_X86_PAE | ||
1324 | .set_pte_atomic = xen_set_pte_atomic, | ||
1325 | .set_pte_present = xen_set_pte_at, | ||
1326 | .pte_clear = xen_pte_clear, | ||
1327 | .pmd_clear = xen_pmd_clear, | ||
1328 | #endif /* CONFIG_X86_PAE */ | ||
1329 | .set_pud = xen_set_pud_hyper, | ||
1330 | |||
1331 | .make_pmd = xen_make_pmd, | ||
1332 | .pmd_val = xen_pmd_val, | ||
1333 | |||
1334 | #if PAGETABLE_LEVELS == 4 | ||
1335 | .pud_val = xen_pud_val, | ||
1336 | .make_pud = xen_make_pud, | ||
1337 | .set_pgd = xen_set_pgd_hyper, | ||
1338 | |||
1339 | .alloc_pud = xen_alloc_pte_init, | ||
1340 | .release_pud = xen_release_pte_init, | ||
1341 | #endif /* PAGETABLE_LEVELS == 4 */ | ||
1342 | |||
1343 | .activate_mm = xen_activate_mm, | ||
1344 | .dup_mmap = xen_dup_mmap, | ||
1345 | .exit_mmap = xen_exit_mmap, | ||
1346 | |||
1347 | .lazy_mode = { | ||
1348 | .enter = paravirt_enter_lazy_mmu, | ||
1349 | .leave = xen_leave_lazy, | ||
1350 | }, | ||
1351 | |||
1352 | .set_fixmap = xen_set_fixmap, | ||
1353 | }; | ||
1354 | |||
1355 | static void xen_reboot(int reason) | 834 | static void xen_reboot(int reason) |
1356 | { | 835 | { |
1357 | struct sched_shutdown r = { .reason = reason }; | 836 | struct sched_shutdown r = { .reason = reason }; |
@@ -1394,223 +873,6 @@ static const struct machine_ops __initdata xen_machine_ops = { | |||
1394 | }; | 873 | }; |
1395 | 874 | ||
1396 | 875 | ||
1397 | static void __init xen_reserve_top(void) | ||
1398 | { | ||
1399 | #ifdef CONFIG_X86_32 | ||
1400 | unsigned long top = HYPERVISOR_VIRT_START; | ||
1401 | struct xen_platform_parameters pp; | ||
1402 | |||
1403 | if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) | ||
1404 | top = pp.virt_start; | ||
1405 | |||
1406 | reserve_top_address(-top); | ||
1407 | #endif /* CONFIG_X86_32 */ | ||
1408 | } | ||
1409 | |||
1410 | /* | ||
1411 | * Like __va(), but returns address in the kernel mapping (which is | ||
1412 | * all we have until the physical memory mapping has been set up. | ||
1413 | */ | ||
1414 | static void *__ka(phys_addr_t paddr) | ||
1415 | { | ||
1416 | #ifdef CONFIG_X86_64 | ||
1417 | return (void *)(paddr + __START_KERNEL_map); | ||
1418 | #else | ||
1419 | return __va(paddr); | ||
1420 | #endif | ||
1421 | } | ||
1422 | |||
1423 | /* Convert a machine address to physical address */ | ||
1424 | static unsigned long m2p(phys_addr_t maddr) | ||
1425 | { | ||
1426 | phys_addr_t paddr; | ||
1427 | |||
1428 | maddr &= PTE_PFN_MASK; | ||
1429 | paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT; | ||
1430 | |||
1431 | return paddr; | ||
1432 | } | ||
1433 | |||
1434 | /* Convert a machine address to kernel virtual */ | ||
1435 | static void *m2v(phys_addr_t maddr) | ||
1436 | { | ||
1437 | return __ka(m2p(maddr)); | ||
1438 | } | ||
1439 | |||
1440 | static void set_page_prot(void *addr, pgprot_t prot) | ||
1441 | { | ||
1442 | unsigned long pfn = __pa(addr) >> PAGE_SHIFT; | ||
1443 | pte_t pte = pfn_pte(pfn, prot); | ||
1444 | |||
1445 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) | ||
1446 | BUG(); | ||
1447 | } | ||
1448 | |||
1449 | static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | ||
1450 | { | ||
1451 | unsigned pmdidx, pteidx; | ||
1452 | unsigned ident_pte; | ||
1453 | unsigned long pfn; | ||
1454 | |||
1455 | ident_pte = 0; | ||
1456 | pfn = 0; | ||
1457 | for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { | ||
1458 | pte_t *pte_page; | ||
1459 | |||
1460 | /* Reuse or allocate a page of ptes */ | ||
1461 | if (pmd_present(pmd[pmdidx])) | ||
1462 | pte_page = m2v(pmd[pmdidx].pmd); | ||
1463 | else { | ||
1464 | /* Check for free pte pages */ | ||
1465 | if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) | ||
1466 | break; | ||
1467 | |||
1468 | pte_page = &level1_ident_pgt[ident_pte]; | ||
1469 | ident_pte += PTRS_PER_PTE; | ||
1470 | |||
1471 | pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); | ||
1472 | } | ||
1473 | |||
1474 | /* Install mappings */ | ||
1475 | for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { | ||
1476 | pte_t pte; | ||
1477 | |||
1478 | if (pfn > max_pfn_mapped) | ||
1479 | max_pfn_mapped = pfn; | ||
1480 | |||
1481 | if (!pte_none(pte_page[pteidx])) | ||
1482 | continue; | ||
1483 | |||
1484 | pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); | ||
1485 | pte_page[pteidx] = pte; | ||
1486 | } | ||
1487 | } | ||
1488 | |||
1489 | for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) | ||
1490 | set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); | ||
1491 | |||
1492 | set_page_prot(pmd, PAGE_KERNEL_RO); | ||
1493 | } | ||
1494 | |||
1495 | #ifdef CONFIG_X86_64 | ||
1496 | static void convert_pfn_mfn(void *v) | ||
1497 | { | ||
1498 | pte_t *pte = v; | ||
1499 | int i; | ||
1500 | |||
1501 | /* All levels are converted the same way, so just treat them | ||
1502 | as ptes. */ | ||
1503 | for (i = 0; i < PTRS_PER_PTE; i++) | ||
1504 | pte[i] = xen_make_pte(pte[i].pte); | ||
1505 | } | ||
1506 | |||
1507 | /* | ||
1508 | * Set up the inital kernel pagetable. | ||
1509 | * | ||
1510 | * We can construct this by grafting the Xen provided pagetable into | ||
1511 | * head_64.S's preconstructed pagetables. We copy the Xen L2's into | ||
1512 | * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This | ||
1513 | * means that only the kernel has a physical mapping to start with - | ||
1514 | * but that's enough to get __va working. We need to fill in the rest | ||
1515 | * of the physical mapping once some sort of allocator has been set | ||
1516 | * up. | ||
1517 | */ | ||
1518 | static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | ||
1519 | unsigned long max_pfn) | ||
1520 | { | ||
1521 | pud_t *l3; | ||
1522 | pmd_t *l2; | ||
1523 | |||
1524 | /* Zap identity mapping */ | ||
1525 | init_level4_pgt[0] = __pgd(0); | ||
1526 | |||
1527 | /* Pre-constructed entries are in pfn, so convert to mfn */ | ||
1528 | convert_pfn_mfn(init_level4_pgt); | ||
1529 | convert_pfn_mfn(level3_ident_pgt); | ||
1530 | convert_pfn_mfn(level3_kernel_pgt); | ||
1531 | |||
1532 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); | ||
1533 | l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); | ||
1534 | |||
1535 | memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); | ||
1536 | memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); | ||
1537 | |||
1538 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); | ||
1539 | l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); | ||
1540 | memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); | ||
1541 | |||
1542 | /* Set up identity map */ | ||
1543 | xen_map_identity_early(level2_ident_pgt, max_pfn); | ||
1544 | |||
1545 | /* Make pagetable pieces RO */ | ||
1546 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); | ||
1547 | set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); | ||
1548 | set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); | ||
1549 | set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); | ||
1550 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | ||
1551 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); | ||
1552 | |||
1553 | /* Pin down new L4 */ | ||
1554 | pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, | ||
1555 | PFN_DOWN(__pa_symbol(init_level4_pgt))); | ||
1556 | |||
1557 | /* Unpin Xen-provided one */ | ||
1558 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | ||
1559 | |||
1560 | /* Switch over */ | ||
1561 | pgd = init_level4_pgt; | ||
1562 | |||
1563 | /* | ||
1564 | * At this stage there can be no user pgd, and no page | ||
1565 | * structure to attach it to, so make sure we just set kernel | ||
1566 | * pgd. | ||
1567 | */ | ||
1568 | xen_mc_batch(); | ||
1569 | __xen_write_cr3(true, __pa(pgd)); | ||
1570 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
1571 | |||
1572 | reserve_early(__pa(xen_start_info->pt_base), | ||
1573 | __pa(xen_start_info->pt_base + | ||
1574 | xen_start_info->nr_pt_frames * PAGE_SIZE), | ||
1575 | "XEN PAGETABLES"); | ||
1576 | |||
1577 | return pgd; | ||
1578 | } | ||
1579 | #else /* !CONFIG_X86_64 */ | ||
1580 | static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; | ||
1581 | |||
1582 | static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | ||
1583 | unsigned long max_pfn) | ||
1584 | { | ||
1585 | pmd_t *kernel_pmd; | ||
1586 | |||
1587 | init_pg_tables_start = __pa(pgd); | ||
1588 | init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; | ||
1589 | max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); | ||
1590 | |||
1591 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); | ||
1592 | memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); | ||
1593 | |||
1594 | xen_map_identity_early(level2_kernel_pgt, max_pfn); | ||
1595 | |||
1596 | memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); | ||
1597 | set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], | ||
1598 | __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); | ||
1599 | |||
1600 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | ||
1601 | set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); | ||
1602 | set_page_prot(empty_zero_page, PAGE_KERNEL_RO); | ||
1603 | |||
1604 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | ||
1605 | |||
1606 | xen_write_cr3(__pa(swapper_pg_dir)); | ||
1607 | |||
1608 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); | ||
1609 | |||
1610 | return swapper_pg_dir; | ||
1611 | } | ||
1612 | #endif /* CONFIG_X86_64 */ | ||
1613 | |||
1614 | /* First C function to be called on Xen boot */ | 876 | /* First C function to be called on Xen boot */ |
1615 | asmlinkage void __init xen_start_kernel(void) | 877 | asmlinkage void __init xen_start_kernel(void) |
1616 | { | 878 | { |
@@ -1639,7 +901,7 @@ asmlinkage void __init xen_start_kernel(void) | |||
1639 | /* | 901 | /* |
1640 | * set up the basic apic ops. | 902 | * set up the basic apic ops. |
1641 | */ | 903 | */ |
1642 | apic_ops = &xen_basic_apic_ops; | 904 | set_xen_basic_apic_ops(); |
1643 | #endif | 905 | #endif |
1644 | 906 | ||
1645 | if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { | 907 | if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { |
@@ -1650,10 +912,18 @@ asmlinkage void __init xen_start_kernel(void) | |||
1650 | machine_ops = xen_machine_ops; | 912 | machine_ops = xen_machine_ops; |
1651 | 913 | ||
1652 | #ifdef CONFIG_X86_64 | 914 | #ifdef CONFIG_X86_64 |
1653 | /* Disable until direct per-cpu data access. */ | 915 | /* |
1654 | have_vcpu_info_placement = 0; | 916 | * Setup percpu state. We only need to do this for 64-bit |
1655 | x86_64_init_pda(); | 917 | * because 32-bit already has %fs set properly. |
918 | */ | ||
919 | load_percpu_segment(0); | ||
1656 | #endif | 920 | #endif |
921 | /* | ||
922 | * The only reliable way to retain the initial address of the | ||
923 | * percpu gdt_page is to remember it here, so we can go and | ||
924 | * mark it RW later, when the initial percpu area is freed. | ||
925 | */ | ||
926 | xen_initial_gdt = &per_cpu(gdt_page, 0); | ||
1657 | 927 | ||
1658 | xen_smp_init(); | 928 | xen_smp_init(); |
1659 | 929 | ||