diff options
Diffstat (limited to 'arch/x86/xen/enlighten.c')
-rw-r--r-- | arch/x86/xen/enlighten.c | 772 |
1 files changed, 19 insertions, 753 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bea215230b20..37230342c2c4 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -61,40 +61,13 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); | |||
61 | enum xen_domain_type xen_domain_type = XEN_NATIVE; | 61 | enum xen_domain_type xen_domain_type = XEN_NATIVE; |
62 | EXPORT_SYMBOL_GPL(xen_domain_type); | 62 | EXPORT_SYMBOL_GPL(xen_domain_type); |
63 | 63 | ||
64 | /* | ||
65 | * Identity map, in addition to plain kernel map. This needs to be | ||
66 | * large enough to allocate page table pages to allocate the rest. | ||
67 | * Each page can map 2MB. | ||
68 | */ | ||
69 | static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; | ||
70 | |||
71 | #ifdef CONFIG_X86_64 | ||
72 | /* l3 pud for userspace vsyscall mapping */ | ||
73 | static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; | ||
74 | #endif /* CONFIG_X86_64 */ | ||
75 | |||
76 | /* | ||
77 | * Note about cr3 (pagetable base) values: | ||
78 | * | ||
79 | * xen_cr3 contains the current logical cr3 value; it contains the | ||
80 | * last set cr3. This may not be the current effective cr3, because | ||
81 | * its update may be being lazily deferred. However, a vcpu looking | ||
82 | * at its own cr3 can use this value knowing that it everything will | ||
83 | * be self-consistent. | ||
84 | * | ||
85 | * xen_current_cr3 contains the actual vcpu cr3; it is set once the | ||
86 | * hypercall to set the vcpu cr3 is complete (so it may be a little | ||
87 | * out of date, but it will never be set early). If one vcpu is | ||
88 | * looking at another vcpu's cr3 value, it should use this variable. | ||
89 | */ | ||
90 | DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */ | ||
91 | DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ | ||
92 | |||
93 | struct start_info *xen_start_info; | 64 | struct start_info *xen_start_info; |
94 | EXPORT_SYMBOL_GPL(xen_start_info); | 65 | EXPORT_SYMBOL_GPL(xen_start_info); |
95 | 66 | ||
96 | struct shared_info xen_dummy_shared_info; | 67 | struct shared_info xen_dummy_shared_info; |
97 | 68 | ||
69 | void *xen_initial_gdt; | ||
70 | |||
98 | /* | 71 | /* |
99 | * Point at some empty memory to start with. We map the real shared_info | 72 | * Point at some empty memory to start with. We map the real shared_info |
100 | * page as soon as fixmap is up and running. | 73 | * page as soon as fixmap is up and running. |
@@ -114,14 +87,7 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; | |||
114 | * | 87 | * |
115 | * 0: not available, 1: available | 88 | * 0: not available, 1: available |
116 | */ | 89 | */ |
117 | static int have_vcpu_info_placement = | 90 | static int have_vcpu_info_placement = 1; |
118 | #ifdef CONFIG_X86_32 | ||
119 | 1 | ||
120 | #else | ||
121 | 0 | ||
122 | #endif | ||
123 | ; | ||
124 | |||
125 | 91 | ||
126 | static void xen_vcpu_setup(int cpu) | 92 | static void xen_vcpu_setup(int cpu) |
127 | { | 93 | { |
@@ -237,7 +203,7 @@ static unsigned long xen_get_debugreg(int reg) | |||
237 | return HYPERVISOR_get_debugreg(reg); | 203 | return HYPERVISOR_get_debugreg(reg); |
238 | } | 204 | } |
239 | 205 | ||
240 | static void xen_leave_lazy(void) | 206 | void xen_leave_lazy(void) |
241 | { | 207 | { |
242 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | 208 | paravirt_leave_lazy(paravirt_get_lazy_mode()); |
243 | xen_mc_flush(); | 209 | xen_mc_flush(); |
@@ -598,83 +564,6 @@ static struct apic_ops xen_basic_apic_ops = { | |||
598 | 564 | ||
599 | #endif | 565 | #endif |
600 | 566 | ||
601 | static void xen_flush_tlb(void) | ||
602 | { | ||
603 | struct mmuext_op *op; | ||
604 | struct multicall_space mcs; | ||
605 | |||
606 | preempt_disable(); | ||
607 | |||
608 | mcs = xen_mc_entry(sizeof(*op)); | ||
609 | |||
610 | op = mcs.args; | ||
611 | op->cmd = MMUEXT_TLB_FLUSH_LOCAL; | ||
612 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | ||
613 | |||
614 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
615 | |||
616 | preempt_enable(); | ||
617 | } | ||
618 | |||
619 | static void xen_flush_tlb_single(unsigned long addr) | ||
620 | { | ||
621 | struct mmuext_op *op; | ||
622 | struct multicall_space mcs; | ||
623 | |||
624 | preempt_disable(); | ||
625 | |||
626 | mcs = xen_mc_entry(sizeof(*op)); | ||
627 | op = mcs.args; | ||
628 | op->cmd = MMUEXT_INVLPG_LOCAL; | ||
629 | op->arg1.linear_addr = addr & PAGE_MASK; | ||
630 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | ||
631 | |||
632 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
633 | |||
634 | preempt_enable(); | ||
635 | } | ||
636 | |||
637 | static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, | ||
638 | unsigned long va) | ||
639 | { | ||
640 | struct { | ||
641 | struct mmuext_op op; | ||
642 | cpumask_t mask; | ||
643 | } *args; | ||
644 | cpumask_t cpumask = *cpus; | ||
645 | struct multicall_space mcs; | ||
646 | |||
647 | /* | ||
648 | * A couple of (to be removed) sanity checks: | ||
649 | * | ||
650 | * - current CPU must not be in mask | ||
651 | * - mask must exist :) | ||
652 | */ | ||
653 | BUG_ON(cpus_empty(cpumask)); | ||
654 | BUG_ON(cpu_isset(smp_processor_id(), cpumask)); | ||
655 | BUG_ON(!mm); | ||
656 | |||
657 | /* If a CPU which we ran on has gone down, OK. */ | ||
658 | cpus_and(cpumask, cpumask, cpu_online_map); | ||
659 | if (cpus_empty(cpumask)) | ||
660 | return; | ||
661 | |||
662 | mcs = xen_mc_entry(sizeof(*args)); | ||
663 | args = mcs.args; | ||
664 | args->mask = cpumask; | ||
665 | args->op.arg2.vcpumask = &args->mask; | ||
666 | |||
667 | if (va == TLB_FLUSH_ALL) { | ||
668 | args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; | ||
669 | } else { | ||
670 | args->op.cmd = MMUEXT_INVLPG_MULTI; | ||
671 | args->op.arg1.linear_addr = va; | ||
672 | } | ||
673 | |||
674 | MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); | ||
675 | |||
676 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
677 | } | ||
678 | 567 | ||
679 | static void xen_clts(void) | 568 | static void xen_clts(void) |
680 | { | 569 | { |
@@ -700,21 +589,6 @@ static void xen_write_cr0(unsigned long cr0) | |||
700 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 589 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
701 | } | 590 | } |
702 | 591 | ||
703 | static void xen_write_cr2(unsigned long cr2) | ||
704 | { | ||
705 | x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; | ||
706 | } | ||
707 | |||
708 | static unsigned long xen_read_cr2(void) | ||
709 | { | ||
710 | return x86_read_percpu(xen_vcpu)->arch.cr2; | ||
711 | } | ||
712 | |||
713 | static unsigned long xen_read_cr2_direct(void) | ||
714 | { | ||
715 | return x86_read_percpu(xen_vcpu_info.arch.cr2); | ||
716 | } | ||
717 | |||
718 | static void xen_write_cr4(unsigned long cr4) | 592 | static void xen_write_cr4(unsigned long cr4) |
719 | { | 593 | { |
720 | cr4 &= ~X86_CR4_PGE; | 594 | cr4 &= ~X86_CR4_PGE; |
@@ -723,71 +597,6 @@ static void xen_write_cr4(unsigned long cr4) | |||
723 | native_write_cr4(cr4); | 597 | native_write_cr4(cr4); |
724 | } | 598 | } |
725 | 599 | ||
726 | static unsigned long xen_read_cr3(void) | ||
727 | { | ||
728 | return x86_read_percpu(xen_cr3); | ||
729 | } | ||
730 | |||
731 | static void set_current_cr3(void *v) | ||
732 | { | ||
733 | x86_write_percpu(xen_current_cr3, (unsigned long)v); | ||
734 | } | ||
735 | |||
736 | static void __xen_write_cr3(bool kernel, unsigned long cr3) | ||
737 | { | ||
738 | struct mmuext_op *op; | ||
739 | struct multicall_space mcs; | ||
740 | unsigned long mfn; | ||
741 | |||
742 | if (cr3) | ||
743 | mfn = pfn_to_mfn(PFN_DOWN(cr3)); | ||
744 | else | ||
745 | mfn = 0; | ||
746 | |||
747 | WARN_ON(mfn == 0 && kernel); | ||
748 | |||
749 | mcs = __xen_mc_entry(sizeof(*op)); | ||
750 | |||
751 | op = mcs.args; | ||
752 | op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; | ||
753 | op->arg1.mfn = mfn; | ||
754 | |||
755 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | ||
756 | |||
757 | if (kernel) { | ||
758 | x86_write_percpu(xen_cr3, cr3); | ||
759 | |||
760 | /* Update xen_current_cr3 once the batch has actually | ||
761 | been submitted. */ | ||
762 | xen_mc_callback(set_current_cr3, (void *)cr3); | ||
763 | } | ||
764 | } | ||
765 | |||
766 | static void xen_write_cr3(unsigned long cr3) | ||
767 | { | ||
768 | BUG_ON(preemptible()); | ||
769 | |||
770 | xen_mc_batch(); /* disables interrupts */ | ||
771 | |||
772 | /* Update while interrupts are disabled, so its atomic with | ||
773 | respect to ipis */ | ||
774 | x86_write_percpu(xen_cr3, cr3); | ||
775 | |||
776 | __xen_write_cr3(true, cr3); | ||
777 | |||
778 | #ifdef CONFIG_X86_64 | ||
779 | { | ||
780 | pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); | ||
781 | if (user_pgd) | ||
782 | __xen_write_cr3(false, __pa(user_pgd)); | ||
783 | else | ||
784 | __xen_write_cr3(false, 0); | ||
785 | } | ||
786 | #endif | ||
787 | |||
788 | xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ | ||
789 | } | ||
790 | |||
791 | static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) | 600 | static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) |
792 | { | 601 | { |
793 | int ret; | 602 | int ret; |
@@ -829,185 +638,6 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) | |||
829 | return ret; | 638 | return ret; |
830 | } | 639 | } |
831 | 640 | ||
832 | /* Early in boot, while setting up the initial pagetable, assume | ||
833 | everything is pinned. */ | ||
834 | static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) | ||
835 | { | ||
836 | #ifdef CONFIG_FLATMEM | ||
837 | BUG_ON(mem_map); /* should only be used early */ | ||
838 | #endif | ||
839 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); | ||
840 | } | ||
841 | |||
842 | /* Early release_pte assumes that all pts are pinned, since there's | ||
843 | only init_mm and anything attached to that is pinned. */ | ||
844 | static void xen_release_pte_init(unsigned long pfn) | ||
845 | { | ||
846 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | ||
847 | } | ||
848 | |||
849 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | ||
850 | { | ||
851 | struct mmuext_op op; | ||
852 | op.cmd = cmd; | ||
853 | op.arg1.mfn = pfn_to_mfn(pfn); | ||
854 | if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) | ||
855 | BUG(); | ||
856 | } | ||
857 | |||
858 | /* This needs to make sure the new pte page is pinned iff its being | ||
859 | attached to a pinned pagetable. */ | ||
860 | static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) | ||
861 | { | ||
862 | struct page *page = pfn_to_page(pfn); | ||
863 | |||
864 | if (PagePinned(virt_to_page(mm->pgd))) { | ||
865 | SetPagePinned(page); | ||
866 | |||
867 | vm_unmap_aliases(); | ||
868 | if (!PageHighMem(page)) { | ||
869 | make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); | ||
870 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) | ||
871 | pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); | ||
872 | } else { | ||
873 | /* make sure there are no stray mappings of | ||
874 | this page */ | ||
875 | kmap_flush_unused(); | ||
876 | } | ||
877 | } | ||
878 | } | ||
879 | |||
880 | static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn) | ||
881 | { | ||
882 | xen_alloc_ptpage(mm, pfn, PT_PTE); | ||
883 | } | ||
884 | |||
885 | static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn) | ||
886 | { | ||
887 | xen_alloc_ptpage(mm, pfn, PT_PMD); | ||
888 | } | ||
889 | |||
890 | static int xen_pgd_alloc(struct mm_struct *mm) | ||
891 | { | ||
892 | pgd_t *pgd = mm->pgd; | ||
893 | int ret = 0; | ||
894 | |||
895 | BUG_ON(PagePinned(virt_to_page(pgd))); | ||
896 | |||
897 | #ifdef CONFIG_X86_64 | ||
898 | { | ||
899 | struct page *page = virt_to_page(pgd); | ||
900 | pgd_t *user_pgd; | ||
901 | |||
902 | BUG_ON(page->private != 0); | ||
903 | |||
904 | ret = -ENOMEM; | ||
905 | |||
906 | user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); | ||
907 | page->private = (unsigned long)user_pgd; | ||
908 | |||
909 | if (user_pgd != NULL) { | ||
910 | user_pgd[pgd_index(VSYSCALL_START)] = | ||
911 | __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); | ||
912 | ret = 0; | ||
913 | } | ||
914 | |||
915 | BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); | ||
916 | } | ||
917 | #endif | ||
918 | |||
919 | return ret; | ||
920 | } | ||
921 | |||
922 | static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) | ||
923 | { | ||
924 | #ifdef CONFIG_X86_64 | ||
925 | pgd_t *user_pgd = xen_get_user_pgd(pgd); | ||
926 | |||
927 | if (user_pgd) | ||
928 | free_page((unsigned long)user_pgd); | ||
929 | #endif | ||
930 | } | ||
931 | |||
932 | /* This should never happen until we're OK to use struct page */ | ||
933 | static void xen_release_ptpage(unsigned long pfn, unsigned level) | ||
934 | { | ||
935 | struct page *page = pfn_to_page(pfn); | ||
936 | |||
937 | if (PagePinned(page)) { | ||
938 | if (!PageHighMem(page)) { | ||
939 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) | ||
940 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); | ||
941 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | ||
942 | } | ||
943 | ClearPagePinned(page); | ||
944 | } | ||
945 | } | ||
946 | |||
947 | static void xen_release_pte(unsigned long pfn) | ||
948 | { | ||
949 | xen_release_ptpage(pfn, PT_PTE); | ||
950 | } | ||
951 | |||
952 | static void xen_release_pmd(unsigned long pfn) | ||
953 | { | ||
954 | xen_release_ptpage(pfn, PT_PMD); | ||
955 | } | ||
956 | |||
957 | #if PAGETABLE_LEVELS == 4 | ||
958 | static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) | ||
959 | { | ||
960 | xen_alloc_ptpage(mm, pfn, PT_PUD); | ||
961 | } | ||
962 | |||
963 | static void xen_release_pud(unsigned long pfn) | ||
964 | { | ||
965 | xen_release_ptpage(pfn, PT_PUD); | ||
966 | } | ||
967 | #endif | ||
968 | |||
969 | #ifdef CONFIG_HIGHPTE | ||
970 | static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) | ||
971 | { | ||
972 | pgprot_t prot = PAGE_KERNEL; | ||
973 | |||
974 | if (PagePinned(page)) | ||
975 | prot = PAGE_KERNEL_RO; | ||
976 | |||
977 | if (0 && PageHighMem(page)) | ||
978 | printk("mapping highpte %lx type %d prot %s\n", | ||
979 | page_to_pfn(page), type, | ||
980 | (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ"); | ||
981 | |||
982 | return kmap_atomic_prot(page, type, prot); | ||
983 | } | ||
984 | #endif | ||
985 | |||
986 | #ifdef CONFIG_X86_32 | ||
987 | static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) | ||
988 | { | ||
989 | /* If there's an existing pte, then don't allow _PAGE_RW to be set */ | ||
990 | if (pte_val_ma(*ptep) & _PAGE_PRESENT) | ||
991 | pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & | ||
992 | pte_val_ma(pte)); | ||
993 | |||
994 | return pte; | ||
995 | } | ||
996 | |||
997 | /* Init-time set_pte while constructing initial pagetables, which | ||
998 | doesn't allow RO pagetable pages to be remapped RW */ | ||
999 | static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) | ||
1000 | { | ||
1001 | pte = mask_rw_pte(ptep, pte); | ||
1002 | |||
1003 | xen_set_pte(ptep, pte); | ||
1004 | } | ||
1005 | #endif | ||
1006 | |||
1007 | static __init void xen_pagetable_setup_start(pgd_t *base) | ||
1008 | { | ||
1009 | } | ||
1010 | |||
1011 | void xen_setup_shared_info(void) | 641 | void xen_setup_shared_info(void) |
1012 | { | 642 | { |
1013 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | 643 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
@@ -1028,37 +658,6 @@ void xen_setup_shared_info(void) | |||
1028 | xen_setup_mfn_list_list(); | 658 | xen_setup_mfn_list_list(); |
1029 | } | 659 | } |
1030 | 660 | ||
1031 | static __init void xen_pagetable_setup_done(pgd_t *base) | ||
1032 | { | ||
1033 | xen_setup_shared_info(); | ||
1034 | } | ||
1035 | |||
1036 | static __init void xen_post_allocator_init(void) | ||
1037 | { | ||
1038 | pv_mmu_ops.set_pte = xen_set_pte; | ||
1039 | pv_mmu_ops.set_pmd = xen_set_pmd; | ||
1040 | pv_mmu_ops.set_pud = xen_set_pud; | ||
1041 | #if PAGETABLE_LEVELS == 4 | ||
1042 | pv_mmu_ops.set_pgd = xen_set_pgd; | ||
1043 | #endif | ||
1044 | |||
1045 | /* This will work as long as patching hasn't happened yet | ||
1046 | (which it hasn't) */ | ||
1047 | pv_mmu_ops.alloc_pte = xen_alloc_pte; | ||
1048 | pv_mmu_ops.alloc_pmd = xen_alloc_pmd; | ||
1049 | pv_mmu_ops.release_pte = xen_release_pte; | ||
1050 | pv_mmu_ops.release_pmd = xen_release_pmd; | ||
1051 | #if PAGETABLE_LEVELS == 4 | ||
1052 | pv_mmu_ops.alloc_pud = xen_alloc_pud; | ||
1053 | pv_mmu_ops.release_pud = xen_release_pud; | ||
1054 | #endif | ||
1055 | |||
1056 | #ifdef CONFIG_X86_64 | ||
1057 | SetPagePinned(virt_to_page(level3_user_vsyscall)); | ||
1058 | #endif | ||
1059 | xen_mark_init_mm_pinned(); | ||
1060 | } | ||
1061 | |||
1062 | /* This is called once we have the cpu_possible_map */ | 661 | /* This is called once we have the cpu_possible_map */ |
1063 | void xen_setup_vcpu_info_placement(void) | 662 | void xen_setup_vcpu_info_placement(void) |
1064 | { | 663 | { |
@@ -1072,10 +671,10 @@ void xen_setup_vcpu_info_placement(void) | |||
1072 | if (have_vcpu_info_placement) { | 671 | if (have_vcpu_info_placement) { |
1073 | printk(KERN_INFO "Xen: using vcpu_info placement\n"); | 672 | printk(KERN_INFO "Xen: using vcpu_info placement\n"); |
1074 | 673 | ||
1075 | pv_irq_ops.save_fl = xen_save_fl_direct; | 674 | pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); |
1076 | pv_irq_ops.restore_fl = xen_restore_fl_direct; | 675 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); |
1077 | pv_irq_ops.irq_disable = xen_irq_disable_direct; | 676 | pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); |
1078 | pv_irq_ops.irq_enable = xen_irq_enable_direct; | 677 | pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct); |
1079 | pv_mmu_ops.read_cr2 = xen_read_cr2_direct; | 678 | pv_mmu_ops.read_cr2 = xen_read_cr2_direct; |
1080 | } | 679 | } |
1081 | } | 680 | } |
@@ -1133,49 +732,6 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, | |||
1133 | return ret; | 732 | return ret; |
1134 | } | 733 | } |
1135 | 734 | ||
1136 | static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) | ||
1137 | { | ||
1138 | pte_t pte; | ||
1139 | |||
1140 | phys >>= PAGE_SHIFT; | ||
1141 | |||
1142 | switch (idx) { | ||
1143 | case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: | ||
1144 | #ifdef CONFIG_X86_F00F_BUG | ||
1145 | case FIX_F00F_IDT: | ||
1146 | #endif | ||
1147 | #ifdef CONFIG_X86_32 | ||
1148 | case FIX_WP_TEST: | ||
1149 | case FIX_VDSO: | ||
1150 | # ifdef CONFIG_HIGHMEM | ||
1151 | case FIX_KMAP_BEGIN ... FIX_KMAP_END: | ||
1152 | # endif | ||
1153 | #else | ||
1154 | case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: | ||
1155 | #endif | ||
1156 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1157 | case FIX_APIC_BASE: /* maps dummy local APIC */ | ||
1158 | #endif | ||
1159 | pte = pfn_pte(phys, prot); | ||
1160 | break; | ||
1161 | |||
1162 | default: | ||
1163 | pte = mfn_pte(phys, prot); | ||
1164 | break; | ||
1165 | } | ||
1166 | |||
1167 | __native_set_fixmap(idx, pte); | ||
1168 | |||
1169 | #ifdef CONFIG_X86_64 | ||
1170 | /* Replicate changes to map the vsyscall page into the user | ||
1171 | pagetable vsyscall mapping. */ | ||
1172 | if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { | ||
1173 | unsigned long vaddr = __fix_to_virt(idx); | ||
1174 | set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); | ||
1175 | } | ||
1176 | #endif | ||
1177 | } | ||
1178 | |||
1179 | static const struct pv_info xen_info __initdata = { | 735 | static const struct pv_info xen_info __initdata = { |
1180 | .paravirt_enabled = 1, | 736 | .paravirt_enabled = 1, |
1181 | .shared_kernel_pmd = 0, | 737 | .shared_kernel_pmd = 0, |
@@ -1271,87 +827,6 @@ static const struct pv_apic_ops xen_apic_ops __initdata = { | |||
1271 | #endif | 827 | #endif |
1272 | }; | 828 | }; |
1273 | 829 | ||
1274 | static const struct pv_mmu_ops xen_mmu_ops __initdata = { | ||
1275 | .pagetable_setup_start = xen_pagetable_setup_start, | ||
1276 | .pagetable_setup_done = xen_pagetable_setup_done, | ||
1277 | |||
1278 | .read_cr2 = xen_read_cr2, | ||
1279 | .write_cr2 = xen_write_cr2, | ||
1280 | |||
1281 | .read_cr3 = xen_read_cr3, | ||
1282 | .write_cr3 = xen_write_cr3, | ||
1283 | |||
1284 | .flush_tlb_user = xen_flush_tlb, | ||
1285 | .flush_tlb_kernel = xen_flush_tlb, | ||
1286 | .flush_tlb_single = xen_flush_tlb_single, | ||
1287 | .flush_tlb_others = xen_flush_tlb_others, | ||
1288 | |||
1289 | .pte_update = paravirt_nop, | ||
1290 | .pte_update_defer = paravirt_nop, | ||
1291 | |||
1292 | .pgd_alloc = xen_pgd_alloc, | ||
1293 | .pgd_free = xen_pgd_free, | ||
1294 | |||
1295 | .alloc_pte = xen_alloc_pte_init, | ||
1296 | .release_pte = xen_release_pte_init, | ||
1297 | .alloc_pmd = xen_alloc_pte_init, | ||
1298 | .alloc_pmd_clone = paravirt_nop, | ||
1299 | .release_pmd = xen_release_pte_init, | ||
1300 | |||
1301 | #ifdef CONFIG_HIGHPTE | ||
1302 | .kmap_atomic_pte = xen_kmap_atomic_pte, | ||
1303 | #endif | ||
1304 | |||
1305 | #ifdef CONFIG_X86_64 | ||
1306 | .set_pte = xen_set_pte, | ||
1307 | #else | ||
1308 | .set_pte = xen_set_pte_init, | ||
1309 | #endif | ||
1310 | .set_pte_at = xen_set_pte_at, | ||
1311 | .set_pmd = xen_set_pmd_hyper, | ||
1312 | |||
1313 | .ptep_modify_prot_start = __ptep_modify_prot_start, | ||
1314 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, | ||
1315 | |||
1316 | .pte_val = xen_pte_val, | ||
1317 | .pte_flags = native_pte_flags, | ||
1318 | .pgd_val = xen_pgd_val, | ||
1319 | |||
1320 | .make_pte = xen_make_pte, | ||
1321 | .make_pgd = xen_make_pgd, | ||
1322 | |||
1323 | #ifdef CONFIG_X86_PAE | ||
1324 | .set_pte_atomic = xen_set_pte_atomic, | ||
1325 | .set_pte_present = xen_set_pte_at, | ||
1326 | .pte_clear = xen_pte_clear, | ||
1327 | .pmd_clear = xen_pmd_clear, | ||
1328 | #endif /* CONFIG_X86_PAE */ | ||
1329 | .set_pud = xen_set_pud_hyper, | ||
1330 | |||
1331 | .make_pmd = xen_make_pmd, | ||
1332 | .pmd_val = xen_pmd_val, | ||
1333 | |||
1334 | #if PAGETABLE_LEVELS == 4 | ||
1335 | .pud_val = xen_pud_val, | ||
1336 | .make_pud = xen_make_pud, | ||
1337 | .set_pgd = xen_set_pgd_hyper, | ||
1338 | |||
1339 | .alloc_pud = xen_alloc_pte_init, | ||
1340 | .release_pud = xen_release_pte_init, | ||
1341 | #endif /* PAGETABLE_LEVELS == 4 */ | ||
1342 | |||
1343 | .activate_mm = xen_activate_mm, | ||
1344 | .dup_mmap = xen_dup_mmap, | ||
1345 | .exit_mmap = xen_exit_mmap, | ||
1346 | |||
1347 | .lazy_mode = { | ||
1348 | .enter = paravirt_enter_lazy_mmu, | ||
1349 | .leave = xen_leave_lazy, | ||
1350 | }, | ||
1351 | |||
1352 | .set_fixmap = xen_set_fixmap, | ||
1353 | }; | ||
1354 | |||
1355 | static void xen_reboot(int reason) | 830 | static void xen_reboot(int reason) |
1356 | { | 831 | { |
1357 | struct sched_shutdown r = { .reason = reason }; | 832 | struct sched_shutdown r = { .reason = reason }; |
@@ -1394,223 +869,6 @@ static const struct machine_ops __initdata xen_machine_ops = { | |||
1394 | }; | 869 | }; |
1395 | 870 | ||
1396 | 871 | ||
1397 | static void __init xen_reserve_top(void) | ||
1398 | { | ||
1399 | #ifdef CONFIG_X86_32 | ||
1400 | unsigned long top = HYPERVISOR_VIRT_START; | ||
1401 | struct xen_platform_parameters pp; | ||
1402 | |||
1403 | if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) | ||
1404 | top = pp.virt_start; | ||
1405 | |||
1406 | reserve_top_address(-top); | ||
1407 | #endif /* CONFIG_X86_32 */ | ||
1408 | } | ||
1409 | |||
1410 | /* | ||
1411 | * Like __va(), but returns address in the kernel mapping (which is | ||
1412 | * all we have until the physical memory mapping has been set up. | ||
1413 | */ | ||
1414 | static void *__ka(phys_addr_t paddr) | ||
1415 | { | ||
1416 | #ifdef CONFIG_X86_64 | ||
1417 | return (void *)(paddr + __START_KERNEL_map); | ||
1418 | #else | ||
1419 | return __va(paddr); | ||
1420 | #endif | ||
1421 | } | ||
1422 | |||
1423 | /* Convert a machine address to physical address */ | ||
1424 | static unsigned long m2p(phys_addr_t maddr) | ||
1425 | { | ||
1426 | phys_addr_t paddr; | ||
1427 | |||
1428 | maddr &= PTE_PFN_MASK; | ||
1429 | paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT; | ||
1430 | |||
1431 | return paddr; | ||
1432 | } | ||
1433 | |||
1434 | /* Convert a machine address to kernel virtual */ | ||
1435 | static void *m2v(phys_addr_t maddr) | ||
1436 | { | ||
1437 | return __ka(m2p(maddr)); | ||
1438 | } | ||
1439 | |||
1440 | static void set_page_prot(void *addr, pgprot_t prot) | ||
1441 | { | ||
1442 | unsigned long pfn = __pa(addr) >> PAGE_SHIFT; | ||
1443 | pte_t pte = pfn_pte(pfn, prot); | ||
1444 | |||
1445 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) | ||
1446 | BUG(); | ||
1447 | } | ||
1448 | |||
1449 | static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | ||
1450 | { | ||
1451 | unsigned pmdidx, pteidx; | ||
1452 | unsigned ident_pte; | ||
1453 | unsigned long pfn; | ||
1454 | |||
1455 | ident_pte = 0; | ||
1456 | pfn = 0; | ||
1457 | for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { | ||
1458 | pte_t *pte_page; | ||
1459 | |||
1460 | /* Reuse or allocate a page of ptes */ | ||
1461 | if (pmd_present(pmd[pmdidx])) | ||
1462 | pte_page = m2v(pmd[pmdidx].pmd); | ||
1463 | else { | ||
1464 | /* Check for free pte pages */ | ||
1465 | if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) | ||
1466 | break; | ||
1467 | |||
1468 | pte_page = &level1_ident_pgt[ident_pte]; | ||
1469 | ident_pte += PTRS_PER_PTE; | ||
1470 | |||
1471 | pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); | ||
1472 | } | ||
1473 | |||
1474 | /* Install mappings */ | ||
1475 | for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { | ||
1476 | pte_t pte; | ||
1477 | |||
1478 | if (pfn > max_pfn_mapped) | ||
1479 | max_pfn_mapped = pfn; | ||
1480 | |||
1481 | if (!pte_none(pte_page[pteidx])) | ||
1482 | continue; | ||
1483 | |||
1484 | pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); | ||
1485 | pte_page[pteidx] = pte; | ||
1486 | } | ||
1487 | } | ||
1488 | |||
1489 | for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) | ||
1490 | set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); | ||
1491 | |||
1492 | set_page_prot(pmd, PAGE_KERNEL_RO); | ||
1493 | } | ||
1494 | |||
1495 | #ifdef CONFIG_X86_64 | ||
1496 | static void convert_pfn_mfn(void *v) | ||
1497 | { | ||
1498 | pte_t *pte = v; | ||
1499 | int i; | ||
1500 | |||
1501 | /* All levels are converted the same way, so just treat them | ||
1502 | as ptes. */ | ||
1503 | for (i = 0; i < PTRS_PER_PTE; i++) | ||
1504 | pte[i] = xen_make_pte(pte[i].pte); | ||
1505 | } | ||
1506 | |||
1507 | /* | ||
1508 | * Set up the inital kernel pagetable. | ||
1509 | * | ||
1510 | * We can construct this by grafting the Xen provided pagetable into | ||
1511 | * head_64.S's preconstructed pagetables. We copy the Xen L2's into | ||
1512 | * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This | ||
1513 | * means that only the kernel has a physical mapping to start with - | ||
1514 | * but that's enough to get __va working. We need to fill in the rest | ||
1515 | * of the physical mapping once some sort of allocator has been set | ||
1516 | * up. | ||
1517 | */ | ||
1518 | static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | ||
1519 | unsigned long max_pfn) | ||
1520 | { | ||
1521 | pud_t *l3; | ||
1522 | pmd_t *l2; | ||
1523 | |||
1524 | /* Zap identity mapping */ | ||
1525 | init_level4_pgt[0] = __pgd(0); | ||
1526 | |||
1527 | /* Pre-constructed entries are in pfn, so convert to mfn */ | ||
1528 | convert_pfn_mfn(init_level4_pgt); | ||
1529 | convert_pfn_mfn(level3_ident_pgt); | ||
1530 | convert_pfn_mfn(level3_kernel_pgt); | ||
1531 | |||
1532 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); | ||
1533 | l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); | ||
1534 | |||
1535 | memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); | ||
1536 | memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); | ||
1537 | |||
1538 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); | ||
1539 | l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); | ||
1540 | memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); | ||
1541 | |||
1542 | /* Set up identity map */ | ||
1543 | xen_map_identity_early(level2_ident_pgt, max_pfn); | ||
1544 | |||
1545 | /* Make pagetable pieces RO */ | ||
1546 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); | ||
1547 | set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); | ||
1548 | set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); | ||
1549 | set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); | ||
1550 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | ||
1551 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); | ||
1552 | |||
1553 | /* Pin down new L4 */ | ||
1554 | pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, | ||
1555 | PFN_DOWN(__pa_symbol(init_level4_pgt))); | ||
1556 | |||
1557 | /* Unpin Xen-provided one */ | ||
1558 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | ||
1559 | |||
1560 | /* Switch over */ | ||
1561 | pgd = init_level4_pgt; | ||
1562 | |||
1563 | /* | ||
1564 | * At this stage there can be no user pgd, and no page | ||
1565 | * structure to attach it to, so make sure we just set kernel | ||
1566 | * pgd. | ||
1567 | */ | ||
1568 | xen_mc_batch(); | ||
1569 | __xen_write_cr3(true, __pa(pgd)); | ||
1570 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
1571 | |||
1572 | reserve_early(__pa(xen_start_info->pt_base), | ||
1573 | __pa(xen_start_info->pt_base + | ||
1574 | xen_start_info->nr_pt_frames * PAGE_SIZE), | ||
1575 | "XEN PAGETABLES"); | ||
1576 | |||
1577 | return pgd; | ||
1578 | } | ||
1579 | #else /* !CONFIG_X86_64 */ | ||
1580 | static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; | ||
1581 | |||
1582 | static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | ||
1583 | unsigned long max_pfn) | ||
1584 | { | ||
1585 | pmd_t *kernel_pmd; | ||
1586 | |||
1587 | init_pg_tables_start = __pa(pgd); | ||
1588 | init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; | ||
1589 | max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); | ||
1590 | |||
1591 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); | ||
1592 | memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); | ||
1593 | |||
1594 | xen_map_identity_early(level2_kernel_pgt, max_pfn); | ||
1595 | |||
1596 | memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); | ||
1597 | set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], | ||
1598 | __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); | ||
1599 | |||
1600 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | ||
1601 | set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); | ||
1602 | set_page_prot(empty_zero_page, PAGE_KERNEL_RO); | ||
1603 | |||
1604 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | ||
1605 | |||
1606 | xen_write_cr3(__pa(swapper_pg_dir)); | ||
1607 | |||
1608 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); | ||
1609 | |||
1610 | return swapper_pg_dir; | ||
1611 | } | ||
1612 | #endif /* CONFIG_X86_64 */ | ||
1613 | |||
1614 | /* First C function to be called on Xen boot */ | 872 | /* First C function to be called on Xen boot */ |
1615 | asmlinkage void __init xen_start_kernel(void) | 873 | asmlinkage void __init xen_start_kernel(void) |
1616 | { | 874 | { |
@@ -1650,10 +908,18 @@ asmlinkage void __init xen_start_kernel(void) | |||
1650 | machine_ops = xen_machine_ops; | 908 | machine_ops = xen_machine_ops; |
1651 | 909 | ||
1652 | #ifdef CONFIG_X86_64 | 910 | #ifdef CONFIG_X86_64 |
1653 | /* Disable until direct per-cpu data access. */ | 911 | /* |
1654 | have_vcpu_info_placement = 0; | 912 | * Setup percpu state. We only need to do this for 64-bit |
1655 | x86_64_init_pda(); | 913 | * because 32-bit already has %fs set properly. |
914 | */ | ||
915 | load_percpu_segment(0); | ||
1656 | #endif | 916 | #endif |
917 | /* | ||
918 | * The only reliable way to retain the initial address of the | ||
919 | * percpu gdt_page is to remember it here, so we can go and | ||
920 | * mark it RW later, when the initial percpu area is freed. | ||
921 | */ | ||
922 | xen_initial_gdt = &per_cpu(gdt_page, 0); | ||
1657 | 923 | ||
1658 | xen_smp_init(); | 924 | xen_smp_init(); |
1659 | 925 | ||