diff options
Diffstat (limited to 'arch/x86/xen')
-rw-r--r-- | arch/x86/xen/Kconfig | 13 | ||||
-rw-r--r-- | arch/x86/xen/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 184 | ||||
-rw-r--r-- | arch/x86/xen/manage.c | 143 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 345 | ||||
-rw-r--r-- | arch/x86/xen/mmu.h | 34 | ||||
-rw-r--r-- | arch/x86/xen/multicalls.c | 40 | ||||
-rw-r--r-- | arch/x86/xen/multicalls.h | 12 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 5 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 8 | ||||
-rw-r--r-- | arch/x86/xen/suspend.c | 45 | ||||
-rw-r--r-- | arch/x86/xen/time.c | 158 | ||||
-rw-r--r-- | arch/x86/xen/xen-head.S | 11 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 11 |
14 files changed, 585 insertions, 426 deletions
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 2e641be2737e..c2cc99580871 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -5,9 +5,20 @@ | |||
5 | config XEN | 5 | config XEN |
6 | bool "Xen guest support" | 6 | bool "Xen guest support" |
7 | select PARAVIRT | 7 | select PARAVIRT |
8 | select PARAVIRT_CLOCK | ||
8 | depends on X86_32 | 9 | depends on X86_32 |
9 | depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER) | 10 | depends on X86_CMPXCHG && X86_TSC && X86_PAE && !(X86_VISWS || X86_VOYAGER) |
10 | help | 11 | help |
11 | This is the Linux Xen port. Enabling this will allow the | 12 | This is the Linux Xen port. Enabling this will allow the |
12 | kernel to boot in a paravirtualized environment under the | 13 | kernel to boot in a paravirtualized environment under the |
13 | Xen hypervisor. | 14 | Xen hypervisor. |
15 | |||
16 | config XEN_MAX_DOMAIN_MEMORY | ||
17 | int "Maximum allowed size of a domain in gigabytes" | ||
18 | default 8 | ||
19 | depends on XEN | ||
20 | help | ||
21 | The pseudo-physical to machine address array is sized | ||
22 | according to the maximum possible memory size of a Xen | ||
23 | domain. This array uses 1 page per gigabyte, so there's no | ||
24 | need to be too stingy here. \ No newline at end of file | ||
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 3d8df981d5fd..2ba2d1649131 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -1,4 +1,4 @@ | |||
1 | obj-y := enlighten.o setup.o multicalls.o mmu.o \ | 1 | obj-y := enlighten.o setup.o multicalls.o mmu.o \ |
2 | time.o manage.o xen-asm.o grant-table.o | 2 | time.o xen-asm.o grant-table.o suspend.o |
3 | 3 | ||
4 | obj-$(CONFIG_SMP) += smp.o | 4 | obj-$(CONFIG_SMP) += smp.o |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c8a56e457d61..bd74229081c3 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -75,13 +75,13 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ | |||
75 | struct start_info *xen_start_info; | 75 | struct start_info *xen_start_info; |
76 | EXPORT_SYMBOL_GPL(xen_start_info); | 76 | EXPORT_SYMBOL_GPL(xen_start_info); |
77 | 77 | ||
78 | static /* __initdata */ struct shared_info dummy_shared_info; | 78 | struct shared_info xen_dummy_shared_info; |
79 | 79 | ||
80 | /* | 80 | /* |
81 | * Point at some empty memory to start with. We map the real shared_info | 81 | * Point at some empty memory to start with. We map the real shared_info |
82 | * page as soon as fixmap is up and running. | 82 | * page as soon as fixmap is up and running. |
83 | */ | 83 | */ |
84 | struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; | 84 | struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; |
85 | 85 | ||
86 | /* | 86 | /* |
87 | * Flag to determine whether vcpu info placement is available on all | 87 | * Flag to determine whether vcpu info placement is available on all |
@@ -98,13 +98,13 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; | |||
98 | */ | 98 | */ |
99 | static int have_vcpu_info_placement = 1; | 99 | static int have_vcpu_info_placement = 1; |
100 | 100 | ||
101 | static void __init xen_vcpu_setup(int cpu) | 101 | static void xen_vcpu_setup(int cpu) |
102 | { | 102 | { |
103 | struct vcpu_register_vcpu_info info; | 103 | struct vcpu_register_vcpu_info info; |
104 | int err; | 104 | int err; |
105 | struct vcpu_info *vcpup; | 105 | struct vcpu_info *vcpup; |
106 | 106 | ||
107 | BUG_ON(HYPERVISOR_shared_info == &dummy_shared_info); | 107 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); |
108 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; | 108 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; |
109 | 109 | ||
110 | if (!have_vcpu_info_placement) | 110 | if (!have_vcpu_info_placement) |
@@ -136,11 +136,41 @@ static void __init xen_vcpu_setup(int cpu) | |||
136 | } | 136 | } |
137 | } | 137 | } |
138 | 138 | ||
139 | /* | ||
140 | * On restore, set the vcpu placement up again. | ||
141 | * If it fails, then we're in a bad state, since | ||
142 | * we can't back out from using it... | ||
143 | */ | ||
144 | void xen_vcpu_restore(void) | ||
145 | { | ||
146 | if (have_vcpu_info_placement) { | ||
147 | int cpu; | ||
148 | |||
149 | for_each_online_cpu(cpu) { | ||
150 | bool other_cpu = (cpu != smp_processor_id()); | ||
151 | |||
152 | if (other_cpu && | ||
153 | HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) | ||
154 | BUG(); | ||
155 | |||
156 | xen_vcpu_setup(cpu); | ||
157 | |||
158 | if (other_cpu && | ||
159 | HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) | ||
160 | BUG(); | ||
161 | } | ||
162 | |||
163 | BUG_ON(!have_vcpu_info_placement); | ||
164 | } | ||
165 | } | ||
166 | |||
139 | static void __init xen_banner(void) | 167 | static void __init xen_banner(void) |
140 | { | 168 | { |
141 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | 169 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", |
142 | pv_info.name); | 170 | pv_info.name); |
143 | printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic); | 171 | printk(KERN_INFO "Hypervisor signature: %s%s\n", |
172 | xen_start_info->magic, | ||
173 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); | ||
144 | } | 174 | } |
145 | 175 | ||
146 | static void xen_cpuid(unsigned int *ax, unsigned int *bx, | 176 | static void xen_cpuid(unsigned int *ax, unsigned int *bx, |
@@ -235,13 +265,13 @@ static void xen_irq_enable(void) | |||
235 | { | 265 | { |
236 | struct vcpu_info *vcpu; | 266 | struct vcpu_info *vcpu; |
237 | 267 | ||
238 | /* There's a one instruction preempt window here. We need to | 268 | /* We don't need to worry about being preempted here, since |
239 | make sure we're don't switch CPUs between getting the vcpu | 269 | either a) interrupts are disabled, so no preemption, or b) |
240 | pointer and updating the mask. */ | 270 | the caller is confused and is trying to re-enable interrupts |
241 | preempt_disable(); | 271 | on an indeterminate processor. */ |
272 | |||
242 | vcpu = x86_read_percpu(xen_vcpu); | 273 | vcpu = x86_read_percpu(xen_vcpu); |
243 | vcpu->evtchn_upcall_mask = 0; | 274 | vcpu->evtchn_upcall_mask = 0; |
244 | preempt_enable_no_resched(); | ||
245 | 275 | ||
246 | /* Doesn't matter if we get preempted here, because any | 276 | /* Doesn't matter if we get preempted here, because any |
247 | pending event will get dealt with anyway. */ | 277 | pending event will get dealt with anyway. */ |
@@ -254,7 +284,7 @@ static void xen_irq_enable(void) | |||
254 | static void xen_safe_halt(void) | 284 | static void xen_safe_halt(void) |
255 | { | 285 | { |
256 | /* Blocking includes an implicit local_irq_enable(). */ | 286 | /* Blocking includes an implicit local_irq_enable(). */ |
257 | if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0) | 287 | if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) |
258 | BUG(); | 288 | BUG(); |
259 | } | 289 | } |
260 | 290 | ||
@@ -607,6 +637,30 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, | |||
607 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 637 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
608 | } | 638 | } |
609 | 639 | ||
640 | static void xen_clts(void) | ||
641 | { | ||
642 | struct multicall_space mcs; | ||
643 | |||
644 | mcs = xen_mc_entry(0); | ||
645 | |||
646 | MULTI_fpu_taskswitch(mcs.mc, 0); | ||
647 | |||
648 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
649 | } | ||
650 | |||
651 | static void xen_write_cr0(unsigned long cr0) | ||
652 | { | ||
653 | struct multicall_space mcs; | ||
654 | |||
655 | /* Only pay attention to cr0.TS; everything else is | ||
656 | ignored. */ | ||
657 | mcs = xen_mc_entry(0); | ||
658 | |||
659 | MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0); | ||
660 | |||
661 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
662 | } | ||
663 | |||
610 | static void xen_write_cr2(unsigned long cr2) | 664 | static void xen_write_cr2(unsigned long cr2) |
611 | { | 665 | { |
612 | x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; | 666 | x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; |
@@ -624,8 +678,10 @@ static unsigned long xen_read_cr2_direct(void) | |||
624 | 678 | ||
625 | static void xen_write_cr4(unsigned long cr4) | 679 | static void xen_write_cr4(unsigned long cr4) |
626 | { | 680 | { |
627 | /* Just ignore cr4 changes; Xen doesn't allow us to do | 681 | cr4 &= ~X86_CR4_PGE; |
628 | anything anyway. */ | 682 | cr4 &= ~X86_CR4_PSE; |
683 | |||
684 | native_write_cr4(cr4); | ||
629 | } | 685 | } |
630 | 686 | ||
631 | static unsigned long xen_read_cr3(void) | 687 | static unsigned long xen_read_cr3(void) |
@@ -785,38 +841,35 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) | |||
785 | static __init void xen_pagetable_setup_start(pgd_t *base) | 841 | static __init void xen_pagetable_setup_start(pgd_t *base) |
786 | { | 842 | { |
787 | pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; | 843 | pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; |
844 | int i; | ||
788 | 845 | ||
789 | /* special set_pte for pagetable initialization */ | 846 | /* special set_pte for pagetable initialization */ |
790 | pv_mmu_ops.set_pte = xen_set_pte_init; | 847 | pv_mmu_ops.set_pte = xen_set_pte_init; |
791 | 848 | ||
792 | init_mm.pgd = base; | 849 | init_mm.pgd = base; |
793 | /* | 850 | /* |
794 | * copy top-level of Xen-supplied pagetable into place. For | 851 | * copy top-level of Xen-supplied pagetable into place. This |
795 | * !PAE we can use this as-is, but for PAE it is a stand-in | 852 | * is a stand-in while we copy the pmd pages. |
796 | * while we copy the pmd pages. | ||
797 | */ | 853 | */ |
798 | memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t)); | 854 | memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t)); |
799 | 855 | ||
800 | if (PTRS_PER_PMD > 1) { | 856 | /* |
801 | int i; | 857 | * For PAE, need to allocate new pmds, rather than |
802 | /* | 858 | * share Xen's, since Xen doesn't like pmd's being |
803 | * For PAE, need to allocate new pmds, rather than | 859 | * shared between address spaces. |
804 | * share Xen's, since Xen doesn't like pmd's being | 860 | */ |
805 | * shared between address spaces. | 861 | for (i = 0; i < PTRS_PER_PGD; i++) { |
806 | */ | 862 | if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) { |
807 | for (i = 0; i < PTRS_PER_PGD; i++) { | 863 | pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); |
808 | if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) { | ||
809 | pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); | ||
810 | 864 | ||
811 | memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), | 865 | memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), |
812 | PAGE_SIZE); | 866 | PAGE_SIZE); |
813 | 867 | ||
814 | make_lowmem_page_readonly(pmd); | 868 | make_lowmem_page_readonly(pmd); |
815 | 869 | ||
816 | set_pgd(&base[i], __pgd(1 + __pa(pmd))); | 870 | set_pgd(&base[i], __pgd(1 + __pa(pmd))); |
817 | } else | 871 | } else |
818 | pgd_clear(&base[i]); | 872 | pgd_clear(&base[i]); |
819 | } | ||
820 | } | 873 | } |
821 | 874 | ||
822 | /* make sure zero_page is mapped RO so we can use it in pagetables */ | 875 | /* make sure zero_page is mapped RO so we can use it in pagetables */ |
@@ -834,7 +887,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base) | |||
834 | PFN_DOWN(__pa(xen_start_info->pt_base))); | 887 | PFN_DOWN(__pa(xen_start_info->pt_base))); |
835 | } | 888 | } |
836 | 889 | ||
837 | static __init void setup_shared_info(void) | 890 | void xen_setup_shared_info(void) |
838 | { | 891 | { |
839 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | 892 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
840 | unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); | 893 | unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); |
@@ -857,6 +910,8 @@ static __init void setup_shared_info(void) | |||
857 | /* In UP this is as good a place as any to set up shared info */ | 910 | /* In UP this is as good a place as any to set up shared info */ |
858 | xen_setup_vcpu_info_placement(); | 911 | xen_setup_vcpu_info_placement(); |
859 | #endif | 912 | #endif |
913 | |||
914 | xen_setup_mfn_list_list(); | ||
860 | } | 915 | } |
861 | 916 | ||
862 | static __init void xen_pagetable_setup_done(pgd_t *base) | 917 | static __init void xen_pagetable_setup_done(pgd_t *base) |
@@ -869,25 +924,23 @@ static __init void xen_pagetable_setup_done(pgd_t *base) | |||
869 | pv_mmu_ops.release_pmd = xen_release_pmd; | 924 | pv_mmu_ops.release_pmd = xen_release_pmd; |
870 | pv_mmu_ops.set_pte = xen_set_pte; | 925 | pv_mmu_ops.set_pte = xen_set_pte; |
871 | 926 | ||
872 | setup_shared_info(); | 927 | xen_setup_shared_info(); |
873 | 928 | ||
874 | /* Actually pin the pagetable down, but we can't set PG_pinned | 929 | /* Actually pin the pagetable down, but we can't set PG_pinned |
875 | yet because the page structures don't exist yet. */ | 930 | yet because the page structures don't exist yet. */ |
876 | { | 931 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); |
877 | unsigned level; | 932 | } |
878 | 933 | ||
879 | #ifdef CONFIG_X86_PAE | 934 | static __init void xen_post_allocator_init(void) |
880 | level = MMUEXT_PIN_L3_TABLE; | 935 | { |
881 | #else | 936 | pv_mmu_ops.set_pmd = xen_set_pmd; |
882 | level = MMUEXT_PIN_L2_TABLE; | 937 | pv_mmu_ops.set_pud = xen_set_pud; |
883 | #endif | ||
884 | 938 | ||
885 | pin_pagetable_pfn(level, PFN_DOWN(__pa(base))); | 939 | xen_mark_init_mm_pinned(); |
886 | } | ||
887 | } | 940 | } |
888 | 941 | ||
889 | /* This is called once we have the cpu_possible_map */ | 942 | /* This is called once we have the cpu_possible_map */ |
890 | void __init xen_setup_vcpu_info_placement(void) | 943 | void xen_setup_vcpu_info_placement(void) |
891 | { | 944 | { |
892 | int cpu; | 945 | int cpu; |
893 | 946 | ||
@@ -973,7 +1026,7 @@ static const struct pv_init_ops xen_init_ops __initdata = { | |||
973 | .banner = xen_banner, | 1026 | .banner = xen_banner, |
974 | .memory_setup = xen_memory_setup, | 1027 | .memory_setup = xen_memory_setup, |
975 | .arch_setup = xen_arch_setup, | 1028 | .arch_setup = xen_arch_setup, |
976 | .post_allocator_init = xen_mark_init_mm_pinned, | 1029 | .post_allocator_init = xen_post_allocator_init, |
977 | }; | 1030 | }; |
978 | 1031 | ||
979 | static const struct pv_time_ops xen_time_ops __initdata = { | 1032 | static const struct pv_time_ops xen_time_ops __initdata = { |
@@ -991,10 +1044,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
991 | .set_debugreg = xen_set_debugreg, | 1044 | .set_debugreg = xen_set_debugreg, |
992 | .get_debugreg = xen_get_debugreg, | 1045 | .get_debugreg = xen_get_debugreg, |
993 | 1046 | ||
994 | .clts = native_clts, | 1047 | .clts = xen_clts, |
995 | 1048 | ||
996 | .read_cr0 = native_read_cr0, | 1049 | .read_cr0 = native_read_cr0, |
997 | .write_cr0 = native_write_cr0, | 1050 | .write_cr0 = xen_write_cr0, |
998 | 1051 | ||
999 | .read_cr4 = native_read_cr4, | 1052 | .read_cr4 = native_read_cr4, |
1000 | .read_cr4_safe = native_read_cr4_safe, | 1053 | .read_cr4_safe = native_read_cr4_safe, |
@@ -1085,24 +1138,26 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1085 | 1138 | ||
1086 | .set_pte = NULL, /* see xen_pagetable_setup_* */ | 1139 | .set_pte = NULL, /* see xen_pagetable_setup_* */ |
1087 | .set_pte_at = xen_set_pte_at, | 1140 | .set_pte_at = xen_set_pte_at, |
1088 | .set_pmd = xen_set_pmd, | 1141 | .set_pmd = xen_set_pmd_hyper, |
1142 | |||
1143 | .ptep_modify_prot_start = __ptep_modify_prot_start, | ||
1144 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, | ||
1089 | 1145 | ||
1090 | .pte_val = xen_pte_val, | 1146 | .pte_val = xen_pte_val, |
1147 | .pte_flags = native_pte_val, | ||
1091 | .pgd_val = xen_pgd_val, | 1148 | .pgd_val = xen_pgd_val, |
1092 | 1149 | ||
1093 | .make_pte = xen_make_pte, | 1150 | .make_pte = xen_make_pte, |
1094 | .make_pgd = xen_make_pgd, | 1151 | .make_pgd = xen_make_pgd, |
1095 | 1152 | ||
1096 | #ifdef CONFIG_X86_PAE | ||
1097 | .set_pte_atomic = xen_set_pte_atomic, | 1153 | .set_pte_atomic = xen_set_pte_atomic, |
1098 | .set_pte_present = xen_set_pte_at, | 1154 | .set_pte_present = xen_set_pte_at, |
1099 | .set_pud = xen_set_pud, | 1155 | .set_pud = xen_set_pud_hyper, |
1100 | .pte_clear = xen_pte_clear, | 1156 | .pte_clear = xen_pte_clear, |
1101 | .pmd_clear = xen_pmd_clear, | 1157 | .pmd_clear = xen_pmd_clear, |
1102 | 1158 | ||
1103 | .make_pmd = xen_make_pmd, | 1159 | .make_pmd = xen_make_pmd, |
1104 | .pmd_val = xen_pmd_val, | 1160 | .pmd_val = xen_pmd_val, |
1105 | #endif /* PAE */ | ||
1106 | 1161 | ||
1107 | .activate_mm = xen_activate_mm, | 1162 | .activate_mm = xen_activate_mm, |
1108 | .dup_mmap = xen_dup_mmap, | 1163 | .dup_mmap = xen_dup_mmap, |
@@ -1129,11 +1184,13 @@ static const struct smp_ops xen_smp_ops __initdata = { | |||
1129 | 1184 | ||
1130 | static void xen_reboot(int reason) | 1185 | static void xen_reboot(int reason) |
1131 | { | 1186 | { |
1187 | struct sched_shutdown r = { .reason = reason }; | ||
1188 | |||
1132 | #ifdef CONFIG_SMP | 1189 | #ifdef CONFIG_SMP |
1133 | smp_send_stop(); | 1190 | smp_send_stop(); |
1134 | #endif | 1191 | #endif |
1135 | 1192 | ||
1136 | if (HYPERVISOR_sched_op(SCHEDOP_shutdown, reason)) | 1193 | if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) |
1137 | BUG(); | 1194 | BUG(); |
1138 | } | 1195 | } |
1139 | 1196 | ||
@@ -1188,6 +1245,8 @@ asmlinkage void __init xen_start_kernel(void) | |||
1188 | 1245 | ||
1189 | BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); | 1246 | BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); |
1190 | 1247 | ||
1248 | xen_setup_features(); | ||
1249 | |||
1191 | /* Install Xen paravirt ops */ | 1250 | /* Install Xen paravirt ops */ |
1192 | pv_info = xen_info; | 1251 | pv_info = xen_info; |
1193 | pv_init_ops = xen_init_ops; | 1252 | pv_init_ops = xen_init_ops; |
@@ -1197,17 +1256,20 @@ asmlinkage void __init xen_start_kernel(void) | |||
1197 | pv_apic_ops = xen_apic_ops; | 1256 | pv_apic_ops = xen_apic_ops; |
1198 | pv_mmu_ops = xen_mmu_ops; | 1257 | pv_mmu_ops = xen_mmu_ops; |
1199 | 1258 | ||
1259 | if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { | ||
1260 | pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; | ||
1261 | pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; | ||
1262 | } | ||
1263 | |||
1200 | machine_ops = xen_machine_ops; | 1264 | machine_ops = xen_machine_ops; |
1201 | 1265 | ||
1202 | #ifdef CONFIG_SMP | 1266 | #ifdef CONFIG_SMP |
1203 | smp_ops = xen_smp_ops; | 1267 | smp_ops = xen_smp_ops; |
1204 | #endif | 1268 | #endif |
1205 | 1269 | ||
1206 | xen_setup_features(); | ||
1207 | |||
1208 | /* Get mfn list */ | 1270 | /* Get mfn list */ |
1209 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | 1271 | if (!xen_feature(XENFEAT_auto_translated_physmap)) |
1210 | phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list; | 1272 | xen_build_dynamic_phys_to_machine(); |
1211 | 1273 | ||
1212 | pgd = (pgd_t *)xen_start_info->pt_base; | 1274 | pgd = (pgd_t *)xen_start_info->pt_base; |
1213 | 1275 | ||
@@ -1228,6 +1290,11 @@ asmlinkage void __init xen_start_kernel(void) | |||
1228 | if (xen_feature(XENFEAT_supervisor_mode_kernel)) | 1290 | if (xen_feature(XENFEAT_supervisor_mode_kernel)) |
1229 | pv_info.kernel_rpl = 0; | 1291 | pv_info.kernel_rpl = 0; |
1230 | 1292 | ||
1293 | /* Prevent unwanted bits from being set in PTEs. */ | ||
1294 | __supported_pte_mask &= ~_PAGE_GLOBAL; | ||
1295 | if (!is_initial_xendomain()) | ||
1296 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); | ||
1297 | |||
1231 | /* set the limit of our address space */ | 1298 | /* set the limit of our address space */ |
1232 | xen_reserve_top(); | 1299 | xen_reserve_top(); |
1233 | 1300 | ||
@@ -1242,8 +1309,11 @@ asmlinkage void __init xen_start_kernel(void) | |||
1242 | ? __pa(xen_start_info->mod_start) : 0; | 1309 | ? __pa(xen_start_info->mod_start) : 0; |
1243 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; | 1310 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; |
1244 | 1311 | ||
1245 | if (!is_initial_xendomain()) | 1312 | if (!is_initial_xendomain()) { |
1313 | add_preferred_console("xenboot", 0, NULL); | ||
1314 | add_preferred_console("tty", 0, NULL); | ||
1246 | add_preferred_console("hvc", 0, NULL); | 1315 | add_preferred_console("hvc", 0, NULL); |
1316 | } | ||
1247 | 1317 | ||
1248 | /* Start the world */ | 1318 | /* Start the world */ |
1249 | start_kernel(); | 1319 | start_kernel(); |
diff --git a/arch/x86/xen/manage.c b/arch/x86/xen/manage.c deleted file mode 100644 index aa7af9e6abc0..000000000000 --- a/arch/x86/xen/manage.c +++ /dev/null | |||
@@ -1,143 +0,0 @@ | |||
1 | /* | ||
2 | * Handle extern requests for shutdown, reboot and sysrq | ||
3 | */ | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/err.h> | ||
6 | #include <linux/reboot.h> | ||
7 | #include <linux/sysrq.h> | ||
8 | |||
9 | #include <xen/xenbus.h> | ||
10 | |||
11 | #define SHUTDOWN_INVALID -1 | ||
12 | #define SHUTDOWN_POWEROFF 0 | ||
13 | #define SHUTDOWN_SUSPEND 2 | ||
14 | /* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only | ||
15 | * report a crash, not be instructed to crash! | ||
16 | * HALT is the same as POWEROFF, as far as we're concerned. The tools use | ||
17 | * the distinction when we return the reason code to them. | ||
18 | */ | ||
19 | #define SHUTDOWN_HALT 4 | ||
20 | |||
21 | /* Ignore multiple shutdown requests. */ | ||
22 | static int shutting_down = SHUTDOWN_INVALID; | ||
23 | |||
24 | static void shutdown_handler(struct xenbus_watch *watch, | ||
25 | const char **vec, unsigned int len) | ||
26 | { | ||
27 | char *str; | ||
28 | struct xenbus_transaction xbt; | ||
29 | int err; | ||
30 | |||
31 | if (shutting_down != SHUTDOWN_INVALID) | ||
32 | return; | ||
33 | |||
34 | again: | ||
35 | err = xenbus_transaction_start(&xbt); | ||
36 | if (err) | ||
37 | return; | ||
38 | |||
39 | str = (char *)xenbus_read(xbt, "control", "shutdown", NULL); | ||
40 | /* Ignore read errors and empty reads. */ | ||
41 | if (XENBUS_IS_ERR_READ(str)) { | ||
42 | xenbus_transaction_end(xbt, 1); | ||
43 | return; | ||
44 | } | ||
45 | |||
46 | xenbus_write(xbt, "control", "shutdown", ""); | ||
47 | |||
48 | err = xenbus_transaction_end(xbt, 0); | ||
49 | if (err == -EAGAIN) { | ||
50 | kfree(str); | ||
51 | goto again; | ||
52 | } | ||
53 | |||
54 | if (strcmp(str, "poweroff") == 0 || | ||
55 | strcmp(str, "halt") == 0) | ||
56 | orderly_poweroff(false); | ||
57 | else if (strcmp(str, "reboot") == 0) | ||
58 | ctrl_alt_del(); | ||
59 | else { | ||
60 | printk(KERN_INFO "Ignoring shutdown request: %s\n", str); | ||
61 | shutting_down = SHUTDOWN_INVALID; | ||
62 | } | ||
63 | |||
64 | kfree(str); | ||
65 | } | ||
66 | |||
67 | static void sysrq_handler(struct xenbus_watch *watch, const char **vec, | ||
68 | unsigned int len) | ||
69 | { | ||
70 | char sysrq_key = '\0'; | ||
71 | struct xenbus_transaction xbt; | ||
72 | int err; | ||
73 | |||
74 | again: | ||
75 | err = xenbus_transaction_start(&xbt); | ||
76 | if (err) | ||
77 | return; | ||
78 | if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) { | ||
79 | printk(KERN_ERR "Unable to read sysrq code in " | ||
80 | "control/sysrq\n"); | ||
81 | xenbus_transaction_end(xbt, 1); | ||
82 | return; | ||
83 | } | ||
84 | |||
85 | if (sysrq_key != '\0') | ||
86 | xenbus_printf(xbt, "control", "sysrq", "%c", '\0'); | ||
87 | |||
88 | err = xenbus_transaction_end(xbt, 0); | ||
89 | if (err == -EAGAIN) | ||
90 | goto again; | ||
91 | |||
92 | if (sysrq_key != '\0') | ||
93 | handle_sysrq(sysrq_key, NULL); | ||
94 | } | ||
95 | |||
96 | static struct xenbus_watch shutdown_watch = { | ||
97 | .node = "control/shutdown", | ||
98 | .callback = shutdown_handler | ||
99 | }; | ||
100 | |||
101 | static struct xenbus_watch sysrq_watch = { | ||
102 | .node = "control/sysrq", | ||
103 | .callback = sysrq_handler | ||
104 | }; | ||
105 | |||
106 | static int setup_shutdown_watcher(void) | ||
107 | { | ||
108 | int err; | ||
109 | |||
110 | err = register_xenbus_watch(&shutdown_watch); | ||
111 | if (err) { | ||
112 | printk(KERN_ERR "Failed to set shutdown watcher\n"); | ||
113 | return err; | ||
114 | } | ||
115 | |||
116 | err = register_xenbus_watch(&sysrq_watch); | ||
117 | if (err) { | ||
118 | printk(KERN_ERR "Failed to set sysrq watcher\n"); | ||
119 | return err; | ||
120 | } | ||
121 | |||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | static int shutdown_event(struct notifier_block *notifier, | ||
126 | unsigned long event, | ||
127 | void *data) | ||
128 | { | ||
129 | setup_shutdown_watcher(); | ||
130 | return NOTIFY_DONE; | ||
131 | } | ||
132 | |||
133 | static int __init setup_shutdown_event(void) | ||
134 | { | ||
135 | static struct notifier_block xenstore_notifier = { | ||
136 | .notifier_call = shutdown_event | ||
137 | }; | ||
138 | register_xenstore_notifier(&xenstore_notifier); | ||
139 | |||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | subsys_initcall(setup_shutdown_event); | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 126766d43aea..42b3b9ed641d 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -56,11 +56,136 @@ | |||
56 | #include "multicalls.h" | 56 | #include "multicalls.h" |
57 | #include "mmu.h" | 57 | #include "mmu.h" |
58 | 58 | ||
59 | #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | ||
60 | #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) | ||
61 | |||
62 | /* Placeholder for holes in the address space */ | ||
63 | static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] | ||
64 | __attribute__((section(".data.page_aligned"))) = | ||
65 | { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL }; | ||
66 | |||
67 | /* Array of pointers to pages containing p2m entries */ | ||
68 | static unsigned long *p2m_top[TOP_ENTRIES] | ||
69 | __attribute__((section(".data.page_aligned"))) = | ||
70 | { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; | ||
71 | |||
72 | /* Arrays of p2m arrays expressed in mfns used for save/restore */ | ||
73 | static unsigned long p2m_top_mfn[TOP_ENTRIES] | ||
74 | __attribute__((section(".bss.page_aligned"))); | ||
75 | |||
76 | static unsigned long p2m_top_mfn_list[ | ||
77 | PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)] | ||
78 | __attribute__((section(".bss.page_aligned"))); | ||
79 | |||
80 | static inline unsigned p2m_top_index(unsigned long pfn) | ||
81 | { | ||
82 | BUG_ON(pfn >= MAX_DOMAIN_PAGES); | ||
83 | return pfn / P2M_ENTRIES_PER_PAGE; | ||
84 | } | ||
85 | |||
86 | static inline unsigned p2m_index(unsigned long pfn) | ||
87 | { | ||
88 | return pfn % P2M_ENTRIES_PER_PAGE; | ||
89 | } | ||
90 | |||
91 | /* Build the parallel p2m_top_mfn structures */ | ||
92 | void xen_setup_mfn_list_list(void) | ||
93 | { | ||
94 | unsigned pfn, idx; | ||
95 | |||
96 | for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) { | ||
97 | unsigned topidx = p2m_top_index(pfn); | ||
98 | |||
99 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]); | ||
100 | } | ||
101 | |||
102 | for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) { | ||
103 | unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; | ||
104 | p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); | ||
105 | } | ||
106 | |||
107 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | ||
108 | |||
109 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | ||
110 | virt_to_mfn(p2m_top_mfn_list); | ||
111 | HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages; | ||
112 | } | ||
113 | |||
114 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | ||
115 | void __init xen_build_dynamic_phys_to_machine(void) | ||
116 | { | ||
117 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; | ||
118 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | ||
119 | unsigned pfn; | ||
120 | |||
121 | for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) { | ||
122 | unsigned topidx = p2m_top_index(pfn); | ||
123 | |||
124 | p2m_top[topidx] = &mfn_list[pfn]; | ||
125 | } | ||
126 | } | ||
127 | |||
128 | unsigned long get_phys_to_machine(unsigned long pfn) | ||
129 | { | ||
130 | unsigned topidx, idx; | ||
131 | |||
132 | if (unlikely(pfn >= MAX_DOMAIN_PAGES)) | ||
133 | return INVALID_P2M_ENTRY; | ||
134 | |||
135 | topidx = p2m_top_index(pfn); | ||
136 | idx = p2m_index(pfn); | ||
137 | return p2m_top[topidx][idx]; | ||
138 | } | ||
139 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | ||
140 | |||
141 | static void alloc_p2m(unsigned long **pp, unsigned long *mfnp) | ||
142 | { | ||
143 | unsigned long *p; | ||
144 | unsigned i; | ||
145 | |||
146 | p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); | ||
147 | BUG_ON(p == NULL); | ||
148 | |||
149 | for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++) | ||
150 | p[i] = INVALID_P2M_ENTRY; | ||
151 | |||
152 | if (cmpxchg(pp, p2m_missing, p) != p2m_missing) | ||
153 | free_page((unsigned long)p); | ||
154 | else | ||
155 | *mfnp = virt_to_mfn(p); | ||
156 | } | ||
157 | |||
158 | void set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
159 | { | ||
160 | unsigned topidx, idx; | ||
161 | |||
162 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { | ||
163 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
164 | return; | ||
165 | } | ||
166 | |||
167 | if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { | ||
168 | BUG_ON(mfn != INVALID_P2M_ENTRY); | ||
169 | return; | ||
170 | } | ||
171 | |||
172 | topidx = p2m_top_index(pfn); | ||
173 | if (p2m_top[topidx] == p2m_missing) { | ||
174 | /* no need to allocate a page to store an invalid entry */ | ||
175 | if (mfn == INVALID_P2M_ENTRY) | ||
176 | return; | ||
177 | alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]); | ||
178 | } | ||
179 | |||
180 | idx = p2m_index(pfn); | ||
181 | p2m_top[topidx][idx] = mfn; | ||
182 | } | ||
183 | |||
59 | xmaddr_t arbitrary_virt_to_machine(unsigned long address) | 184 | xmaddr_t arbitrary_virt_to_machine(unsigned long address) |
60 | { | 185 | { |
61 | unsigned int level; | 186 | unsigned int level; |
62 | pte_t *pte = lookup_address(address, &level); | 187 | pte_t *pte = lookup_address(address, &level); |
63 | unsigned offset = address & PAGE_MASK; | 188 | unsigned offset = address & ~PAGE_MASK; |
64 | 189 | ||
65 | BUG_ON(pte == NULL); | 190 | BUG_ON(pte == NULL); |
66 | 191 | ||
@@ -98,24 +223,60 @@ void make_lowmem_page_readwrite(void *vaddr) | |||
98 | } | 223 | } |
99 | 224 | ||
100 | 225 | ||
101 | void xen_set_pmd(pmd_t *ptr, pmd_t val) | 226 | static bool page_pinned(void *ptr) |
227 | { | ||
228 | struct page *page = virt_to_page(ptr); | ||
229 | |||
230 | return PagePinned(page); | ||
231 | } | ||
232 | |||
233 | static void extend_mmu_update(const struct mmu_update *update) | ||
102 | { | 234 | { |
103 | struct multicall_space mcs; | 235 | struct multicall_space mcs; |
104 | struct mmu_update *u; | 236 | struct mmu_update *u; |
105 | 237 | ||
106 | preempt_disable(); | 238 | mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); |
239 | |||
240 | if (mcs.mc != NULL) | ||
241 | mcs.mc->args[1]++; | ||
242 | else { | ||
243 | mcs = __xen_mc_entry(sizeof(*u)); | ||
244 | MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); | ||
245 | } | ||
107 | 246 | ||
108 | mcs = xen_mc_entry(sizeof(*u)); | ||
109 | u = mcs.args; | 247 | u = mcs.args; |
110 | u->ptr = virt_to_machine(ptr).maddr; | 248 | *u = *update; |
111 | u->val = pmd_val_ma(val); | 249 | } |
112 | MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); | 250 | |
251 | void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | ||
252 | { | ||
253 | struct mmu_update u; | ||
254 | |||
255 | preempt_disable(); | ||
256 | |||
257 | xen_mc_batch(); | ||
258 | |||
259 | u.ptr = virt_to_machine(ptr).maddr; | ||
260 | u.val = pmd_val_ma(val); | ||
261 | extend_mmu_update(&u); | ||
113 | 262 | ||
114 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 263 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
115 | 264 | ||
116 | preempt_enable(); | 265 | preempt_enable(); |
117 | } | 266 | } |
118 | 267 | ||
268 | void xen_set_pmd(pmd_t *ptr, pmd_t val) | ||
269 | { | ||
270 | /* If page is not pinned, we can just update the entry | ||
271 | directly */ | ||
272 | if (!page_pinned(ptr)) { | ||
273 | *ptr = val; | ||
274 | return; | ||
275 | } | ||
276 | |||
277 | xen_set_pmd_hyper(ptr, val); | ||
278 | } | ||
279 | |||
119 | /* | 280 | /* |
120 | * Associate a virtual page frame with a given physical page frame | 281 | * Associate a virtual page frame with a given physical page frame |
121 | * and protection flags for that frame. | 282 | * and protection flags for that frame. |
@@ -179,68 +340,105 @@ out: | |||
179 | preempt_enable(); | 340 | preempt_enable(); |
180 | } | 341 | } |
181 | 342 | ||
182 | pteval_t xen_pte_val(pte_t pte) | 343 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
183 | { | 344 | { |
184 | pteval_t ret = pte.pte; | 345 | /* Just return the pte as-is. We preserve the bits on commit */ |
346 | return *ptep; | ||
347 | } | ||
348 | |||
349 | void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | ||
350 | pte_t *ptep, pte_t pte) | ||
351 | { | ||
352 | struct mmu_update u; | ||
353 | |||
354 | xen_mc_batch(); | ||
185 | 355 | ||
186 | if (ret & _PAGE_PRESENT) | 356 | u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; |
187 | ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; | 357 | u.val = pte_val_ma(pte); |
358 | extend_mmu_update(&u); | ||
188 | 359 | ||
189 | return ret; | 360 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
190 | } | 361 | } |
191 | 362 | ||
192 | pgdval_t xen_pgd_val(pgd_t pgd) | 363 | /* Assume pteval_t is equivalent to all the other *val_t types. */ |
364 | static pteval_t pte_mfn_to_pfn(pteval_t val) | ||
193 | { | 365 | { |
194 | pgdval_t ret = pgd.pgd; | 366 | if (val & _PAGE_PRESENT) { |
195 | if (ret & _PAGE_PRESENT) | 367 | unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT; |
196 | ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; | 368 | pteval_t flags = val & ~PTE_MASK; |
197 | return ret; | 369 | val = ((pteval_t)mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; |
370 | } | ||
371 | |||
372 | return val; | ||
198 | } | 373 | } |
199 | 374 | ||
200 | pte_t xen_make_pte(pteval_t pte) | 375 | static pteval_t pte_pfn_to_mfn(pteval_t val) |
201 | { | 376 | { |
202 | if (pte & _PAGE_PRESENT) { | 377 | if (val & _PAGE_PRESENT) { |
203 | pte = phys_to_machine(XPADDR(pte)).maddr; | 378 | unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT; |
204 | pte &= ~(_PAGE_PCD | _PAGE_PWT); | 379 | pteval_t flags = val & ~PTE_MASK; |
380 | val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) | flags; | ||
205 | } | 381 | } |
206 | 382 | ||
207 | return (pte_t){ .pte = pte }; | 383 | return val; |
208 | } | 384 | } |
209 | 385 | ||
210 | pgd_t xen_make_pgd(pgdval_t pgd) | 386 | pteval_t xen_pte_val(pte_t pte) |
211 | { | 387 | { |
212 | if (pgd & _PAGE_PRESENT) | 388 | return pte_mfn_to_pfn(pte.pte); |
213 | pgd = phys_to_machine(XPADDR(pgd)).maddr; | 389 | } |
214 | 390 | ||
215 | return (pgd_t){ pgd }; | 391 | pgdval_t xen_pgd_val(pgd_t pgd) |
392 | { | ||
393 | return pte_mfn_to_pfn(pgd.pgd); | ||
394 | } | ||
395 | |||
396 | pte_t xen_make_pte(pteval_t pte) | ||
397 | { | ||
398 | pte = pte_pfn_to_mfn(pte); | ||
399 | return native_make_pte(pte); | ||
400 | } | ||
401 | |||
402 | pgd_t xen_make_pgd(pgdval_t pgd) | ||
403 | { | ||
404 | pgd = pte_pfn_to_mfn(pgd); | ||
405 | return native_make_pgd(pgd); | ||
216 | } | 406 | } |
217 | 407 | ||
218 | pmdval_t xen_pmd_val(pmd_t pmd) | 408 | pmdval_t xen_pmd_val(pmd_t pmd) |
219 | { | 409 | { |
220 | pmdval_t ret = native_pmd_val(pmd); | 410 | return pte_mfn_to_pfn(pmd.pmd); |
221 | if (ret & _PAGE_PRESENT) | ||
222 | ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; | ||
223 | return ret; | ||
224 | } | 411 | } |
225 | #ifdef CONFIG_X86_PAE | 412 | |
226 | void xen_set_pud(pud_t *ptr, pud_t val) | 413 | void xen_set_pud_hyper(pud_t *ptr, pud_t val) |
227 | { | 414 | { |
228 | struct multicall_space mcs; | 415 | struct mmu_update u; |
229 | struct mmu_update *u; | ||
230 | 416 | ||
231 | preempt_disable(); | 417 | preempt_disable(); |
232 | 418 | ||
233 | mcs = xen_mc_entry(sizeof(*u)); | 419 | xen_mc_batch(); |
234 | u = mcs.args; | 420 | |
235 | u->ptr = virt_to_machine(ptr).maddr; | 421 | u.ptr = virt_to_machine(ptr).maddr; |
236 | u->val = pud_val_ma(val); | 422 | u.val = pud_val_ma(val); |
237 | MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); | 423 | extend_mmu_update(&u); |
238 | 424 | ||
239 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 425 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
240 | 426 | ||
241 | preempt_enable(); | 427 | preempt_enable(); |
242 | } | 428 | } |
243 | 429 | ||
430 | void xen_set_pud(pud_t *ptr, pud_t val) | ||
431 | { | ||
432 | /* If page is not pinned, we can just update the entry | ||
433 | directly */ | ||
434 | if (!page_pinned(ptr)) { | ||
435 | *ptr = val; | ||
436 | return; | ||
437 | } | ||
438 | |||
439 | xen_set_pud_hyper(ptr, val); | ||
440 | } | ||
441 | |||
244 | void xen_set_pte(pte_t *ptep, pte_t pte) | 442 | void xen_set_pte(pte_t *ptep, pte_t pte) |
245 | { | 443 | { |
246 | ptep->pte_high = pte.pte_high; | 444 | ptep->pte_high = pte.pte_high; |
@@ -262,22 +460,14 @@ void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | |||
262 | 460 | ||
263 | void xen_pmd_clear(pmd_t *pmdp) | 461 | void xen_pmd_clear(pmd_t *pmdp) |
264 | { | 462 | { |
265 | xen_set_pmd(pmdp, __pmd(0)); | 463 | set_pmd(pmdp, __pmd(0)); |
266 | } | 464 | } |
267 | 465 | ||
268 | pmd_t xen_make_pmd(pmdval_t pmd) | 466 | pmd_t xen_make_pmd(pmdval_t pmd) |
269 | { | 467 | { |
270 | if (pmd & _PAGE_PRESENT) | 468 | pmd = pte_pfn_to_mfn(pmd); |
271 | pmd = phys_to_machine(XPADDR(pmd)).maddr; | ||
272 | |||
273 | return native_make_pmd(pmd); | 469 | return native_make_pmd(pmd); |
274 | } | 470 | } |
275 | #else /* !PAE */ | ||
276 | void xen_set_pte(pte_t *ptep, pte_t pte) | ||
277 | { | ||
278 | *ptep = pte; | ||
279 | } | ||
280 | #endif /* CONFIG_X86_PAE */ | ||
281 | 471 | ||
282 | /* | 472 | /* |
283 | (Yet another) pagetable walker. This one is intended for pinning a | 473 | (Yet another) pagetable walker. This one is intended for pinning a |
@@ -430,8 +620,6 @@ static int pin_page(struct page *page, enum pt_level level) | |||
430 | read-only, and can be pinned. */ | 620 | read-only, and can be pinned. */ |
431 | void xen_pgd_pin(pgd_t *pgd) | 621 | void xen_pgd_pin(pgd_t *pgd) |
432 | { | 622 | { |
433 | unsigned level; | ||
434 | |||
435 | xen_mc_batch(); | 623 | xen_mc_batch(); |
436 | 624 | ||
437 | if (pgd_walk(pgd, pin_page, TASK_SIZE)) { | 625 | if (pgd_walk(pgd, pin_page, TASK_SIZE)) { |
@@ -441,15 +629,31 @@ void xen_pgd_pin(pgd_t *pgd) | |||
441 | xen_mc_batch(); | 629 | xen_mc_batch(); |
442 | } | 630 | } |
443 | 631 | ||
444 | #ifdef CONFIG_X86_PAE | 632 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); |
445 | level = MMUEXT_PIN_L3_TABLE; | 633 | xen_mc_issue(0); |
446 | #else | 634 | } |
447 | level = MMUEXT_PIN_L2_TABLE; | 635 | |
448 | #endif | 636 | /* |
637 | * On save, we need to pin all pagetables to make sure they get their | ||
638 | * mfns turned into pfns. Search the list for any unpinned pgds and pin | ||
639 | * them (unpinned pgds are not currently in use, probably because the | ||
640 | * process is under construction or destruction). | ||
641 | */ | ||
642 | void xen_mm_pin_all(void) | ||
643 | { | ||
644 | unsigned long flags; | ||
645 | struct page *page; | ||
449 | 646 | ||
450 | xen_do_pin(level, PFN_DOWN(__pa(pgd))); | 647 | spin_lock_irqsave(&pgd_lock, flags); |
451 | 648 | ||
452 | xen_mc_issue(0); | 649 | list_for_each_entry(page, &pgd_list, lru) { |
650 | if (!PagePinned(page)) { | ||
651 | xen_pgd_pin((pgd_t *)page_address(page)); | ||
652 | SetPageSavePinned(page); | ||
653 | } | ||
654 | } | ||
655 | |||
656 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
453 | } | 657 | } |
454 | 658 | ||
455 | /* The init_mm pagetable is really pinned as soon as its created, but | 659 | /* The init_mm pagetable is really pinned as soon as its created, but |
@@ -509,6 +713,29 @@ static void xen_pgd_unpin(pgd_t *pgd) | |||
509 | xen_mc_issue(0); | 713 | xen_mc_issue(0); |
510 | } | 714 | } |
511 | 715 | ||
716 | /* | ||
717 | * On resume, undo any pinning done at save, so that the rest of the | ||
718 | * kernel doesn't see any unexpected pinned pagetables. | ||
719 | */ | ||
720 | void xen_mm_unpin_all(void) | ||
721 | { | ||
722 | unsigned long flags; | ||
723 | struct page *page; | ||
724 | |||
725 | spin_lock_irqsave(&pgd_lock, flags); | ||
726 | |||
727 | list_for_each_entry(page, &pgd_list, lru) { | ||
728 | if (PageSavePinned(page)) { | ||
729 | BUG_ON(!PagePinned(page)); | ||
730 | printk("unpinning pinned %p\n", page_address(page)); | ||
731 | xen_pgd_unpin((pgd_t *)page_address(page)); | ||
732 | ClearPageSavePinned(page); | ||
733 | } | ||
734 | } | ||
735 | |||
736 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
737 | } | ||
738 | |||
512 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) | 739 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) |
513 | { | 740 | { |
514 | spin_lock(&next->page_table_lock); | 741 | spin_lock(&next->page_table_lock); |
@@ -602,7 +829,7 @@ void xen_exit_mmap(struct mm_struct *mm) | |||
602 | spin_lock(&mm->page_table_lock); | 829 | spin_lock(&mm->page_table_lock); |
603 | 830 | ||
604 | /* pgd may not be pinned in the error exit path of execve */ | 831 | /* pgd may not be pinned in the error exit path of execve */ |
605 | if (PagePinned(virt_to_page(mm->pgd))) | 832 | if (page_pinned(mm->pgd)) |
606 | xen_pgd_unpin(mm->pgd); | 833 | xen_pgd_unpin(mm->pgd); |
607 | 834 | ||
608 | spin_unlock(&mm->page_table_lock); | 835 | spin_unlock(&mm->page_table_lock); |
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index b5e189b1519d..297bf9f5b8bc 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h | |||
@@ -25,10 +25,6 @@ enum pt_level { | |||
25 | 25 | ||
26 | void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | 26 | void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); |
27 | 27 | ||
28 | void xen_set_pte(pte_t *ptep, pte_t pteval); | ||
29 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | ||
30 | pte_t *ptep, pte_t pteval); | ||
31 | void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval); | ||
32 | 28 | ||
33 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next); | 29 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next); |
34 | void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); | 30 | void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); |
@@ -37,31 +33,27 @@ void xen_exit_mmap(struct mm_struct *mm); | |||
37 | void xen_pgd_pin(pgd_t *pgd); | 33 | void xen_pgd_pin(pgd_t *pgd); |
38 | //void xen_pgd_unpin(pgd_t *pgd); | 34 | //void xen_pgd_unpin(pgd_t *pgd); |
39 | 35 | ||
40 | #ifdef CONFIG_X86_PAE | 36 | pteval_t xen_pte_val(pte_t); |
41 | unsigned long long xen_pte_val(pte_t); | 37 | pmdval_t xen_pmd_val(pmd_t); |
42 | unsigned long long xen_pmd_val(pmd_t); | 38 | pgdval_t xen_pgd_val(pgd_t); |
43 | unsigned long long xen_pgd_val(pgd_t); | ||
44 | 39 | ||
45 | pte_t xen_make_pte(unsigned long long); | 40 | pte_t xen_make_pte(pteval_t); |
46 | pmd_t xen_make_pmd(unsigned long long); | 41 | pmd_t xen_make_pmd(pmdval_t); |
47 | pgd_t xen_make_pgd(unsigned long long); | 42 | pgd_t xen_make_pgd(pgdval_t); |
48 | 43 | ||
44 | void xen_set_pte(pte_t *ptep, pte_t pteval); | ||
49 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | 45 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
50 | pte_t *ptep, pte_t pteval); | 46 | pte_t *ptep, pte_t pteval); |
51 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte); | 47 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte); |
48 | void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval); | ||
52 | void xen_set_pud(pud_t *ptr, pud_t val); | 49 | void xen_set_pud(pud_t *ptr, pud_t val); |
50 | void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval); | ||
51 | void xen_set_pud_hyper(pud_t *ptr, pud_t val); | ||
53 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); | 52 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); |
54 | void xen_pmd_clear(pmd_t *pmdp); | 53 | void xen_pmd_clear(pmd_t *pmdp); |
55 | 54 | ||
56 | 55 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); | |
57 | #else | 56 | void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, |
58 | unsigned long xen_pte_val(pte_t); | 57 | pte_t *ptep, pte_t pte); |
59 | unsigned long xen_pmd_val(pmd_t); | ||
60 | unsigned long xen_pgd_val(pgd_t); | ||
61 | |||
62 | pte_t xen_make_pte(unsigned long); | ||
63 | pmd_t xen_make_pmd(unsigned long); | ||
64 | pgd_t xen_make_pgd(unsigned long); | ||
65 | #endif | ||
66 | 58 | ||
67 | #endif /* _XEN_MMU_H */ | 59 | #endif /* _XEN_MMU_H */ |
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 5791eb2e3750..3c63c4da7ed1 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c | |||
@@ -29,14 +29,14 @@ | |||
29 | #define MC_DEBUG 1 | 29 | #define MC_DEBUG 1 |
30 | 30 | ||
31 | #define MC_BATCH 32 | 31 | #define MC_BATCH 32 |
32 | #define MC_ARGS (MC_BATCH * 16 / sizeof(u64)) | 32 | #define MC_ARGS (MC_BATCH * 16) |
33 | 33 | ||
34 | struct mc_buffer { | 34 | struct mc_buffer { |
35 | struct multicall_entry entries[MC_BATCH]; | 35 | struct multicall_entry entries[MC_BATCH]; |
36 | #if MC_DEBUG | 36 | #if MC_DEBUG |
37 | struct multicall_entry debug[MC_BATCH]; | 37 | struct multicall_entry debug[MC_BATCH]; |
38 | #endif | 38 | #endif |
39 | u64 args[MC_ARGS]; | 39 | unsigned char args[MC_ARGS]; |
40 | struct callback { | 40 | struct callback { |
41 | void (*fn)(void *); | 41 | void (*fn)(void *); |
42 | void *data; | 42 | void *data; |
@@ -107,20 +107,48 @@ struct multicall_space __xen_mc_entry(size_t args) | |||
107 | { | 107 | { |
108 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); | 108 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); |
109 | struct multicall_space ret; | 109 | struct multicall_space ret; |
110 | unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64); | 110 | unsigned argidx = roundup(b->argidx, sizeof(u64)); |
111 | 111 | ||
112 | BUG_ON(preemptible()); | 112 | BUG_ON(preemptible()); |
113 | BUG_ON(argspace > MC_ARGS); | 113 | BUG_ON(b->argidx > MC_ARGS); |
114 | 114 | ||
115 | if (b->mcidx == MC_BATCH || | 115 | if (b->mcidx == MC_BATCH || |
116 | (b->argidx + argspace) > MC_ARGS) | 116 | (argidx + args) > MC_ARGS) { |
117 | xen_mc_flush(); | 117 | xen_mc_flush(); |
118 | argidx = roundup(b->argidx, sizeof(u64)); | ||
119 | } | ||
118 | 120 | ||
119 | ret.mc = &b->entries[b->mcidx]; | 121 | ret.mc = &b->entries[b->mcidx]; |
120 | b->mcidx++; | 122 | b->mcidx++; |
123 | ret.args = &b->args[argidx]; | ||
124 | b->argidx = argidx + args; | ||
125 | |||
126 | BUG_ON(b->argidx > MC_ARGS); | ||
127 | return ret; | ||
128 | } | ||
129 | |||
130 | struct multicall_space xen_mc_extend_args(unsigned long op, size_t size) | ||
131 | { | ||
132 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); | ||
133 | struct multicall_space ret = { NULL, NULL }; | ||
134 | |||
135 | BUG_ON(preemptible()); | ||
136 | BUG_ON(b->argidx > MC_ARGS); | ||
137 | |||
138 | if (b->mcidx == 0) | ||
139 | return ret; | ||
140 | |||
141 | if (b->entries[b->mcidx - 1].op != op) | ||
142 | return ret; | ||
143 | |||
144 | if ((b->argidx + size) > MC_ARGS) | ||
145 | return ret; | ||
146 | |||
147 | ret.mc = &b->entries[b->mcidx - 1]; | ||
121 | ret.args = &b->args[b->argidx]; | 148 | ret.args = &b->args[b->argidx]; |
122 | b->argidx += argspace; | 149 | b->argidx += size; |
123 | 150 | ||
151 | BUG_ON(b->argidx > MC_ARGS); | ||
124 | return ret; | 152 | return ret; |
125 | } | 153 | } |
126 | 154 | ||
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index 8bae996d99a3..858938241616 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h | |||
@@ -45,4 +45,16 @@ static inline void xen_mc_issue(unsigned mode) | |||
45 | /* Set up a callback to be called when the current batch is flushed */ | 45 | /* Set up a callback to be called when the current batch is flushed */ |
46 | void xen_mc_callback(void (*fn)(void *), void *data); | 46 | void xen_mc_callback(void (*fn)(void *), void *data); |
47 | 47 | ||
48 | /* | ||
49 | * Try to extend the arguments of the previous multicall command. The | ||
50 | * previous command's op must match. If it does, then it attempts to | ||
51 | * extend the argument space allocated to the multicall entry by | ||
52 | * arg_size bytes. | ||
53 | * | ||
54 | * The returned multicall_space will return with mc pointing to the | ||
55 | * command on success, or NULL on failure, and args pointing to the | ||
56 | * newly allocated space. | ||
57 | */ | ||
58 | struct multicall_space xen_mc_extend_args(unsigned long op, size_t arg_size); | ||
59 | |||
48 | #endif /* _XEN_MULTICALLS_H */ | 60 | #endif /* _XEN_MULTICALLS_H */ |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 82517e4a752a..488447878a9d 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <asm/xen/hypervisor.h> | 16 | #include <asm/xen/hypervisor.h> |
17 | #include <asm/xen/hypercall.h> | 17 | #include <asm/xen/hypercall.h> |
18 | 18 | ||
19 | #include <xen/page.h> | ||
19 | #include <xen/interface/callback.h> | 20 | #include <xen/interface/callback.h> |
20 | #include <xen/interface/physdev.h> | 21 | #include <xen/interface/physdev.h> |
21 | #include <xen/features.h> | 22 | #include <xen/features.h> |
@@ -27,8 +28,6 @@ | |||
27 | extern const char xen_hypervisor_callback[]; | 28 | extern const char xen_hypervisor_callback[]; |
28 | extern const char xen_failsafe_callback[]; | 29 | extern const char xen_failsafe_callback[]; |
29 | 30 | ||
30 | unsigned long *phys_to_machine_mapping; | ||
31 | EXPORT_SYMBOL(phys_to_machine_mapping); | ||
32 | 31 | ||
33 | /** | 32 | /** |
34 | * machine_specific_memory_setup - Hook for machine specific memory setup. | 33 | * machine_specific_memory_setup - Hook for machine specific memory setup. |
@@ -38,6 +37,8 @@ char * __init xen_memory_setup(void) | |||
38 | { | 37 | { |
39 | unsigned long max_pfn = xen_start_info->nr_pages; | 38 | unsigned long max_pfn = xen_start_info->nr_pages; |
40 | 39 | ||
40 | max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); | ||
41 | |||
41 | e820.nr_map = 0; | 42 | e820.nr_map = 0; |
42 | add_memory_region(0, LOWMEMSIZE(), E820_RAM); | 43 | add_memory_region(0, LOWMEMSIZE(), E820_RAM); |
43 | add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); | 44 | add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 94e69000f982..d2e3c20127d7 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -35,7 +35,7 @@ | |||
35 | #include "xen-ops.h" | 35 | #include "xen-ops.h" |
36 | #include "mmu.h" | 36 | #include "mmu.h" |
37 | 37 | ||
38 | static cpumask_t xen_cpu_initialized_map; | 38 | cpumask_t xen_cpu_initialized_map; |
39 | static DEFINE_PER_CPU(int, resched_irq) = -1; | 39 | static DEFINE_PER_CPU(int, resched_irq) = -1; |
40 | static DEFINE_PER_CPU(int, callfunc_irq) = -1; | 40 | static DEFINE_PER_CPU(int, callfunc_irq) = -1; |
41 | static DEFINE_PER_CPU(int, debug_irq) = -1; | 41 | static DEFINE_PER_CPU(int, debug_irq) = -1; |
@@ -65,6 +65,12 @@ static struct call_data_struct *call_data; | |||
65 | */ | 65 | */ |
66 | static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) | 66 | static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) |
67 | { | 67 | { |
68 | #ifdef CONFIG_X86_32 | ||
69 | __get_cpu_var(irq_stat).irq_resched_count++; | ||
70 | #else | ||
71 | add_pda(irq_resched_count, 1); | ||
72 | #endif | ||
73 | |||
68 | return IRQ_HANDLED; | 74 | return IRQ_HANDLED; |
69 | } | 75 | } |
70 | 76 | ||
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c new file mode 100644 index 000000000000..251669a932d4 --- /dev/null +++ b/arch/x86/xen/suspend.c | |||
@@ -0,0 +1,45 @@ | |||
1 | #include <linux/types.h> | ||
2 | |||
3 | #include <xen/interface/xen.h> | ||
4 | #include <xen/grant_table.h> | ||
5 | #include <xen/events.h> | ||
6 | |||
7 | #include <asm/xen/hypercall.h> | ||
8 | #include <asm/xen/page.h> | ||
9 | |||
10 | #include "xen-ops.h" | ||
11 | #include "mmu.h" | ||
12 | |||
13 | void xen_pre_suspend(void) | ||
14 | { | ||
15 | xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); | ||
16 | xen_start_info->console.domU.mfn = | ||
17 | mfn_to_pfn(xen_start_info->console.domU.mfn); | ||
18 | |||
19 | BUG_ON(!irqs_disabled()); | ||
20 | |||
21 | HYPERVISOR_shared_info = &xen_dummy_shared_info; | ||
22 | if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP), | ||
23 | __pte_ma(0), 0)) | ||
24 | BUG(); | ||
25 | } | ||
26 | |||
27 | void xen_post_suspend(int suspend_cancelled) | ||
28 | { | ||
29 | xen_setup_shared_info(); | ||
30 | |||
31 | if (suspend_cancelled) { | ||
32 | xen_start_info->store_mfn = | ||
33 | pfn_to_mfn(xen_start_info->store_mfn); | ||
34 | xen_start_info->console.domU.mfn = | ||
35 | pfn_to_mfn(xen_start_info->console.domU.mfn); | ||
36 | } else { | ||
37 | #ifdef CONFIG_SMP | ||
38 | xen_cpu_initialized_map = cpu_online_map; | ||
39 | #endif | ||
40 | xen_vcpu_restore(); | ||
41 | xen_timer_resume(); | ||
42 | } | ||
43 | |||
44 | } | ||
45 | |||
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index c39e1a5aa241..64f0038b9558 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -12,7 +12,9 @@ | |||
12 | #include <linux/clocksource.h> | 12 | #include <linux/clocksource.h> |
13 | #include <linux/clockchips.h> | 13 | #include <linux/clockchips.h> |
14 | #include <linux/kernel_stat.h> | 14 | #include <linux/kernel_stat.h> |
15 | #include <linux/math64.h> | ||
15 | 16 | ||
17 | #include <asm/pvclock.h> | ||
16 | #include <asm/xen/hypervisor.h> | 18 | #include <asm/xen/hypervisor.h> |
17 | #include <asm/xen/hypercall.h> | 19 | #include <asm/xen/hypercall.h> |
18 | 20 | ||
@@ -30,17 +32,6 @@ | |||
30 | 32 | ||
31 | static cycle_t xen_clocksource_read(void); | 33 | static cycle_t xen_clocksource_read(void); |
32 | 34 | ||
33 | /* These are perodically updated in shared_info, and then copied here. */ | ||
34 | struct shadow_time_info { | ||
35 | u64 tsc_timestamp; /* TSC at last update of time vals. */ | ||
36 | u64 system_timestamp; /* Time, in nanosecs, since boot. */ | ||
37 | u32 tsc_to_nsec_mul; | ||
38 | int tsc_shift; | ||
39 | u32 version; | ||
40 | }; | ||
41 | |||
42 | static DEFINE_PER_CPU(struct shadow_time_info, shadow_time); | ||
43 | |||
44 | /* runstate info updated by Xen */ | 35 | /* runstate info updated by Xen */ |
45 | static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); | 36 | static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); |
46 | 37 | ||
@@ -150,11 +141,7 @@ static void do_stolen_accounting(void) | |||
150 | if (stolen < 0) | 141 | if (stolen < 0) |
151 | stolen = 0; | 142 | stolen = 0; |
152 | 143 | ||
153 | ticks = 0; | 144 | ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); |
154 | while (stolen >= NS_PER_TICK) { | ||
155 | ticks++; | ||
156 | stolen -= NS_PER_TICK; | ||
157 | } | ||
158 | __get_cpu_var(residual_stolen) = stolen; | 145 | __get_cpu_var(residual_stolen) = stolen; |
159 | account_steal_time(NULL, ticks); | 146 | account_steal_time(NULL, ticks); |
160 | 147 | ||
@@ -166,11 +153,7 @@ static void do_stolen_accounting(void) | |||
166 | if (blocked < 0) | 153 | if (blocked < 0) |
167 | blocked = 0; | 154 | blocked = 0; |
168 | 155 | ||
169 | ticks = 0; | 156 | ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); |
170 | while (blocked >= NS_PER_TICK) { | ||
171 | ticks++; | ||
172 | blocked -= NS_PER_TICK; | ||
173 | } | ||
174 | __get_cpu_var(residual_blocked) = blocked; | 157 | __get_cpu_var(residual_blocked) = blocked; |
175 | account_steal_time(idle_task(smp_processor_id()), ticks); | 158 | account_steal_time(idle_task(smp_processor_id()), ticks); |
176 | } | 159 | } |
@@ -218,7 +201,7 @@ unsigned long long xen_sched_clock(void) | |||
218 | unsigned long xen_cpu_khz(void) | 201 | unsigned long xen_cpu_khz(void) |
219 | { | 202 | { |
220 | u64 xen_khz = 1000000ULL << 32; | 203 | u64 xen_khz = 1000000ULL << 32; |
221 | const struct vcpu_time_info *info = | 204 | const struct pvclock_vcpu_time_info *info = |
222 | &HYPERVISOR_shared_info->vcpu_info[0].time; | 205 | &HYPERVISOR_shared_info->vcpu_info[0].time; |
223 | 206 | ||
224 | do_div(xen_khz, info->tsc_to_system_mul); | 207 | do_div(xen_khz, info->tsc_to_system_mul); |
@@ -230,121 +213,26 @@ unsigned long xen_cpu_khz(void) | |||
230 | return xen_khz; | 213 | return xen_khz; |
231 | } | 214 | } |
232 | 215 | ||
233 | /* | ||
234 | * Reads a consistent set of time-base values from Xen, into a shadow data | ||
235 | * area. | ||
236 | */ | ||
237 | static unsigned get_time_values_from_xen(void) | ||
238 | { | ||
239 | struct vcpu_time_info *src; | ||
240 | struct shadow_time_info *dst; | ||
241 | |||
242 | /* src is shared memory with the hypervisor, so we need to | ||
243 | make sure we get a consistent snapshot, even in the face of | ||
244 | being preempted. */ | ||
245 | src = &__get_cpu_var(xen_vcpu)->time; | ||
246 | dst = &__get_cpu_var(shadow_time); | ||
247 | |||
248 | do { | ||
249 | dst->version = src->version; | ||
250 | rmb(); /* fetch version before data */ | ||
251 | dst->tsc_timestamp = src->tsc_timestamp; | ||
252 | dst->system_timestamp = src->system_time; | ||
253 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; | ||
254 | dst->tsc_shift = src->tsc_shift; | ||
255 | rmb(); /* test version after fetching data */ | ||
256 | } while ((src->version & 1) | (dst->version ^ src->version)); | ||
257 | |||
258 | return dst->version; | ||
259 | } | ||
260 | |||
261 | /* | ||
262 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, | ||
263 | * yielding a 64-bit result. | ||
264 | */ | ||
265 | static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) | ||
266 | { | ||
267 | u64 product; | ||
268 | #ifdef __i386__ | ||
269 | u32 tmp1, tmp2; | ||
270 | #endif | ||
271 | |||
272 | if (shift < 0) | ||
273 | delta >>= -shift; | ||
274 | else | ||
275 | delta <<= shift; | ||
276 | |||
277 | #ifdef __i386__ | ||
278 | __asm__ ( | ||
279 | "mul %5 ; " | ||
280 | "mov %4,%%eax ; " | ||
281 | "mov %%edx,%4 ; " | ||
282 | "mul %5 ; " | ||
283 | "xor %5,%5 ; " | ||
284 | "add %4,%%eax ; " | ||
285 | "adc %5,%%edx ; " | ||
286 | : "=A" (product), "=r" (tmp1), "=r" (tmp2) | ||
287 | : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); | ||
288 | #elif __x86_64__ | ||
289 | __asm__ ( | ||
290 | "mul %%rdx ; shrd $32,%%rdx,%%rax" | ||
291 | : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); | ||
292 | #else | ||
293 | #error implement me! | ||
294 | #endif | ||
295 | |||
296 | return product; | ||
297 | } | ||
298 | |||
299 | static u64 get_nsec_offset(struct shadow_time_info *shadow) | ||
300 | { | ||
301 | u64 now, delta; | ||
302 | now = native_read_tsc(); | ||
303 | delta = now - shadow->tsc_timestamp; | ||
304 | return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); | ||
305 | } | ||
306 | |||
307 | static cycle_t xen_clocksource_read(void) | 216 | static cycle_t xen_clocksource_read(void) |
308 | { | 217 | { |
309 | struct shadow_time_info *shadow = &get_cpu_var(shadow_time); | 218 | struct pvclock_vcpu_time_info *src; |
310 | cycle_t ret; | 219 | cycle_t ret; |
311 | unsigned version; | ||
312 | |||
313 | do { | ||
314 | version = get_time_values_from_xen(); | ||
315 | barrier(); | ||
316 | ret = shadow->system_timestamp + get_nsec_offset(shadow); | ||
317 | barrier(); | ||
318 | } while (version != __get_cpu_var(xen_vcpu)->time.version); | ||
319 | |||
320 | put_cpu_var(shadow_time); | ||
321 | 220 | ||
221 | src = &get_cpu_var(xen_vcpu)->time; | ||
222 | ret = pvclock_clocksource_read(src); | ||
223 | put_cpu_var(xen_vcpu); | ||
322 | return ret; | 224 | return ret; |
323 | } | 225 | } |
324 | 226 | ||
325 | static void xen_read_wallclock(struct timespec *ts) | 227 | static void xen_read_wallclock(struct timespec *ts) |
326 | { | 228 | { |
327 | const struct shared_info *s = HYPERVISOR_shared_info; | 229 | struct shared_info *s = HYPERVISOR_shared_info; |
328 | u32 version; | 230 | struct pvclock_wall_clock *wall_clock = &(s->wc); |
329 | u64 delta; | 231 | struct pvclock_vcpu_time_info *vcpu_time; |
330 | struct timespec now; | ||
331 | |||
332 | /* get wallclock at system boot */ | ||
333 | do { | ||
334 | version = s->wc_version; | ||
335 | rmb(); /* fetch version before time */ | ||
336 | now.tv_sec = s->wc_sec; | ||
337 | now.tv_nsec = s->wc_nsec; | ||
338 | rmb(); /* fetch time before checking version */ | ||
339 | } while ((s->wc_version & 1) | (version ^ s->wc_version)); | ||
340 | |||
341 | delta = xen_clocksource_read(); /* time since system boot */ | ||
342 | delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; | ||
343 | |||
344 | now.tv_nsec = do_div(delta, NSEC_PER_SEC); | ||
345 | now.tv_sec = delta; | ||
346 | 232 | ||
347 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); | 233 | vcpu_time = &get_cpu_var(xen_vcpu)->time; |
234 | pvclock_read_wallclock(wall_clock, vcpu_time, ts); | ||
235 | put_cpu_var(xen_vcpu); | ||
348 | } | 236 | } |
349 | 237 | ||
350 | unsigned long xen_get_wallclock(void) | 238 | unsigned long xen_get_wallclock(void) |
@@ -352,7 +240,6 @@ unsigned long xen_get_wallclock(void) | |||
352 | struct timespec ts; | 240 | struct timespec ts; |
353 | 241 | ||
354 | xen_read_wallclock(&ts); | 242 | xen_read_wallclock(&ts); |
355 | |||
356 | return ts.tv_sec; | 243 | return ts.tv_sec; |
357 | } | 244 | } |
358 | 245 | ||
@@ -572,12 +459,23 @@ void xen_setup_cpu_clockevents(void) | |||
572 | clockevents_register_device(&__get_cpu_var(xen_clock_events)); | 459 | clockevents_register_device(&__get_cpu_var(xen_clock_events)); |
573 | } | 460 | } |
574 | 461 | ||
462 | void xen_timer_resume(void) | ||
463 | { | ||
464 | int cpu; | ||
465 | |||
466 | if (xen_clockevent != &xen_vcpuop_clockevent) | ||
467 | return; | ||
468 | |||
469 | for_each_online_cpu(cpu) { | ||
470 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) | ||
471 | BUG(); | ||
472 | } | ||
473 | } | ||
474 | |||
575 | __init void xen_time_init(void) | 475 | __init void xen_time_init(void) |
576 | { | 476 | { |
577 | int cpu = smp_processor_id(); | 477 | int cpu = smp_processor_id(); |
578 | 478 | ||
579 | get_time_values_from_xen(); | ||
580 | |||
581 | clocksource_register(&xen_clocksource); | 479 | clocksource_register(&xen_clocksource); |
582 | 480 | ||
583 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { | 481 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { |
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 288d587ce73c..7c0cf6320a0a 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/init.h> | 7 | #include <linux/init.h> |
8 | #include <asm/boot.h> | 8 | #include <asm/boot.h> |
9 | #include <xen/interface/elfnote.h> | 9 | #include <xen/interface/elfnote.h> |
10 | #include <asm/xen/interface.h> | ||
10 | 11 | ||
11 | __INIT | 12 | __INIT |
12 | ENTRY(startup_xen) | 13 | ENTRY(startup_xen) |
@@ -17,7 +18,7 @@ ENTRY(startup_xen) | |||
17 | 18 | ||
18 | __FINIT | 19 | __FINIT |
19 | 20 | ||
20 | .pushsection .bss.page_aligned | 21 | .pushsection .text |
21 | .align PAGE_SIZE_asm | 22 | .align PAGE_SIZE_asm |
22 | ENTRY(hypercall_page) | 23 | ENTRY(hypercall_page) |
23 | .skip 0x1000 | 24 | .skip 0x1000 |
@@ -30,11 +31,11 @@ ENTRY(hypercall_page) | |||
30 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen) | 31 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen) |
31 | ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page) | 32 | ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page) |
32 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") | 33 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") |
33 | #ifdef CONFIG_X86_PAE | ||
34 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") | 34 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") |
35 | #else | ||
36 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no") | ||
37 | #endif | ||
38 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") | 35 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") |
36 | ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, | ||
37 | .quad _PAGE_PRESENT; .quad _PAGE_PRESENT) | ||
38 | ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) | ||
39 | ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long __HYPERVISOR_VIRT_START) | ||
39 | 40 | ||
40 | #endif /*CONFIG_XEN */ | 41 | #endif /*CONFIG_XEN */ |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index f1063ae08037..9a055592a307 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -9,18 +9,26 @@ | |||
9 | extern const char xen_hypervisor_callback[]; | 9 | extern const char xen_hypervisor_callback[]; |
10 | extern const char xen_failsafe_callback[]; | 10 | extern const char xen_failsafe_callback[]; |
11 | 11 | ||
12 | struct trap_info; | ||
12 | void xen_copy_trap_info(struct trap_info *traps); | 13 | void xen_copy_trap_info(struct trap_info *traps); |
13 | 14 | ||
14 | DECLARE_PER_CPU(unsigned long, xen_cr3); | 15 | DECLARE_PER_CPU(unsigned long, xen_cr3); |
15 | DECLARE_PER_CPU(unsigned long, xen_current_cr3); | 16 | DECLARE_PER_CPU(unsigned long, xen_current_cr3); |
16 | 17 | ||
17 | extern struct start_info *xen_start_info; | 18 | extern struct start_info *xen_start_info; |
19 | extern struct shared_info xen_dummy_shared_info; | ||
18 | extern struct shared_info *HYPERVISOR_shared_info; | 20 | extern struct shared_info *HYPERVISOR_shared_info; |
19 | 21 | ||
22 | void xen_setup_mfn_list_list(void); | ||
23 | void xen_setup_shared_info(void); | ||
24 | |||
20 | char * __init xen_memory_setup(void); | 25 | char * __init xen_memory_setup(void); |
21 | void __init xen_arch_setup(void); | 26 | void __init xen_arch_setup(void); |
22 | void __init xen_init_IRQ(void); | 27 | void __init xen_init_IRQ(void); |
23 | void xen_enable_sysenter(void); | 28 | void xen_enable_sysenter(void); |
29 | void xen_vcpu_restore(void); | ||
30 | |||
31 | void __init xen_build_dynamic_phys_to_machine(void); | ||
24 | 32 | ||
25 | void xen_setup_timer(int cpu); | 33 | void xen_setup_timer(int cpu); |
26 | void xen_setup_cpu_clockevents(void); | 34 | void xen_setup_cpu_clockevents(void); |
@@ -29,6 +37,7 @@ void __init xen_time_init(void); | |||
29 | unsigned long xen_get_wallclock(void); | 37 | unsigned long xen_get_wallclock(void); |
30 | int xen_set_wallclock(unsigned long time); | 38 | int xen_set_wallclock(unsigned long time); |
31 | unsigned long long xen_sched_clock(void); | 39 | unsigned long long xen_sched_clock(void); |
40 | void xen_timer_resume(void); | ||
32 | 41 | ||
33 | irqreturn_t xen_debug_interrupt(int irq, void *dev_id); | 42 | irqreturn_t xen_debug_interrupt(int irq, void *dev_id); |
34 | 43 | ||
@@ -54,6 +63,8 @@ int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info, | |||
54 | int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), | 63 | int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), |
55 | void *info, int wait); | 64 | void *info, int wait); |
56 | 65 | ||
66 | extern cpumask_t xen_cpu_initialized_map; | ||
67 | |||
57 | 68 | ||
58 | /* Declare an asm function, along with symbols needed to make it | 69 | /* Declare an asm function, along with symbols needed to make it |
59 | inlineable */ | 70 | inlineable */ |