diff options
Diffstat (limited to 'arch/x86/xen')
-rw-r--r-- | arch/x86/xen/Kconfig | 10 | ||||
-rw-r--r-- | arch/x86/xen/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 132 | ||||
-rw-r--r-- | arch/x86/xen/manage.c | 143 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 274 | ||||
-rw-r--r-- | arch/x86/xen/mmu.h | 12 | ||||
-rw-r--r-- | arch/x86/xen/multicalls.c | 40 | ||||
-rw-r--r-- | arch/x86/xen/multicalls.h | 12 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 5 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 8 | ||||
-rw-r--r-- | arch/x86/xen/suspend.c | 45 | ||||
-rw-r--r-- | arch/x86/xen/time.c | 13 | ||||
-rw-r--r-- | arch/x86/xen/xen-head.S | 5 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 11 |
14 files changed, 511 insertions, 201 deletions
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 6c388e593bc8..c2cc99580871 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -12,3 +12,13 @@ config XEN | |||
12 | This is the Linux Xen port. Enabling this will allow the | 12 | This is the Linux Xen port. Enabling this will allow the |
13 | kernel to boot in a paravirtualized environment under the | 13 | kernel to boot in a paravirtualized environment under the |
14 | Xen hypervisor. | 14 | Xen hypervisor. |
15 | |||
16 | config XEN_MAX_DOMAIN_MEMORY | ||
17 | int "Maximum allowed size of a domain in gigabytes" | ||
18 | default 8 | ||
19 | depends on XEN | ||
20 | help | ||
21 | The pseudo-physical to machine address array is sized | ||
22 | according to the maximum possible memory size of a Xen | ||
23 | domain. This array uses 1 page per gigabyte, so there's no | ||
24 | need to be too stingy here. \ No newline at end of file | ||
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 3d8df981d5fd..2ba2d1649131 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -1,4 +1,4 @@ | |||
1 | obj-y := enlighten.o setup.o multicalls.o mmu.o \ | 1 | obj-y := enlighten.o setup.o multicalls.o mmu.o \ |
2 | time.o manage.o xen-asm.o grant-table.o | 2 | time.o xen-asm.o grant-table.o suspend.o |
3 | 3 | ||
4 | obj-$(CONFIG_SMP) += smp.o | 4 | obj-$(CONFIG_SMP) += smp.o |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f09c1c69c37a..bd74229081c3 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -75,13 +75,13 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ | |||
75 | struct start_info *xen_start_info; | 75 | struct start_info *xen_start_info; |
76 | EXPORT_SYMBOL_GPL(xen_start_info); | 76 | EXPORT_SYMBOL_GPL(xen_start_info); |
77 | 77 | ||
78 | static /* __initdata */ struct shared_info dummy_shared_info; | 78 | struct shared_info xen_dummy_shared_info; |
79 | 79 | ||
80 | /* | 80 | /* |
81 | * Point at some empty memory to start with. We map the real shared_info | 81 | * Point at some empty memory to start with. We map the real shared_info |
82 | * page as soon as fixmap is up and running. | 82 | * page as soon as fixmap is up and running. |
83 | */ | 83 | */ |
84 | struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; | 84 | struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; |
85 | 85 | ||
86 | /* | 86 | /* |
87 | * Flag to determine whether vcpu info placement is available on all | 87 | * Flag to determine whether vcpu info placement is available on all |
@@ -98,13 +98,13 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; | |||
98 | */ | 98 | */ |
99 | static int have_vcpu_info_placement = 1; | 99 | static int have_vcpu_info_placement = 1; |
100 | 100 | ||
101 | static void __init xen_vcpu_setup(int cpu) | 101 | static void xen_vcpu_setup(int cpu) |
102 | { | 102 | { |
103 | struct vcpu_register_vcpu_info info; | 103 | struct vcpu_register_vcpu_info info; |
104 | int err; | 104 | int err; |
105 | struct vcpu_info *vcpup; | 105 | struct vcpu_info *vcpup; |
106 | 106 | ||
107 | BUG_ON(HYPERVISOR_shared_info == &dummy_shared_info); | 107 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); |
108 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; | 108 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; |
109 | 109 | ||
110 | if (!have_vcpu_info_placement) | 110 | if (!have_vcpu_info_placement) |
@@ -136,11 +136,41 @@ static void __init xen_vcpu_setup(int cpu) | |||
136 | } | 136 | } |
137 | } | 137 | } |
138 | 138 | ||
139 | /* | ||
140 | * On restore, set the vcpu placement up again. | ||
141 | * If it fails, then we're in a bad state, since | ||
142 | * we can't back out from using it... | ||
143 | */ | ||
144 | void xen_vcpu_restore(void) | ||
145 | { | ||
146 | if (have_vcpu_info_placement) { | ||
147 | int cpu; | ||
148 | |||
149 | for_each_online_cpu(cpu) { | ||
150 | bool other_cpu = (cpu != smp_processor_id()); | ||
151 | |||
152 | if (other_cpu && | ||
153 | HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) | ||
154 | BUG(); | ||
155 | |||
156 | xen_vcpu_setup(cpu); | ||
157 | |||
158 | if (other_cpu && | ||
159 | HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) | ||
160 | BUG(); | ||
161 | } | ||
162 | |||
163 | BUG_ON(!have_vcpu_info_placement); | ||
164 | } | ||
165 | } | ||
166 | |||
139 | static void __init xen_banner(void) | 167 | static void __init xen_banner(void) |
140 | { | 168 | { |
141 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | 169 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", |
142 | pv_info.name); | 170 | pv_info.name); |
143 | printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic); | 171 | printk(KERN_INFO "Hypervisor signature: %s%s\n", |
172 | xen_start_info->magic, | ||
173 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); | ||
144 | } | 174 | } |
145 | 175 | ||
146 | static void xen_cpuid(unsigned int *ax, unsigned int *bx, | 176 | static void xen_cpuid(unsigned int *ax, unsigned int *bx, |
@@ -235,13 +265,13 @@ static void xen_irq_enable(void) | |||
235 | { | 265 | { |
236 | struct vcpu_info *vcpu; | 266 | struct vcpu_info *vcpu; |
237 | 267 | ||
238 | /* There's a one instruction preempt window here. We need to | 268 | /* We don't need to worry about being preempted here, since |
239 | make sure we're don't switch CPUs between getting the vcpu | 269 | either a) interrupts are disabled, so no preemption, or b) |
240 | pointer and updating the mask. */ | 270 | the caller is confused and is trying to re-enable interrupts |
241 | preempt_disable(); | 271 | on an indeterminate processor. */ |
272 | |||
242 | vcpu = x86_read_percpu(xen_vcpu); | 273 | vcpu = x86_read_percpu(xen_vcpu); |
243 | vcpu->evtchn_upcall_mask = 0; | 274 | vcpu->evtchn_upcall_mask = 0; |
244 | preempt_enable_no_resched(); | ||
245 | 275 | ||
246 | /* Doesn't matter if we get preempted here, because any | 276 | /* Doesn't matter if we get preempted here, because any |
247 | pending event will get dealt with anyway. */ | 277 | pending event will get dealt with anyway. */ |
@@ -254,7 +284,7 @@ static void xen_irq_enable(void) | |||
254 | static void xen_safe_halt(void) | 284 | static void xen_safe_halt(void) |
255 | { | 285 | { |
256 | /* Blocking includes an implicit local_irq_enable(). */ | 286 | /* Blocking includes an implicit local_irq_enable(). */ |
257 | if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0) | 287 | if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) |
258 | BUG(); | 288 | BUG(); |
259 | } | 289 | } |
260 | 290 | ||
@@ -607,6 +637,30 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, | |||
607 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 637 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
608 | } | 638 | } |
609 | 639 | ||
640 | static void xen_clts(void) | ||
641 | { | ||
642 | struct multicall_space mcs; | ||
643 | |||
644 | mcs = xen_mc_entry(0); | ||
645 | |||
646 | MULTI_fpu_taskswitch(mcs.mc, 0); | ||
647 | |||
648 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
649 | } | ||
650 | |||
651 | static void xen_write_cr0(unsigned long cr0) | ||
652 | { | ||
653 | struct multicall_space mcs; | ||
654 | |||
655 | /* Only pay attention to cr0.TS; everything else is | ||
656 | ignored. */ | ||
657 | mcs = xen_mc_entry(0); | ||
658 | |||
659 | MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0); | ||
660 | |||
661 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
662 | } | ||
663 | |||
610 | static void xen_write_cr2(unsigned long cr2) | 664 | static void xen_write_cr2(unsigned long cr2) |
611 | { | 665 | { |
612 | x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; | 666 | x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; |
@@ -624,8 +678,10 @@ static unsigned long xen_read_cr2_direct(void) | |||
624 | 678 | ||
625 | static void xen_write_cr4(unsigned long cr4) | 679 | static void xen_write_cr4(unsigned long cr4) |
626 | { | 680 | { |
627 | /* Just ignore cr4 changes; Xen doesn't allow us to do | 681 | cr4 &= ~X86_CR4_PGE; |
628 | anything anyway. */ | 682 | cr4 &= ~X86_CR4_PSE; |
683 | |||
684 | native_write_cr4(cr4); | ||
629 | } | 685 | } |
630 | 686 | ||
631 | static unsigned long xen_read_cr3(void) | 687 | static unsigned long xen_read_cr3(void) |
@@ -831,7 +887,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base) | |||
831 | PFN_DOWN(__pa(xen_start_info->pt_base))); | 887 | PFN_DOWN(__pa(xen_start_info->pt_base))); |
832 | } | 888 | } |
833 | 889 | ||
834 | static __init void setup_shared_info(void) | 890 | void xen_setup_shared_info(void) |
835 | { | 891 | { |
836 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | 892 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
837 | unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); | 893 | unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); |
@@ -854,6 +910,8 @@ static __init void setup_shared_info(void) | |||
854 | /* In UP this is as good a place as any to set up shared info */ | 910 | /* In UP this is as good a place as any to set up shared info */ |
855 | xen_setup_vcpu_info_placement(); | 911 | xen_setup_vcpu_info_placement(); |
856 | #endif | 912 | #endif |
913 | |||
914 | xen_setup_mfn_list_list(); | ||
857 | } | 915 | } |
858 | 916 | ||
859 | static __init void xen_pagetable_setup_done(pgd_t *base) | 917 | static __init void xen_pagetable_setup_done(pgd_t *base) |
@@ -866,15 +924,23 @@ static __init void xen_pagetable_setup_done(pgd_t *base) | |||
866 | pv_mmu_ops.release_pmd = xen_release_pmd; | 924 | pv_mmu_ops.release_pmd = xen_release_pmd; |
867 | pv_mmu_ops.set_pte = xen_set_pte; | 925 | pv_mmu_ops.set_pte = xen_set_pte; |
868 | 926 | ||
869 | setup_shared_info(); | 927 | xen_setup_shared_info(); |
870 | 928 | ||
871 | /* Actually pin the pagetable down, but we can't set PG_pinned | 929 | /* Actually pin the pagetable down, but we can't set PG_pinned |
872 | yet because the page structures don't exist yet. */ | 930 | yet because the page structures don't exist yet. */ |
873 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); | 931 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); |
874 | } | 932 | } |
875 | 933 | ||
934 | static __init void xen_post_allocator_init(void) | ||
935 | { | ||
936 | pv_mmu_ops.set_pmd = xen_set_pmd; | ||
937 | pv_mmu_ops.set_pud = xen_set_pud; | ||
938 | |||
939 | xen_mark_init_mm_pinned(); | ||
940 | } | ||
941 | |||
876 | /* This is called once we have the cpu_possible_map */ | 942 | /* This is called once we have the cpu_possible_map */ |
877 | void __init xen_setup_vcpu_info_placement(void) | 943 | void xen_setup_vcpu_info_placement(void) |
878 | { | 944 | { |
879 | int cpu; | 945 | int cpu; |
880 | 946 | ||
@@ -960,7 +1026,7 @@ static const struct pv_init_ops xen_init_ops __initdata = { | |||
960 | .banner = xen_banner, | 1026 | .banner = xen_banner, |
961 | .memory_setup = xen_memory_setup, | 1027 | .memory_setup = xen_memory_setup, |
962 | .arch_setup = xen_arch_setup, | 1028 | .arch_setup = xen_arch_setup, |
963 | .post_allocator_init = xen_mark_init_mm_pinned, | 1029 | .post_allocator_init = xen_post_allocator_init, |
964 | }; | 1030 | }; |
965 | 1031 | ||
966 | static const struct pv_time_ops xen_time_ops __initdata = { | 1032 | static const struct pv_time_ops xen_time_ops __initdata = { |
@@ -978,10 +1044,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
978 | .set_debugreg = xen_set_debugreg, | 1044 | .set_debugreg = xen_set_debugreg, |
979 | .get_debugreg = xen_get_debugreg, | 1045 | .get_debugreg = xen_get_debugreg, |
980 | 1046 | ||
981 | .clts = native_clts, | 1047 | .clts = xen_clts, |
982 | 1048 | ||
983 | .read_cr0 = native_read_cr0, | 1049 | .read_cr0 = native_read_cr0, |
984 | .write_cr0 = native_write_cr0, | 1050 | .write_cr0 = xen_write_cr0, |
985 | 1051 | ||
986 | .read_cr4 = native_read_cr4, | 1052 | .read_cr4 = native_read_cr4, |
987 | .read_cr4_safe = native_read_cr4_safe, | 1053 | .read_cr4_safe = native_read_cr4_safe, |
@@ -1072,9 +1138,13 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1072 | 1138 | ||
1073 | .set_pte = NULL, /* see xen_pagetable_setup_* */ | 1139 | .set_pte = NULL, /* see xen_pagetable_setup_* */ |
1074 | .set_pte_at = xen_set_pte_at, | 1140 | .set_pte_at = xen_set_pte_at, |
1075 | .set_pmd = xen_set_pmd, | 1141 | .set_pmd = xen_set_pmd_hyper, |
1142 | |||
1143 | .ptep_modify_prot_start = __ptep_modify_prot_start, | ||
1144 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, | ||
1076 | 1145 | ||
1077 | .pte_val = xen_pte_val, | 1146 | .pte_val = xen_pte_val, |
1147 | .pte_flags = native_pte_val, | ||
1078 | .pgd_val = xen_pgd_val, | 1148 | .pgd_val = xen_pgd_val, |
1079 | 1149 | ||
1080 | .make_pte = xen_make_pte, | 1150 | .make_pte = xen_make_pte, |
@@ -1082,7 +1152,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1082 | 1152 | ||
1083 | .set_pte_atomic = xen_set_pte_atomic, | 1153 | .set_pte_atomic = xen_set_pte_atomic, |
1084 | .set_pte_present = xen_set_pte_at, | 1154 | .set_pte_present = xen_set_pte_at, |
1085 | .set_pud = xen_set_pud, | 1155 | .set_pud = xen_set_pud_hyper, |
1086 | .pte_clear = xen_pte_clear, | 1156 | .pte_clear = xen_pte_clear, |
1087 | .pmd_clear = xen_pmd_clear, | 1157 | .pmd_clear = xen_pmd_clear, |
1088 | 1158 | ||
@@ -1114,11 +1184,13 @@ static const struct smp_ops xen_smp_ops __initdata = { | |||
1114 | 1184 | ||
1115 | static void xen_reboot(int reason) | 1185 | static void xen_reboot(int reason) |
1116 | { | 1186 | { |
1187 | struct sched_shutdown r = { .reason = reason }; | ||
1188 | |||
1117 | #ifdef CONFIG_SMP | 1189 | #ifdef CONFIG_SMP |
1118 | smp_send_stop(); | 1190 | smp_send_stop(); |
1119 | #endif | 1191 | #endif |
1120 | 1192 | ||
1121 | if (HYPERVISOR_sched_op(SCHEDOP_shutdown, reason)) | 1193 | if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) |
1122 | BUG(); | 1194 | BUG(); |
1123 | } | 1195 | } |
1124 | 1196 | ||
@@ -1173,6 +1245,8 @@ asmlinkage void __init xen_start_kernel(void) | |||
1173 | 1245 | ||
1174 | BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); | 1246 | BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); |
1175 | 1247 | ||
1248 | xen_setup_features(); | ||
1249 | |||
1176 | /* Install Xen paravirt ops */ | 1250 | /* Install Xen paravirt ops */ |
1177 | pv_info = xen_info; | 1251 | pv_info = xen_info; |
1178 | pv_init_ops = xen_init_ops; | 1252 | pv_init_ops = xen_init_ops; |
@@ -1182,17 +1256,20 @@ asmlinkage void __init xen_start_kernel(void) | |||
1182 | pv_apic_ops = xen_apic_ops; | 1256 | pv_apic_ops = xen_apic_ops; |
1183 | pv_mmu_ops = xen_mmu_ops; | 1257 | pv_mmu_ops = xen_mmu_ops; |
1184 | 1258 | ||
1259 | if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { | ||
1260 | pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; | ||
1261 | pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; | ||
1262 | } | ||
1263 | |||
1185 | machine_ops = xen_machine_ops; | 1264 | machine_ops = xen_machine_ops; |
1186 | 1265 | ||
1187 | #ifdef CONFIG_SMP | 1266 | #ifdef CONFIG_SMP |
1188 | smp_ops = xen_smp_ops; | 1267 | smp_ops = xen_smp_ops; |
1189 | #endif | 1268 | #endif |
1190 | 1269 | ||
1191 | xen_setup_features(); | ||
1192 | |||
1193 | /* Get mfn list */ | 1270 | /* Get mfn list */ |
1194 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | 1271 | if (!xen_feature(XENFEAT_auto_translated_physmap)) |
1195 | phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list; | 1272 | xen_build_dynamic_phys_to_machine(); |
1196 | 1273 | ||
1197 | pgd = (pgd_t *)xen_start_info->pt_base; | 1274 | pgd = (pgd_t *)xen_start_info->pt_base; |
1198 | 1275 | ||
@@ -1232,8 +1309,11 @@ asmlinkage void __init xen_start_kernel(void) | |||
1232 | ? __pa(xen_start_info->mod_start) : 0; | 1309 | ? __pa(xen_start_info->mod_start) : 0; |
1233 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; | 1310 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; |
1234 | 1311 | ||
1235 | if (!is_initial_xendomain()) | 1312 | if (!is_initial_xendomain()) { |
1313 | add_preferred_console("xenboot", 0, NULL); | ||
1314 | add_preferred_console("tty", 0, NULL); | ||
1236 | add_preferred_console("hvc", 0, NULL); | 1315 | add_preferred_console("hvc", 0, NULL); |
1316 | } | ||
1237 | 1317 | ||
1238 | /* Start the world */ | 1318 | /* Start the world */ |
1239 | start_kernel(); | 1319 | start_kernel(); |
diff --git a/arch/x86/xen/manage.c b/arch/x86/xen/manage.c deleted file mode 100644 index aa7af9e6abc0..000000000000 --- a/arch/x86/xen/manage.c +++ /dev/null | |||
@@ -1,143 +0,0 @@ | |||
1 | /* | ||
2 | * Handle extern requests for shutdown, reboot and sysrq | ||
3 | */ | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/err.h> | ||
6 | #include <linux/reboot.h> | ||
7 | #include <linux/sysrq.h> | ||
8 | |||
9 | #include <xen/xenbus.h> | ||
10 | |||
11 | #define SHUTDOWN_INVALID -1 | ||
12 | #define SHUTDOWN_POWEROFF 0 | ||
13 | #define SHUTDOWN_SUSPEND 2 | ||
14 | /* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only | ||
15 | * report a crash, not be instructed to crash! | ||
16 | * HALT is the same as POWEROFF, as far as we're concerned. The tools use | ||
17 | * the distinction when we return the reason code to them. | ||
18 | */ | ||
19 | #define SHUTDOWN_HALT 4 | ||
20 | |||
21 | /* Ignore multiple shutdown requests. */ | ||
22 | static int shutting_down = SHUTDOWN_INVALID; | ||
23 | |||
24 | static void shutdown_handler(struct xenbus_watch *watch, | ||
25 | const char **vec, unsigned int len) | ||
26 | { | ||
27 | char *str; | ||
28 | struct xenbus_transaction xbt; | ||
29 | int err; | ||
30 | |||
31 | if (shutting_down != SHUTDOWN_INVALID) | ||
32 | return; | ||
33 | |||
34 | again: | ||
35 | err = xenbus_transaction_start(&xbt); | ||
36 | if (err) | ||
37 | return; | ||
38 | |||
39 | str = (char *)xenbus_read(xbt, "control", "shutdown", NULL); | ||
40 | /* Ignore read errors and empty reads. */ | ||
41 | if (XENBUS_IS_ERR_READ(str)) { | ||
42 | xenbus_transaction_end(xbt, 1); | ||
43 | return; | ||
44 | } | ||
45 | |||
46 | xenbus_write(xbt, "control", "shutdown", ""); | ||
47 | |||
48 | err = xenbus_transaction_end(xbt, 0); | ||
49 | if (err == -EAGAIN) { | ||
50 | kfree(str); | ||
51 | goto again; | ||
52 | } | ||
53 | |||
54 | if (strcmp(str, "poweroff") == 0 || | ||
55 | strcmp(str, "halt") == 0) | ||
56 | orderly_poweroff(false); | ||
57 | else if (strcmp(str, "reboot") == 0) | ||
58 | ctrl_alt_del(); | ||
59 | else { | ||
60 | printk(KERN_INFO "Ignoring shutdown request: %s\n", str); | ||
61 | shutting_down = SHUTDOWN_INVALID; | ||
62 | } | ||
63 | |||
64 | kfree(str); | ||
65 | } | ||
66 | |||
67 | static void sysrq_handler(struct xenbus_watch *watch, const char **vec, | ||
68 | unsigned int len) | ||
69 | { | ||
70 | char sysrq_key = '\0'; | ||
71 | struct xenbus_transaction xbt; | ||
72 | int err; | ||
73 | |||
74 | again: | ||
75 | err = xenbus_transaction_start(&xbt); | ||
76 | if (err) | ||
77 | return; | ||
78 | if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) { | ||
79 | printk(KERN_ERR "Unable to read sysrq code in " | ||
80 | "control/sysrq\n"); | ||
81 | xenbus_transaction_end(xbt, 1); | ||
82 | return; | ||
83 | } | ||
84 | |||
85 | if (sysrq_key != '\0') | ||
86 | xenbus_printf(xbt, "control", "sysrq", "%c", '\0'); | ||
87 | |||
88 | err = xenbus_transaction_end(xbt, 0); | ||
89 | if (err == -EAGAIN) | ||
90 | goto again; | ||
91 | |||
92 | if (sysrq_key != '\0') | ||
93 | handle_sysrq(sysrq_key, NULL); | ||
94 | } | ||
95 | |||
96 | static struct xenbus_watch shutdown_watch = { | ||
97 | .node = "control/shutdown", | ||
98 | .callback = shutdown_handler | ||
99 | }; | ||
100 | |||
101 | static struct xenbus_watch sysrq_watch = { | ||
102 | .node = "control/sysrq", | ||
103 | .callback = sysrq_handler | ||
104 | }; | ||
105 | |||
106 | static int setup_shutdown_watcher(void) | ||
107 | { | ||
108 | int err; | ||
109 | |||
110 | err = register_xenbus_watch(&shutdown_watch); | ||
111 | if (err) { | ||
112 | printk(KERN_ERR "Failed to set shutdown watcher\n"); | ||
113 | return err; | ||
114 | } | ||
115 | |||
116 | err = register_xenbus_watch(&sysrq_watch); | ||
117 | if (err) { | ||
118 | printk(KERN_ERR "Failed to set sysrq watcher\n"); | ||
119 | return err; | ||
120 | } | ||
121 | |||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | static int shutdown_event(struct notifier_block *notifier, | ||
126 | unsigned long event, | ||
127 | void *data) | ||
128 | { | ||
129 | setup_shutdown_watcher(); | ||
130 | return NOTIFY_DONE; | ||
131 | } | ||
132 | |||
133 | static int __init setup_shutdown_event(void) | ||
134 | { | ||
135 | static struct notifier_block xenstore_notifier = { | ||
136 | .notifier_call = shutdown_event | ||
137 | }; | ||
138 | register_xenstore_notifier(&xenstore_notifier); | ||
139 | |||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | subsys_initcall(setup_shutdown_event); | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index df40bf74ea75..42b3b9ed641d 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -56,6 +56,131 @@ | |||
56 | #include "multicalls.h" | 56 | #include "multicalls.h" |
57 | #include "mmu.h" | 57 | #include "mmu.h" |
58 | 58 | ||
59 | #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | ||
60 | #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) | ||
61 | |||
62 | /* Placeholder for holes in the address space */ | ||
63 | static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] | ||
64 | __attribute__((section(".data.page_aligned"))) = | ||
65 | { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL }; | ||
66 | |||
67 | /* Array of pointers to pages containing p2m entries */ | ||
68 | static unsigned long *p2m_top[TOP_ENTRIES] | ||
69 | __attribute__((section(".data.page_aligned"))) = | ||
70 | { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; | ||
71 | |||
72 | /* Arrays of p2m arrays expressed in mfns used for save/restore */ | ||
73 | static unsigned long p2m_top_mfn[TOP_ENTRIES] | ||
74 | __attribute__((section(".bss.page_aligned"))); | ||
75 | |||
76 | static unsigned long p2m_top_mfn_list[ | ||
77 | PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)] | ||
78 | __attribute__((section(".bss.page_aligned"))); | ||
79 | |||
80 | static inline unsigned p2m_top_index(unsigned long pfn) | ||
81 | { | ||
82 | BUG_ON(pfn >= MAX_DOMAIN_PAGES); | ||
83 | return pfn / P2M_ENTRIES_PER_PAGE; | ||
84 | } | ||
85 | |||
86 | static inline unsigned p2m_index(unsigned long pfn) | ||
87 | { | ||
88 | return pfn % P2M_ENTRIES_PER_PAGE; | ||
89 | } | ||
90 | |||
91 | /* Build the parallel p2m_top_mfn structures */ | ||
92 | void xen_setup_mfn_list_list(void) | ||
93 | { | ||
94 | unsigned pfn, idx; | ||
95 | |||
96 | for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) { | ||
97 | unsigned topidx = p2m_top_index(pfn); | ||
98 | |||
99 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]); | ||
100 | } | ||
101 | |||
102 | for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) { | ||
103 | unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; | ||
104 | p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); | ||
105 | } | ||
106 | |||
107 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | ||
108 | |||
109 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | ||
110 | virt_to_mfn(p2m_top_mfn_list); | ||
111 | HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages; | ||
112 | } | ||
113 | |||
114 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | ||
115 | void __init xen_build_dynamic_phys_to_machine(void) | ||
116 | { | ||
117 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; | ||
118 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | ||
119 | unsigned pfn; | ||
120 | |||
121 | for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) { | ||
122 | unsigned topidx = p2m_top_index(pfn); | ||
123 | |||
124 | p2m_top[topidx] = &mfn_list[pfn]; | ||
125 | } | ||
126 | } | ||
127 | |||
128 | unsigned long get_phys_to_machine(unsigned long pfn) | ||
129 | { | ||
130 | unsigned topidx, idx; | ||
131 | |||
132 | if (unlikely(pfn >= MAX_DOMAIN_PAGES)) | ||
133 | return INVALID_P2M_ENTRY; | ||
134 | |||
135 | topidx = p2m_top_index(pfn); | ||
136 | idx = p2m_index(pfn); | ||
137 | return p2m_top[topidx][idx]; | ||
138 | } | ||
139 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | ||
140 | |||
141 | static void alloc_p2m(unsigned long **pp, unsigned long *mfnp) | ||
142 | { | ||
143 | unsigned long *p; | ||
144 | unsigned i; | ||
145 | |||
146 | p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); | ||
147 | BUG_ON(p == NULL); | ||
148 | |||
149 | for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++) | ||
150 | p[i] = INVALID_P2M_ENTRY; | ||
151 | |||
152 | if (cmpxchg(pp, p2m_missing, p) != p2m_missing) | ||
153 | free_page((unsigned long)p); | ||
154 | else | ||
155 | *mfnp = virt_to_mfn(p); | ||
156 | } | ||
157 | |||
158 | void set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
159 | { | ||
160 | unsigned topidx, idx; | ||
161 | |||
162 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { | ||
163 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
164 | return; | ||
165 | } | ||
166 | |||
167 | if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { | ||
168 | BUG_ON(mfn != INVALID_P2M_ENTRY); | ||
169 | return; | ||
170 | } | ||
171 | |||
172 | topidx = p2m_top_index(pfn); | ||
173 | if (p2m_top[topidx] == p2m_missing) { | ||
174 | /* no need to allocate a page to store an invalid entry */ | ||
175 | if (mfn == INVALID_P2M_ENTRY) | ||
176 | return; | ||
177 | alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]); | ||
178 | } | ||
179 | |||
180 | idx = p2m_index(pfn); | ||
181 | p2m_top[topidx][idx] = mfn; | ||
182 | } | ||
183 | |||
59 | xmaddr_t arbitrary_virt_to_machine(unsigned long address) | 184 | xmaddr_t arbitrary_virt_to_machine(unsigned long address) |
60 | { | 185 | { |
61 | unsigned int level; | 186 | unsigned int level; |
@@ -98,24 +223,60 @@ void make_lowmem_page_readwrite(void *vaddr) | |||
98 | } | 223 | } |
99 | 224 | ||
100 | 225 | ||
101 | void xen_set_pmd(pmd_t *ptr, pmd_t val) | 226 | static bool page_pinned(void *ptr) |
227 | { | ||
228 | struct page *page = virt_to_page(ptr); | ||
229 | |||
230 | return PagePinned(page); | ||
231 | } | ||
232 | |||
233 | static void extend_mmu_update(const struct mmu_update *update) | ||
102 | { | 234 | { |
103 | struct multicall_space mcs; | 235 | struct multicall_space mcs; |
104 | struct mmu_update *u; | 236 | struct mmu_update *u; |
105 | 237 | ||
106 | preempt_disable(); | 238 | mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); |
239 | |||
240 | if (mcs.mc != NULL) | ||
241 | mcs.mc->args[1]++; | ||
242 | else { | ||
243 | mcs = __xen_mc_entry(sizeof(*u)); | ||
244 | MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); | ||
245 | } | ||
107 | 246 | ||
108 | mcs = xen_mc_entry(sizeof(*u)); | ||
109 | u = mcs.args; | 247 | u = mcs.args; |
110 | u->ptr = virt_to_machine(ptr).maddr; | 248 | *u = *update; |
111 | u->val = pmd_val_ma(val); | 249 | } |
112 | MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); | 250 | |
251 | void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | ||
252 | { | ||
253 | struct mmu_update u; | ||
254 | |||
255 | preempt_disable(); | ||
256 | |||
257 | xen_mc_batch(); | ||
258 | |||
259 | u.ptr = virt_to_machine(ptr).maddr; | ||
260 | u.val = pmd_val_ma(val); | ||
261 | extend_mmu_update(&u); | ||
113 | 262 | ||
114 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 263 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
115 | 264 | ||
116 | preempt_enable(); | 265 | preempt_enable(); |
117 | } | 266 | } |
118 | 267 | ||
268 | void xen_set_pmd(pmd_t *ptr, pmd_t val) | ||
269 | { | ||
270 | /* If page is not pinned, we can just update the entry | ||
271 | directly */ | ||
272 | if (!page_pinned(ptr)) { | ||
273 | *ptr = val; | ||
274 | return; | ||
275 | } | ||
276 | |||
277 | xen_set_pmd_hyper(ptr, val); | ||
278 | } | ||
279 | |||
119 | /* | 280 | /* |
120 | * Associate a virtual page frame with a given physical page frame | 281 | * Associate a virtual page frame with a given physical page frame |
121 | * and protection flags for that frame. | 282 | * and protection flags for that frame. |
@@ -179,13 +340,33 @@ out: | |||
179 | preempt_enable(); | 340 | preempt_enable(); |
180 | } | 341 | } |
181 | 342 | ||
343 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
344 | { | ||
345 | /* Just return the pte as-is. We preserve the bits on commit */ | ||
346 | return *ptep; | ||
347 | } | ||
348 | |||
349 | void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | ||
350 | pte_t *ptep, pte_t pte) | ||
351 | { | ||
352 | struct mmu_update u; | ||
353 | |||
354 | xen_mc_batch(); | ||
355 | |||
356 | u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; | ||
357 | u.val = pte_val_ma(pte); | ||
358 | extend_mmu_update(&u); | ||
359 | |||
360 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
361 | } | ||
362 | |||
182 | /* Assume pteval_t is equivalent to all the other *val_t types. */ | 363 | /* Assume pteval_t is equivalent to all the other *val_t types. */ |
183 | static pteval_t pte_mfn_to_pfn(pteval_t val) | 364 | static pteval_t pte_mfn_to_pfn(pteval_t val) |
184 | { | 365 | { |
185 | if (val & _PAGE_PRESENT) { | 366 | if (val & _PAGE_PRESENT) { |
186 | unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT; | 367 | unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT; |
187 | pteval_t flags = val & ~PTE_MASK; | 368 | pteval_t flags = val & ~PTE_MASK; |
188 | val = (mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; | 369 | val = ((pteval_t)mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; |
189 | } | 370 | } |
190 | 371 | ||
191 | return val; | 372 | return val; |
@@ -196,7 +377,7 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) | |||
196 | if (val & _PAGE_PRESENT) { | 377 | if (val & _PAGE_PRESENT) { |
197 | unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT; | 378 | unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT; |
198 | pteval_t flags = val & ~PTE_MASK; | 379 | pteval_t flags = val & ~PTE_MASK; |
199 | val = (pfn_to_mfn(pfn) << PAGE_SHIFT) | flags; | 380 | val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) | flags; |
200 | } | 381 | } |
201 | 382 | ||
202 | return val; | 383 | return val; |
@@ -229,24 +410,35 @@ pmdval_t xen_pmd_val(pmd_t pmd) | |||
229 | return pte_mfn_to_pfn(pmd.pmd); | 410 | return pte_mfn_to_pfn(pmd.pmd); |
230 | } | 411 | } |
231 | 412 | ||
232 | void xen_set_pud(pud_t *ptr, pud_t val) | 413 | void xen_set_pud_hyper(pud_t *ptr, pud_t val) |
233 | { | 414 | { |
234 | struct multicall_space mcs; | 415 | struct mmu_update u; |
235 | struct mmu_update *u; | ||
236 | 416 | ||
237 | preempt_disable(); | 417 | preempt_disable(); |
238 | 418 | ||
239 | mcs = xen_mc_entry(sizeof(*u)); | 419 | xen_mc_batch(); |
240 | u = mcs.args; | 420 | |
241 | u->ptr = virt_to_machine(ptr).maddr; | 421 | u.ptr = virt_to_machine(ptr).maddr; |
242 | u->val = pud_val_ma(val); | 422 | u.val = pud_val_ma(val); |
243 | MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); | 423 | extend_mmu_update(&u); |
244 | 424 | ||
245 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 425 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
246 | 426 | ||
247 | preempt_enable(); | 427 | preempt_enable(); |
248 | } | 428 | } |
249 | 429 | ||
430 | void xen_set_pud(pud_t *ptr, pud_t val) | ||
431 | { | ||
432 | /* If page is not pinned, we can just update the entry | ||
433 | directly */ | ||
434 | if (!page_pinned(ptr)) { | ||
435 | *ptr = val; | ||
436 | return; | ||
437 | } | ||
438 | |||
439 | xen_set_pud_hyper(ptr, val); | ||
440 | } | ||
441 | |||
250 | void xen_set_pte(pte_t *ptep, pte_t pte) | 442 | void xen_set_pte(pte_t *ptep, pte_t pte) |
251 | { | 443 | { |
252 | ptep->pte_high = pte.pte_high; | 444 | ptep->pte_high = pte.pte_high; |
@@ -268,7 +460,7 @@ void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | |||
268 | 460 | ||
269 | void xen_pmd_clear(pmd_t *pmdp) | 461 | void xen_pmd_clear(pmd_t *pmdp) |
270 | { | 462 | { |
271 | xen_set_pmd(pmdp, __pmd(0)); | 463 | set_pmd(pmdp, __pmd(0)); |
272 | } | 464 | } |
273 | 465 | ||
274 | pmd_t xen_make_pmd(pmdval_t pmd) | 466 | pmd_t xen_make_pmd(pmdval_t pmd) |
@@ -441,6 +633,29 @@ void xen_pgd_pin(pgd_t *pgd) | |||
441 | xen_mc_issue(0); | 633 | xen_mc_issue(0); |
442 | } | 634 | } |
443 | 635 | ||
636 | /* | ||
637 | * On save, we need to pin all pagetables to make sure they get their | ||
638 | * mfns turned into pfns. Search the list for any unpinned pgds and pin | ||
639 | * them (unpinned pgds are not currently in use, probably because the | ||
640 | * process is under construction or destruction). | ||
641 | */ | ||
642 | void xen_mm_pin_all(void) | ||
643 | { | ||
644 | unsigned long flags; | ||
645 | struct page *page; | ||
646 | |||
647 | spin_lock_irqsave(&pgd_lock, flags); | ||
648 | |||
649 | list_for_each_entry(page, &pgd_list, lru) { | ||
650 | if (!PagePinned(page)) { | ||
651 | xen_pgd_pin((pgd_t *)page_address(page)); | ||
652 | SetPageSavePinned(page); | ||
653 | } | ||
654 | } | ||
655 | |||
656 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
657 | } | ||
658 | |||
444 | /* The init_mm pagetable is really pinned as soon as its created, but | 659 | /* The init_mm pagetable is really pinned as soon as its created, but |
445 | that's before we have page structures to store the bits. So do all | 660 | that's before we have page structures to store the bits. So do all |
446 | the book-keeping now. */ | 661 | the book-keeping now. */ |
@@ -498,6 +713,29 @@ static void xen_pgd_unpin(pgd_t *pgd) | |||
498 | xen_mc_issue(0); | 713 | xen_mc_issue(0); |
499 | } | 714 | } |
500 | 715 | ||
716 | /* | ||
717 | * On resume, undo any pinning done at save, so that the rest of the | ||
718 | * kernel doesn't see any unexpected pinned pagetables. | ||
719 | */ | ||
720 | void xen_mm_unpin_all(void) | ||
721 | { | ||
722 | unsigned long flags; | ||
723 | struct page *page; | ||
724 | |||
725 | spin_lock_irqsave(&pgd_lock, flags); | ||
726 | |||
727 | list_for_each_entry(page, &pgd_list, lru) { | ||
728 | if (PageSavePinned(page)) { | ||
729 | BUG_ON(!PagePinned(page)); | ||
730 | printk("unpinning pinned %p\n", page_address(page)); | ||
731 | xen_pgd_unpin((pgd_t *)page_address(page)); | ||
732 | ClearPageSavePinned(page); | ||
733 | } | ||
734 | } | ||
735 | |||
736 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
737 | } | ||
738 | |||
501 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) | 739 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) |
502 | { | 740 | { |
503 | spin_lock(&next->page_table_lock); | 741 | spin_lock(&next->page_table_lock); |
@@ -591,7 +829,7 @@ void xen_exit_mmap(struct mm_struct *mm) | |||
591 | spin_lock(&mm->page_table_lock); | 829 | spin_lock(&mm->page_table_lock); |
592 | 830 | ||
593 | /* pgd may not be pinned in the error exit path of execve */ | 831 | /* pgd may not be pinned in the error exit path of execve */ |
594 | if (PagePinned(virt_to_page(mm->pgd))) | 832 | if (page_pinned(mm->pgd)) |
595 | xen_pgd_unpin(mm->pgd); | 833 | xen_pgd_unpin(mm->pgd); |
596 | 834 | ||
597 | spin_unlock(&mm->page_table_lock); | 835 | spin_unlock(&mm->page_table_lock); |
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 5fe961caffd4..297bf9f5b8bc 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h | |||
@@ -25,10 +25,6 @@ enum pt_level { | |||
25 | 25 | ||
26 | void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | 26 | void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); |
27 | 27 | ||
28 | void xen_set_pte(pte_t *ptep, pte_t pteval); | ||
29 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | ||
30 | pte_t *ptep, pte_t pteval); | ||
31 | void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval); | ||
32 | 28 | ||
33 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next); | 29 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next); |
34 | void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); | 30 | void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); |
@@ -45,11 +41,19 @@ pte_t xen_make_pte(pteval_t); | |||
45 | pmd_t xen_make_pmd(pmdval_t); | 41 | pmd_t xen_make_pmd(pmdval_t); |
46 | pgd_t xen_make_pgd(pgdval_t); | 42 | pgd_t xen_make_pgd(pgdval_t); |
47 | 43 | ||
44 | void xen_set_pte(pte_t *ptep, pte_t pteval); | ||
48 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | 45 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
49 | pte_t *ptep, pte_t pteval); | 46 | pte_t *ptep, pte_t pteval); |
50 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte); | 47 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte); |
48 | void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval); | ||
51 | void xen_set_pud(pud_t *ptr, pud_t val); | 49 | void xen_set_pud(pud_t *ptr, pud_t val); |
50 | void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval); | ||
51 | void xen_set_pud_hyper(pud_t *ptr, pud_t val); | ||
52 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); | 52 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); |
53 | void xen_pmd_clear(pmd_t *pmdp); | 53 | void xen_pmd_clear(pmd_t *pmdp); |
54 | 54 | ||
55 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); | ||
56 | void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | ||
57 | pte_t *ptep, pte_t pte); | ||
58 | |||
55 | #endif /* _XEN_MMU_H */ | 59 | #endif /* _XEN_MMU_H */ |
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 5791eb2e3750..3c63c4da7ed1 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c | |||
@@ -29,14 +29,14 @@ | |||
29 | #define MC_DEBUG 1 | 29 | #define MC_DEBUG 1 |
30 | 30 | ||
31 | #define MC_BATCH 32 | 31 | #define MC_BATCH 32 |
32 | #define MC_ARGS (MC_BATCH * 16 / sizeof(u64)) | 32 | #define MC_ARGS (MC_BATCH * 16) |
33 | 33 | ||
34 | struct mc_buffer { | 34 | struct mc_buffer { |
35 | struct multicall_entry entries[MC_BATCH]; | 35 | struct multicall_entry entries[MC_BATCH]; |
36 | #if MC_DEBUG | 36 | #if MC_DEBUG |
37 | struct multicall_entry debug[MC_BATCH]; | 37 | struct multicall_entry debug[MC_BATCH]; |
38 | #endif | 38 | #endif |
39 | u64 args[MC_ARGS]; | 39 | unsigned char args[MC_ARGS]; |
40 | struct callback { | 40 | struct callback { |
41 | void (*fn)(void *); | 41 | void (*fn)(void *); |
42 | void *data; | 42 | void *data; |
@@ -107,20 +107,48 @@ struct multicall_space __xen_mc_entry(size_t args) | |||
107 | { | 107 | { |
108 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); | 108 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); |
109 | struct multicall_space ret; | 109 | struct multicall_space ret; |
110 | unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64); | 110 | unsigned argidx = roundup(b->argidx, sizeof(u64)); |
111 | 111 | ||
112 | BUG_ON(preemptible()); | 112 | BUG_ON(preemptible()); |
113 | BUG_ON(argspace > MC_ARGS); | 113 | BUG_ON(b->argidx > MC_ARGS); |
114 | 114 | ||
115 | if (b->mcidx == MC_BATCH || | 115 | if (b->mcidx == MC_BATCH || |
116 | (b->argidx + argspace) > MC_ARGS) | 116 | (argidx + args) > MC_ARGS) { |
117 | xen_mc_flush(); | 117 | xen_mc_flush(); |
118 | argidx = roundup(b->argidx, sizeof(u64)); | ||
119 | } | ||
118 | 120 | ||
119 | ret.mc = &b->entries[b->mcidx]; | 121 | ret.mc = &b->entries[b->mcidx]; |
120 | b->mcidx++; | 122 | b->mcidx++; |
123 | ret.args = &b->args[argidx]; | ||
124 | b->argidx = argidx + args; | ||
125 | |||
126 | BUG_ON(b->argidx > MC_ARGS); | ||
127 | return ret; | ||
128 | } | ||
129 | |||
130 | struct multicall_space xen_mc_extend_args(unsigned long op, size_t size) | ||
131 | { | ||
132 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); | ||
133 | struct multicall_space ret = { NULL, NULL }; | ||
134 | |||
135 | BUG_ON(preemptible()); | ||
136 | BUG_ON(b->argidx > MC_ARGS); | ||
137 | |||
138 | if (b->mcidx == 0) | ||
139 | return ret; | ||
140 | |||
141 | if (b->entries[b->mcidx - 1].op != op) | ||
142 | return ret; | ||
143 | |||
144 | if ((b->argidx + size) > MC_ARGS) | ||
145 | return ret; | ||
146 | |||
147 | ret.mc = &b->entries[b->mcidx - 1]; | ||
121 | ret.args = &b->args[b->argidx]; | 148 | ret.args = &b->args[b->argidx]; |
122 | b->argidx += argspace; | 149 | b->argidx += size; |
123 | 150 | ||
151 | BUG_ON(b->argidx > MC_ARGS); | ||
124 | return ret; | 152 | return ret; |
125 | } | 153 | } |
126 | 154 | ||
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index 8bae996d99a3..858938241616 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h | |||
@@ -45,4 +45,16 @@ static inline void xen_mc_issue(unsigned mode) | |||
45 | /* Set up a callback to be called when the current batch is flushed */ | 45 | /* Set up a callback to be called when the current batch is flushed */ |
46 | void xen_mc_callback(void (*fn)(void *), void *data); | 46 | void xen_mc_callback(void (*fn)(void *), void *data); |
47 | 47 | ||
48 | /* | ||
49 | * Try to extend the arguments of the previous multicall command. The | ||
50 | * previous command's op must match. If it does, then it attempts to | ||
51 | * extend the argument space allocated to the multicall entry by | ||
52 | * arg_size bytes. | ||
53 | * | ||
54 | * The returned multicall_space will return with mc pointing to the | ||
55 | * command on success, or NULL on failure, and args pointing to the | ||
56 | * newly allocated space. | ||
57 | */ | ||
58 | struct multicall_space xen_mc_extend_args(unsigned long op, size_t arg_size); | ||
59 | |||
48 | #endif /* _XEN_MULTICALLS_H */ | 60 | #endif /* _XEN_MULTICALLS_H */ |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 82517e4a752a..488447878a9d 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <asm/xen/hypervisor.h> | 16 | #include <asm/xen/hypervisor.h> |
17 | #include <asm/xen/hypercall.h> | 17 | #include <asm/xen/hypercall.h> |
18 | 18 | ||
19 | #include <xen/page.h> | ||
19 | #include <xen/interface/callback.h> | 20 | #include <xen/interface/callback.h> |
20 | #include <xen/interface/physdev.h> | 21 | #include <xen/interface/physdev.h> |
21 | #include <xen/features.h> | 22 | #include <xen/features.h> |
@@ -27,8 +28,6 @@ | |||
27 | extern const char xen_hypervisor_callback[]; | 28 | extern const char xen_hypervisor_callback[]; |
28 | extern const char xen_failsafe_callback[]; | 29 | extern const char xen_failsafe_callback[]; |
29 | 30 | ||
30 | unsigned long *phys_to_machine_mapping; | ||
31 | EXPORT_SYMBOL(phys_to_machine_mapping); | ||
32 | 31 | ||
33 | /** | 32 | /** |
34 | * machine_specific_memory_setup - Hook for machine specific memory setup. | 33 | * machine_specific_memory_setup - Hook for machine specific memory setup. |
@@ -38,6 +37,8 @@ char * __init xen_memory_setup(void) | |||
38 | { | 37 | { |
39 | unsigned long max_pfn = xen_start_info->nr_pages; | 38 | unsigned long max_pfn = xen_start_info->nr_pages; |
40 | 39 | ||
40 | max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); | ||
41 | |||
41 | e820.nr_map = 0; | 42 | e820.nr_map = 0; |
42 | add_memory_region(0, LOWMEMSIZE(), E820_RAM); | 43 | add_memory_region(0, LOWMEMSIZE(), E820_RAM); |
43 | add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); | 44 | add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 94e69000f982..d2e3c20127d7 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -35,7 +35,7 @@ | |||
35 | #include "xen-ops.h" | 35 | #include "xen-ops.h" |
36 | #include "mmu.h" | 36 | #include "mmu.h" |
37 | 37 | ||
38 | static cpumask_t xen_cpu_initialized_map; | 38 | cpumask_t xen_cpu_initialized_map; |
39 | static DEFINE_PER_CPU(int, resched_irq) = -1; | 39 | static DEFINE_PER_CPU(int, resched_irq) = -1; |
40 | static DEFINE_PER_CPU(int, callfunc_irq) = -1; | 40 | static DEFINE_PER_CPU(int, callfunc_irq) = -1; |
41 | static DEFINE_PER_CPU(int, debug_irq) = -1; | 41 | static DEFINE_PER_CPU(int, debug_irq) = -1; |
@@ -65,6 +65,12 @@ static struct call_data_struct *call_data; | |||
65 | */ | 65 | */ |
66 | static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) | 66 | static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) |
67 | { | 67 | { |
68 | #ifdef CONFIG_X86_32 | ||
69 | __get_cpu_var(irq_stat).irq_resched_count++; | ||
70 | #else | ||
71 | add_pda(irq_resched_count, 1); | ||
72 | #endif | ||
73 | |||
68 | return IRQ_HANDLED; | 74 | return IRQ_HANDLED; |
69 | } | 75 | } |
70 | 76 | ||
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c new file mode 100644 index 000000000000..251669a932d4 --- /dev/null +++ b/arch/x86/xen/suspend.c | |||
@@ -0,0 +1,45 @@ | |||
1 | #include <linux/types.h> | ||
2 | |||
3 | #include <xen/interface/xen.h> | ||
4 | #include <xen/grant_table.h> | ||
5 | #include <xen/events.h> | ||
6 | |||
7 | #include <asm/xen/hypercall.h> | ||
8 | #include <asm/xen/page.h> | ||
9 | |||
10 | #include "xen-ops.h" | ||
11 | #include "mmu.h" | ||
12 | |||
13 | void xen_pre_suspend(void) | ||
14 | { | ||
15 | xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); | ||
16 | xen_start_info->console.domU.mfn = | ||
17 | mfn_to_pfn(xen_start_info->console.domU.mfn); | ||
18 | |||
19 | BUG_ON(!irqs_disabled()); | ||
20 | |||
21 | HYPERVISOR_shared_info = &xen_dummy_shared_info; | ||
22 | if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP), | ||
23 | __pte_ma(0), 0)) | ||
24 | BUG(); | ||
25 | } | ||
26 | |||
27 | void xen_post_suspend(int suspend_cancelled) | ||
28 | { | ||
29 | xen_setup_shared_info(); | ||
30 | |||
31 | if (suspend_cancelled) { | ||
32 | xen_start_info->store_mfn = | ||
33 | pfn_to_mfn(xen_start_info->store_mfn); | ||
34 | xen_start_info->console.domU.mfn = | ||
35 | pfn_to_mfn(xen_start_info->console.domU.mfn); | ||
36 | } else { | ||
37 | #ifdef CONFIG_SMP | ||
38 | xen_cpu_initialized_map = cpu_online_map; | ||
39 | #endif | ||
40 | xen_vcpu_restore(); | ||
41 | xen_timer_resume(); | ||
42 | } | ||
43 | |||
44 | } | ||
45 | |||
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 41e217503c96..64f0038b9558 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -459,6 +459,19 @@ void xen_setup_cpu_clockevents(void) | |||
459 | clockevents_register_device(&__get_cpu_var(xen_clock_events)); | 459 | clockevents_register_device(&__get_cpu_var(xen_clock_events)); |
460 | } | 460 | } |
461 | 461 | ||
462 | void xen_timer_resume(void) | ||
463 | { | ||
464 | int cpu; | ||
465 | |||
466 | if (xen_clockevent != &xen_vcpuop_clockevent) | ||
467 | return; | ||
468 | |||
469 | for_each_online_cpu(cpu) { | ||
470 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) | ||
471 | BUG(); | ||
472 | } | ||
473 | } | ||
474 | |||
462 | __init void xen_time_init(void) | 475 | __init void xen_time_init(void) |
463 | { | 476 | { |
464 | int cpu = smp_processor_id(); | 477 | int cpu = smp_processor_id(); |
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 6ec3b4f7719b..7c0cf6320a0a 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/init.h> | 7 | #include <linux/init.h> |
8 | #include <asm/boot.h> | 8 | #include <asm/boot.h> |
9 | #include <xen/interface/elfnote.h> | 9 | #include <xen/interface/elfnote.h> |
10 | #include <asm/xen/interface.h> | ||
10 | 11 | ||
11 | __INIT | 12 | __INIT |
12 | ENTRY(startup_xen) | 13 | ENTRY(startup_xen) |
@@ -32,5 +33,9 @@ ENTRY(hypercall_page) | |||
32 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") | 33 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") |
33 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") | 34 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") |
34 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") | 35 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") |
36 | ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, | ||
37 | .quad _PAGE_PRESENT; .quad _PAGE_PRESENT) | ||
38 | ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) | ||
39 | ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long __HYPERVISOR_VIRT_START) | ||
35 | 40 | ||
36 | #endif /*CONFIG_XEN */ | 41 | #endif /*CONFIG_XEN */ |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index f1063ae08037..9a055592a307 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -9,18 +9,26 @@ | |||
9 | extern const char xen_hypervisor_callback[]; | 9 | extern const char xen_hypervisor_callback[]; |
10 | extern const char xen_failsafe_callback[]; | 10 | extern const char xen_failsafe_callback[]; |
11 | 11 | ||
12 | struct trap_info; | ||
12 | void xen_copy_trap_info(struct trap_info *traps); | 13 | void xen_copy_trap_info(struct trap_info *traps); |
13 | 14 | ||
14 | DECLARE_PER_CPU(unsigned long, xen_cr3); | 15 | DECLARE_PER_CPU(unsigned long, xen_cr3); |
15 | DECLARE_PER_CPU(unsigned long, xen_current_cr3); | 16 | DECLARE_PER_CPU(unsigned long, xen_current_cr3); |
16 | 17 | ||
17 | extern struct start_info *xen_start_info; | 18 | extern struct start_info *xen_start_info; |
19 | extern struct shared_info xen_dummy_shared_info; | ||
18 | extern struct shared_info *HYPERVISOR_shared_info; | 20 | extern struct shared_info *HYPERVISOR_shared_info; |
19 | 21 | ||
22 | void xen_setup_mfn_list_list(void); | ||
23 | void xen_setup_shared_info(void); | ||
24 | |||
20 | char * __init xen_memory_setup(void); | 25 | char * __init xen_memory_setup(void); |
21 | void __init xen_arch_setup(void); | 26 | void __init xen_arch_setup(void); |
22 | void __init xen_init_IRQ(void); | 27 | void __init xen_init_IRQ(void); |
23 | void xen_enable_sysenter(void); | 28 | void xen_enable_sysenter(void); |
29 | void xen_vcpu_restore(void); | ||
30 | |||
31 | void __init xen_build_dynamic_phys_to_machine(void); | ||
24 | 32 | ||
25 | void xen_setup_timer(int cpu); | 33 | void xen_setup_timer(int cpu); |
26 | void xen_setup_cpu_clockevents(void); | 34 | void xen_setup_cpu_clockevents(void); |
@@ -29,6 +37,7 @@ void __init xen_time_init(void); | |||
29 | unsigned long xen_get_wallclock(void); | 37 | unsigned long xen_get_wallclock(void); |
30 | int xen_set_wallclock(unsigned long time); | 38 | int xen_set_wallclock(unsigned long time); |
31 | unsigned long long xen_sched_clock(void); | 39 | unsigned long long xen_sched_clock(void); |
40 | void xen_timer_resume(void); | ||
32 | 41 | ||
33 | irqreturn_t xen_debug_interrupt(int irq, void *dev_id); | 42 | irqreturn_t xen_debug_interrupt(int irq, void *dev_id); |
34 | 43 | ||
@@ -54,6 +63,8 @@ int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info, | |||
54 | int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), | 63 | int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), |
55 | void *info, int wait); | 64 | void *info, int wait); |
56 | 65 | ||
66 | extern cpumask_t xen_cpu_initialized_map; | ||
67 | |||
57 | 68 | ||
58 | /* Declare an asm function, along with symbols needed to make it | 69 | /* Declare an asm function, along with symbols needed to make it |
59 | inlineable */ | 70 | inlineable */ |