aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen')
-rw-r--r--arch/x86/xen/Kconfig10
-rw-r--r--arch/x86/xen/Makefile2
-rw-r--r--arch/x86/xen/enlighten.c132
-rw-r--r--arch/x86/xen/manage.c143
-rw-r--r--arch/x86/xen/mmu.c274
-rw-r--r--arch/x86/xen/mmu.h12
-rw-r--r--arch/x86/xen/multicalls.c40
-rw-r--r--arch/x86/xen/multicalls.h12
-rw-r--r--arch/x86/xen/setup.c5
-rw-r--r--arch/x86/xen/smp.c8
-rw-r--r--arch/x86/xen/suspend.c45
-rw-r--r--arch/x86/xen/time.c13
-rw-r--r--arch/x86/xen/xen-head.S5
-rw-r--r--arch/x86/xen/xen-ops.h11
14 files changed, 511 insertions, 201 deletions
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 6c388e593bc8..c2cc99580871 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -12,3 +12,13 @@ config XEN
12 This is the Linux Xen port. Enabling this will allow the 12 This is the Linux Xen port. Enabling this will allow the
13 kernel to boot in a paravirtualized environment under the 13 kernel to boot in a paravirtualized environment under the
14 Xen hypervisor. 14 Xen hypervisor.
15
16config XEN_MAX_DOMAIN_MEMORY
17 int "Maximum allowed size of a domain in gigabytes"
18 default 8
19 depends on XEN
20 help
21 The pseudo-physical to machine address array is sized
22 according to the maximum possible memory size of a Xen
23 domain. This array uses 1 page per gigabyte, so there's no
24 need to be too stingy here. \ No newline at end of file
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 3d8df981d5fd..2ba2d1649131 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,4 @@
1obj-y := enlighten.o setup.o multicalls.o mmu.o \ 1obj-y := enlighten.o setup.o multicalls.o mmu.o \
2 time.o manage.o xen-asm.o grant-table.o 2 time.o xen-asm.o grant-table.o suspend.o
3 3
4obj-$(CONFIG_SMP) += smp.o 4obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index f09c1c69c37a..bd74229081c3 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -75,13 +75,13 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
75struct start_info *xen_start_info; 75struct start_info *xen_start_info;
76EXPORT_SYMBOL_GPL(xen_start_info); 76EXPORT_SYMBOL_GPL(xen_start_info);
77 77
78static /* __initdata */ struct shared_info dummy_shared_info; 78struct shared_info xen_dummy_shared_info;
79 79
80/* 80/*
81 * Point at some empty memory to start with. We map the real shared_info 81 * Point at some empty memory to start with. We map the real shared_info
82 * page as soon as fixmap is up and running. 82 * page as soon as fixmap is up and running.
83 */ 83 */
84struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; 84struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
85 85
86/* 86/*
87 * Flag to determine whether vcpu info placement is available on all 87 * Flag to determine whether vcpu info placement is available on all
@@ -98,13 +98,13 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info;
98 */ 98 */
99static int have_vcpu_info_placement = 1; 99static int have_vcpu_info_placement = 1;
100 100
101static void __init xen_vcpu_setup(int cpu) 101static void xen_vcpu_setup(int cpu)
102{ 102{
103 struct vcpu_register_vcpu_info info; 103 struct vcpu_register_vcpu_info info;
104 int err; 104 int err;
105 struct vcpu_info *vcpup; 105 struct vcpu_info *vcpup;
106 106
107 BUG_ON(HYPERVISOR_shared_info == &dummy_shared_info); 107 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
108 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; 108 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
109 109
110 if (!have_vcpu_info_placement) 110 if (!have_vcpu_info_placement)
@@ -136,11 +136,41 @@ static void __init xen_vcpu_setup(int cpu)
136 } 136 }
137} 137}
138 138
139/*
140 * On restore, set the vcpu placement up again.
141 * If it fails, then we're in a bad state, since
142 * we can't back out from using it...
143 */
144void xen_vcpu_restore(void)
145{
146 if (have_vcpu_info_placement) {
147 int cpu;
148
149 for_each_online_cpu(cpu) {
150 bool other_cpu = (cpu != smp_processor_id());
151
152 if (other_cpu &&
153 HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
154 BUG();
155
156 xen_vcpu_setup(cpu);
157
158 if (other_cpu &&
159 HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
160 BUG();
161 }
162
163 BUG_ON(!have_vcpu_info_placement);
164 }
165}
166
139static void __init xen_banner(void) 167static void __init xen_banner(void)
140{ 168{
141 printk(KERN_INFO "Booting paravirtualized kernel on %s\n", 169 printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
142 pv_info.name); 170 pv_info.name);
143 printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic); 171 printk(KERN_INFO "Hypervisor signature: %s%s\n",
172 xen_start_info->magic,
173 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
144} 174}
145 175
146static void xen_cpuid(unsigned int *ax, unsigned int *bx, 176static void xen_cpuid(unsigned int *ax, unsigned int *bx,
@@ -235,13 +265,13 @@ static void xen_irq_enable(void)
235{ 265{
236 struct vcpu_info *vcpu; 266 struct vcpu_info *vcpu;
237 267
238 /* There's a one instruction preempt window here. We need to 268 /* We don't need to worry about being preempted here, since
239 make sure we're don't switch CPUs between getting the vcpu 269 either a) interrupts are disabled, so no preemption, or b)
240 pointer and updating the mask. */ 270 the caller is confused and is trying to re-enable interrupts
241 preempt_disable(); 271 on an indeterminate processor. */
272
242 vcpu = x86_read_percpu(xen_vcpu); 273 vcpu = x86_read_percpu(xen_vcpu);
243 vcpu->evtchn_upcall_mask = 0; 274 vcpu->evtchn_upcall_mask = 0;
244 preempt_enable_no_resched();
245 275
246 /* Doesn't matter if we get preempted here, because any 276 /* Doesn't matter if we get preempted here, because any
247 pending event will get dealt with anyway. */ 277 pending event will get dealt with anyway. */
@@ -254,7 +284,7 @@ static void xen_irq_enable(void)
254static void xen_safe_halt(void) 284static void xen_safe_halt(void)
255{ 285{
256 /* Blocking includes an implicit local_irq_enable(). */ 286 /* Blocking includes an implicit local_irq_enable(). */
257 if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0) 287 if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
258 BUG(); 288 BUG();
259} 289}
260 290
@@ -607,6 +637,30 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
607 xen_mc_issue(PARAVIRT_LAZY_MMU); 637 xen_mc_issue(PARAVIRT_LAZY_MMU);
608} 638}
609 639
640static void xen_clts(void)
641{
642 struct multicall_space mcs;
643
644 mcs = xen_mc_entry(0);
645
646 MULTI_fpu_taskswitch(mcs.mc, 0);
647
648 xen_mc_issue(PARAVIRT_LAZY_CPU);
649}
650
651static void xen_write_cr0(unsigned long cr0)
652{
653 struct multicall_space mcs;
654
655 /* Only pay attention to cr0.TS; everything else is
656 ignored. */
657 mcs = xen_mc_entry(0);
658
659 MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0);
660
661 xen_mc_issue(PARAVIRT_LAZY_CPU);
662}
663
610static void xen_write_cr2(unsigned long cr2) 664static void xen_write_cr2(unsigned long cr2)
611{ 665{
612 x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; 666 x86_read_percpu(xen_vcpu)->arch.cr2 = cr2;
@@ -624,8 +678,10 @@ static unsigned long xen_read_cr2_direct(void)
624 678
625static void xen_write_cr4(unsigned long cr4) 679static void xen_write_cr4(unsigned long cr4)
626{ 680{
627 /* Just ignore cr4 changes; Xen doesn't allow us to do 681 cr4 &= ~X86_CR4_PGE;
628 anything anyway. */ 682 cr4 &= ~X86_CR4_PSE;
683
684 native_write_cr4(cr4);
629} 685}
630 686
631static unsigned long xen_read_cr3(void) 687static unsigned long xen_read_cr3(void)
@@ -831,7 +887,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
831 PFN_DOWN(__pa(xen_start_info->pt_base))); 887 PFN_DOWN(__pa(xen_start_info->pt_base)));
832} 888}
833 889
834static __init void setup_shared_info(void) 890void xen_setup_shared_info(void)
835{ 891{
836 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 892 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
837 unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); 893 unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP);
@@ -854,6 +910,8 @@ static __init void setup_shared_info(void)
854 /* In UP this is as good a place as any to set up shared info */ 910 /* In UP this is as good a place as any to set up shared info */
855 xen_setup_vcpu_info_placement(); 911 xen_setup_vcpu_info_placement();
856#endif 912#endif
913
914 xen_setup_mfn_list_list();
857} 915}
858 916
859static __init void xen_pagetable_setup_done(pgd_t *base) 917static __init void xen_pagetable_setup_done(pgd_t *base)
@@ -866,15 +924,23 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
866 pv_mmu_ops.release_pmd = xen_release_pmd; 924 pv_mmu_ops.release_pmd = xen_release_pmd;
867 pv_mmu_ops.set_pte = xen_set_pte; 925 pv_mmu_ops.set_pte = xen_set_pte;
868 926
869 setup_shared_info(); 927 xen_setup_shared_info();
870 928
871 /* Actually pin the pagetable down, but we can't set PG_pinned 929 /* Actually pin the pagetable down, but we can't set PG_pinned
872 yet because the page structures don't exist yet. */ 930 yet because the page structures don't exist yet. */
873 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); 931 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
874} 932}
875 933
934static __init void xen_post_allocator_init(void)
935{
936 pv_mmu_ops.set_pmd = xen_set_pmd;
937 pv_mmu_ops.set_pud = xen_set_pud;
938
939 xen_mark_init_mm_pinned();
940}
941
876/* This is called once we have the cpu_possible_map */ 942/* This is called once we have the cpu_possible_map */
877void __init xen_setup_vcpu_info_placement(void) 943void xen_setup_vcpu_info_placement(void)
878{ 944{
879 int cpu; 945 int cpu;
880 946
@@ -960,7 +1026,7 @@ static const struct pv_init_ops xen_init_ops __initdata = {
960 .banner = xen_banner, 1026 .banner = xen_banner,
961 .memory_setup = xen_memory_setup, 1027 .memory_setup = xen_memory_setup,
962 .arch_setup = xen_arch_setup, 1028 .arch_setup = xen_arch_setup,
963 .post_allocator_init = xen_mark_init_mm_pinned, 1029 .post_allocator_init = xen_post_allocator_init,
964}; 1030};
965 1031
966static const struct pv_time_ops xen_time_ops __initdata = { 1032static const struct pv_time_ops xen_time_ops __initdata = {
@@ -978,10 +1044,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
978 .set_debugreg = xen_set_debugreg, 1044 .set_debugreg = xen_set_debugreg,
979 .get_debugreg = xen_get_debugreg, 1045 .get_debugreg = xen_get_debugreg,
980 1046
981 .clts = native_clts, 1047 .clts = xen_clts,
982 1048
983 .read_cr0 = native_read_cr0, 1049 .read_cr0 = native_read_cr0,
984 .write_cr0 = native_write_cr0, 1050 .write_cr0 = xen_write_cr0,
985 1051
986 .read_cr4 = native_read_cr4, 1052 .read_cr4 = native_read_cr4,
987 .read_cr4_safe = native_read_cr4_safe, 1053 .read_cr4_safe = native_read_cr4_safe,
@@ -1072,9 +1138,13 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1072 1138
1073 .set_pte = NULL, /* see xen_pagetable_setup_* */ 1139 .set_pte = NULL, /* see xen_pagetable_setup_* */
1074 .set_pte_at = xen_set_pte_at, 1140 .set_pte_at = xen_set_pte_at,
1075 .set_pmd = xen_set_pmd, 1141 .set_pmd = xen_set_pmd_hyper,
1142
1143 .ptep_modify_prot_start = __ptep_modify_prot_start,
1144 .ptep_modify_prot_commit = __ptep_modify_prot_commit,
1076 1145
1077 .pte_val = xen_pte_val, 1146 .pte_val = xen_pte_val,
1147 .pte_flags = native_pte_val,
1078 .pgd_val = xen_pgd_val, 1148 .pgd_val = xen_pgd_val,
1079 1149
1080 .make_pte = xen_make_pte, 1150 .make_pte = xen_make_pte,
@@ -1082,7 +1152,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1082 1152
1083 .set_pte_atomic = xen_set_pte_atomic, 1153 .set_pte_atomic = xen_set_pte_atomic,
1084 .set_pte_present = xen_set_pte_at, 1154 .set_pte_present = xen_set_pte_at,
1085 .set_pud = xen_set_pud, 1155 .set_pud = xen_set_pud_hyper,
1086 .pte_clear = xen_pte_clear, 1156 .pte_clear = xen_pte_clear,
1087 .pmd_clear = xen_pmd_clear, 1157 .pmd_clear = xen_pmd_clear,
1088 1158
@@ -1114,11 +1184,13 @@ static const struct smp_ops xen_smp_ops __initdata = {
1114 1184
1115static void xen_reboot(int reason) 1185static void xen_reboot(int reason)
1116{ 1186{
1187 struct sched_shutdown r = { .reason = reason };
1188
1117#ifdef CONFIG_SMP 1189#ifdef CONFIG_SMP
1118 smp_send_stop(); 1190 smp_send_stop();
1119#endif 1191#endif
1120 1192
1121 if (HYPERVISOR_sched_op(SCHEDOP_shutdown, reason)) 1193 if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
1122 BUG(); 1194 BUG();
1123} 1195}
1124 1196
@@ -1173,6 +1245,8 @@ asmlinkage void __init xen_start_kernel(void)
1173 1245
1174 BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); 1246 BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0);
1175 1247
1248 xen_setup_features();
1249
1176 /* Install Xen paravirt ops */ 1250 /* Install Xen paravirt ops */
1177 pv_info = xen_info; 1251 pv_info = xen_info;
1178 pv_init_ops = xen_init_ops; 1252 pv_init_ops = xen_init_ops;
@@ -1182,17 +1256,20 @@ asmlinkage void __init xen_start_kernel(void)
1182 pv_apic_ops = xen_apic_ops; 1256 pv_apic_ops = xen_apic_ops;
1183 pv_mmu_ops = xen_mmu_ops; 1257 pv_mmu_ops = xen_mmu_ops;
1184 1258
1259 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
1260 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
1261 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
1262 }
1263
1185 machine_ops = xen_machine_ops; 1264 machine_ops = xen_machine_ops;
1186 1265
1187#ifdef CONFIG_SMP 1266#ifdef CONFIG_SMP
1188 smp_ops = xen_smp_ops; 1267 smp_ops = xen_smp_ops;
1189#endif 1268#endif
1190 1269
1191 xen_setup_features();
1192
1193 /* Get mfn list */ 1270 /* Get mfn list */
1194 if (!xen_feature(XENFEAT_auto_translated_physmap)) 1271 if (!xen_feature(XENFEAT_auto_translated_physmap))
1195 phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list; 1272 xen_build_dynamic_phys_to_machine();
1196 1273
1197 pgd = (pgd_t *)xen_start_info->pt_base; 1274 pgd = (pgd_t *)xen_start_info->pt_base;
1198 1275
@@ -1232,8 +1309,11 @@ asmlinkage void __init xen_start_kernel(void)
1232 ? __pa(xen_start_info->mod_start) : 0; 1309 ? __pa(xen_start_info->mod_start) : 0;
1233 boot_params.hdr.ramdisk_size = xen_start_info->mod_len; 1310 boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
1234 1311
1235 if (!is_initial_xendomain()) 1312 if (!is_initial_xendomain()) {
1313 add_preferred_console("xenboot", 0, NULL);
1314 add_preferred_console("tty", 0, NULL);
1236 add_preferred_console("hvc", 0, NULL); 1315 add_preferred_console("hvc", 0, NULL);
1316 }
1237 1317
1238 /* Start the world */ 1318 /* Start the world */
1239 start_kernel(); 1319 start_kernel();
diff --git a/arch/x86/xen/manage.c b/arch/x86/xen/manage.c
deleted file mode 100644
index aa7af9e6abc0..000000000000
--- a/arch/x86/xen/manage.c
+++ /dev/null
@@ -1,143 +0,0 @@
1/*
2 * Handle extern requests for shutdown, reboot and sysrq
3 */
4#include <linux/kernel.h>
5#include <linux/err.h>
6#include <linux/reboot.h>
7#include <linux/sysrq.h>
8
9#include <xen/xenbus.h>
10
11#define SHUTDOWN_INVALID -1
12#define SHUTDOWN_POWEROFF 0
13#define SHUTDOWN_SUSPEND 2
14/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
15 * report a crash, not be instructed to crash!
16 * HALT is the same as POWEROFF, as far as we're concerned. The tools use
17 * the distinction when we return the reason code to them.
18 */
19#define SHUTDOWN_HALT 4
20
21/* Ignore multiple shutdown requests. */
22static int shutting_down = SHUTDOWN_INVALID;
23
24static void shutdown_handler(struct xenbus_watch *watch,
25 const char **vec, unsigned int len)
26{
27 char *str;
28 struct xenbus_transaction xbt;
29 int err;
30
31 if (shutting_down != SHUTDOWN_INVALID)
32 return;
33
34 again:
35 err = xenbus_transaction_start(&xbt);
36 if (err)
37 return;
38
39 str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
40 /* Ignore read errors and empty reads. */
41 if (XENBUS_IS_ERR_READ(str)) {
42 xenbus_transaction_end(xbt, 1);
43 return;
44 }
45
46 xenbus_write(xbt, "control", "shutdown", "");
47
48 err = xenbus_transaction_end(xbt, 0);
49 if (err == -EAGAIN) {
50 kfree(str);
51 goto again;
52 }
53
54 if (strcmp(str, "poweroff") == 0 ||
55 strcmp(str, "halt") == 0)
56 orderly_poweroff(false);
57 else if (strcmp(str, "reboot") == 0)
58 ctrl_alt_del();
59 else {
60 printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
61 shutting_down = SHUTDOWN_INVALID;
62 }
63
64 kfree(str);
65}
66
67static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
68 unsigned int len)
69{
70 char sysrq_key = '\0';
71 struct xenbus_transaction xbt;
72 int err;
73
74 again:
75 err = xenbus_transaction_start(&xbt);
76 if (err)
77 return;
78 if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
79 printk(KERN_ERR "Unable to read sysrq code in "
80 "control/sysrq\n");
81 xenbus_transaction_end(xbt, 1);
82 return;
83 }
84
85 if (sysrq_key != '\0')
86 xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
87
88 err = xenbus_transaction_end(xbt, 0);
89 if (err == -EAGAIN)
90 goto again;
91
92 if (sysrq_key != '\0')
93 handle_sysrq(sysrq_key, NULL);
94}
95
96static struct xenbus_watch shutdown_watch = {
97 .node = "control/shutdown",
98 .callback = shutdown_handler
99};
100
101static struct xenbus_watch sysrq_watch = {
102 .node = "control/sysrq",
103 .callback = sysrq_handler
104};
105
106static int setup_shutdown_watcher(void)
107{
108 int err;
109
110 err = register_xenbus_watch(&shutdown_watch);
111 if (err) {
112 printk(KERN_ERR "Failed to set shutdown watcher\n");
113 return err;
114 }
115
116 err = register_xenbus_watch(&sysrq_watch);
117 if (err) {
118 printk(KERN_ERR "Failed to set sysrq watcher\n");
119 return err;
120 }
121
122 return 0;
123}
124
125static int shutdown_event(struct notifier_block *notifier,
126 unsigned long event,
127 void *data)
128{
129 setup_shutdown_watcher();
130 return NOTIFY_DONE;
131}
132
133static int __init setup_shutdown_event(void)
134{
135 static struct notifier_block xenstore_notifier = {
136 .notifier_call = shutdown_event
137 };
138 register_xenstore_notifier(&xenstore_notifier);
139
140 return 0;
141}
142
143subsys_initcall(setup_shutdown_event);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index df40bf74ea75..42b3b9ed641d 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -56,6 +56,131 @@
56#include "multicalls.h" 56#include "multicalls.h"
57#include "mmu.h" 57#include "mmu.h"
58 58
59#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
60#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
61
62/* Placeholder for holes in the address space */
63static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE]
64 __attribute__((section(".data.page_aligned"))) =
65 { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
66
67 /* Array of pointers to pages containing p2m entries */
68static unsigned long *p2m_top[TOP_ENTRIES]
69 __attribute__((section(".data.page_aligned"))) =
70 { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
71
72/* Arrays of p2m arrays expressed in mfns used for save/restore */
73static unsigned long p2m_top_mfn[TOP_ENTRIES]
74 __attribute__((section(".bss.page_aligned")));
75
76static unsigned long p2m_top_mfn_list[
77 PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)]
78 __attribute__((section(".bss.page_aligned")));
79
80static inline unsigned p2m_top_index(unsigned long pfn)
81{
82 BUG_ON(pfn >= MAX_DOMAIN_PAGES);
83 return pfn / P2M_ENTRIES_PER_PAGE;
84}
85
86static inline unsigned p2m_index(unsigned long pfn)
87{
88 return pfn % P2M_ENTRIES_PER_PAGE;
89}
90
91/* Build the parallel p2m_top_mfn structures */
92void xen_setup_mfn_list_list(void)
93{
94 unsigned pfn, idx;
95
96 for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
97 unsigned topidx = p2m_top_index(pfn);
98
99 p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]);
100 }
101
102 for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
103 unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
104 p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
105 }
106
107 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
108
109 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
110 virt_to_mfn(p2m_top_mfn_list);
111 HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages;
112}
113
114/* Set up p2m_top to point to the domain-builder provided p2m pages */
115void __init xen_build_dynamic_phys_to_machine(void)
116{
117 unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
118 unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
119 unsigned pfn;
120
121 for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
122 unsigned topidx = p2m_top_index(pfn);
123
124 p2m_top[topidx] = &mfn_list[pfn];
125 }
126}
127
128unsigned long get_phys_to_machine(unsigned long pfn)
129{
130 unsigned topidx, idx;
131
132 if (unlikely(pfn >= MAX_DOMAIN_PAGES))
133 return INVALID_P2M_ENTRY;
134
135 topidx = p2m_top_index(pfn);
136 idx = p2m_index(pfn);
137 return p2m_top[topidx][idx];
138}
139EXPORT_SYMBOL_GPL(get_phys_to_machine);
140
141static void alloc_p2m(unsigned long **pp, unsigned long *mfnp)
142{
143 unsigned long *p;
144 unsigned i;
145
146 p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
147 BUG_ON(p == NULL);
148
149 for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
150 p[i] = INVALID_P2M_ENTRY;
151
152 if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
153 free_page((unsigned long)p);
154 else
155 *mfnp = virt_to_mfn(p);
156}
157
158void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
159{
160 unsigned topidx, idx;
161
162 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
163 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
164 return;
165 }
166
167 if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
168 BUG_ON(mfn != INVALID_P2M_ENTRY);
169 return;
170 }
171
172 topidx = p2m_top_index(pfn);
173 if (p2m_top[topidx] == p2m_missing) {
174 /* no need to allocate a page to store an invalid entry */
175 if (mfn == INVALID_P2M_ENTRY)
176 return;
177 alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]);
178 }
179
180 idx = p2m_index(pfn);
181 p2m_top[topidx][idx] = mfn;
182}
183
59xmaddr_t arbitrary_virt_to_machine(unsigned long address) 184xmaddr_t arbitrary_virt_to_machine(unsigned long address)
60{ 185{
61 unsigned int level; 186 unsigned int level;
@@ -98,24 +223,60 @@ void make_lowmem_page_readwrite(void *vaddr)
98} 223}
99 224
100 225
101void xen_set_pmd(pmd_t *ptr, pmd_t val) 226static bool page_pinned(void *ptr)
227{
228 struct page *page = virt_to_page(ptr);
229
230 return PagePinned(page);
231}
232
233static void extend_mmu_update(const struct mmu_update *update)
102{ 234{
103 struct multicall_space mcs; 235 struct multicall_space mcs;
104 struct mmu_update *u; 236 struct mmu_update *u;
105 237
106 preempt_disable(); 238 mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
239
240 if (mcs.mc != NULL)
241 mcs.mc->args[1]++;
242 else {
243 mcs = __xen_mc_entry(sizeof(*u));
244 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
245 }
107 246
108 mcs = xen_mc_entry(sizeof(*u));
109 u = mcs.args; 247 u = mcs.args;
110 u->ptr = virt_to_machine(ptr).maddr; 248 *u = *update;
111 u->val = pmd_val_ma(val); 249}
112 MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); 250
251void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
252{
253 struct mmu_update u;
254
255 preempt_disable();
256
257 xen_mc_batch();
258
259 u.ptr = virt_to_machine(ptr).maddr;
260 u.val = pmd_val_ma(val);
261 extend_mmu_update(&u);
113 262
114 xen_mc_issue(PARAVIRT_LAZY_MMU); 263 xen_mc_issue(PARAVIRT_LAZY_MMU);
115 264
116 preempt_enable(); 265 preempt_enable();
117} 266}
118 267
268void xen_set_pmd(pmd_t *ptr, pmd_t val)
269{
270 /* If page is not pinned, we can just update the entry
271 directly */
272 if (!page_pinned(ptr)) {
273 *ptr = val;
274 return;
275 }
276
277 xen_set_pmd_hyper(ptr, val);
278}
279
119/* 280/*
120 * Associate a virtual page frame with a given physical page frame 281 * Associate a virtual page frame with a given physical page frame
121 * and protection flags for that frame. 282 * and protection flags for that frame.
@@ -179,13 +340,33 @@ out:
179 preempt_enable(); 340 preempt_enable();
180} 341}
181 342
343pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
344{
345 /* Just return the pte as-is. We preserve the bits on commit */
346 return *ptep;
347}
348
349void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
350 pte_t *ptep, pte_t pte)
351{
352 struct mmu_update u;
353
354 xen_mc_batch();
355
356 u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
357 u.val = pte_val_ma(pte);
358 extend_mmu_update(&u);
359
360 xen_mc_issue(PARAVIRT_LAZY_MMU);
361}
362
182/* Assume pteval_t is equivalent to all the other *val_t types. */ 363/* Assume pteval_t is equivalent to all the other *val_t types. */
183static pteval_t pte_mfn_to_pfn(pteval_t val) 364static pteval_t pte_mfn_to_pfn(pteval_t val)
184{ 365{
185 if (val & _PAGE_PRESENT) { 366 if (val & _PAGE_PRESENT) {
186 unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT; 367 unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT;
187 pteval_t flags = val & ~PTE_MASK; 368 pteval_t flags = val & ~PTE_MASK;
188 val = (mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; 369 val = ((pteval_t)mfn_to_pfn(mfn) << PAGE_SHIFT) | flags;
189 } 370 }
190 371
191 return val; 372 return val;
@@ -196,7 +377,7 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
196 if (val & _PAGE_PRESENT) { 377 if (val & _PAGE_PRESENT) {
197 unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT; 378 unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT;
198 pteval_t flags = val & ~PTE_MASK; 379 pteval_t flags = val & ~PTE_MASK;
199 val = (pfn_to_mfn(pfn) << PAGE_SHIFT) | flags; 380 val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) | flags;
200 } 381 }
201 382
202 return val; 383 return val;
@@ -229,24 +410,35 @@ pmdval_t xen_pmd_val(pmd_t pmd)
229 return pte_mfn_to_pfn(pmd.pmd); 410 return pte_mfn_to_pfn(pmd.pmd);
230} 411}
231 412
232void xen_set_pud(pud_t *ptr, pud_t val) 413void xen_set_pud_hyper(pud_t *ptr, pud_t val)
233{ 414{
234 struct multicall_space mcs; 415 struct mmu_update u;
235 struct mmu_update *u;
236 416
237 preempt_disable(); 417 preempt_disable();
238 418
239 mcs = xen_mc_entry(sizeof(*u)); 419 xen_mc_batch();
240 u = mcs.args; 420
241 u->ptr = virt_to_machine(ptr).maddr; 421 u.ptr = virt_to_machine(ptr).maddr;
242 u->val = pud_val_ma(val); 422 u.val = pud_val_ma(val);
243 MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); 423 extend_mmu_update(&u);
244 424
245 xen_mc_issue(PARAVIRT_LAZY_MMU); 425 xen_mc_issue(PARAVIRT_LAZY_MMU);
246 426
247 preempt_enable(); 427 preempt_enable();
248} 428}
249 429
430void xen_set_pud(pud_t *ptr, pud_t val)
431{
432 /* If page is not pinned, we can just update the entry
433 directly */
434 if (!page_pinned(ptr)) {
435 *ptr = val;
436 return;
437 }
438
439 xen_set_pud_hyper(ptr, val);
440}
441
250void xen_set_pte(pte_t *ptep, pte_t pte) 442void xen_set_pte(pte_t *ptep, pte_t pte)
251{ 443{
252 ptep->pte_high = pte.pte_high; 444 ptep->pte_high = pte.pte_high;
@@ -268,7 +460,7 @@ void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
268 460
269void xen_pmd_clear(pmd_t *pmdp) 461void xen_pmd_clear(pmd_t *pmdp)
270{ 462{
271 xen_set_pmd(pmdp, __pmd(0)); 463 set_pmd(pmdp, __pmd(0));
272} 464}
273 465
274pmd_t xen_make_pmd(pmdval_t pmd) 466pmd_t xen_make_pmd(pmdval_t pmd)
@@ -441,6 +633,29 @@ void xen_pgd_pin(pgd_t *pgd)
441 xen_mc_issue(0); 633 xen_mc_issue(0);
442} 634}
443 635
636/*
637 * On save, we need to pin all pagetables to make sure they get their
638 * mfns turned into pfns. Search the list for any unpinned pgds and pin
639 * them (unpinned pgds are not currently in use, probably because the
640 * process is under construction or destruction).
641 */
642void xen_mm_pin_all(void)
643{
644 unsigned long flags;
645 struct page *page;
646
647 spin_lock_irqsave(&pgd_lock, flags);
648
649 list_for_each_entry(page, &pgd_list, lru) {
650 if (!PagePinned(page)) {
651 xen_pgd_pin((pgd_t *)page_address(page));
652 SetPageSavePinned(page);
653 }
654 }
655
656 spin_unlock_irqrestore(&pgd_lock, flags);
657}
658
444/* The init_mm pagetable is really pinned as soon as its created, but 659/* The init_mm pagetable is really pinned as soon as its created, but
445 that's before we have page structures to store the bits. So do all 660 that's before we have page structures to store the bits. So do all
446 the book-keeping now. */ 661 the book-keeping now. */
@@ -498,6 +713,29 @@ static void xen_pgd_unpin(pgd_t *pgd)
498 xen_mc_issue(0); 713 xen_mc_issue(0);
499} 714}
500 715
716/*
717 * On resume, undo any pinning done at save, so that the rest of the
718 * kernel doesn't see any unexpected pinned pagetables.
719 */
720void xen_mm_unpin_all(void)
721{
722 unsigned long flags;
723 struct page *page;
724
725 spin_lock_irqsave(&pgd_lock, flags);
726
727 list_for_each_entry(page, &pgd_list, lru) {
728 if (PageSavePinned(page)) {
729 BUG_ON(!PagePinned(page));
730 printk("unpinning pinned %p\n", page_address(page));
731 xen_pgd_unpin((pgd_t *)page_address(page));
732 ClearPageSavePinned(page);
733 }
734 }
735
736 spin_unlock_irqrestore(&pgd_lock, flags);
737}
738
501void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) 739void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
502{ 740{
503 spin_lock(&next->page_table_lock); 741 spin_lock(&next->page_table_lock);
@@ -591,7 +829,7 @@ void xen_exit_mmap(struct mm_struct *mm)
591 spin_lock(&mm->page_table_lock); 829 spin_lock(&mm->page_table_lock);
592 830
593 /* pgd may not be pinned in the error exit path of execve */ 831 /* pgd may not be pinned in the error exit path of execve */
594 if (PagePinned(virt_to_page(mm->pgd))) 832 if (page_pinned(mm->pgd))
595 xen_pgd_unpin(mm->pgd); 833 xen_pgd_unpin(mm->pgd);
596 834
597 spin_unlock(&mm->page_table_lock); 835 spin_unlock(&mm->page_table_lock);
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 5fe961caffd4..297bf9f5b8bc 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -25,10 +25,6 @@ enum pt_level {
25 25
26void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); 26void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
27 27
28void xen_set_pte(pte_t *ptep, pte_t pteval);
29void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
30 pte_t *ptep, pte_t pteval);
31void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval);
32 28
33void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next); 29void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next);
34void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); 30void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
@@ -45,11 +41,19 @@ pte_t xen_make_pte(pteval_t);
45pmd_t xen_make_pmd(pmdval_t); 41pmd_t xen_make_pmd(pmdval_t);
46pgd_t xen_make_pgd(pgdval_t); 42pgd_t xen_make_pgd(pgdval_t);
47 43
44void xen_set_pte(pte_t *ptep, pte_t pteval);
48void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, 45void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
49 pte_t *ptep, pte_t pteval); 46 pte_t *ptep, pte_t pteval);
50void xen_set_pte_atomic(pte_t *ptep, pte_t pte); 47void xen_set_pte_atomic(pte_t *ptep, pte_t pte);
48void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval);
51void xen_set_pud(pud_t *ptr, pud_t val); 49void xen_set_pud(pud_t *ptr, pud_t val);
50void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval);
51void xen_set_pud_hyper(pud_t *ptr, pud_t val);
52void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); 52void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
53void xen_pmd_clear(pmd_t *pmdp); 53void xen_pmd_clear(pmd_t *pmdp);
54 54
55pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
56void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
57 pte_t *ptep, pte_t pte);
58
55#endif /* _XEN_MMU_H */ 59#endif /* _XEN_MMU_H */
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 5791eb2e3750..3c63c4da7ed1 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -29,14 +29,14 @@
29#define MC_DEBUG 1 29#define MC_DEBUG 1
30 30
31#define MC_BATCH 32 31#define MC_BATCH 32
32#define MC_ARGS (MC_BATCH * 16 / sizeof(u64)) 32#define MC_ARGS (MC_BATCH * 16)
33 33
34struct mc_buffer { 34struct mc_buffer {
35 struct multicall_entry entries[MC_BATCH]; 35 struct multicall_entry entries[MC_BATCH];
36#if MC_DEBUG 36#if MC_DEBUG
37 struct multicall_entry debug[MC_BATCH]; 37 struct multicall_entry debug[MC_BATCH];
38#endif 38#endif
39 u64 args[MC_ARGS]; 39 unsigned char args[MC_ARGS];
40 struct callback { 40 struct callback {
41 void (*fn)(void *); 41 void (*fn)(void *);
42 void *data; 42 void *data;
@@ -107,20 +107,48 @@ struct multicall_space __xen_mc_entry(size_t args)
107{ 107{
108 struct mc_buffer *b = &__get_cpu_var(mc_buffer); 108 struct mc_buffer *b = &__get_cpu_var(mc_buffer);
109 struct multicall_space ret; 109 struct multicall_space ret;
110 unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64); 110 unsigned argidx = roundup(b->argidx, sizeof(u64));
111 111
112 BUG_ON(preemptible()); 112 BUG_ON(preemptible());
113 BUG_ON(argspace > MC_ARGS); 113 BUG_ON(b->argidx > MC_ARGS);
114 114
115 if (b->mcidx == MC_BATCH || 115 if (b->mcidx == MC_BATCH ||
116 (b->argidx + argspace) > MC_ARGS) 116 (argidx + args) > MC_ARGS) {
117 xen_mc_flush(); 117 xen_mc_flush();
118 argidx = roundup(b->argidx, sizeof(u64));
119 }
118 120
119 ret.mc = &b->entries[b->mcidx]; 121 ret.mc = &b->entries[b->mcidx];
120 b->mcidx++; 122 b->mcidx++;
123 ret.args = &b->args[argidx];
124 b->argidx = argidx + args;
125
126 BUG_ON(b->argidx > MC_ARGS);
127 return ret;
128}
129
130struct multicall_space xen_mc_extend_args(unsigned long op, size_t size)
131{
132 struct mc_buffer *b = &__get_cpu_var(mc_buffer);
133 struct multicall_space ret = { NULL, NULL };
134
135 BUG_ON(preemptible());
136 BUG_ON(b->argidx > MC_ARGS);
137
138 if (b->mcidx == 0)
139 return ret;
140
141 if (b->entries[b->mcidx - 1].op != op)
142 return ret;
143
144 if ((b->argidx + size) > MC_ARGS)
145 return ret;
146
147 ret.mc = &b->entries[b->mcidx - 1];
121 ret.args = &b->args[b->argidx]; 148 ret.args = &b->args[b->argidx];
122 b->argidx += argspace; 149 b->argidx += size;
123 150
151 BUG_ON(b->argidx > MC_ARGS);
124 return ret; 152 return ret;
125} 153}
126 154
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h
index 8bae996d99a3..858938241616 100644
--- a/arch/x86/xen/multicalls.h
+++ b/arch/x86/xen/multicalls.h
@@ -45,4 +45,16 @@ static inline void xen_mc_issue(unsigned mode)
45/* Set up a callback to be called when the current batch is flushed */ 45/* Set up a callback to be called when the current batch is flushed */
46void xen_mc_callback(void (*fn)(void *), void *data); 46void xen_mc_callback(void (*fn)(void *), void *data);
47 47
48/*
49 * Try to extend the arguments of the previous multicall command. The
50 * previous command's op must match. If it does, then it attempts to
51 * extend the argument space allocated to the multicall entry by
52 * arg_size bytes.
53 *
54 * The returned multicall_space will return with mc pointing to the
55 * command on success, or NULL on failure, and args pointing to the
56 * newly allocated space.
57 */
58struct multicall_space xen_mc_extend_args(unsigned long op, size_t arg_size);
59
48#endif /* _XEN_MULTICALLS_H */ 60#endif /* _XEN_MULTICALLS_H */
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 82517e4a752a..488447878a9d 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -16,6 +16,7 @@
16#include <asm/xen/hypervisor.h> 16#include <asm/xen/hypervisor.h>
17#include <asm/xen/hypercall.h> 17#include <asm/xen/hypercall.h>
18 18
19#include <xen/page.h>
19#include <xen/interface/callback.h> 20#include <xen/interface/callback.h>
20#include <xen/interface/physdev.h> 21#include <xen/interface/physdev.h>
21#include <xen/features.h> 22#include <xen/features.h>
@@ -27,8 +28,6 @@
27extern const char xen_hypervisor_callback[]; 28extern const char xen_hypervisor_callback[];
28extern const char xen_failsafe_callback[]; 29extern const char xen_failsafe_callback[];
29 30
30unsigned long *phys_to_machine_mapping;
31EXPORT_SYMBOL(phys_to_machine_mapping);
32 31
33/** 32/**
34 * machine_specific_memory_setup - Hook for machine specific memory setup. 33 * machine_specific_memory_setup - Hook for machine specific memory setup.
@@ -38,6 +37,8 @@ char * __init xen_memory_setup(void)
38{ 37{
39 unsigned long max_pfn = xen_start_info->nr_pages; 38 unsigned long max_pfn = xen_start_info->nr_pages;
40 39
40 max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
41
41 e820.nr_map = 0; 42 e820.nr_map = 0;
42 add_memory_region(0, LOWMEMSIZE(), E820_RAM); 43 add_memory_region(0, LOWMEMSIZE(), E820_RAM);
43 add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); 44 add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 94e69000f982..d2e3c20127d7 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -35,7 +35,7 @@
35#include "xen-ops.h" 35#include "xen-ops.h"
36#include "mmu.h" 36#include "mmu.h"
37 37
38static cpumask_t xen_cpu_initialized_map; 38cpumask_t xen_cpu_initialized_map;
39static DEFINE_PER_CPU(int, resched_irq) = -1; 39static DEFINE_PER_CPU(int, resched_irq) = -1;
40static DEFINE_PER_CPU(int, callfunc_irq) = -1; 40static DEFINE_PER_CPU(int, callfunc_irq) = -1;
41static DEFINE_PER_CPU(int, debug_irq) = -1; 41static DEFINE_PER_CPU(int, debug_irq) = -1;
@@ -65,6 +65,12 @@ static struct call_data_struct *call_data;
65 */ 65 */
66static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) 66static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
67{ 67{
68#ifdef CONFIG_X86_32
69 __get_cpu_var(irq_stat).irq_resched_count++;
70#else
71 add_pda(irq_resched_count, 1);
72#endif
73
68 return IRQ_HANDLED; 74 return IRQ_HANDLED;
69} 75}
70 76
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
new file mode 100644
index 000000000000..251669a932d4
--- /dev/null
+++ b/arch/x86/xen/suspend.c
@@ -0,0 +1,45 @@
1#include <linux/types.h>
2
3#include <xen/interface/xen.h>
4#include <xen/grant_table.h>
5#include <xen/events.h>
6
7#include <asm/xen/hypercall.h>
8#include <asm/xen/page.h>
9
10#include "xen-ops.h"
11#include "mmu.h"
12
13void xen_pre_suspend(void)
14{
15 xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
16 xen_start_info->console.domU.mfn =
17 mfn_to_pfn(xen_start_info->console.domU.mfn);
18
19 BUG_ON(!irqs_disabled());
20
21 HYPERVISOR_shared_info = &xen_dummy_shared_info;
22 if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP),
23 __pte_ma(0), 0))
24 BUG();
25}
26
27void xen_post_suspend(int suspend_cancelled)
28{
29 xen_setup_shared_info();
30
31 if (suspend_cancelled) {
32 xen_start_info->store_mfn =
33 pfn_to_mfn(xen_start_info->store_mfn);
34 xen_start_info->console.domU.mfn =
35 pfn_to_mfn(xen_start_info->console.domU.mfn);
36 } else {
37#ifdef CONFIG_SMP
38 xen_cpu_initialized_map = cpu_online_map;
39#endif
40 xen_vcpu_restore();
41 xen_timer_resume();
42 }
43
44}
45
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 41e217503c96..64f0038b9558 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -459,6 +459,19 @@ void xen_setup_cpu_clockevents(void)
459 clockevents_register_device(&__get_cpu_var(xen_clock_events)); 459 clockevents_register_device(&__get_cpu_var(xen_clock_events));
460} 460}
461 461
462void xen_timer_resume(void)
463{
464 int cpu;
465
466 if (xen_clockevent != &xen_vcpuop_clockevent)
467 return;
468
469 for_each_online_cpu(cpu) {
470 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
471 BUG();
472 }
473}
474
462__init void xen_time_init(void) 475__init void xen_time_init(void)
463{ 476{
464 int cpu = smp_processor_id(); 477 int cpu = smp_processor_id();
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 6ec3b4f7719b..7c0cf6320a0a 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -7,6 +7,7 @@
7#include <linux/init.h> 7#include <linux/init.h>
8#include <asm/boot.h> 8#include <asm/boot.h>
9#include <xen/interface/elfnote.h> 9#include <xen/interface/elfnote.h>
10#include <asm/xen/interface.h>
10 11
11 __INIT 12 __INIT
12ENTRY(startup_xen) 13ENTRY(startup_xen)
@@ -32,5 +33,9 @@ ENTRY(hypercall_page)
32 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") 33 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb")
33 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") 34 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
34 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") 35 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
36 ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
37 .quad _PAGE_PRESENT; .quad _PAGE_PRESENT)
38 ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
39 ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long __HYPERVISOR_VIRT_START)
35 40
36#endif /*CONFIG_XEN */ 41#endif /*CONFIG_XEN */
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index f1063ae08037..9a055592a307 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -9,18 +9,26 @@
9extern const char xen_hypervisor_callback[]; 9extern const char xen_hypervisor_callback[];
10extern const char xen_failsafe_callback[]; 10extern const char xen_failsafe_callback[];
11 11
12struct trap_info;
12void xen_copy_trap_info(struct trap_info *traps); 13void xen_copy_trap_info(struct trap_info *traps);
13 14
14DECLARE_PER_CPU(unsigned long, xen_cr3); 15DECLARE_PER_CPU(unsigned long, xen_cr3);
15DECLARE_PER_CPU(unsigned long, xen_current_cr3); 16DECLARE_PER_CPU(unsigned long, xen_current_cr3);
16 17
17extern struct start_info *xen_start_info; 18extern struct start_info *xen_start_info;
19extern struct shared_info xen_dummy_shared_info;
18extern struct shared_info *HYPERVISOR_shared_info; 20extern struct shared_info *HYPERVISOR_shared_info;
19 21
22void xen_setup_mfn_list_list(void);
23void xen_setup_shared_info(void);
24
20char * __init xen_memory_setup(void); 25char * __init xen_memory_setup(void);
21void __init xen_arch_setup(void); 26void __init xen_arch_setup(void);
22void __init xen_init_IRQ(void); 27void __init xen_init_IRQ(void);
23void xen_enable_sysenter(void); 28void xen_enable_sysenter(void);
29void xen_vcpu_restore(void);
30
31void __init xen_build_dynamic_phys_to_machine(void);
24 32
25void xen_setup_timer(int cpu); 33void xen_setup_timer(int cpu);
26void xen_setup_cpu_clockevents(void); 34void xen_setup_cpu_clockevents(void);
@@ -29,6 +37,7 @@ void __init xen_time_init(void);
29unsigned long xen_get_wallclock(void); 37unsigned long xen_get_wallclock(void);
30int xen_set_wallclock(unsigned long time); 38int xen_set_wallclock(unsigned long time);
31unsigned long long xen_sched_clock(void); 39unsigned long long xen_sched_clock(void);
40void xen_timer_resume(void);
32 41
33irqreturn_t xen_debug_interrupt(int irq, void *dev_id); 42irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
34 43
@@ -54,6 +63,8 @@ int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info,
54int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), 63int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
55 void *info, int wait); 64 void *info, int wait);
56 65
66extern cpumask_t xen_cpu_initialized_map;
67
57 68
58/* Declare an asm function, along with symbols needed to make it 69/* Declare an asm function, along with symbols needed to make it
59 inlineable */ 70 inlineable */