aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kernel/paravirt.c1
-rw-r--r--arch/x86/xen/Kconfig10
-rw-r--r--arch/x86/xen/Makefile2
-rw-r--r--arch/x86/xen/enlighten.c116
-rw-r--r--arch/x86/xen/manage.c143
-rw-r--r--arch/x86/xen/mmu.c210
-rw-r--r--arch/x86/xen/mmu.h8
-rw-r--r--arch/x86/xen/setup.c5
-rw-r--r--arch/x86/xen/smp.c8
-rw-r--r--arch/x86/xen/suspend.c45
-rw-r--r--arch/x86/xen/time.c13
-rw-r--r--arch/x86/xen/xen-head.S5
-rw-r--r--arch/x86/xen/xen-ops.h11
13 files changed, 399 insertions, 178 deletions
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 74f0c5ea2a03..c98d54688180 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -403,6 +403,7 @@ struct pv_mmu_ops pv_mmu_ops = {
403#endif /* PAGETABLE_LEVELS >= 3 */ 403#endif /* PAGETABLE_LEVELS >= 3 */
404 404
405 .pte_val = native_pte_val, 405 .pte_val = native_pte_val,
406 .pte_flags = native_pte_val,
406 .pgd_val = native_pgd_val, 407 .pgd_val = native_pgd_val,
407 408
408 .make_pte = native_make_pte, 409 .make_pte = native_make_pte,
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 6c388e593bc8..c2cc99580871 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -12,3 +12,13 @@ config XEN
12 This is the Linux Xen port. Enabling this will allow the 12 This is the Linux Xen port. Enabling this will allow the
13 kernel to boot in a paravirtualized environment under the 13 kernel to boot in a paravirtualized environment under the
14 Xen hypervisor. 14 Xen hypervisor.
15
16config XEN_MAX_DOMAIN_MEMORY
17 int "Maximum allowed size of a domain in gigabytes"
18 default 8
19 depends on XEN
20 help
21 The pseudo-physical to machine address array is sized
22 according to the maximum possible memory size of a Xen
23 domain. This array uses 1 page per gigabyte, so there's no
24 need to be too stingy here. \ No newline at end of file
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 3d8df981d5fd..2ba2d1649131 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,4 @@
1obj-y := enlighten.o setup.o multicalls.o mmu.o \ 1obj-y := enlighten.o setup.o multicalls.o mmu.o \
2 time.o manage.o xen-asm.o grant-table.o 2 time.o xen-asm.o grant-table.o suspend.o
3 3
4obj-$(CONFIG_SMP) += smp.o 4obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index f09c1c69c37a..73fb0c4c150a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -75,13 +75,13 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
75struct start_info *xen_start_info; 75struct start_info *xen_start_info;
76EXPORT_SYMBOL_GPL(xen_start_info); 76EXPORT_SYMBOL_GPL(xen_start_info);
77 77
78static /* __initdata */ struct shared_info dummy_shared_info; 78struct shared_info xen_dummy_shared_info;
79 79
80/* 80/*
81 * Point at some empty memory to start with. We map the real shared_info 81 * Point at some empty memory to start with. We map the real shared_info
82 * page as soon as fixmap is up and running. 82 * page as soon as fixmap is up and running.
83 */ 83 */
84struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; 84struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
85 85
86/* 86/*
87 * Flag to determine whether vcpu info placement is available on all 87 * Flag to determine whether vcpu info placement is available on all
@@ -98,13 +98,13 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info;
98 */ 98 */
99static int have_vcpu_info_placement = 1; 99static int have_vcpu_info_placement = 1;
100 100
101static void __init xen_vcpu_setup(int cpu) 101static void xen_vcpu_setup(int cpu)
102{ 102{
103 struct vcpu_register_vcpu_info info; 103 struct vcpu_register_vcpu_info info;
104 int err; 104 int err;
105 struct vcpu_info *vcpup; 105 struct vcpu_info *vcpup;
106 106
107 BUG_ON(HYPERVISOR_shared_info == &dummy_shared_info); 107 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
108 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; 108 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
109 109
110 if (!have_vcpu_info_placement) 110 if (!have_vcpu_info_placement)
@@ -136,6 +136,34 @@ static void __init xen_vcpu_setup(int cpu)
136 } 136 }
137} 137}
138 138
139/*
140 * On restore, set the vcpu placement up again.
141 * If it fails, then we're in a bad state, since
142 * we can't back out from using it...
143 */
144void xen_vcpu_restore(void)
145{
146 if (have_vcpu_info_placement) {
147 int cpu;
148
149 for_each_online_cpu(cpu) {
150 bool other_cpu = (cpu != smp_processor_id());
151
152 if (other_cpu &&
153 HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
154 BUG();
155
156 xen_vcpu_setup(cpu);
157
158 if (other_cpu &&
159 HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
160 BUG();
161 }
162
163 BUG_ON(!have_vcpu_info_placement);
164 }
165}
166
139static void __init xen_banner(void) 167static void __init xen_banner(void)
140{ 168{
141 printk(KERN_INFO "Booting paravirtualized kernel on %s\n", 169 printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
@@ -235,13 +263,13 @@ static void xen_irq_enable(void)
235{ 263{
236 struct vcpu_info *vcpu; 264 struct vcpu_info *vcpu;
237 265
238 /* There's a one instruction preempt window here. We need to 266 /* We don't need to worry about being preempted here, since
239 make sure we're don't switch CPUs between getting the vcpu 267 either a) interrupts are disabled, so no preemption, or b)
240 pointer and updating the mask. */ 268 the caller is confused and is trying to re-enable interrupts
241 preempt_disable(); 269 on an indeterminate processor. */
270
242 vcpu = x86_read_percpu(xen_vcpu); 271 vcpu = x86_read_percpu(xen_vcpu);
243 vcpu->evtchn_upcall_mask = 0; 272 vcpu->evtchn_upcall_mask = 0;
244 preempt_enable_no_resched();
245 273
246 /* Doesn't matter if we get preempted here, because any 274 /* Doesn't matter if we get preempted here, because any
247 pending event will get dealt with anyway. */ 275 pending event will get dealt with anyway. */
@@ -254,7 +282,7 @@ static void xen_irq_enable(void)
254static void xen_safe_halt(void) 282static void xen_safe_halt(void)
255{ 283{
256 /* Blocking includes an implicit local_irq_enable(). */ 284 /* Blocking includes an implicit local_irq_enable(). */
257 if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0) 285 if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
258 BUG(); 286 BUG();
259} 287}
260 288
@@ -607,6 +635,30 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
607 xen_mc_issue(PARAVIRT_LAZY_MMU); 635 xen_mc_issue(PARAVIRT_LAZY_MMU);
608} 636}
609 637
638static void xen_clts(void)
639{
640 struct multicall_space mcs;
641
642 mcs = xen_mc_entry(0);
643
644 MULTI_fpu_taskswitch(mcs.mc, 0);
645
646 xen_mc_issue(PARAVIRT_LAZY_CPU);
647}
648
649static void xen_write_cr0(unsigned long cr0)
650{
651 struct multicall_space mcs;
652
653 /* Only pay attention to cr0.TS; everything else is
654 ignored. */
655 mcs = xen_mc_entry(0);
656
657 MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0);
658
659 xen_mc_issue(PARAVIRT_LAZY_CPU);
660}
661
610static void xen_write_cr2(unsigned long cr2) 662static void xen_write_cr2(unsigned long cr2)
611{ 663{
612 x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; 664 x86_read_percpu(xen_vcpu)->arch.cr2 = cr2;
@@ -624,8 +676,10 @@ static unsigned long xen_read_cr2_direct(void)
624 676
625static void xen_write_cr4(unsigned long cr4) 677static void xen_write_cr4(unsigned long cr4)
626{ 678{
627 /* Just ignore cr4 changes; Xen doesn't allow us to do 679 cr4 &= ~X86_CR4_PGE;
628 anything anyway. */ 680 cr4 &= ~X86_CR4_PSE;
681
682 native_write_cr4(cr4);
629} 683}
630 684
631static unsigned long xen_read_cr3(void) 685static unsigned long xen_read_cr3(void)
@@ -831,7 +885,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
831 PFN_DOWN(__pa(xen_start_info->pt_base))); 885 PFN_DOWN(__pa(xen_start_info->pt_base)));
832} 886}
833 887
834static __init void setup_shared_info(void) 888void xen_setup_shared_info(void)
835{ 889{
836 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 890 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
837 unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); 891 unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP);
@@ -854,6 +908,8 @@ static __init void setup_shared_info(void)
854 /* In UP this is as good a place as any to set up shared info */ 908 /* In UP this is as good a place as any to set up shared info */
855 xen_setup_vcpu_info_placement(); 909 xen_setup_vcpu_info_placement();
856#endif 910#endif
911
912 xen_setup_mfn_list_list();
857} 913}
858 914
859static __init void xen_pagetable_setup_done(pgd_t *base) 915static __init void xen_pagetable_setup_done(pgd_t *base)
@@ -866,15 +922,23 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
866 pv_mmu_ops.release_pmd = xen_release_pmd; 922 pv_mmu_ops.release_pmd = xen_release_pmd;
867 pv_mmu_ops.set_pte = xen_set_pte; 923 pv_mmu_ops.set_pte = xen_set_pte;
868 924
869 setup_shared_info(); 925 xen_setup_shared_info();
870 926
871 /* Actually pin the pagetable down, but we can't set PG_pinned 927 /* Actually pin the pagetable down, but we can't set PG_pinned
872 yet because the page structures don't exist yet. */ 928 yet because the page structures don't exist yet. */
873 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); 929 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
874} 930}
875 931
932static __init void xen_post_allocator_init(void)
933{
934 pv_mmu_ops.set_pmd = xen_set_pmd;
935 pv_mmu_ops.set_pud = xen_set_pud;
936
937 xen_mark_init_mm_pinned();
938}
939
876/* This is called once we have the cpu_possible_map */ 940/* This is called once we have the cpu_possible_map */
877void __init xen_setup_vcpu_info_placement(void) 941void xen_setup_vcpu_info_placement(void)
878{ 942{
879 int cpu; 943 int cpu;
880 944
@@ -960,7 +1024,7 @@ static const struct pv_init_ops xen_init_ops __initdata = {
960 .banner = xen_banner, 1024 .banner = xen_banner,
961 .memory_setup = xen_memory_setup, 1025 .memory_setup = xen_memory_setup,
962 .arch_setup = xen_arch_setup, 1026 .arch_setup = xen_arch_setup,
963 .post_allocator_init = xen_mark_init_mm_pinned, 1027 .post_allocator_init = xen_post_allocator_init,
964}; 1028};
965 1029
966static const struct pv_time_ops xen_time_ops __initdata = { 1030static const struct pv_time_ops xen_time_ops __initdata = {
@@ -978,10 +1042,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
978 .set_debugreg = xen_set_debugreg, 1042 .set_debugreg = xen_set_debugreg,
979 .get_debugreg = xen_get_debugreg, 1043 .get_debugreg = xen_get_debugreg,
980 1044
981 .clts = native_clts, 1045 .clts = xen_clts,
982 1046
983 .read_cr0 = native_read_cr0, 1047 .read_cr0 = native_read_cr0,
984 .write_cr0 = native_write_cr0, 1048 .write_cr0 = xen_write_cr0,
985 1049
986 .read_cr4 = native_read_cr4, 1050 .read_cr4 = native_read_cr4,
987 .read_cr4_safe = native_read_cr4_safe, 1051 .read_cr4_safe = native_read_cr4_safe,
@@ -1072,9 +1136,10 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1072 1136
1073 .set_pte = NULL, /* see xen_pagetable_setup_* */ 1137 .set_pte = NULL, /* see xen_pagetable_setup_* */
1074 .set_pte_at = xen_set_pte_at, 1138 .set_pte_at = xen_set_pte_at,
1075 .set_pmd = xen_set_pmd, 1139 .set_pmd = xen_set_pmd_hyper,
1076 1140
1077 .pte_val = xen_pte_val, 1141 .pte_val = xen_pte_val,
1142 .pte_flags = native_pte_val,
1078 .pgd_val = xen_pgd_val, 1143 .pgd_val = xen_pgd_val,
1079 1144
1080 .make_pte = xen_make_pte, 1145 .make_pte = xen_make_pte,
@@ -1082,7 +1147,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1082 1147
1083 .set_pte_atomic = xen_set_pte_atomic, 1148 .set_pte_atomic = xen_set_pte_atomic,
1084 .set_pte_present = xen_set_pte_at, 1149 .set_pte_present = xen_set_pte_at,
1085 .set_pud = xen_set_pud, 1150 .set_pud = xen_set_pud_hyper,
1086 .pte_clear = xen_pte_clear, 1151 .pte_clear = xen_pte_clear,
1087 .pmd_clear = xen_pmd_clear, 1152 .pmd_clear = xen_pmd_clear,
1088 1153
@@ -1114,11 +1179,13 @@ static const struct smp_ops xen_smp_ops __initdata = {
1114 1179
1115static void xen_reboot(int reason) 1180static void xen_reboot(int reason)
1116{ 1181{
1182 struct sched_shutdown r = { .reason = reason };
1183
1117#ifdef CONFIG_SMP 1184#ifdef CONFIG_SMP
1118 smp_send_stop(); 1185 smp_send_stop();
1119#endif 1186#endif
1120 1187
1121 if (HYPERVISOR_sched_op(SCHEDOP_shutdown, reason)) 1188 if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
1122 BUG(); 1189 BUG();
1123} 1190}
1124 1191
@@ -1192,7 +1259,7 @@ asmlinkage void __init xen_start_kernel(void)
1192 1259
1193 /* Get mfn list */ 1260 /* Get mfn list */
1194 if (!xen_feature(XENFEAT_auto_translated_physmap)) 1261 if (!xen_feature(XENFEAT_auto_translated_physmap))
1195 phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list; 1262 xen_build_dynamic_phys_to_machine();
1196 1263
1197 pgd = (pgd_t *)xen_start_info->pt_base; 1264 pgd = (pgd_t *)xen_start_info->pt_base;
1198 1265
@@ -1232,8 +1299,11 @@ asmlinkage void __init xen_start_kernel(void)
1232 ? __pa(xen_start_info->mod_start) : 0; 1299 ? __pa(xen_start_info->mod_start) : 0;
1233 boot_params.hdr.ramdisk_size = xen_start_info->mod_len; 1300 boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
1234 1301
1235 if (!is_initial_xendomain()) 1302 if (!is_initial_xendomain()) {
1303 add_preferred_console("xenboot", 0, NULL);
1304 add_preferred_console("tty", 0, NULL);
1236 add_preferred_console("hvc", 0, NULL); 1305 add_preferred_console("hvc", 0, NULL);
1306 }
1237 1307
1238 /* Start the world */ 1308 /* Start the world */
1239 start_kernel(); 1309 start_kernel();
diff --git a/arch/x86/xen/manage.c b/arch/x86/xen/manage.c
deleted file mode 100644
index aa7af9e6abc0..000000000000
--- a/arch/x86/xen/manage.c
+++ /dev/null
@@ -1,143 +0,0 @@
1/*
2 * Handle extern requests for shutdown, reboot and sysrq
3 */
4#include <linux/kernel.h>
5#include <linux/err.h>
6#include <linux/reboot.h>
7#include <linux/sysrq.h>
8
9#include <xen/xenbus.h>
10
11#define SHUTDOWN_INVALID -1
12#define SHUTDOWN_POWEROFF 0
13#define SHUTDOWN_SUSPEND 2
14/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
15 * report a crash, not be instructed to crash!
16 * HALT is the same as POWEROFF, as far as we're concerned. The tools use
17 * the distinction when we return the reason code to them.
18 */
19#define SHUTDOWN_HALT 4
20
21/* Ignore multiple shutdown requests. */
22static int shutting_down = SHUTDOWN_INVALID;
23
24static void shutdown_handler(struct xenbus_watch *watch,
25 const char **vec, unsigned int len)
26{
27 char *str;
28 struct xenbus_transaction xbt;
29 int err;
30
31 if (shutting_down != SHUTDOWN_INVALID)
32 return;
33
34 again:
35 err = xenbus_transaction_start(&xbt);
36 if (err)
37 return;
38
39 str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
40 /* Ignore read errors and empty reads. */
41 if (XENBUS_IS_ERR_READ(str)) {
42 xenbus_transaction_end(xbt, 1);
43 return;
44 }
45
46 xenbus_write(xbt, "control", "shutdown", "");
47
48 err = xenbus_transaction_end(xbt, 0);
49 if (err == -EAGAIN) {
50 kfree(str);
51 goto again;
52 }
53
54 if (strcmp(str, "poweroff") == 0 ||
55 strcmp(str, "halt") == 0)
56 orderly_poweroff(false);
57 else if (strcmp(str, "reboot") == 0)
58 ctrl_alt_del();
59 else {
60 printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
61 shutting_down = SHUTDOWN_INVALID;
62 }
63
64 kfree(str);
65}
66
67static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
68 unsigned int len)
69{
70 char sysrq_key = '\0';
71 struct xenbus_transaction xbt;
72 int err;
73
74 again:
75 err = xenbus_transaction_start(&xbt);
76 if (err)
77 return;
78 if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
79 printk(KERN_ERR "Unable to read sysrq code in "
80 "control/sysrq\n");
81 xenbus_transaction_end(xbt, 1);
82 return;
83 }
84
85 if (sysrq_key != '\0')
86 xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
87
88 err = xenbus_transaction_end(xbt, 0);
89 if (err == -EAGAIN)
90 goto again;
91
92 if (sysrq_key != '\0')
93 handle_sysrq(sysrq_key, NULL);
94}
95
96static struct xenbus_watch shutdown_watch = {
97 .node = "control/shutdown",
98 .callback = shutdown_handler
99};
100
101static struct xenbus_watch sysrq_watch = {
102 .node = "control/sysrq",
103 .callback = sysrq_handler
104};
105
106static int setup_shutdown_watcher(void)
107{
108 int err;
109
110 err = register_xenbus_watch(&shutdown_watch);
111 if (err) {
112 printk(KERN_ERR "Failed to set shutdown watcher\n");
113 return err;
114 }
115
116 err = register_xenbus_watch(&sysrq_watch);
117 if (err) {
118 printk(KERN_ERR "Failed to set sysrq watcher\n");
119 return err;
120 }
121
122 return 0;
123}
124
125static int shutdown_event(struct notifier_block *notifier,
126 unsigned long event,
127 void *data)
128{
129 setup_shutdown_watcher();
130 return NOTIFY_DONE;
131}
132
133static int __init setup_shutdown_event(void)
134{
135 static struct notifier_block xenstore_notifier = {
136 .notifier_call = shutdown_event
137 };
138 register_xenstore_notifier(&xenstore_notifier);
139
140 return 0;
141}
142
143subsys_initcall(setup_shutdown_event);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index df40bf74ea75..8132aa8c5d49 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -56,6 +56,131 @@
56#include "multicalls.h" 56#include "multicalls.h"
57#include "mmu.h" 57#include "mmu.h"
58 58
59#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
60#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
61
62/* Placeholder for holes in the address space */
63static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE]
64 __attribute__((section(".data.page_aligned"))) =
65 { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
66
67 /* Array of pointers to pages containing p2m entries */
68static unsigned long *p2m_top[TOP_ENTRIES]
69 __attribute__((section(".data.page_aligned"))) =
70 { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
71
72/* Arrays of p2m arrays expressed in mfns used for save/restore */
73static unsigned long p2m_top_mfn[TOP_ENTRIES]
74 __attribute__((section(".bss.page_aligned")));
75
76static unsigned long p2m_top_mfn_list[
77 PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)]
78 __attribute__((section(".bss.page_aligned")));
79
80static inline unsigned p2m_top_index(unsigned long pfn)
81{
82 BUG_ON(pfn >= MAX_DOMAIN_PAGES);
83 return pfn / P2M_ENTRIES_PER_PAGE;
84}
85
86static inline unsigned p2m_index(unsigned long pfn)
87{
88 return pfn % P2M_ENTRIES_PER_PAGE;
89}
90
91/* Build the parallel p2m_top_mfn structures */
92void xen_setup_mfn_list_list(void)
93{
94 unsigned pfn, idx;
95
96 for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
97 unsigned topidx = p2m_top_index(pfn);
98
99 p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]);
100 }
101
102 for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
103 unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
104 p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
105 }
106
107 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
108
109 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
110 virt_to_mfn(p2m_top_mfn_list);
111 HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages;
112}
113
114/* Set up p2m_top to point to the domain-builder provided p2m pages */
115void __init xen_build_dynamic_phys_to_machine(void)
116{
117 unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
118 unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
119 unsigned pfn;
120
121 for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
122 unsigned topidx = p2m_top_index(pfn);
123
124 p2m_top[topidx] = &mfn_list[pfn];
125 }
126}
127
128unsigned long get_phys_to_machine(unsigned long pfn)
129{
130 unsigned topidx, idx;
131
132 if (unlikely(pfn >= MAX_DOMAIN_PAGES))
133 return INVALID_P2M_ENTRY;
134
135 topidx = p2m_top_index(pfn);
136 idx = p2m_index(pfn);
137 return p2m_top[topidx][idx];
138}
139EXPORT_SYMBOL_GPL(get_phys_to_machine);
140
141static void alloc_p2m(unsigned long **pp, unsigned long *mfnp)
142{
143 unsigned long *p;
144 unsigned i;
145
146 p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
147 BUG_ON(p == NULL);
148
149 for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
150 p[i] = INVALID_P2M_ENTRY;
151
152 if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
153 free_page((unsigned long)p);
154 else
155 *mfnp = virt_to_mfn(p);
156}
157
158void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
159{
160 unsigned topidx, idx;
161
162 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
163 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
164 return;
165 }
166
167 if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
168 BUG_ON(mfn != INVALID_P2M_ENTRY);
169 return;
170 }
171
172 topidx = p2m_top_index(pfn);
173 if (p2m_top[topidx] == p2m_missing) {
174 /* no need to allocate a page to store an invalid entry */
175 if (mfn == INVALID_P2M_ENTRY)
176 return;
177 alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]);
178 }
179
180 idx = p2m_index(pfn);
181 p2m_top[topidx][idx] = mfn;
182}
183
59xmaddr_t arbitrary_virt_to_machine(unsigned long address) 184xmaddr_t arbitrary_virt_to_machine(unsigned long address)
60{ 185{
61 unsigned int level; 186 unsigned int level;
@@ -98,7 +223,14 @@ void make_lowmem_page_readwrite(void *vaddr)
98} 223}
99 224
100 225
101void xen_set_pmd(pmd_t *ptr, pmd_t val) 226static bool page_pinned(void *ptr)
227{
228 struct page *page = virt_to_page(ptr);
229
230 return PagePinned(page);
231}
232
233void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
102{ 234{
103 struct multicall_space mcs; 235 struct multicall_space mcs;
104 struct mmu_update *u; 236 struct mmu_update *u;
@@ -116,6 +248,18 @@ void xen_set_pmd(pmd_t *ptr, pmd_t val)
116 preempt_enable(); 248 preempt_enable();
117} 249}
118 250
251void xen_set_pmd(pmd_t *ptr, pmd_t val)
252{
253 /* If page is not pinned, we can just update the entry
254 directly */
255 if (!page_pinned(ptr)) {
256 *ptr = val;
257 return;
258 }
259
260 xen_set_pmd_hyper(ptr, val);
261}
262
119/* 263/*
120 * Associate a virtual page frame with a given physical page frame 264 * Associate a virtual page frame with a given physical page frame
121 * and protection flags for that frame. 265 * and protection flags for that frame.
@@ -229,7 +373,7 @@ pmdval_t xen_pmd_val(pmd_t pmd)
229 return pte_mfn_to_pfn(pmd.pmd); 373 return pte_mfn_to_pfn(pmd.pmd);
230} 374}
231 375
232void xen_set_pud(pud_t *ptr, pud_t val) 376void xen_set_pud_hyper(pud_t *ptr, pud_t val)
233{ 377{
234 struct multicall_space mcs; 378 struct multicall_space mcs;
235 struct mmu_update *u; 379 struct mmu_update *u;
@@ -247,6 +391,18 @@ void xen_set_pud(pud_t *ptr, pud_t val)
247 preempt_enable(); 391 preempt_enable();
248} 392}
249 393
394void xen_set_pud(pud_t *ptr, pud_t val)
395{
396 /* If page is not pinned, we can just update the entry
397 directly */
398 if (!page_pinned(ptr)) {
399 *ptr = val;
400 return;
401 }
402
403 xen_set_pud_hyper(ptr, val);
404}
405
250void xen_set_pte(pte_t *ptep, pte_t pte) 406void xen_set_pte(pte_t *ptep, pte_t pte)
251{ 407{
252 ptep->pte_high = pte.pte_high; 408 ptep->pte_high = pte.pte_high;
@@ -268,7 +424,7 @@ void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
268 424
269void xen_pmd_clear(pmd_t *pmdp) 425void xen_pmd_clear(pmd_t *pmdp)
270{ 426{
271 xen_set_pmd(pmdp, __pmd(0)); 427 set_pmd(pmdp, __pmd(0));
272} 428}
273 429
274pmd_t xen_make_pmd(pmdval_t pmd) 430pmd_t xen_make_pmd(pmdval_t pmd)
@@ -441,6 +597,29 @@ void xen_pgd_pin(pgd_t *pgd)
441 xen_mc_issue(0); 597 xen_mc_issue(0);
442} 598}
443 599
600/*
601 * On save, we need to pin all pagetables to make sure they get their
602 * mfns turned into pfns. Search the list for any unpinned pgds and pin
603 * them (unpinned pgds are not currently in use, probably because the
604 * process is under construction or destruction).
605 */
606void xen_mm_pin_all(void)
607{
608 unsigned long flags;
609 struct page *page;
610
611 spin_lock_irqsave(&pgd_lock, flags);
612
613 list_for_each_entry(page, &pgd_list, lru) {
614 if (!PagePinned(page)) {
615 xen_pgd_pin((pgd_t *)page_address(page));
616 SetPageSavePinned(page);
617 }
618 }
619
620 spin_unlock_irqrestore(&pgd_lock, flags);
621}
622
444/* The init_mm pagetable is really pinned as soon as its created, but 623/* The init_mm pagetable is really pinned as soon as its created, but
445 that's before we have page structures to store the bits. So do all 624 that's before we have page structures to store the bits. So do all
446 the book-keeping now. */ 625 the book-keeping now. */
@@ -498,6 +677,29 @@ static void xen_pgd_unpin(pgd_t *pgd)
498 xen_mc_issue(0); 677 xen_mc_issue(0);
499} 678}
500 679
680/*
681 * On resume, undo any pinning done at save, so that the rest of the
682 * kernel doesn't see any unexpected pinned pagetables.
683 */
684void xen_mm_unpin_all(void)
685{
686 unsigned long flags;
687 struct page *page;
688
689 spin_lock_irqsave(&pgd_lock, flags);
690
691 list_for_each_entry(page, &pgd_list, lru) {
692 if (PageSavePinned(page)) {
693 BUG_ON(!PagePinned(page));
694 printk("unpinning pinned %p\n", page_address(page));
695 xen_pgd_unpin((pgd_t *)page_address(page));
696 ClearPageSavePinned(page);
697 }
698 }
699
700 spin_unlock_irqrestore(&pgd_lock, flags);
701}
702
501void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) 703void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
502{ 704{
503 spin_lock(&next->page_table_lock); 705 spin_lock(&next->page_table_lock);
@@ -591,7 +793,7 @@ void xen_exit_mmap(struct mm_struct *mm)
591 spin_lock(&mm->page_table_lock); 793 spin_lock(&mm->page_table_lock);
592 794
593 /* pgd may not be pinned in the error exit path of execve */ 795 /* pgd may not be pinned in the error exit path of execve */
594 if (PagePinned(virt_to_page(mm->pgd))) 796 if (page_pinned(mm->pgd))
595 xen_pgd_unpin(mm->pgd); 797 xen_pgd_unpin(mm->pgd);
596 798
597 spin_unlock(&mm->page_table_lock); 799 spin_unlock(&mm->page_table_lock);
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 5fe961caffd4..e3dd09e25c63 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -25,10 +25,6 @@ enum pt_level {
25 25
26void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); 26void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
27 27
28void xen_set_pte(pte_t *ptep, pte_t pteval);
29void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
30 pte_t *ptep, pte_t pteval);
31void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval);
32 28
33void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next); 29void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next);
34void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); 30void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
@@ -45,10 +41,14 @@ pte_t xen_make_pte(pteval_t);
45pmd_t xen_make_pmd(pmdval_t); 41pmd_t xen_make_pmd(pmdval_t);
46pgd_t xen_make_pgd(pgdval_t); 42pgd_t xen_make_pgd(pgdval_t);
47 43
44void xen_set_pte(pte_t *ptep, pte_t pteval);
48void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, 45void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
49 pte_t *ptep, pte_t pteval); 46 pte_t *ptep, pte_t pteval);
50void xen_set_pte_atomic(pte_t *ptep, pte_t pte); 47void xen_set_pte_atomic(pte_t *ptep, pte_t pte);
48void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval);
51void xen_set_pud(pud_t *ptr, pud_t val); 49void xen_set_pud(pud_t *ptr, pud_t val);
50void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval);
51void xen_set_pud_hyper(pud_t *ptr, pud_t val);
52void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); 52void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
53void xen_pmd_clear(pmd_t *pmdp); 53void xen_pmd_clear(pmd_t *pmdp);
54 54
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 82517e4a752a..488447878a9d 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -16,6 +16,7 @@
16#include <asm/xen/hypervisor.h> 16#include <asm/xen/hypervisor.h>
17#include <asm/xen/hypercall.h> 17#include <asm/xen/hypercall.h>
18 18
19#include <xen/page.h>
19#include <xen/interface/callback.h> 20#include <xen/interface/callback.h>
20#include <xen/interface/physdev.h> 21#include <xen/interface/physdev.h>
21#include <xen/features.h> 22#include <xen/features.h>
@@ -27,8 +28,6 @@
27extern const char xen_hypervisor_callback[]; 28extern const char xen_hypervisor_callback[];
28extern const char xen_failsafe_callback[]; 29extern const char xen_failsafe_callback[];
29 30
30unsigned long *phys_to_machine_mapping;
31EXPORT_SYMBOL(phys_to_machine_mapping);
32 31
33/** 32/**
34 * machine_specific_memory_setup - Hook for machine specific memory setup. 33 * machine_specific_memory_setup - Hook for machine specific memory setup.
@@ -38,6 +37,8 @@ char * __init xen_memory_setup(void)
38{ 37{
39 unsigned long max_pfn = xen_start_info->nr_pages; 38 unsigned long max_pfn = xen_start_info->nr_pages;
40 39
40 max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
41
41 e820.nr_map = 0; 42 e820.nr_map = 0;
42 add_memory_region(0, LOWMEMSIZE(), E820_RAM); 43 add_memory_region(0, LOWMEMSIZE(), E820_RAM);
43 add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); 44 add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 94e69000f982..d2e3c20127d7 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -35,7 +35,7 @@
35#include "xen-ops.h" 35#include "xen-ops.h"
36#include "mmu.h" 36#include "mmu.h"
37 37
38static cpumask_t xen_cpu_initialized_map; 38cpumask_t xen_cpu_initialized_map;
39static DEFINE_PER_CPU(int, resched_irq) = -1; 39static DEFINE_PER_CPU(int, resched_irq) = -1;
40static DEFINE_PER_CPU(int, callfunc_irq) = -1; 40static DEFINE_PER_CPU(int, callfunc_irq) = -1;
41static DEFINE_PER_CPU(int, debug_irq) = -1; 41static DEFINE_PER_CPU(int, debug_irq) = -1;
@@ -65,6 +65,12 @@ static struct call_data_struct *call_data;
65 */ 65 */
66static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) 66static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
67{ 67{
68#ifdef CONFIG_X86_32
69 __get_cpu_var(irq_stat).irq_resched_count++;
70#else
71 add_pda(irq_resched_count, 1);
72#endif
73
68 return IRQ_HANDLED; 74 return IRQ_HANDLED;
69} 75}
70 76
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
new file mode 100644
index 000000000000..251669a932d4
--- /dev/null
+++ b/arch/x86/xen/suspend.c
@@ -0,0 +1,45 @@
1#include <linux/types.h>
2
3#include <xen/interface/xen.h>
4#include <xen/grant_table.h>
5#include <xen/events.h>
6
7#include <asm/xen/hypercall.h>
8#include <asm/xen/page.h>
9
10#include "xen-ops.h"
11#include "mmu.h"
12
13void xen_pre_suspend(void)
14{
15 xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
16 xen_start_info->console.domU.mfn =
17 mfn_to_pfn(xen_start_info->console.domU.mfn);
18
19 BUG_ON(!irqs_disabled());
20
21 HYPERVISOR_shared_info = &xen_dummy_shared_info;
22 if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP),
23 __pte_ma(0), 0))
24 BUG();
25}
26
27void xen_post_suspend(int suspend_cancelled)
28{
29 xen_setup_shared_info();
30
31 if (suspend_cancelled) {
32 xen_start_info->store_mfn =
33 pfn_to_mfn(xen_start_info->store_mfn);
34 xen_start_info->console.domU.mfn =
35 pfn_to_mfn(xen_start_info->console.domU.mfn);
36 } else {
37#ifdef CONFIG_SMP
38 xen_cpu_initialized_map = cpu_online_map;
39#endif
40 xen_vcpu_restore();
41 xen_timer_resume();
42 }
43
44}
45
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 41e217503c96..64f0038b9558 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -459,6 +459,19 @@ void xen_setup_cpu_clockevents(void)
459 clockevents_register_device(&__get_cpu_var(xen_clock_events)); 459 clockevents_register_device(&__get_cpu_var(xen_clock_events));
460} 460}
461 461
462void xen_timer_resume(void)
463{
464 int cpu;
465
466 if (xen_clockevent != &xen_vcpuop_clockevent)
467 return;
468
469 for_each_online_cpu(cpu) {
470 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
471 BUG();
472 }
473}
474
462__init void xen_time_init(void) 475__init void xen_time_init(void)
463{ 476{
464 int cpu = smp_processor_id(); 477 int cpu = smp_processor_id();
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 6ec3b4f7719b..7c0cf6320a0a 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -7,6 +7,7 @@
7#include <linux/init.h> 7#include <linux/init.h>
8#include <asm/boot.h> 8#include <asm/boot.h>
9#include <xen/interface/elfnote.h> 9#include <xen/interface/elfnote.h>
10#include <asm/xen/interface.h>
10 11
11 __INIT 12 __INIT
12ENTRY(startup_xen) 13ENTRY(startup_xen)
@@ -32,5 +33,9 @@ ENTRY(hypercall_page)
32 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") 33 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb")
33 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") 34 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
34 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") 35 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
36 ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
37 .quad _PAGE_PRESENT; .quad _PAGE_PRESENT)
38 ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
39 ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long __HYPERVISOR_VIRT_START)
35 40
36#endif /*CONFIG_XEN */ 41#endif /*CONFIG_XEN */
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index f1063ae08037..9a055592a307 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -9,18 +9,26 @@
9extern const char xen_hypervisor_callback[]; 9extern const char xen_hypervisor_callback[];
10extern const char xen_failsafe_callback[]; 10extern const char xen_failsafe_callback[];
11 11
12struct trap_info;
12void xen_copy_trap_info(struct trap_info *traps); 13void xen_copy_trap_info(struct trap_info *traps);
13 14
14DECLARE_PER_CPU(unsigned long, xen_cr3); 15DECLARE_PER_CPU(unsigned long, xen_cr3);
15DECLARE_PER_CPU(unsigned long, xen_current_cr3); 16DECLARE_PER_CPU(unsigned long, xen_current_cr3);
16 17
17extern struct start_info *xen_start_info; 18extern struct start_info *xen_start_info;
19extern struct shared_info xen_dummy_shared_info;
18extern struct shared_info *HYPERVISOR_shared_info; 20extern struct shared_info *HYPERVISOR_shared_info;
19 21
22void xen_setup_mfn_list_list(void);
23void xen_setup_shared_info(void);
24
20char * __init xen_memory_setup(void); 25char * __init xen_memory_setup(void);
21void __init xen_arch_setup(void); 26void __init xen_arch_setup(void);
22void __init xen_init_IRQ(void); 27void __init xen_init_IRQ(void);
23void xen_enable_sysenter(void); 28void xen_enable_sysenter(void);
29void xen_vcpu_restore(void);
30
31void __init xen_build_dynamic_phys_to_machine(void);
24 32
25void xen_setup_timer(int cpu); 33void xen_setup_timer(int cpu);
26void xen_setup_cpu_clockevents(void); 34void xen_setup_cpu_clockevents(void);
@@ -29,6 +37,7 @@ void __init xen_time_init(void);
29unsigned long xen_get_wallclock(void); 37unsigned long xen_get_wallclock(void);
30int xen_set_wallclock(unsigned long time); 38int xen_set_wallclock(unsigned long time);
31unsigned long long xen_sched_clock(void); 39unsigned long long xen_sched_clock(void);
40void xen_timer_resume(void);
32 41
33irqreturn_t xen_debug_interrupt(int irq, void *dev_id); 42irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
34 43
@@ -54,6 +63,8 @@ int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info,
54int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), 63int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
55 void *info, int wait); 64 void *info, int wait);
56 65
66extern cpumask_t xen_cpu_initialized_map;
67
57 68
58/* Declare an asm function, along with symbols needed to make it 69/* Declare an asm function, along with symbols needed to make it
59 inlineable */ 70 inlineable */