aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-15 18:29:07 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-15 18:29:07 -0400
commit82638844d9a8581bbf33201cc209a14876eca167 (patch)
tree961d7f9360194421a71aa644a9d0c176a960ce49 /arch/x86/xen
parent9982fbface82893e77d211fbabfbd229da6bdde6 (diff)
parent63cf13b77ab785e87c867defa8545e6d4a989774 (diff)
Merge branch 'linus' into cpus4096
Conflicts: arch/x86/xen/smp.c kernel/sched_rt.c net/iucv/iucv.c Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/xen')
-rw-r--r--arch/x86/xen/Kconfig10
-rw-r--r--arch/x86/xen/Makefile2
-rw-r--r--arch/x86/xen/enlighten.c180
-rw-r--r--arch/x86/xen/manage.c143
-rw-r--r--arch/x86/xen/mmu.c272
-rw-r--r--arch/x86/xen/mmu.h12
-rw-r--r--arch/x86/xen/multicalls.c40
-rw-r--r--arch/x86/xen/multicalls.h12
-rw-r--r--arch/x86/xen/setup.c30
-rw-r--r--arch/x86/xen/smp.c143
-rw-r--r--arch/x86/xen/suspend.c45
-rw-r--r--arch/x86/xen/time.c17
-rw-r--r--arch/x86/xen/xen-head.S5
-rw-r--r--arch/x86/xen/xen-ops.h20
14 files changed, 628 insertions, 303 deletions
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 6c388e593bc8..c2cc99580871 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -12,3 +12,13 @@ config XEN
12 This is the Linux Xen port. Enabling this will allow the 12 This is the Linux Xen port. Enabling this will allow the
13 kernel to boot in a paravirtualized environment under the 13 kernel to boot in a paravirtualized environment under the
14 Xen hypervisor. 14 Xen hypervisor.
15
16config XEN_MAX_DOMAIN_MEMORY
17 int "Maximum allowed size of a domain in gigabytes"
18 default 8
19 depends on XEN
20 help
21 The pseudo-physical to machine address array is sized
22 according to the maximum possible memory size of a Xen
23 domain. This array uses 1 page per gigabyte, so there's no
24 need to be too stingy here. \ No newline at end of file
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 3d8df981d5fd..2ba2d1649131 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,4 @@
1obj-y := enlighten.o setup.o multicalls.o mmu.o \ 1obj-y := enlighten.o setup.o multicalls.o mmu.o \
2 time.o manage.o xen-asm.o grant-table.o 2 time.o xen-asm.o grant-table.o suspend.o
3 3
4obj-$(CONFIG_SMP) += smp.o 4obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index f09c1c69c37a..bb508456ef52 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -45,6 +45,7 @@
45#include <asm/pgtable.h> 45#include <asm/pgtable.h>
46#include <asm/tlbflush.h> 46#include <asm/tlbflush.h>
47#include <asm/reboot.h> 47#include <asm/reboot.h>
48#include <asm/pgalloc.h>
48 49
49#include "xen-ops.h" 50#include "xen-ops.h"
50#include "mmu.h" 51#include "mmu.h"
@@ -75,13 +76,13 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
75struct start_info *xen_start_info; 76struct start_info *xen_start_info;
76EXPORT_SYMBOL_GPL(xen_start_info); 77EXPORT_SYMBOL_GPL(xen_start_info);
77 78
78static /* __initdata */ struct shared_info dummy_shared_info; 79struct shared_info xen_dummy_shared_info;
79 80
80/* 81/*
81 * Point at some empty memory to start with. We map the real shared_info 82 * Point at some empty memory to start with. We map the real shared_info
82 * page as soon as fixmap is up and running. 83 * page as soon as fixmap is up and running.
83 */ 84 */
84struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; 85struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
85 86
86/* 87/*
87 * Flag to determine whether vcpu info placement is available on all 88 * Flag to determine whether vcpu info placement is available on all
@@ -98,13 +99,13 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info;
98 */ 99 */
99static int have_vcpu_info_placement = 1; 100static int have_vcpu_info_placement = 1;
100 101
101static void __init xen_vcpu_setup(int cpu) 102static void xen_vcpu_setup(int cpu)
102{ 103{
103 struct vcpu_register_vcpu_info info; 104 struct vcpu_register_vcpu_info info;
104 int err; 105 int err;
105 struct vcpu_info *vcpup; 106 struct vcpu_info *vcpup;
106 107
107 BUG_ON(HYPERVISOR_shared_info == &dummy_shared_info); 108 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
108 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; 109 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
109 110
110 if (!have_vcpu_info_placement) 111 if (!have_vcpu_info_placement)
@@ -136,11 +137,41 @@ static void __init xen_vcpu_setup(int cpu)
136 } 137 }
137} 138}
138 139
140/*
141 * On restore, set the vcpu placement up again.
142 * If it fails, then we're in a bad state, since
143 * we can't back out from using it...
144 */
145void xen_vcpu_restore(void)
146{
147 if (have_vcpu_info_placement) {
148 int cpu;
149
150 for_each_online_cpu(cpu) {
151 bool other_cpu = (cpu != smp_processor_id());
152
153 if (other_cpu &&
154 HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
155 BUG();
156
157 xen_vcpu_setup(cpu);
158
159 if (other_cpu &&
160 HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
161 BUG();
162 }
163
164 BUG_ON(!have_vcpu_info_placement);
165 }
166}
167
139static void __init xen_banner(void) 168static void __init xen_banner(void)
140{ 169{
141 printk(KERN_INFO "Booting paravirtualized kernel on %s\n", 170 printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
142 pv_info.name); 171 pv_info.name);
143 printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic); 172 printk(KERN_INFO "Hypervisor signature: %s%s\n",
173 xen_start_info->magic,
174 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
144} 175}
145 176
146static void xen_cpuid(unsigned int *ax, unsigned int *bx, 177static void xen_cpuid(unsigned int *ax, unsigned int *bx,
@@ -235,13 +266,13 @@ static void xen_irq_enable(void)
235{ 266{
236 struct vcpu_info *vcpu; 267 struct vcpu_info *vcpu;
237 268
238 /* There's a one instruction preempt window here. We need to 269 /* We don't need to worry about being preempted here, since
239 make sure we're don't switch CPUs between getting the vcpu 270 either a) interrupts are disabled, so no preemption, or b)
240 pointer and updating the mask. */ 271 the caller is confused and is trying to re-enable interrupts
241 preempt_disable(); 272 on an indeterminate processor. */
273
242 vcpu = x86_read_percpu(xen_vcpu); 274 vcpu = x86_read_percpu(xen_vcpu);
243 vcpu->evtchn_upcall_mask = 0; 275 vcpu->evtchn_upcall_mask = 0;
244 preempt_enable_no_resched();
245 276
246 /* Doesn't matter if we get preempted here, because any 277 /* Doesn't matter if we get preempted here, because any
247 pending event will get dealt with anyway. */ 278 pending event will get dealt with anyway. */
@@ -254,7 +285,7 @@ static void xen_irq_enable(void)
254static void xen_safe_halt(void) 285static void xen_safe_halt(void)
255{ 286{
256 /* Blocking includes an implicit local_irq_enable(). */ 287 /* Blocking includes an implicit local_irq_enable(). */
257 if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0) 288 if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
258 BUG(); 289 BUG();
259} 290}
260 291
@@ -607,6 +638,30 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
607 xen_mc_issue(PARAVIRT_LAZY_MMU); 638 xen_mc_issue(PARAVIRT_LAZY_MMU);
608} 639}
609 640
641static void xen_clts(void)
642{
643 struct multicall_space mcs;
644
645 mcs = xen_mc_entry(0);
646
647 MULTI_fpu_taskswitch(mcs.mc, 0);
648
649 xen_mc_issue(PARAVIRT_LAZY_CPU);
650}
651
652static void xen_write_cr0(unsigned long cr0)
653{
654 struct multicall_space mcs;
655
656 /* Only pay attention to cr0.TS; everything else is
657 ignored. */
658 mcs = xen_mc_entry(0);
659
660 MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0);
661
662 xen_mc_issue(PARAVIRT_LAZY_CPU);
663}
664
610static void xen_write_cr2(unsigned long cr2) 665static void xen_write_cr2(unsigned long cr2)
611{ 666{
612 x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; 667 x86_read_percpu(xen_vcpu)->arch.cr2 = cr2;
@@ -624,8 +679,10 @@ static unsigned long xen_read_cr2_direct(void)
624 679
625static void xen_write_cr4(unsigned long cr4) 680static void xen_write_cr4(unsigned long cr4)
626{ 681{
627 /* Just ignore cr4 changes; Xen doesn't allow us to do 682 cr4 &= ~X86_CR4_PGE;
628 anything anyway. */ 683 cr4 &= ~X86_CR4_PSE;
684
685 native_write_cr4(cr4);
629} 686}
630 687
631static unsigned long xen_read_cr3(void) 688static unsigned long xen_read_cr3(void)
@@ -831,7 +888,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
831 PFN_DOWN(__pa(xen_start_info->pt_base))); 888 PFN_DOWN(__pa(xen_start_info->pt_base)));
832} 889}
833 890
834static __init void setup_shared_info(void) 891void xen_setup_shared_info(void)
835{ 892{
836 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 893 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
837 unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); 894 unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP);
@@ -854,6 +911,8 @@ static __init void setup_shared_info(void)
854 /* In UP this is as good a place as any to set up shared info */ 911 /* In UP this is as good a place as any to set up shared info */
855 xen_setup_vcpu_info_placement(); 912 xen_setup_vcpu_info_placement();
856#endif 913#endif
914
915 xen_setup_mfn_list_list();
857} 916}
858 917
859static __init void xen_pagetable_setup_done(pgd_t *base) 918static __init void xen_pagetable_setup_done(pgd_t *base)
@@ -866,15 +925,23 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
866 pv_mmu_ops.release_pmd = xen_release_pmd; 925 pv_mmu_ops.release_pmd = xen_release_pmd;
867 pv_mmu_ops.set_pte = xen_set_pte; 926 pv_mmu_ops.set_pte = xen_set_pte;
868 927
869 setup_shared_info(); 928 xen_setup_shared_info();
870 929
871 /* Actually pin the pagetable down, but we can't set PG_pinned 930 /* Actually pin the pagetable down, but we can't set PG_pinned
872 yet because the page structures don't exist yet. */ 931 yet because the page structures don't exist yet. */
873 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); 932 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
874} 933}
875 934
935static __init void xen_post_allocator_init(void)
936{
937 pv_mmu_ops.set_pmd = xen_set_pmd;
938 pv_mmu_ops.set_pud = xen_set_pud;
939
940 xen_mark_init_mm_pinned();
941}
942
876/* This is called once we have the cpu_possible_map */ 943/* This is called once we have the cpu_possible_map */
877void __init xen_setup_vcpu_info_placement(void) 944void xen_setup_vcpu_info_placement(void)
878{ 945{
879 int cpu; 946 int cpu;
880 947
@@ -947,6 +1014,33 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
947 return ret; 1014 return ret;
948} 1015}
949 1016
1017static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
1018{
1019 pte_t pte;
1020
1021 phys >>= PAGE_SHIFT;
1022
1023 switch (idx) {
1024 case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
1025#ifdef CONFIG_X86_F00F_BUG
1026 case FIX_F00F_IDT:
1027#endif
1028 case FIX_WP_TEST:
1029 case FIX_VDSO:
1030#ifdef CONFIG_X86_LOCAL_APIC
1031 case FIX_APIC_BASE: /* maps dummy local APIC */
1032#endif
1033 pte = pfn_pte(phys, prot);
1034 break;
1035
1036 default:
1037 pte = mfn_pte(phys, prot);
1038 break;
1039 }
1040
1041 __native_set_fixmap(idx, pte);
1042}
1043
950static const struct pv_info xen_info __initdata = { 1044static const struct pv_info xen_info __initdata = {
951 .paravirt_enabled = 1, 1045 .paravirt_enabled = 1,
952 .shared_kernel_pmd = 0, 1046 .shared_kernel_pmd = 0,
@@ -960,7 +1054,7 @@ static const struct pv_init_ops xen_init_ops __initdata = {
960 .banner = xen_banner, 1054 .banner = xen_banner,
961 .memory_setup = xen_memory_setup, 1055 .memory_setup = xen_memory_setup,
962 .arch_setup = xen_arch_setup, 1056 .arch_setup = xen_arch_setup,
963 .post_allocator_init = xen_mark_init_mm_pinned, 1057 .post_allocator_init = xen_post_allocator_init,
964}; 1058};
965 1059
966static const struct pv_time_ops xen_time_ops __initdata = { 1060static const struct pv_time_ops xen_time_ops __initdata = {
@@ -968,7 +1062,7 @@ static const struct pv_time_ops xen_time_ops __initdata = {
968 1062
969 .set_wallclock = xen_set_wallclock, 1063 .set_wallclock = xen_set_wallclock,
970 .get_wallclock = xen_get_wallclock, 1064 .get_wallclock = xen_get_wallclock,
971 .get_cpu_khz = xen_cpu_khz, 1065 .get_tsc_khz = xen_tsc_khz,
972 .sched_clock = xen_sched_clock, 1066 .sched_clock = xen_sched_clock,
973}; 1067};
974 1068
@@ -978,10 +1072,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
978 .set_debugreg = xen_set_debugreg, 1072 .set_debugreg = xen_set_debugreg,
979 .get_debugreg = xen_get_debugreg, 1073 .get_debugreg = xen_get_debugreg,
980 1074
981 .clts = native_clts, 1075 .clts = xen_clts,
982 1076
983 .read_cr0 = native_read_cr0, 1077 .read_cr0 = native_read_cr0,
984 .write_cr0 = native_write_cr0, 1078 .write_cr0 = xen_write_cr0,
985 1079
986 .read_cr4 = native_read_cr4, 1080 .read_cr4 = native_read_cr4,
987 .read_cr4_safe = native_read_cr4_safe, 1081 .read_cr4_safe = native_read_cr4_safe,
@@ -995,7 +1089,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
995 .read_pmc = native_read_pmc, 1089 .read_pmc = native_read_pmc,
996 1090
997 .iret = xen_iret, 1091 .iret = xen_iret,
998 .irq_enable_syscall_ret = xen_sysexit, 1092 .irq_enable_sysexit = xen_sysexit,
999 1093
1000 .load_tr_desc = paravirt_nop, 1094 .load_tr_desc = paravirt_nop,
1001 .set_ldt = xen_set_ldt, 1095 .set_ldt = xen_set_ldt,
@@ -1029,6 +1123,9 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
1029 .irq_enable = xen_irq_enable, 1123 .irq_enable = xen_irq_enable,
1030 .safe_halt = xen_safe_halt, 1124 .safe_halt = xen_safe_halt,
1031 .halt = xen_halt, 1125 .halt = xen_halt,
1126#ifdef CONFIG_X86_64
1127 .adjust_exception_frame = paravirt_nop,
1128#endif
1032}; 1129};
1033 1130
1034static const struct pv_apic_ops xen_apic_ops __initdata = { 1131static const struct pv_apic_ops xen_apic_ops __initdata = {
@@ -1060,6 +1157,9 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1060 .pte_update = paravirt_nop, 1157 .pte_update = paravirt_nop,
1061 .pte_update_defer = paravirt_nop, 1158 .pte_update_defer = paravirt_nop,
1062 1159
1160 .pgd_alloc = __paravirt_pgd_alloc,
1161 .pgd_free = paravirt_nop,
1162
1063 .alloc_pte = xen_alloc_pte_init, 1163 .alloc_pte = xen_alloc_pte_init,
1064 .release_pte = xen_release_pte_init, 1164 .release_pte = xen_release_pte_init,
1065 .alloc_pmd = xen_alloc_pte_init, 1165 .alloc_pmd = xen_alloc_pte_init,
@@ -1072,9 +1172,13 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1072 1172
1073 .set_pte = NULL, /* see xen_pagetable_setup_* */ 1173 .set_pte = NULL, /* see xen_pagetable_setup_* */
1074 .set_pte_at = xen_set_pte_at, 1174 .set_pte_at = xen_set_pte_at,
1075 .set_pmd = xen_set_pmd, 1175 .set_pmd = xen_set_pmd_hyper,
1176
1177 .ptep_modify_prot_start = __ptep_modify_prot_start,
1178 .ptep_modify_prot_commit = __ptep_modify_prot_commit,
1076 1179
1077 .pte_val = xen_pte_val, 1180 .pte_val = xen_pte_val,
1181 .pte_flags = native_pte_val,
1078 .pgd_val = xen_pgd_val, 1182 .pgd_val = xen_pgd_val,
1079 1183
1080 .make_pte = xen_make_pte, 1184 .make_pte = xen_make_pte,
@@ -1082,7 +1186,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1082 1186
1083 .set_pte_atomic = xen_set_pte_atomic, 1187 .set_pte_atomic = xen_set_pte_atomic,
1084 .set_pte_present = xen_set_pte_at, 1188 .set_pte_present = xen_set_pte_at,
1085 .set_pud = xen_set_pud, 1189 .set_pud = xen_set_pud_hyper,
1086 .pte_clear = xen_pte_clear, 1190 .pte_clear = xen_pte_clear,
1087 .pmd_clear = xen_pmd_clear, 1191 .pmd_clear = xen_pmd_clear,
1088 1192
@@ -1097,6 +1201,8 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1097 .enter = paravirt_enter_lazy_mmu, 1201 .enter = paravirt_enter_lazy_mmu,
1098 .leave = xen_leave_lazy, 1202 .leave = xen_leave_lazy,
1099 }, 1203 },
1204
1205 .set_fixmap = xen_set_fixmap,
1100}; 1206};
1101 1207
1102#ifdef CONFIG_SMP 1208#ifdef CONFIG_SMP
@@ -1108,17 +1214,21 @@ static const struct smp_ops xen_smp_ops __initdata = {
1108 1214
1109 .smp_send_stop = xen_smp_send_stop, 1215 .smp_send_stop = xen_smp_send_stop,
1110 .smp_send_reschedule = xen_smp_send_reschedule, 1216 .smp_send_reschedule = xen_smp_send_reschedule,
1111 .smp_call_function_mask = xen_smp_call_function_mask, 1217
1218 .send_call_func_ipi = xen_smp_send_call_function_ipi,
1219 .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
1112}; 1220};
1113#endif /* CONFIG_SMP */ 1221#endif /* CONFIG_SMP */
1114 1222
1115static void xen_reboot(int reason) 1223static void xen_reboot(int reason)
1116{ 1224{
1225 struct sched_shutdown r = { .reason = reason };
1226
1117#ifdef CONFIG_SMP 1227#ifdef CONFIG_SMP
1118 smp_send_stop(); 1228 smp_send_stop();
1119#endif 1229#endif
1120 1230
1121 if (HYPERVISOR_sched_op(SCHEDOP_shutdown, reason)) 1231 if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
1122 BUG(); 1232 BUG();
1123} 1233}
1124 1234
@@ -1173,6 +1283,8 @@ asmlinkage void __init xen_start_kernel(void)
1173 1283
1174 BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); 1284 BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0);
1175 1285
1286 xen_setup_features();
1287
1176 /* Install Xen paravirt ops */ 1288 /* Install Xen paravirt ops */
1177 pv_info = xen_info; 1289 pv_info = xen_info;
1178 pv_init_ops = xen_init_ops; 1290 pv_init_ops = xen_init_ops;
@@ -1182,21 +1294,26 @@ asmlinkage void __init xen_start_kernel(void)
1182 pv_apic_ops = xen_apic_ops; 1294 pv_apic_ops = xen_apic_ops;
1183 pv_mmu_ops = xen_mmu_ops; 1295 pv_mmu_ops = xen_mmu_ops;
1184 1296
1297 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
1298 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
1299 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
1300 }
1301
1185 machine_ops = xen_machine_ops; 1302 machine_ops = xen_machine_ops;
1186 1303
1187#ifdef CONFIG_SMP 1304#ifdef CONFIG_SMP
1188 smp_ops = xen_smp_ops; 1305 smp_ops = xen_smp_ops;
1189#endif 1306#endif
1190 1307
1191 xen_setup_features();
1192
1193 /* Get mfn list */ 1308 /* Get mfn list */
1194 if (!xen_feature(XENFEAT_auto_translated_physmap)) 1309 if (!xen_feature(XENFEAT_auto_translated_physmap))
1195 phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list; 1310 xen_build_dynamic_phys_to_machine();
1196 1311
1197 pgd = (pgd_t *)xen_start_info->pt_base; 1312 pgd = (pgd_t *)xen_start_info->pt_base;
1198 1313
1314 init_pg_tables_start = __pa(pgd);
1199 init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; 1315 init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
1316 max_pfn_mapped = (init_pg_tables_end + 512*1024) >> PAGE_SHIFT;
1200 1317
1201 init_mm.pgd = pgd; /* use the Xen pagetables to start */ 1318 init_mm.pgd = pgd; /* use the Xen pagetables to start */
1202 1319
@@ -1232,9 +1349,12 @@ asmlinkage void __init xen_start_kernel(void)
1232 ? __pa(xen_start_info->mod_start) : 0; 1349 ? __pa(xen_start_info->mod_start) : 0;
1233 boot_params.hdr.ramdisk_size = xen_start_info->mod_len; 1350 boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
1234 1351
1235 if (!is_initial_xendomain()) 1352 if (!is_initial_xendomain()) {
1353 add_preferred_console("xenboot", 0, NULL);
1354 add_preferred_console("tty", 0, NULL);
1236 add_preferred_console("hvc", 0, NULL); 1355 add_preferred_console("hvc", 0, NULL);
1356 }
1237 1357
1238 /* Start the world */ 1358 /* Start the world */
1239 start_kernel(); 1359 i386_start_kernel();
1240} 1360}
diff --git a/arch/x86/xen/manage.c b/arch/x86/xen/manage.c
deleted file mode 100644
index aa7af9e6abc0..000000000000
--- a/arch/x86/xen/manage.c
+++ /dev/null
@@ -1,143 +0,0 @@
1/*
2 * Handle extern requests for shutdown, reboot and sysrq
3 */
4#include <linux/kernel.h>
5#include <linux/err.h>
6#include <linux/reboot.h>
7#include <linux/sysrq.h>
8
9#include <xen/xenbus.h>
10
11#define SHUTDOWN_INVALID -1
12#define SHUTDOWN_POWEROFF 0
13#define SHUTDOWN_SUSPEND 2
14/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
15 * report a crash, not be instructed to crash!
16 * HALT is the same as POWEROFF, as far as we're concerned. The tools use
17 * the distinction when we return the reason code to them.
18 */
19#define SHUTDOWN_HALT 4
20
21/* Ignore multiple shutdown requests. */
22static int shutting_down = SHUTDOWN_INVALID;
23
24static void shutdown_handler(struct xenbus_watch *watch,
25 const char **vec, unsigned int len)
26{
27 char *str;
28 struct xenbus_transaction xbt;
29 int err;
30
31 if (shutting_down != SHUTDOWN_INVALID)
32 return;
33
34 again:
35 err = xenbus_transaction_start(&xbt);
36 if (err)
37 return;
38
39 str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
40 /* Ignore read errors and empty reads. */
41 if (XENBUS_IS_ERR_READ(str)) {
42 xenbus_transaction_end(xbt, 1);
43 return;
44 }
45
46 xenbus_write(xbt, "control", "shutdown", "");
47
48 err = xenbus_transaction_end(xbt, 0);
49 if (err == -EAGAIN) {
50 kfree(str);
51 goto again;
52 }
53
54 if (strcmp(str, "poweroff") == 0 ||
55 strcmp(str, "halt") == 0)
56 orderly_poweroff(false);
57 else if (strcmp(str, "reboot") == 0)
58 ctrl_alt_del();
59 else {
60 printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
61 shutting_down = SHUTDOWN_INVALID;
62 }
63
64 kfree(str);
65}
66
67static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
68 unsigned int len)
69{
70 char sysrq_key = '\0';
71 struct xenbus_transaction xbt;
72 int err;
73
74 again:
75 err = xenbus_transaction_start(&xbt);
76 if (err)
77 return;
78 if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
79 printk(KERN_ERR "Unable to read sysrq code in "
80 "control/sysrq\n");
81 xenbus_transaction_end(xbt, 1);
82 return;
83 }
84
85 if (sysrq_key != '\0')
86 xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
87
88 err = xenbus_transaction_end(xbt, 0);
89 if (err == -EAGAIN)
90 goto again;
91
92 if (sysrq_key != '\0')
93 handle_sysrq(sysrq_key, NULL);
94}
95
96static struct xenbus_watch shutdown_watch = {
97 .node = "control/shutdown",
98 .callback = shutdown_handler
99};
100
101static struct xenbus_watch sysrq_watch = {
102 .node = "control/sysrq",
103 .callback = sysrq_handler
104};
105
106static int setup_shutdown_watcher(void)
107{
108 int err;
109
110 err = register_xenbus_watch(&shutdown_watch);
111 if (err) {
112 printk(KERN_ERR "Failed to set shutdown watcher\n");
113 return err;
114 }
115
116 err = register_xenbus_watch(&sysrq_watch);
117 if (err) {
118 printk(KERN_ERR "Failed to set sysrq watcher\n");
119 return err;
120 }
121
122 return 0;
123}
124
125static int shutdown_event(struct notifier_block *notifier,
126 unsigned long event,
127 void *data)
128{
129 setup_shutdown_watcher();
130 return NOTIFY_DONE;
131}
132
133static int __init setup_shutdown_event(void)
134{
135 static struct notifier_block xenstore_notifier = {
136 .notifier_call = shutdown_event
137 };
138 register_xenstore_notifier(&xenstore_notifier);
139
140 return 0;
141}
142
143subsys_initcall(setup_shutdown_event);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 4e527e7893a8..ff0aa74afaa1 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -56,6 +56,131 @@
56#include "multicalls.h" 56#include "multicalls.h"
57#include "mmu.h" 57#include "mmu.h"
58 58
59#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
60#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
61
62/* Placeholder for holes in the address space */
63static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE]
64 __attribute__((section(".data.page_aligned"))) =
65 { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
66
67 /* Array of pointers to pages containing p2m entries */
68static unsigned long *p2m_top[TOP_ENTRIES]
69 __attribute__((section(".data.page_aligned"))) =
70 { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
71
72/* Arrays of p2m arrays expressed in mfns used for save/restore */
73static unsigned long p2m_top_mfn[TOP_ENTRIES]
74 __attribute__((section(".bss.page_aligned")));
75
76static unsigned long p2m_top_mfn_list[
77 PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)]
78 __attribute__((section(".bss.page_aligned")));
79
80static inline unsigned p2m_top_index(unsigned long pfn)
81{
82 BUG_ON(pfn >= MAX_DOMAIN_PAGES);
83 return pfn / P2M_ENTRIES_PER_PAGE;
84}
85
86static inline unsigned p2m_index(unsigned long pfn)
87{
88 return pfn % P2M_ENTRIES_PER_PAGE;
89}
90
91/* Build the parallel p2m_top_mfn structures */
92void xen_setup_mfn_list_list(void)
93{
94 unsigned pfn, idx;
95
96 for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
97 unsigned topidx = p2m_top_index(pfn);
98
99 p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]);
100 }
101
102 for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
103 unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
104 p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
105 }
106
107 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
108
109 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
110 virt_to_mfn(p2m_top_mfn_list);
111 HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages;
112}
113
114/* Set up p2m_top to point to the domain-builder provided p2m pages */
115void __init xen_build_dynamic_phys_to_machine(void)
116{
117 unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
118 unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
119 unsigned pfn;
120
121 for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
122 unsigned topidx = p2m_top_index(pfn);
123
124 p2m_top[topidx] = &mfn_list[pfn];
125 }
126}
127
128unsigned long get_phys_to_machine(unsigned long pfn)
129{
130 unsigned topidx, idx;
131
132 if (unlikely(pfn >= MAX_DOMAIN_PAGES))
133 return INVALID_P2M_ENTRY;
134
135 topidx = p2m_top_index(pfn);
136 idx = p2m_index(pfn);
137 return p2m_top[topidx][idx];
138}
139EXPORT_SYMBOL_GPL(get_phys_to_machine);
140
141static void alloc_p2m(unsigned long **pp, unsigned long *mfnp)
142{
143 unsigned long *p;
144 unsigned i;
145
146 p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
147 BUG_ON(p == NULL);
148
149 for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
150 p[i] = INVALID_P2M_ENTRY;
151
152 if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
153 free_page((unsigned long)p);
154 else
155 *mfnp = virt_to_mfn(p);
156}
157
158void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
159{
160 unsigned topidx, idx;
161
162 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
163 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
164 return;
165 }
166
167 if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
168 BUG_ON(mfn != INVALID_P2M_ENTRY);
169 return;
170 }
171
172 topidx = p2m_top_index(pfn);
173 if (p2m_top[topidx] == p2m_missing) {
174 /* no need to allocate a page to store an invalid entry */
175 if (mfn == INVALID_P2M_ENTRY)
176 return;
177 alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]);
178 }
179
180 idx = p2m_index(pfn);
181 p2m_top[topidx][idx] = mfn;
182}
183
59xmaddr_t arbitrary_virt_to_machine(unsigned long address) 184xmaddr_t arbitrary_virt_to_machine(unsigned long address)
60{ 185{
61 unsigned int level; 186 unsigned int level;
@@ -98,24 +223,60 @@ void make_lowmem_page_readwrite(void *vaddr)
98} 223}
99 224
100 225
101void xen_set_pmd(pmd_t *ptr, pmd_t val) 226static bool page_pinned(void *ptr)
227{
228 struct page *page = virt_to_page(ptr);
229
230 return PagePinned(page);
231}
232
233static void extend_mmu_update(const struct mmu_update *update)
102{ 234{
103 struct multicall_space mcs; 235 struct multicall_space mcs;
104 struct mmu_update *u; 236 struct mmu_update *u;
105 237
106 preempt_disable(); 238 mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
239
240 if (mcs.mc != NULL)
241 mcs.mc->args[1]++;
242 else {
243 mcs = __xen_mc_entry(sizeof(*u));
244 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
245 }
107 246
108 mcs = xen_mc_entry(sizeof(*u));
109 u = mcs.args; 247 u = mcs.args;
110 u->ptr = virt_to_machine(ptr).maddr; 248 *u = *update;
111 u->val = pmd_val_ma(val); 249}
112 MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); 250
251void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
252{
253 struct mmu_update u;
254
255 preempt_disable();
256
257 xen_mc_batch();
258
259 u.ptr = virt_to_machine(ptr).maddr;
260 u.val = pmd_val_ma(val);
261 extend_mmu_update(&u);
113 262
114 xen_mc_issue(PARAVIRT_LAZY_MMU); 263 xen_mc_issue(PARAVIRT_LAZY_MMU);
115 264
116 preempt_enable(); 265 preempt_enable();
117} 266}
118 267
268void xen_set_pmd(pmd_t *ptr, pmd_t val)
269{
270 /* If page is not pinned, we can just update the entry
271 directly */
272 if (!page_pinned(ptr)) {
273 *ptr = val;
274 return;
275 }
276
277 xen_set_pmd_hyper(ptr, val);
278}
279
119/* 280/*
120 * Associate a virtual page frame with a given physical page frame 281 * Associate a virtual page frame with a given physical page frame
121 * and protection flags for that frame. 282 * and protection flags for that frame.
@@ -179,6 +340,26 @@ out:
179 preempt_enable(); 340 preempt_enable();
180} 341}
181 342
343pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
344{
345 /* Just return the pte as-is. We preserve the bits on commit */
346 return *ptep;
347}
348
349void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
350 pte_t *ptep, pte_t pte)
351{
352 struct mmu_update u;
353
354 xen_mc_batch();
355
356 u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
357 u.val = pte_val_ma(pte);
358 extend_mmu_update(&u);
359
360 xen_mc_issue(PARAVIRT_LAZY_MMU);
361}
362
182/* Assume pteval_t is equivalent to all the other *val_t types. */ 363/* Assume pteval_t is equivalent to all the other *val_t types. */
183static pteval_t pte_mfn_to_pfn(pteval_t val) 364static pteval_t pte_mfn_to_pfn(pteval_t val)
184{ 365{
@@ -229,24 +410,35 @@ pmdval_t xen_pmd_val(pmd_t pmd)
229 return pte_mfn_to_pfn(pmd.pmd); 410 return pte_mfn_to_pfn(pmd.pmd);
230} 411}
231 412
232void xen_set_pud(pud_t *ptr, pud_t val) 413void xen_set_pud_hyper(pud_t *ptr, pud_t val)
233{ 414{
234 struct multicall_space mcs; 415 struct mmu_update u;
235 struct mmu_update *u;
236 416
237 preempt_disable(); 417 preempt_disable();
238 418
239 mcs = xen_mc_entry(sizeof(*u)); 419 xen_mc_batch();
240 u = mcs.args; 420
241 u->ptr = virt_to_machine(ptr).maddr; 421 u.ptr = virt_to_machine(ptr).maddr;
242 u->val = pud_val_ma(val); 422 u.val = pud_val_ma(val);
243 MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); 423 extend_mmu_update(&u);
244 424
245 xen_mc_issue(PARAVIRT_LAZY_MMU); 425 xen_mc_issue(PARAVIRT_LAZY_MMU);
246 426
247 preempt_enable(); 427 preempt_enable();
248} 428}
249 429
430void xen_set_pud(pud_t *ptr, pud_t val)
431{
432 /* If page is not pinned, we can just update the entry
433 directly */
434 if (!page_pinned(ptr)) {
435 *ptr = val;
436 return;
437 }
438
439 xen_set_pud_hyper(ptr, val);
440}
441
250void xen_set_pte(pte_t *ptep, pte_t pte) 442void xen_set_pte(pte_t *ptep, pte_t pte)
251{ 443{
252 ptep->pte_high = pte.pte_high; 444 ptep->pte_high = pte.pte_high;
@@ -268,7 +460,7 @@ void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
268 460
269void xen_pmd_clear(pmd_t *pmdp) 461void xen_pmd_clear(pmd_t *pmdp)
270{ 462{
271 xen_set_pmd(pmdp, __pmd(0)); 463 set_pmd(pmdp, __pmd(0));
272} 464}
273 465
274pmd_t xen_make_pmd(pmdval_t pmd) 466pmd_t xen_make_pmd(pmdval_t pmd)
@@ -441,6 +633,29 @@ void xen_pgd_pin(pgd_t *pgd)
441 xen_mc_issue(0); 633 xen_mc_issue(0);
442} 634}
443 635
636/*
637 * On save, we need to pin all pagetables to make sure they get their
638 * mfns turned into pfns. Search the list for any unpinned pgds and pin
639 * them (unpinned pgds are not currently in use, probably because the
640 * process is under construction or destruction).
641 */
642void xen_mm_pin_all(void)
643{
644 unsigned long flags;
645 struct page *page;
646
647 spin_lock_irqsave(&pgd_lock, flags);
648
649 list_for_each_entry(page, &pgd_list, lru) {
650 if (!PagePinned(page)) {
651 xen_pgd_pin((pgd_t *)page_address(page));
652 SetPageSavePinned(page);
653 }
654 }
655
656 spin_unlock_irqrestore(&pgd_lock, flags);
657}
658
444/* The init_mm pagetable is really pinned as soon as its created, but 659/* The init_mm pagetable is really pinned as soon as its created, but
445 that's before we have page structures to store the bits. So do all 660 that's before we have page structures to store the bits. So do all
446 the book-keeping now. */ 661 the book-keeping now. */
@@ -498,6 +713,29 @@ static void xen_pgd_unpin(pgd_t *pgd)
498 xen_mc_issue(0); 713 xen_mc_issue(0);
499} 714}
500 715
716/*
717 * On resume, undo any pinning done at save, so that the rest of the
718 * kernel doesn't see any unexpected pinned pagetables.
719 */
720void xen_mm_unpin_all(void)
721{
722 unsigned long flags;
723 struct page *page;
724
725 spin_lock_irqsave(&pgd_lock, flags);
726
727 list_for_each_entry(page, &pgd_list, lru) {
728 if (PageSavePinned(page)) {
729 BUG_ON(!PagePinned(page));
730 printk("unpinning pinned %p\n", page_address(page));
731 xen_pgd_unpin((pgd_t *)page_address(page));
732 ClearPageSavePinned(page);
733 }
734 }
735
736 spin_unlock_irqrestore(&pgd_lock, flags);
737}
738
501void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) 739void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
502{ 740{
503 spin_lock(&next->page_table_lock); 741 spin_lock(&next->page_table_lock);
@@ -558,7 +796,7 @@ static void drop_mm_ref(struct mm_struct *mm)
558 } 796 }
559 797
560 if (!cpus_empty(mask)) 798 if (!cpus_empty(mask))
561 xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); 799 smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
562} 800}
563#else 801#else
564static void drop_mm_ref(struct mm_struct *mm) 802static void drop_mm_ref(struct mm_struct *mm)
@@ -591,7 +829,7 @@ void xen_exit_mmap(struct mm_struct *mm)
591 spin_lock(&mm->page_table_lock); 829 spin_lock(&mm->page_table_lock);
592 830
593 /* pgd may not be pinned in the error exit path of execve */ 831 /* pgd may not be pinned in the error exit path of execve */
594 if (PagePinned(virt_to_page(mm->pgd))) 832 if (page_pinned(mm->pgd))
595 xen_pgd_unpin(mm->pgd); 833 xen_pgd_unpin(mm->pgd);
596 834
597 spin_unlock(&mm->page_table_lock); 835 spin_unlock(&mm->page_table_lock);
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 5fe961caffd4..297bf9f5b8bc 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -25,10 +25,6 @@ enum pt_level {
25 25
26void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); 26void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
27 27
28void xen_set_pte(pte_t *ptep, pte_t pteval);
29void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
30 pte_t *ptep, pte_t pteval);
31void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval);
32 28
33void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next); 29void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next);
34void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); 30void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
@@ -45,11 +41,19 @@ pte_t xen_make_pte(pteval_t);
45pmd_t xen_make_pmd(pmdval_t); 41pmd_t xen_make_pmd(pmdval_t);
46pgd_t xen_make_pgd(pgdval_t); 42pgd_t xen_make_pgd(pgdval_t);
47 43
44void xen_set_pte(pte_t *ptep, pte_t pteval);
48void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, 45void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
49 pte_t *ptep, pte_t pteval); 46 pte_t *ptep, pte_t pteval);
50void xen_set_pte_atomic(pte_t *ptep, pte_t pte); 47void xen_set_pte_atomic(pte_t *ptep, pte_t pte);
48void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval);
51void xen_set_pud(pud_t *ptr, pud_t val); 49void xen_set_pud(pud_t *ptr, pud_t val);
50void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval);
51void xen_set_pud_hyper(pud_t *ptr, pud_t val);
52void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); 52void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
53void xen_pmd_clear(pmd_t *pmdp); 53void xen_pmd_clear(pmd_t *pmdp);
54 54
55pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
56void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
57 pte_t *ptep, pte_t pte);
58
55#endif /* _XEN_MMU_H */ 59#endif /* _XEN_MMU_H */
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 5791eb2e3750..3c63c4da7ed1 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -29,14 +29,14 @@
29#define MC_DEBUG 1 29#define MC_DEBUG 1
30 30
31#define MC_BATCH 32 31#define MC_BATCH 32
32#define MC_ARGS (MC_BATCH * 16 / sizeof(u64)) 32#define MC_ARGS (MC_BATCH * 16)
33 33
34struct mc_buffer { 34struct mc_buffer {
35 struct multicall_entry entries[MC_BATCH]; 35 struct multicall_entry entries[MC_BATCH];
36#if MC_DEBUG 36#if MC_DEBUG
37 struct multicall_entry debug[MC_BATCH]; 37 struct multicall_entry debug[MC_BATCH];
38#endif 38#endif
39 u64 args[MC_ARGS]; 39 unsigned char args[MC_ARGS];
40 struct callback { 40 struct callback {
41 void (*fn)(void *); 41 void (*fn)(void *);
42 void *data; 42 void *data;
@@ -107,20 +107,48 @@ struct multicall_space __xen_mc_entry(size_t args)
107{ 107{
108 struct mc_buffer *b = &__get_cpu_var(mc_buffer); 108 struct mc_buffer *b = &__get_cpu_var(mc_buffer);
109 struct multicall_space ret; 109 struct multicall_space ret;
110 unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64); 110 unsigned argidx = roundup(b->argidx, sizeof(u64));
111 111
112 BUG_ON(preemptible()); 112 BUG_ON(preemptible());
113 BUG_ON(argspace > MC_ARGS); 113 BUG_ON(b->argidx > MC_ARGS);
114 114
115 if (b->mcidx == MC_BATCH || 115 if (b->mcidx == MC_BATCH ||
116 (b->argidx + argspace) > MC_ARGS) 116 (argidx + args) > MC_ARGS) {
117 xen_mc_flush(); 117 xen_mc_flush();
118 argidx = roundup(b->argidx, sizeof(u64));
119 }
118 120
119 ret.mc = &b->entries[b->mcidx]; 121 ret.mc = &b->entries[b->mcidx];
120 b->mcidx++; 122 b->mcidx++;
123 ret.args = &b->args[argidx];
124 b->argidx = argidx + args;
125
126 BUG_ON(b->argidx > MC_ARGS);
127 return ret;
128}
129
130struct multicall_space xen_mc_extend_args(unsigned long op, size_t size)
131{
132 struct mc_buffer *b = &__get_cpu_var(mc_buffer);
133 struct multicall_space ret = { NULL, NULL };
134
135 BUG_ON(preemptible());
136 BUG_ON(b->argidx > MC_ARGS);
137
138 if (b->mcidx == 0)
139 return ret;
140
141 if (b->entries[b->mcidx - 1].op != op)
142 return ret;
143
144 if ((b->argidx + size) > MC_ARGS)
145 return ret;
146
147 ret.mc = &b->entries[b->mcidx - 1];
121 ret.args = &b->args[b->argidx]; 148 ret.args = &b->args[b->argidx];
122 b->argidx += argspace; 149 b->argidx += size;
123 150
151 BUG_ON(b->argidx > MC_ARGS);
124 return ret; 152 return ret;
125} 153}
126 154
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h
index 8bae996d99a3..858938241616 100644
--- a/arch/x86/xen/multicalls.h
+++ b/arch/x86/xen/multicalls.h
@@ -45,4 +45,16 @@ static inline void xen_mc_issue(unsigned mode)
45/* Set up a callback to be called when the current batch is flushed */ 45/* Set up a callback to be called when the current batch is flushed */
46void xen_mc_callback(void (*fn)(void *), void *data); 46void xen_mc_callback(void (*fn)(void *), void *data);
47 47
48/*
49 * Try to extend the arguments of the previous multicall command. The
50 * previous command's op must match. If it does, then it attempts to
51 * extend the argument space allocated to the multicall entry by
52 * arg_size bytes.
53 *
54 * The returned multicall_space will return with mc pointing to the
55 * command on success, or NULL on failure, and args pointing to the
56 * newly allocated space.
57 */
58struct multicall_space xen_mc_extend_args(unsigned long op, size_t arg_size);
59
48#endif /* _XEN_MULTICALLS_H */ 60#endif /* _XEN_MULTICALLS_H */
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 82517e4a752a..e0a39595bde3 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -13,9 +13,11 @@
13#include <asm/vdso.h> 13#include <asm/vdso.h>
14#include <asm/e820.h> 14#include <asm/e820.h>
15#include <asm/setup.h> 15#include <asm/setup.h>
16#include <asm/acpi.h>
16#include <asm/xen/hypervisor.h> 17#include <asm/xen/hypervisor.h>
17#include <asm/xen/hypercall.h> 18#include <asm/xen/hypercall.h>
18 19
20#include <xen/page.h>
19#include <xen/interface/callback.h> 21#include <xen/interface/callback.h>
20#include <xen/interface/physdev.h> 22#include <xen/interface/physdev.h>
21#include <xen/features.h> 23#include <xen/features.h>
@@ -27,8 +29,6 @@
27extern const char xen_hypervisor_callback[]; 29extern const char xen_hypervisor_callback[];
28extern const char xen_failsafe_callback[]; 30extern const char xen_failsafe_callback[];
29 31
30unsigned long *phys_to_machine_mapping;
31EXPORT_SYMBOL(phys_to_machine_mapping);
32 32
33/** 33/**
34 * machine_specific_memory_setup - Hook for machine specific memory setup. 34 * machine_specific_memory_setup - Hook for machine specific memory setup.
@@ -38,9 +38,31 @@ char * __init xen_memory_setup(void)
38{ 38{
39 unsigned long max_pfn = xen_start_info->nr_pages; 39 unsigned long max_pfn = xen_start_info->nr_pages;
40 40
41 max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
42
41 e820.nr_map = 0; 43 e820.nr_map = 0;
42 add_memory_region(0, LOWMEMSIZE(), E820_RAM); 44
43 add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); 45 e820_add_region(0, PFN_PHYS(max_pfn), E820_RAM);
46
47 /*
48 * Even though this is normal, usable memory under Xen, reserve
49 * ISA memory anyway because too many things think they can poke
50 * about in there.
51 */
52 e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
53 E820_RESERVED);
54
55 /*
56 * Reserve Xen bits:
57 * - mfn_list
58 * - xen_start_info
59 * See comment above "struct start_info" in <xen/interface/xen.h>
60 */
61 e820_add_region(__pa(xen_start_info->mfn_list),
62 xen_start_info->pt_base - xen_start_info->mfn_list,
63 E820_RESERVED);
64
65 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
44 66
45 return "Xen"; 67 return "Xen";
46} 68}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 7a70638797ed..463adecc5cba 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -35,28 +35,15 @@
35#include "xen-ops.h" 35#include "xen-ops.h"
36#include "mmu.h" 36#include "mmu.h"
37 37
38static cpumask_t xen_cpu_initialized_map; 38cpumask_t xen_cpu_initialized_map;
39static DEFINE_PER_CPU(int, resched_irq) = -1;
40static DEFINE_PER_CPU(int, callfunc_irq) = -1;
41static DEFINE_PER_CPU(int, debug_irq) = -1;
42
43/*
44 * Structure and data for smp_call_function(). This is designed to minimise
45 * static memory requirements. It also looks cleaner.
46 */
47static DEFINE_SPINLOCK(call_lock);
48 39
49struct call_data_struct { 40static DEFINE_PER_CPU(int, resched_irq);
50 void (*func) (void *info); 41static DEFINE_PER_CPU(int, callfunc_irq);
51 void *info; 42static DEFINE_PER_CPU(int, callfuncsingle_irq);
52 atomic_t started; 43static DEFINE_PER_CPU(int, debug_irq) = -1;
53 atomic_t finished;
54 int wait;
55};
56 44
57static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); 45static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
58 46static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
59static struct call_data_struct *call_data;
60 47
61/* 48/*
62 * Reschedule call back. Nothing to do, 49 * Reschedule call back. Nothing to do,
@@ -65,6 +52,12 @@ static struct call_data_struct *call_data;
65 */ 52 */
66static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) 53static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
67{ 54{
55#ifdef CONFIG_X86_32
56 __get_cpu_var(irq_stat).irq_resched_count++;
57#else
58 add_pda(irq_resched_count, 1);
59#endif
60
68 return IRQ_HANDLED; 61 return IRQ_HANDLED;
69} 62}
70 63
@@ -122,6 +115,17 @@ static int xen_smp_intr_init(unsigned int cpu)
122 goto fail; 115 goto fail;
123 per_cpu(debug_irq, cpu) = rc; 116 per_cpu(debug_irq, cpu) = rc;
124 117
118 callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
119 rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
120 cpu,
121 xen_call_function_single_interrupt,
122 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
123 callfunc_name,
124 NULL);
125 if (rc < 0)
126 goto fail;
127 per_cpu(callfuncsingle_irq, cpu) = rc;
128
125 return 0; 129 return 0;
126 130
127 fail: 131 fail:
@@ -131,6 +135,9 @@ static int xen_smp_intr_init(unsigned int cpu)
131 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); 135 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
132 if (per_cpu(debug_irq, cpu) >= 0) 136 if (per_cpu(debug_irq, cpu) >= 0)
133 unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL); 137 unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
138 if (per_cpu(callfuncsingle_irq, cpu) >= 0)
139 unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
140
134 return rc; 141 return rc;
135} 142}
136 143
@@ -330,7 +337,7 @@ static void stop_self(void *v)
330 337
331void xen_smp_send_stop(void) 338void xen_smp_send_stop(void)
332{ 339{
333 smp_call_function(stop_self, NULL, 0, 0); 340 smp_call_function(stop_self, NULL, 0);
334} 341}
335 342
336void xen_smp_send_reschedule(int cpu) 343void xen_smp_send_reschedule(int cpu)
@@ -338,7 +345,6 @@ void xen_smp_send_reschedule(int cpu)
338 xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); 345 xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
339} 346}
340 347
341
342static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) 348static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
343{ 349{
344 unsigned cpu; 350 unsigned cpu;
@@ -349,83 +355,42 @@ static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
349 xen_send_IPI_one(cpu, vector); 355 xen_send_IPI_one(cpu, vector);
350} 356}
351 357
358void xen_smp_send_call_function_ipi(cpumask_t mask)
359{
360 int cpu;
361
362 xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
363
364 /* Make sure other vcpus get a chance to run if they need to. */
365 for_each_cpu_mask_nr(cpu, mask) {
366 if (xen_vcpu_stolen(cpu)) {
367 HYPERVISOR_sched_op(SCHEDOP_yield, 0);
368 break;
369 }
370 }
371}
372
373void xen_smp_send_call_function_single_ipi(int cpu)
374{
375 xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
376}
377
352static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) 378static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
353{ 379{
354 void (*func) (void *info) = call_data->func;
355 void *info = call_data->info;
356 int wait = call_data->wait;
357
358 /*
359 * Notify initiating CPU that I've grabbed the data and am
360 * about to execute the function
361 */
362 mb();
363 atomic_inc(&call_data->started);
364 /*
365 * At this point the info structure may be out of scope unless wait==1
366 */
367 irq_enter(); 380 irq_enter();
368 (*func)(info); 381 generic_smp_call_function_interrupt();
369 __get_cpu_var(irq_stat).irq_call_count++; 382 __get_cpu_var(irq_stat).irq_call_count++;
370 irq_exit(); 383 irq_exit();
371 384
372 if (wait) {
373 mb(); /* commit everything before setting finished */
374 atomic_inc(&call_data->finished);
375 }
376
377 return IRQ_HANDLED; 385 return IRQ_HANDLED;
378} 386}
379 387
380int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), 388static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
381 void *info, int wait)
382{ 389{
383 struct call_data_struct data; 390 irq_enter();
384 int cpus, cpu; 391 generic_smp_call_function_single_interrupt();
385 bool yield; 392 __get_cpu_var(irq_stat).irq_call_count++;
386 393 irq_exit();
387 /* Holding any lock stops cpus from going down. */
388 spin_lock(&call_lock);
389
390 cpu_clear(smp_processor_id(), mask);
391
392 cpus = cpus_weight(mask);
393 if (!cpus) {
394 spin_unlock(&call_lock);
395 return 0;
396 }
397
398 /* Can deadlock when called with interrupts disabled */
399 WARN_ON(irqs_disabled());
400
401 data.func = func;
402 data.info = info;
403 atomic_set(&data.started, 0);
404 data.wait = wait;
405 if (wait)
406 atomic_set(&data.finished, 0);
407
408 call_data = &data;
409 mb(); /* write everything before IPI */
410
411 /* Send a message to other CPUs and wait for them to respond */
412 xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
413
414 /* Make sure other vcpus get a chance to run if they need to. */
415 yield = false;
416 for_each_cpu_mask_nr(cpu, mask)
417 if (xen_vcpu_stolen(cpu))
418 yield = true;
419
420 if (yield)
421 HYPERVISOR_sched_op(SCHEDOP_yield, 0);
422
423 /* Wait for response */
424 while (atomic_read(&data.started) != cpus ||
425 (wait && atomic_read(&data.finished) != cpus))
426 cpu_relax();
427
428 spin_unlock(&call_lock);
429 394
430 return 0; 395 return IRQ_HANDLED;
431} 396}
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
new file mode 100644
index 000000000000..251669a932d4
--- /dev/null
+++ b/arch/x86/xen/suspend.c
@@ -0,0 +1,45 @@
1#include <linux/types.h>
2
3#include <xen/interface/xen.h>
4#include <xen/grant_table.h>
5#include <xen/events.h>
6
7#include <asm/xen/hypercall.h>
8#include <asm/xen/page.h>
9
10#include "xen-ops.h"
11#include "mmu.h"
12
13void xen_pre_suspend(void)
14{
15 xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
16 xen_start_info->console.domU.mfn =
17 mfn_to_pfn(xen_start_info->console.domU.mfn);
18
19 BUG_ON(!irqs_disabled());
20
21 HYPERVISOR_shared_info = &xen_dummy_shared_info;
22 if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP),
23 __pte_ma(0), 0))
24 BUG();
25}
26
27void xen_post_suspend(int suspend_cancelled)
28{
29 xen_setup_shared_info();
30
31 if (suspend_cancelled) {
32 xen_start_info->store_mfn =
33 pfn_to_mfn(xen_start_info->store_mfn);
34 xen_start_info->console.domU.mfn =
35 pfn_to_mfn(xen_start_info->console.domU.mfn);
36 } else {
37#ifdef CONFIG_SMP
38 xen_cpu_initialized_map = cpu_online_map;
39#endif
40 xen_vcpu_restore();
41 xen_timer_resume();
42 }
43
44}
45
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 41e217503c96..685b77470fc3 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -197,8 +197,8 @@ unsigned long long xen_sched_clock(void)
197} 197}
198 198
199 199
200/* Get the CPU speed from Xen */ 200/* Get the TSC speed from Xen */
201unsigned long xen_cpu_khz(void) 201unsigned long xen_tsc_khz(void)
202{ 202{
203 u64 xen_khz = 1000000ULL << 32; 203 u64 xen_khz = 1000000ULL << 32;
204 const struct pvclock_vcpu_time_info *info = 204 const struct pvclock_vcpu_time_info *info =
@@ -459,6 +459,19 @@ void xen_setup_cpu_clockevents(void)
459 clockevents_register_device(&__get_cpu_var(xen_clock_events)); 459 clockevents_register_device(&__get_cpu_var(xen_clock_events));
460} 460}
461 461
462void xen_timer_resume(void)
463{
464 int cpu;
465
466 if (xen_clockevent != &xen_vcpuop_clockevent)
467 return;
468
469 for_each_online_cpu(cpu) {
470 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
471 BUG();
472 }
473}
474
462__init void xen_time_init(void) 475__init void xen_time_init(void)
463{ 476{
464 int cpu = smp_processor_id(); 477 int cpu = smp_processor_id();
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 6ec3b4f7719b..7c0cf6320a0a 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -7,6 +7,7 @@
7#include <linux/init.h> 7#include <linux/init.h>
8#include <asm/boot.h> 8#include <asm/boot.h>
9#include <xen/interface/elfnote.h> 9#include <xen/interface/elfnote.h>
10#include <asm/xen/interface.h>
10 11
11 __INIT 12 __INIT
12ENTRY(startup_xen) 13ENTRY(startup_xen)
@@ -32,5 +33,9 @@ ENTRY(hypercall_page)
32 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") 33 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb")
33 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") 34 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
34 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") 35 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
36 ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
37 .quad _PAGE_PRESENT; .quad _PAGE_PRESENT)
38 ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
39 ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long __HYPERVISOR_VIRT_START)
35 40
36#endif /*CONFIG_XEN */ 41#endif /*CONFIG_XEN */
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index f1063ae08037..6f4b1045c1c2 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -9,26 +9,35 @@
9extern const char xen_hypervisor_callback[]; 9extern const char xen_hypervisor_callback[];
10extern const char xen_failsafe_callback[]; 10extern const char xen_failsafe_callback[];
11 11
12struct trap_info;
12void xen_copy_trap_info(struct trap_info *traps); 13void xen_copy_trap_info(struct trap_info *traps);
13 14
14DECLARE_PER_CPU(unsigned long, xen_cr3); 15DECLARE_PER_CPU(unsigned long, xen_cr3);
15DECLARE_PER_CPU(unsigned long, xen_current_cr3); 16DECLARE_PER_CPU(unsigned long, xen_current_cr3);
16 17
17extern struct start_info *xen_start_info; 18extern struct start_info *xen_start_info;
19extern struct shared_info xen_dummy_shared_info;
18extern struct shared_info *HYPERVISOR_shared_info; 20extern struct shared_info *HYPERVISOR_shared_info;
19 21
22void xen_setup_mfn_list_list(void);
23void xen_setup_shared_info(void);
24
20char * __init xen_memory_setup(void); 25char * __init xen_memory_setup(void);
21void __init xen_arch_setup(void); 26void __init xen_arch_setup(void);
22void __init xen_init_IRQ(void); 27void __init xen_init_IRQ(void);
23void xen_enable_sysenter(void); 28void xen_enable_sysenter(void);
29void xen_vcpu_restore(void);
30
31void __init xen_build_dynamic_phys_to_machine(void);
24 32
25void xen_setup_timer(int cpu); 33void xen_setup_timer(int cpu);
26void xen_setup_cpu_clockevents(void); 34void xen_setup_cpu_clockevents(void);
27unsigned long xen_cpu_khz(void); 35unsigned long xen_tsc_khz(void);
28void __init xen_time_init(void); 36void __init xen_time_init(void);
29unsigned long xen_get_wallclock(void); 37unsigned long xen_get_wallclock(void);
30int xen_set_wallclock(unsigned long time); 38int xen_set_wallclock(unsigned long time);
31unsigned long long xen_sched_clock(void); 39unsigned long long xen_sched_clock(void);
40void xen_timer_resume(void);
32 41
33irqreturn_t xen_debug_interrupt(int irq, void *dev_id); 42irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
34 43
@@ -46,13 +55,10 @@ void xen_smp_cpus_done(unsigned int max_cpus);
46 55
47void xen_smp_send_stop(void); 56void xen_smp_send_stop(void);
48void xen_smp_send_reschedule(int cpu); 57void xen_smp_send_reschedule(int cpu);
49int xen_smp_call_function (void (*func) (void *info), void *info, int nonatomic, 58void xen_smp_send_call_function_ipi(cpumask_t mask);
50 int wait); 59void xen_smp_send_call_function_single_ipi(int cpu);
51int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info,
52 int nonatomic, int wait);
53 60
54int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), 61extern cpumask_t xen_cpu_initialized_map;
55 void *info, int wait);
56 62
57 63
58/* Declare an asm function, along with symbols needed to make it 64/* Declare an asm function, along with symbols needed to make it