diff options
Diffstat (limited to 'arch/x86/xen')
-rw-r--r-- | arch/x86/xen/Kconfig | 10 | ||||
-rw-r--r-- | arch/x86/xen/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 180 | ||||
-rw-r--r-- | arch/x86/xen/manage.c | 143 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 272 | ||||
-rw-r--r-- | arch/x86/xen/mmu.h | 12 | ||||
-rw-r--r-- | arch/x86/xen/multicalls.c | 40 | ||||
-rw-r--r-- | arch/x86/xen/multicalls.h | 12 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 30 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 143 | ||||
-rw-r--r-- | arch/x86/xen/suspend.c | 45 | ||||
-rw-r--r-- | arch/x86/xen/time.c | 17 | ||||
-rw-r--r-- | arch/x86/xen/xen-head.S | 5 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 20 |
14 files changed, 628 insertions, 303 deletions
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 6c388e593bc8..c2cc99580871 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -12,3 +12,13 @@ config XEN | |||
12 | This is the Linux Xen port. Enabling this will allow the | 12 | This is the Linux Xen port. Enabling this will allow the |
13 | kernel to boot in a paravirtualized environment under the | 13 | kernel to boot in a paravirtualized environment under the |
14 | Xen hypervisor. | 14 | Xen hypervisor. |
15 | |||
16 | config XEN_MAX_DOMAIN_MEMORY | ||
17 | int "Maximum allowed size of a domain in gigabytes" | ||
18 | default 8 | ||
19 | depends on XEN | ||
20 | help | ||
21 | The pseudo-physical to machine address array is sized | ||
22 | according to the maximum possible memory size of a Xen | ||
23 | domain. This array uses 1 page per gigabyte, so there's no | ||
24 | need to be too stingy here. \ No newline at end of file | ||
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 3d8df981d5fd..2ba2d1649131 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -1,4 +1,4 @@ | |||
1 | obj-y := enlighten.o setup.o multicalls.o mmu.o \ | 1 | obj-y := enlighten.o setup.o multicalls.o mmu.o \ |
2 | time.o manage.o xen-asm.o grant-table.o | 2 | time.o xen-asm.o grant-table.o suspend.o |
3 | 3 | ||
4 | obj-$(CONFIG_SMP) += smp.o | 4 | obj-$(CONFIG_SMP) += smp.o |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f09c1c69c37a..bb508456ef52 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <asm/pgtable.h> | 45 | #include <asm/pgtable.h> |
46 | #include <asm/tlbflush.h> | 46 | #include <asm/tlbflush.h> |
47 | #include <asm/reboot.h> | 47 | #include <asm/reboot.h> |
48 | #include <asm/pgalloc.h> | ||
48 | 49 | ||
49 | #include "xen-ops.h" | 50 | #include "xen-ops.h" |
50 | #include "mmu.h" | 51 | #include "mmu.h" |
@@ -75,13 +76,13 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ | |||
75 | struct start_info *xen_start_info; | 76 | struct start_info *xen_start_info; |
76 | EXPORT_SYMBOL_GPL(xen_start_info); | 77 | EXPORT_SYMBOL_GPL(xen_start_info); |
77 | 78 | ||
78 | static /* __initdata */ struct shared_info dummy_shared_info; | 79 | struct shared_info xen_dummy_shared_info; |
79 | 80 | ||
80 | /* | 81 | /* |
81 | * Point at some empty memory to start with. We map the real shared_info | 82 | * Point at some empty memory to start with. We map the real shared_info |
82 | * page as soon as fixmap is up and running. | 83 | * page as soon as fixmap is up and running. |
83 | */ | 84 | */ |
84 | struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; | 85 | struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; |
85 | 86 | ||
86 | /* | 87 | /* |
87 | * Flag to determine whether vcpu info placement is available on all | 88 | * Flag to determine whether vcpu info placement is available on all |
@@ -98,13 +99,13 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; | |||
98 | */ | 99 | */ |
99 | static int have_vcpu_info_placement = 1; | 100 | static int have_vcpu_info_placement = 1; |
100 | 101 | ||
101 | static void __init xen_vcpu_setup(int cpu) | 102 | static void xen_vcpu_setup(int cpu) |
102 | { | 103 | { |
103 | struct vcpu_register_vcpu_info info; | 104 | struct vcpu_register_vcpu_info info; |
104 | int err; | 105 | int err; |
105 | struct vcpu_info *vcpup; | 106 | struct vcpu_info *vcpup; |
106 | 107 | ||
107 | BUG_ON(HYPERVISOR_shared_info == &dummy_shared_info); | 108 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); |
108 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; | 109 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; |
109 | 110 | ||
110 | if (!have_vcpu_info_placement) | 111 | if (!have_vcpu_info_placement) |
@@ -136,11 +137,41 @@ static void __init xen_vcpu_setup(int cpu) | |||
136 | } | 137 | } |
137 | } | 138 | } |
138 | 139 | ||
140 | /* | ||
141 | * On restore, set the vcpu placement up again. | ||
142 | * If it fails, then we're in a bad state, since | ||
143 | * we can't back out from using it... | ||
144 | */ | ||
145 | void xen_vcpu_restore(void) | ||
146 | { | ||
147 | if (have_vcpu_info_placement) { | ||
148 | int cpu; | ||
149 | |||
150 | for_each_online_cpu(cpu) { | ||
151 | bool other_cpu = (cpu != smp_processor_id()); | ||
152 | |||
153 | if (other_cpu && | ||
154 | HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) | ||
155 | BUG(); | ||
156 | |||
157 | xen_vcpu_setup(cpu); | ||
158 | |||
159 | if (other_cpu && | ||
160 | HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) | ||
161 | BUG(); | ||
162 | } | ||
163 | |||
164 | BUG_ON(!have_vcpu_info_placement); | ||
165 | } | ||
166 | } | ||
167 | |||
139 | static void __init xen_banner(void) | 168 | static void __init xen_banner(void) |
140 | { | 169 | { |
141 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | 170 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", |
142 | pv_info.name); | 171 | pv_info.name); |
143 | printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic); | 172 | printk(KERN_INFO "Hypervisor signature: %s%s\n", |
173 | xen_start_info->magic, | ||
174 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); | ||
144 | } | 175 | } |
145 | 176 | ||
146 | static void xen_cpuid(unsigned int *ax, unsigned int *bx, | 177 | static void xen_cpuid(unsigned int *ax, unsigned int *bx, |
@@ -235,13 +266,13 @@ static void xen_irq_enable(void) | |||
235 | { | 266 | { |
236 | struct vcpu_info *vcpu; | 267 | struct vcpu_info *vcpu; |
237 | 268 | ||
238 | /* There's a one instruction preempt window here. We need to | 269 | /* We don't need to worry about being preempted here, since |
239 | make sure we're don't switch CPUs between getting the vcpu | 270 | either a) interrupts are disabled, so no preemption, or b) |
240 | pointer and updating the mask. */ | 271 | the caller is confused and is trying to re-enable interrupts |
241 | preempt_disable(); | 272 | on an indeterminate processor. */ |
273 | |||
242 | vcpu = x86_read_percpu(xen_vcpu); | 274 | vcpu = x86_read_percpu(xen_vcpu); |
243 | vcpu->evtchn_upcall_mask = 0; | 275 | vcpu->evtchn_upcall_mask = 0; |
244 | preempt_enable_no_resched(); | ||
245 | 276 | ||
246 | /* Doesn't matter if we get preempted here, because any | 277 | /* Doesn't matter if we get preempted here, because any |
247 | pending event will get dealt with anyway. */ | 278 | pending event will get dealt with anyway. */ |
@@ -254,7 +285,7 @@ static void xen_irq_enable(void) | |||
254 | static void xen_safe_halt(void) | 285 | static void xen_safe_halt(void) |
255 | { | 286 | { |
256 | /* Blocking includes an implicit local_irq_enable(). */ | 287 | /* Blocking includes an implicit local_irq_enable(). */ |
257 | if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0) | 288 | if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) |
258 | BUG(); | 289 | BUG(); |
259 | } | 290 | } |
260 | 291 | ||
@@ -607,6 +638,30 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, | |||
607 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 638 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
608 | } | 639 | } |
609 | 640 | ||
641 | static void xen_clts(void) | ||
642 | { | ||
643 | struct multicall_space mcs; | ||
644 | |||
645 | mcs = xen_mc_entry(0); | ||
646 | |||
647 | MULTI_fpu_taskswitch(mcs.mc, 0); | ||
648 | |||
649 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
650 | } | ||
651 | |||
652 | static void xen_write_cr0(unsigned long cr0) | ||
653 | { | ||
654 | struct multicall_space mcs; | ||
655 | |||
656 | /* Only pay attention to cr0.TS; everything else is | ||
657 | ignored. */ | ||
658 | mcs = xen_mc_entry(0); | ||
659 | |||
660 | MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0); | ||
661 | |||
662 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
663 | } | ||
664 | |||
610 | static void xen_write_cr2(unsigned long cr2) | 665 | static void xen_write_cr2(unsigned long cr2) |
611 | { | 666 | { |
612 | x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; | 667 | x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; |
@@ -624,8 +679,10 @@ static unsigned long xen_read_cr2_direct(void) | |||
624 | 679 | ||
625 | static void xen_write_cr4(unsigned long cr4) | 680 | static void xen_write_cr4(unsigned long cr4) |
626 | { | 681 | { |
627 | /* Just ignore cr4 changes; Xen doesn't allow us to do | 682 | cr4 &= ~X86_CR4_PGE; |
628 | anything anyway. */ | 683 | cr4 &= ~X86_CR4_PSE; |
684 | |||
685 | native_write_cr4(cr4); | ||
629 | } | 686 | } |
630 | 687 | ||
631 | static unsigned long xen_read_cr3(void) | 688 | static unsigned long xen_read_cr3(void) |
@@ -831,7 +888,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base) | |||
831 | PFN_DOWN(__pa(xen_start_info->pt_base))); | 888 | PFN_DOWN(__pa(xen_start_info->pt_base))); |
832 | } | 889 | } |
833 | 890 | ||
834 | static __init void setup_shared_info(void) | 891 | void xen_setup_shared_info(void) |
835 | { | 892 | { |
836 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | 893 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
837 | unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); | 894 | unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); |
@@ -854,6 +911,8 @@ static __init void setup_shared_info(void) | |||
854 | /* In UP this is as good a place as any to set up shared info */ | 911 | /* In UP this is as good a place as any to set up shared info */ |
855 | xen_setup_vcpu_info_placement(); | 912 | xen_setup_vcpu_info_placement(); |
856 | #endif | 913 | #endif |
914 | |||
915 | xen_setup_mfn_list_list(); | ||
857 | } | 916 | } |
858 | 917 | ||
859 | static __init void xen_pagetable_setup_done(pgd_t *base) | 918 | static __init void xen_pagetable_setup_done(pgd_t *base) |
@@ -866,15 +925,23 @@ static __init void xen_pagetable_setup_done(pgd_t *base) | |||
866 | pv_mmu_ops.release_pmd = xen_release_pmd; | 925 | pv_mmu_ops.release_pmd = xen_release_pmd; |
867 | pv_mmu_ops.set_pte = xen_set_pte; | 926 | pv_mmu_ops.set_pte = xen_set_pte; |
868 | 927 | ||
869 | setup_shared_info(); | 928 | xen_setup_shared_info(); |
870 | 929 | ||
871 | /* Actually pin the pagetable down, but we can't set PG_pinned | 930 | /* Actually pin the pagetable down, but we can't set PG_pinned |
872 | yet because the page structures don't exist yet. */ | 931 | yet because the page structures don't exist yet. */ |
873 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); | 932 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); |
874 | } | 933 | } |
875 | 934 | ||
935 | static __init void xen_post_allocator_init(void) | ||
936 | { | ||
937 | pv_mmu_ops.set_pmd = xen_set_pmd; | ||
938 | pv_mmu_ops.set_pud = xen_set_pud; | ||
939 | |||
940 | xen_mark_init_mm_pinned(); | ||
941 | } | ||
942 | |||
876 | /* This is called once we have the cpu_possible_map */ | 943 | /* This is called once we have the cpu_possible_map */ |
877 | void __init xen_setup_vcpu_info_placement(void) | 944 | void xen_setup_vcpu_info_placement(void) |
878 | { | 945 | { |
879 | int cpu; | 946 | int cpu; |
880 | 947 | ||
@@ -947,6 +1014,33 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, | |||
947 | return ret; | 1014 | return ret; |
948 | } | 1015 | } |
949 | 1016 | ||
1017 | static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) | ||
1018 | { | ||
1019 | pte_t pte; | ||
1020 | |||
1021 | phys >>= PAGE_SHIFT; | ||
1022 | |||
1023 | switch (idx) { | ||
1024 | case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: | ||
1025 | #ifdef CONFIG_X86_F00F_BUG | ||
1026 | case FIX_F00F_IDT: | ||
1027 | #endif | ||
1028 | case FIX_WP_TEST: | ||
1029 | case FIX_VDSO: | ||
1030 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1031 | case FIX_APIC_BASE: /* maps dummy local APIC */ | ||
1032 | #endif | ||
1033 | pte = pfn_pte(phys, prot); | ||
1034 | break; | ||
1035 | |||
1036 | default: | ||
1037 | pte = mfn_pte(phys, prot); | ||
1038 | break; | ||
1039 | } | ||
1040 | |||
1041 | __native_set_fixmap(idx, pte); | ||
1042 | } | ||
1043 | |||
950 | static const struct pv_info xen_info __initdata = { | 1044 | static const struct pv_info xen_info __initdata = { |
951 | .paravirt_enabled = 1, | 1045 | .paravirt_enabled = 1, |
952 | .shared_kernel_pmd = 0, | 1046 | .shared_kernel_pmd = 0, |
@@ -960,7 +1054,7 @@ static const struct pv_init_ops xen_init_ops __initdata = { | |||
960 | .banner = xen_banner, | 1054 | .banner = xen_banner, |
961 | .memory_setup = xen_memory_setup, | 1055 | .memory_setup = xen_memory_setup, |
962 | .arch_setup = xen_arch_setup, | 1056 | .arch_setup = xen_arch_setup, |
963 | .post_allocator_init = xen_mark_init_mm_pinned, | 1057 | .post_allocator_init = xen_post_allocator_init, |
964 | }; | 1058 | }; |
965 | 1059 | ||
966 | static const struct pv_time_ops xen_time_ops __initdata = { | 1060 | static const struct pv_time_ops xen_time_ops __initdata = { |
@@ -968,7 +1062,7 @@ static const struct pv_time_ops xen_time_ops __initdata = { | |||
968 | 1062 | ||
969 | .set_wallclock = xen_set_wallclock, | 1063 | .set_wallclock = xen_set_wallclock, |
970 | .get_wallclock = xen_get_wallclock, | 1064 | .get_wallclock = xen_get_wallclock, |
971 | .get_cpu_khz = xen_cpu_khz, | 1065 | .get_tsc_khz = xen_tsc_khz, |
972 | .sched_clock = xen_sched_clock, | 1066 | .sched_clock = xen_sched_clock, |
973 | }; | 1067 | }; |
974 | 1068 | ||
@@ -978,10 +1072,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
978 | .set_debugreg = xen_set_debugreg, | 1072 | .set_debugreg = xen_set_debugreg, |
979 | .get_debugreg = xen_get_debugreg, | 1073 | .get_debugreg = xen_get_debugreg, |
980 | 1074 | ||
981 | .clts = native_clts, | 1075 | .clts = xen_clts, |
982 | 1076 | ||
983 | .read_cr0 = native_read_cr0, | 1077 | .read_cr0 = native_read_cr0, |
984 | .write_cr0 = native_write_cr0, | 1078 | .write_cr0 = xen_write_cr0, |
985 | 1079 | ||
986 | .read_cr4 = native_read_cr4, | 1080 | .read_cr4 = native_read_cr4, |
987 | .read_cr4_safe = native_read_cr4_safe, | 1081 | .read_cr4_safe = native_read_cr4_safe, |
@@ -995,7 +1089,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
995 | .read_pmc = native_read_pmc, | 1089 | .read_pmc = native_read_pmc, |
996 | 1090 | ||
997 | .iret = xen_iret, | 1091 | .iret = xen_iret, |
998 | .irq_enable_syscall_ret = xen_sysexit, | 1092 | .irq_enable_sysexit = xen_sysexit, |
999 | 1093 | ||
1000 | .load_tr_desc = paravirt_nop, | 1094 | .load_tr_desc = paravirt_nop, |
1001 | .set_ldt = xen_set_ldt, | 1095 | .set_ldt = xen_set_ldt, |
@@ -1029,6 +1123,9 @@ static const struct pv_irq_ops xen_irq_ops __initdata = { | |||
1029 | .irq_enable = xen_irq_enable, | 1123 | .irq_enable = xen_irq_enable, |
1030 | .safe_halt = xen_safe_halt, | 1124 | .safe_halt = xen_safe_halt, |
1031 | .halt = xen_halt, | 1125 | .halt = xen_halt, |
1126 | #ifdef CONFIG_X86_64 | ||
1127 | .adjust_exception_frame = paravirt_nop, | ||
1128 | #endif | ||
1032 | }; | 1129 | }; |
1033 | 1130 | ||
1034 | static const struct pv_apic_ops xen_apic_ops __initdata = { | 1131 | static const struct pv_apic_ops xen_apic_ops __initdata = { |
@@ -1060,6 +1157,9 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1060 | .pte_update = paravirt_nop, | 1157 | .pte_update = paravirt_nop, |
1061 | .pte_update_defer = paravirt_nop, | 1158 | .pte_update_defer = paravirt_nop, |
1062 | 1159 | ||
1160 | .pgd_alloc = __paravirt_pgd_alloc, | ||
1161 | .pgd_free = paravirt_nop, | ||
1162 | |||
1063 | .alloc_pte = xen_alloc_pte_init, | 1163 | .alloc_pte = xen_alloc_pte_init, |
1064 | .release_pte = xen_release_pte_init, | 1164 | .release_pte = xen_release_pte_init, |
1065 | .alloc_pmd = xen_alloc_pte_init, | 1165 | .alloc_pmd = xen_alloc_pte_init, |
@@ -1072,9 +1172,13 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1072 | 1172 | ||
1073 | .set_pte = NULL, /* see xen_pagetable_setup_* */ | 1173 | .set_pte = NULL, /* see xen_pagetable_setup_* */ |
1074 | .set_pte_at = xen_set_pte_at, | 1174 | .set_pte_at = xen_set_pte_at, |
1075 | .set_pmd = xen_set_pmd, | 1175 | .set_pmd = xen_set_pmd_hyper, |
1176 | |||
1177 | .ptep_modify_prot_start = __ptep_modify_prot_start, | ||
1178 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, | ||
1076 | 1179 | ||
1077 | .pte_val = xen_pte_val, | 1180 | .pte_val = xen_pte_val, |
1181 | .pte_flags = native_pte_val, | ||
1078 | .pgd_val = xen_pgd_val, | 1182 | .pgd_val = xen_pgd_val, |
1079 | 1183 | ||
1080 | .make_pte = xen_make_pte, | 1184 | .make_pte = xen_make_pte, |
@@ -1082,7 +1186,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1082 | 1186 | ||
1083 | .set_pte_atomic = xen_set_pte_atomic, | 1187 | .set_pte_atomic = xen_set_pte_atomic, |
1084 | .set_pte_present = xen_set_pte_at, | 1188 | .set_pte_present = xen_set_pte_at, |
1085 | .set_pud = xen_set_pud, | 1189 | .set_pud = xen_set_pud_hyper, |
1086 | .pte_clear = xen_pte_clear, | 1190 | .pte_clear = xen_pte_clear, |
1087 | .pmd_clear = xen_pmd_clear, | 1191 | .pmd_clear = xen_pmd_clear, |
1088 | 1192 | ||
@@ -1097,6 +1201,8 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1097 | .enter = paravirt_enter_lazy_mmu, | 1201 | .enter = paravirt_enter_lazy_mmu, |
1098 | .leave = xen_leave_lazy, | 1202 | .leave = xen_leave_lazy, |
1099 | }, | 1203 | }, |
1204 | |||
1205 | .set_fixmap = xen_set_fixmap, | ||
1100 | }; | 1206 | }; |
1101 | 1207 | ||
1102 | #ifdef CONFIG_SMP | 1208 | #ifdef CONFIG_SMP |
@@ -1108,17 +1214,21 @@ static const struct smp_ops xen_smp_ops __initdata = { | |||
1108 | 1214 | ||
1109 | .smp_send_stop = xen_smp_send_stop, | 1215 | .smp_send_stop = xen_smp_send_stop, |
1110 | .smp_send_reschedule = xen_smp_send_reschedule, | 1216 | .smp_send_reschedule = xen_smp_send_reschedule, |
1111 | .smp_call_function_mask = xen_smp_call_function_mask, | 1217 | |
1218 | .send_call_func_ipi = xen_smp_send_call_function_ipi, | ||
1219 | .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi, | ||
1112 | }; | 1220 | }; |
1113 | #endif /* CONFIG_SMP */ | 1221 | #endif /* CONFIG_SMP */ |
1114 | 1222 | ||
1115 | static void xen_reboot(int reason) | 1223 | static void xen_reboot(int reason) |
1116 | { | 1224 | { |
1225 | struct sched_shutdown r = { .reason = reason }; | ||
1226 | |||
1117 | #ifdef CONFIG_SMP | 1227 | #ifdef CONFIG_SMP |
1118 | smp_send_stop(); | 1228 | smp_send_stop(); |
1119 | #endif | 1229 | #endif |
1120 | 1230 | ||
1121 | if (HYPERVISOR_sched_op(SCHEDOP_shutdown, reason)) | 1231 | if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) |
1122 | BUG(); | 1232 | BUG(); |
1123 | } | 1233 | } |
1124 | 1234 | ||
@@ -1173,6 +1283,8 @@ asmlinkage void __init xen_start_kernel(void) | |||
1173 | 1283 | ||
1174 | BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); | 1284 | BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); |
1175 | 1285 | ||
1286 | xen_setup_features(); | ||
1287 | |||
1176 | /* Install Xen paravirt ops */ | 1288 | /* Install Xen paravirt ops */ |
1177 | pv_info = xen_info; | 1289 | pv_info = xen_info; |
1178 | pv_init_ops = xen_init_ops; | 1290 | pv_init_ops = xen_init_ops; |
@@ -1182,21 +1294,26 @@ asmlinkage void __init xen_start_kernel(void) | |||
1182 | pv_apic_ops = xen_apic_ops; | 1294 | pv_apic_ops = xen_apic_ops; |
1183 | pv_mmu_ops = xen_mmu_ops; | 1295 | pv_mmu_ops = xen_mmu_ops; |
1184 | 1296 | ||
1297 | if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { | ||
1298 | pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; | ||
1299 | pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; | ||
1300 | } | ||
1301 | |||
1185 | machine_ops = xen_machine_ops; | 1302 | machine_ops = xen_machine_ops; |
1186 | 1303 | ||
1187 | #ifdef CONFIG_SMP | 1304 | #ifdef CONFIG_SMP |
1188 | smp_ops = xen_smp_ops; | 1305 | smp_ops = xen_smp_ops; |
1189 | #endif | 1306 | #endif |
1190 | 1307 | ||
1191 | xen_setup_features(); | ||
1192 | |||
1193 | /* Get mfn list */ | 1308 | /* Get mfn list */ |
1194 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | 1309 | if (!xen_feature(XENFEAT_auto_translated_physmap)) |
1195 | phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list; | 1310 | xen_build_dynamic_phys_to_machine(); |
1196 | 1311 | ||
1197 | pgd = (pgd_t *)xen_start_info->pt_base; | 1312 | pgd = (pgd_t *)xen_start_info->pt_base; |
1198 | 1313 | ||
1314 | init_pg_tables_start = __pa(pgd); | ||
1199 | init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; | 1315 | init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; |
1316 | max_pfn_mapped = (init_pg_tables_end + 512*1024) >> PAGE_SHIFT; | ||
1200 | 1317 | ||
1201 | init_mm.pgd = pgd; /* use the Xen pagetables to start */ | 1318 | init_mm.pgd = pgd; /* use the Xen pagetables to start */ |
1202 | 1319 | ||
@@ -1232,9 +1349,12 @@ asmlinkage void __init xen_start_kernel(void) | |||
1232 | ? __pa(xen_start_info->mod_start) : 0; | 1349 | ? __pa(xen_start_info->mod_start) : 0; |
1233 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; | 1350 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; |
1234 | 1351 | ||
1235 | if (!is_initial_xendomain()) | 1352 | if (!is_initial_xendomain()) { |
1353 | add_preferred_console("xenboot", 0, NULL); | ||
1354 | add_preferred_console("tty", 0, NULL); | ||
1236 | add_preferred_console("hvc", 0, NULL); | 1355 | add_preferred_console("hvc", 0, NULL); |
1356 | } | ||
1237 | 1357 | ||
1238 | /* Start the world */ | 1358 | /* Start the world */ |
1239 | start_kernel(); | 1359 | i386_start_kernel(); |
1240 | } | 1360 | } |
diff --git a/arch/x86/xen/manage.c b/arch/x86/xen/manage.c deleted file mode 100644 index aa7af9e6abc0..000000000000 --- a/arch/x86/xen/manage.c +++ /dev/null | |||
@@ -1,143 +0,0 @@ | |||
1 | /* | ||
2 | * Handle extern requests for shutdown, reboot and sysrq | ||
3 | */ | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/err.h> | ||
6 | #include <linux/reboot.h> | ||
7 | #include <linux/sysrq.h> | ||
8 | |||
9 | #include <xen/xenbus.h> | ||
10 | |||
11 | #define SHUTDOWN_INVALID -1 | ||
12 | #define SHUTDOWN_POWEROFF 0 | ||
13 | #define SHUTDOWN_SUSPEND 2 | ||
14 | /* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only | ||
15 | * report a crash, not be instructed to crash! | ||
16 | * HALT is the same as POWEROFF, as far as we're concerned. The tools use | ||
17 | * the distinction when we return the reason code to them. | ||
18 | */ | ||
19 | #define SHUTDOWN_HALT 4 | ||
20 | |||
21 | /* Ignore multiple shutdown requests. */ | ||
22 | static int shutting_down = SHUTDOWN_INVALID; | ||
23 | |||
24 | static void shutdown_handler(struct xenbus_watch *watch, | ||
25 | const char **vec, unsigned int len) | ||
26 | { | ||
27 | char *str; | ||
28 | struct xenbus_transaction xbt; | ||
29 | int err; | ||
30 | |||
31 | if (shutting_down != SHUTDOWN_INVALID) | ||
32 | return; | ||
33 | |||
34 | again: | ||
35 | err = xenbus_transaction_start(&xbt); | ||
36 | if (err) | ||
37 | return; | ||
38 | |||
39 | str = (char *)xenbus_read(xbt, "control", "shutdown", NULL); | ||
40 | /* Ignore read errors and empty reads. */ | ||
41 | if (XENBUS_IS_ERR_READ(str)) { | ||
42 | xenbus_transaction_end(xbt, 1); | ||
43 | return; | ||
44 | } | ||
45 | |||
46 | xenbus_write(xbt, "control", "shutdown", ""); | ||
47 | |||
48 | err = xenbus_transaction_end(xbt, 0); | ||
49 | if (err == -EAGAIN) { | ||
50 | kfree(str); | ||
51 | goto again; | ||
52 | } | ||
53 | |||
54 | if (strcmp(str, "poweroff") == 0 || | ||
55 | strcmp(str, "halt") == 0) | ||
56 | orderly_poweroff(false); | ||
57 | else if (strcmp(str, "reboot") == 0) | ||
58 | ctrl_alt_del(); | ||
59 | else { | ||
60 | printk(KERN_INFO "Ignoring shutdown request: %s\n", str); | ||
61 | shutting_down = SHUTDOWN_INVALID; | ||
62 | } | ||
63 | |||
64 | kfree(str); | ||
65 | } | ||
66 | |||
67 | static void sysrq_handler(struct xenbus_watch *watch, const char **vec, | ||
68 | unsigned int len) | ||
69 | { | ||
70 | char sysrq_key = '\0'; | ||
71 | struct xenbus_transaction xbt; | ||
72 | int err; | ||
73 | |||
74 | again: | ||
75 | err = xenbus_transaction_start(&xbt); | ||
76 | if (err) | ||
77 | return; | ||
78 | if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) { | ||
79 | printk(KERN_ERR "Unable to read sysrq code in " | ||
80 | "control/sysrq\n"); | ||
81 | xenbus_transaction_end(xbt, 1); | ||
82 | return; | ||
83 | } | ||
84 | |||
85 | if (sysrq_key != '\0') | ||
86 | xenbus_printf(xbt, "control", "sysrq", "%c", '\0'); | ||
87 | |||
88 | err = xenbus_transaction_end(xbt, 0); | ||
89 | if (err == -EAGAIN) | ||
90 | goto again; | ||
91 | |||
92 | if (sysrq_key != '\0') | ||
93 | handle_sysrq(sysrq_key, NULL); | ||
94 | } | ||
95 | |||
96 | static struct xenbus_watch shutdown_watch = { | ||
97 | .node = "control/shutdown", | ||
98 | .callback = shutdown_handler | ||
99 | }; | ||
100 | |||
101 | static struct xenbus_watch sysrq_watch = { | ||
102 | .node = "control/sysrq", | ||
103 | .callback = sysrq_handler | ||
104 | }; | ||
105 | |||
106 | static int setup_shutdown_watcher(void) | ||
107 | { | ||
108 | int err; | ||
109 | |||
110 | err = register_xenbus_watch(&shutdown_watch); | ||
111 | if (err) { | ||
112 | printk(KERN_ERR "Failed to set shutdown watcher\n"); | ||
113 | return err; | ||
114 | } | ||
115 | |||
116 | err = register_xenbus_watch(&sysrq_watch); | ||
117 | if (err) { | ||
118 | printk(KERN_ERR "Failed to set sysrq watcher\n"); | ||
119 | return err; | ||
120 | } | ||
121 | |||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | static int shutdown_event(struct notifier_block *notifier, | ||
126 | unsigned long event, | ||
127 | void *data) | ||
128 | { | ||
129 | setup_shutdown_watcher(); | ||
130 | return NOTIFY_DONE; | ||
131 | } | ||
132 | |||
133 | static int __init setup_shutdown_event(void) | ||
134 | { | ||
135 | static struct notifier_block xenstore_notifier = { | ||
136 | .notifier_call = shutdown_event | ||
137 | }; | ||
138 | register_xenstore_notifier(&xenstore_notifier); | ||
139 | |||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | subsys_initcall(setup_shutdown_event); | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 4e527e7893a8..ff0aa74afaa1 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -56,6 +56,131 @@ | |||
56 | #include "multicalls.h" | 56 | #include "multicalls.h" |
57 | #include "mmu.h" | 57 | #include "mmu.h" |
58 | 58 | ||
59 | #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | ||
60 | #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) | ||
61 | |||
62 | /* Placeholder for holes in the address space */ | ||
63 | static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] | ||
64 | __attribute__((section(".data.page_aligned"))) = | ||
65 | { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL }; | ||
66 | |||
67 | /* Array of pointers to pages containing p2m entries */ | ||
68 | static unsigned long *p2m_top[TOP_ENTRIES] | ||
69 | __attribute__((section(".data.page_aligned"))) = | ||
70 | { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; | ||
71 | |||
72 | /* Arrays of p2m arrays expressed in mfns used for save/restore */ | ||
73 | static unsigned long p2m_top_mfn[TOP_ENTRIES] | ||
74 | __attribute__((section(".bss.page_aligned"))); | ||
75 | |||
76 | static unsigned long p2m_top_mfn_list[ | ||
77 | PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)] | ||
78 | __attribute__((section(".bss.page_aligned"))); | ||
79 | |||
80 | static inline unsigned p2m_top_index(unsigned long pfn) | ||
81 | { | ||
82 | BUG_ON(pfn >= MAX_DOMAIN_PAGES); | ||
83 | return pfn / P2M_ENTRIES_PER_PAGE; | ||
84 | } | ||
85 | |||
86 | static inline unsigned p2m_index(unsigned long pfn) | ||
87 | { | ||
88 | return pfn % P2M_ENTRIES_PER_PAGE; | ||
89 | } | ||
90 | |||
91 | /* Build the parallel p2m_top_mfn structures */ | ||
92 | void xen_setup_mfn_list_list(void) | ||
93 | { | ||
94 | unsigned pfn, idx; | ||
95 | |||
96 | for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) { | ||
97 | unsigned topidx = p2m_top_index(pfn); | ||
98 | |||
99 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]); | ||
100 | } | ||
101 | |||
102 | for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) { | ||
103 | unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; | ||
104 | p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); | ||
105 | } | ||
106 | |||
107 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | ||
108 | |||
109 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | ||
110 | virt_to_mfn(p2m_top_mfn_list); | ||
111 | HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages; | ||
112 | } | ||
113 | |||
114 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | ||
115 | void __init xen_build_dynamic_phys_to_machine(void) | ||
116 | { | ||
117 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; | ||
118 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | ||
119 | unsigned pfn; | ||
120 | |||
121 | for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) { | ||
122 | unsigned topidx = p2m_top_index(pfn); | ||
123 | |||
124 | p2m_top[topidx] = &mfn_list[pfn]; | ||
125 | } | ||
126 | } | ||
127 | |||
128 | unsigned long get_phys_to_machine(unsigned long pfn) | ||
129 | { | ||
130 | unsigned topidx, idx; | ||
131 | |||
132 | if (unlikely(pfn >= MAX_DOMAIN_PAGES)) | ||
133 | return INVALID_P2M_ENTRY; | ||
134 | |||
135 | topidx = p2m_top_index(pfn); | ||
136 | idx = p2m_index(pfn); | ||
137 | return p2m_top[topidx][idx]; | ||
138 | } | ||
139 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | ||
140 | |||
141 | static void alloc_p2m(unsigned long **pp, unsigned long *mfnp) | ||
142 | { | ||
143 | unsigned long *p; | ||
144 | unsigned i; | ||
145 | |||
146 | p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); | ||
147 | BUG_ON(p == NULL); | ||
148 | |||
149 | for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++) | ||
150 | p[i] = INVALID_P2M_ENTRY; | ||
151 | |||
152 | if (cmpxchg(pp, p2m_missing, p) != p2m_missing) | ||
153 | free_page((unsigned long)p); | ||
154 | else | ||
155 | *mfnp = virt_to_mfn(p); | ||
156 | } | ||
157 | |||
158 | void set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
159 | { | ||
160 | unsigned topidx, idx; | ||
161 | |||
162 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { | ||
163 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
164 | return; | ||
165 | } | ||
166 | |||
167 | if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { | ||
168 | BUG_ON(mfn != INVALID_P2M_ENTRY); | ||
169 | return; | ||
170 | } | ||
171 | |||
172 | topidx = p2m_top_index(pfn); | ||
173 | if (p2m_top[topidx] == p2m_missing) { | ||
174 | /* no need to allocate a page to store an invalid entry */ | ||
175 | if (mfn == INVALID_P2M_ENTRY) | ||
176 | return; | ||
177 | alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]); | ||
178 | } | ||
179 | |||
180 | idx = p2m_index(pfn); | ||
181 | p2m_top[topidx][idx] = mfn; | ||
182 | } | ||
183 | |||
59 | xmaddr_t arbitrary_virt_to_machine(unsigned long address) | 184 | xmaddr_t arbitrary_virt_to_machine(unsigned long address) |
60 | { | 185 | { |
61 | unsigned int level; | 186 | unsigned int level; |
@@ -98,24 +223,60 @@ void make_lowmem_page_readwrite(void *vaddr) | |||
98 | } | 223 | } |
99 | 224 | ||
100 | 225 | ||
101 | void xen_set_pmd(pmd_t *ptr, pmd_t val) | 226 | static bool page_pinned(void *ptr) |
227 | { | ||
228 | struct page *page = virt_to_page(ptr); | ||
229 | |||
230 | return PagePinned(page); | ||
231 | } | ||
232 | |||
233 | static void extend_mmu_update(const struct mmu_update *update) | ||
102 | { | 234 | { |
103 | struct multicall_space mcs; | 235 | struct multicall_space mcs; |
104 | struct mmu_update *u; | 236 | struct mmu_update *u; |
105 | 237 | ||
106 | preempt_disable(); | 238 | mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); |
239 | |||
240 | if (mcs.mc != NULL) | ||
241 | mcs.mc->args[1]++; | ||
242 | else { | ||
243 | mcs = __xen_mc_entry(sizeof(*u)); | ||
244 | MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); | ||
245 | } | ||
107 | 246 | ||
108 | mcs = xen_mc_entry(sizeof(*u)); | ||
109 | u = mcs.args; | 247 | u = mcs.args; |
110 | u->ptr = virt_to_machine(ptr).maddr; | 248 | *u = *update; |
111 | u->val = pmd_val_ma(val); | 249 | } |
112 | MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); | 250 | |
251 | void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | ||
252 | { | ||
253 | struct mmu_update u; | ||
254 | |||
255 | preempt_disable(); | ||
256 | |||
257 | xen_mc_batch(); | ||
258 | |||
259 | u.ptr = virt_to_machine(ptr).maddr; | ||
260 | u.val = pmd_val_ma(val); | ||
261 | extend_mmu_update(&u); | ||
113 | 262 | ||
114 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 263 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
115 | 264 | ||
116 | preempt_enable(); | 265 | preempt_enable(); |
117 | } | 266 | } |
118 | 267 | ||
268 | void xen_set_pmd(pmd_t *ptr, pmd_t val) | ||
269 | { | ||
270 | /* If page is not pinned, we can just update the entry | ||
271 | directly */ | ||
272 | if (!page_pinned(ptr)) { | ||
273 | *ptr = val; | ||
274 | return; | ||
275 | } | ||
276 | |||
277 | xen_set_pmd_hyper(ptr, val); | ||
278 | } | ||
279 | |||
119 | /* | 280 | /* |
120 | * Associate a virtual page frame with a given physical page frame | 281 | * Associate a virtual page frame with a given physical page frame |
121 | * and protection flags for that frame. | 282 | * and protection flags for that frame. |
@@ -179,6 +340,26 @@ out: | |||
179 | preempt_enable(); | 340 | preempt_enable(); |
180 | } | 341 | } |
181 | 342 | ||
343 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
344 | { | ||
345 | /* Just return the pte as-is. We preserve the bits on commit */ | ||
346 | return *ptep; | ||
347 | } | ||
348 | |||
349 | void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | ||
350 | pte_t *ptep, pte_t pte) | ||
351 | { | ||
352 | struct mmu_update u; | ||
353 | |||
354 | xen_mc_batch(); | ||
355 | |||
356 | u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; | ||
357 | u.val = pte_val_ma(pte); | ||
358 | extend_mmu_update(&u); | ||
359 | |||
360 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
361 | } | ||
362 | |||
182 | /* Assume pteval_t is equivalent to all the other *val_t types. */ | 363 | /* Assume pteval_t is equivalent to all the other *val_t types. */ |
183 | static pteval_t pte_mfn_to_pfn(pteval_t val) | 364 | static pteval_t pte_mfn_to_pfn(pteval_t val) |
184 | { | 365 | { |
@@ -229,24 +410,35 @@ pmdval_t xen_pmd_val(pmd_t pmd) | |||
229 | return pte_mfn_to_pfn(pmd.pmd); | 410 | return pte_mfn_to_pfn(pmd.pmd); |
230 | } | 411 | } |
231 | 412 | ||
232 | void xen_set_pud(pud_t *ptr, pud_t val) | 413 | void xen_set_pud_hyper(pud_t *ptr, pud_t val) |
233 | { | 414 | { |
234 | struct multicall_space mcs; | 415 | struct mmu_update u; |
235 | struct mmu_update *u; | ||
236 | 416 | ||
237 | preempt_disable(); | 417 | preempt_disable(); |
238 | 418 | ||
239 | mcs = xen_mc_entry(sizeof(*u)); | 419 | xen_mc_batch(); |
240 | u = mcs.args; | 420 | |
241 | u->ptr = virt_to_machine(ptr).maddr; | 421 | u.ptr = virt_to_machine(ptr).maddr; |
242 | u->val = pud_val_ma(val); | 422 | u.val = pud_val_ma(val); |
243 | MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); | 423 | extend_mmu_update(&u); |
244 | 424 | ||
245 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 425 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
246 | 426 | ||
247 | preempt_enable(); | 427 | preempt_enable(); |
248 | } | 428 | } |
249 | 429 | ||
430 | void xen_set_pud(pud_t *ptr, pud_t val) | ||
431 | { | ||
432 | /* If page is not pinned, we can just update the entry | ||
433 | directly */ | ||
434 | if (!page_pinned(ptr)) { | ||
435 | *ptr = val; | ||
436 | return; | ||
437 | } | ||
438 | |||
439 | xen_set_pud_hyper(ptr, val); | ||
440 | } | ||
441 | |||
250 | void xen_set_pte(pte_t *ptep, pte_t pte) | 442 | void xen_set_pte(pte_t *ptep, pte_t pte) |
251 | { | 443 | { |
252 | ptep->pte_high = pte.pte_high; | 444 | ptep->pte_high = pte.pte_high; |
@@ -268,7 +460,7 @@ void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | |||
268 | 460 | ||
269 | void xen_pmd_clear(pmd_t *pmdp) | 461 | void xen_pmd_clear(pmd_t *pmdp) |
270 | { | 462 | { |
271 | xen_set_pmd(pmdp, __pmd(0)); | 463 | set_pmd(pmdp, __pmd(0)); |
272 | } | 464 | } |
273 | 465 | ||
274 | pmd_t xen_make_pmd(pmdval_t pmd) | 466 | pmd_t xen_make_pmd(pmdval_t pmd) |
@@ -441,6 +633,29 @@ void xen_pgd_pin(pgd_t *pgd) | |||
441 | xen_mc_issue(0); | 633 | xen_mc_issue(0); |
442 | } | 634 | } |
443 | 635 | ||
636 | /* | ||
637 | * On save, we need to pin all pagetables to make sure they get their | ||
638 | * mfns turned into pfns. Search the list for any unpinned pgds and pin | ||
639 | * them (unpinned pgds are not currently in use, probably because the | ||
640 | * process is under construction or destruction). | ||
641 | */ | ||
642 | void xen_mm_pin_all(void) | ||
643 | { | ||
644 | unsigned long flags; | ||
645 | struct page *page; | ||
646 | |||
647 | spin_lock_irqsave(&pgd_lock, flags); | ||
648 | |||
649 | list_for_each_entry(page, &pgd_list, lru) { | ||
650 | if (!PagePinned(page)) { | ||
651 | xen_pgd_pin((pgd_t *)page_address(page)); | ||
652 | SetPageSavePinned(page); | ||
653 | } | ||
654 | } | ||
655 | |||
656 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
657 | } | ||
658 | |||
444 | /* The init_mm pagetable is really pinned as soon as its created, but | 659 | /* The init_mm pagetable is really pinned as soon as its created, but |
445 | that's before we have page structures to store the bits. So do all | 660 | that's before we have page structures to store the bits. So do all |
446 | the book-keeping now. */ | 661 | the book-keeping now. */ |
@@ -498,6 +713,29 @@ static void xen_pgd_unpin(pgd_t *pgd) | |||
498 | xen_mc_issue(0); | 713 | xen_mc_issue(0); |
499 | } | 714 | } |
500 | 715 | ||
716 | /* | ||
717 | * On resume, undo any pinning done at save, so that the rest of the | ||
718 | * kernel doesn't see any unexpected pinned pagetables. | ||
719 | */ | ||
720 | void xen_mm_unpin_all(void) | ||
721 | { | ||
722 | unsigned long flags; | ||
723 | struct page *page; | ||
724 | |||
725 | spin_lock_irqsave(&pgd_lock, flags); | ||
726 | |||
727 | list_for_each_entry(page, &pgd_list, lru) { | ||
728 | if (PageSavePinned(page)) { | ||
729 | BUG_ON(!PagePinned(page)); | ||
730 | printk("unpinning pinned %p\n", page_address(page)); | ||
731 | xen_pgd_unpin((pgd_t *)page_address(page)); | ||
732 | ClearPageSavePinned(page); | ||
733 | } | ||
734 | } | ||
735 | |||
736 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
737 | } | ||
738 | |||
501 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) | 739 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) |
502 | { | 740 | { |
503 | spin_lock(&next->page_table_lock); | 741 | spin_lock(&next->page_table_lock); |
@@ -558,7 +796,7 @@ static void drop_mm_ref(struct mm_struct *mm) | |||
558 | } | 796 | } |
559 | 797 | ||
560 | if (!cpus_empty(mask)) | 798 | if (!cpus_empty(mask)) |
561 | xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); | 799 | smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); |
562 | } | 800 | } |
563 | #else | 801 | #else |
564 | static void drop_mm_ref(struct mm_struct *mm) | 802 | static void drop_mm_ref(struct mm_struct *mm) |
@@ -591,7 +829,7 @@ void xen_exit_mmap(struct mm_struct *mm) | |||
591 | spin_lock(&mm->page_table_lock); | 829 | spin_lock(&mm->page_table_lock); |
592 | 830 | ||
593 | /* pgd may not be pinned in the error exit path of execve */ | 831 | /* pgd may not be pinned in the error exit path of execve */ |
594 | if (PagePinned(virt_to_page(mm->pgd))) | 832 | if (page_pinned(mm->pgd)) |
595 | xen_pgd_unpin(mm->pgd); | 833 | xen_pgd_unpin(mm->pgd); |
596 | 834 | ||
597 | spin_unlock(&mm->page_table_lock); | 835 | spin_unlock(&mm->page_table_lock); |
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 5fe961caffd4..297bf9f5b8bc 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h | |||
@@ -25,10 +25,6 @@ enum pt_level { | |||
25 | 25 | ||
26 | void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | 26 | void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); |
27 | 27 | ||
28 | void xen_set_pte(pte_t *ptep, pte_t pteval); | ||
29 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | ||
30 | pte_t *ptep, pte_t pteval); | ||
31 | void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval); | ||
32 | 28 | ||
33 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next); | 29 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next); |
34 | void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); | 30 | void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); |
@@ -45,11 +41,19 @@ pte_t xen_make_pte(pteval_t); | |||
45 | pmd_t xen_make_pmd(pmdval_t); | 41 | pmd_t xen_make_pmd(pmdval_t); |
46 | pgd_t xen_make_pgd(pgdval_t); | 42 | pgd_t xen_make_pgd(pgdval_t); |
47 | 43 | ||
44 | void xen_set_pte(pte_t *ptep, pte_t pteval); | ||
48 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | 45 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
49 | pte_t *ptep, pte_t pteval); | 46 | pte_t *ptep, pte_t pteval); |
50 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte); | 47 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte); |
48 | void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval); | ||
51 | void xen_set_pud(pud_t *ptr, pud_t val); | 49 | void xen_set_pud(pud_t *ptr, pud_t val); |
50 | void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval); | ||
51 | void xen_set_pud_hyper(pud_t *ptr, pud_t val); | ||
52 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); | 52 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); |
53 | void xen_pmd_clear(pmd_t *pmdp); | 53 | void xen_pmd_clear(pmd_t *pmdp); |
54 | 54 | ||
55 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); | ||
56 | void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | ||
57 | pte_t *ptep, pte_t pte); | ||
58 | |||
55 | #endif /* _XEN_MMU_H */ | 59 | #endif /* _XEN_MMU_H */ |
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 5791eb2e3750..3c63c4da7ed1 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c | |||
@@ -29,14 +29,14 @@ | |||
29 | #define MC_DEBUG 1 | 29 | #define MC_DEBUG 1 |
30 | 30 | ||
31 | #define MC_BATCH 32 | 31 | #define MC_BATCH 32 |
32 | #define MC_ARGS (MC_BATCH * 16 / sizeof(u64)) | 32 | #define MC_ARGS (MC_BATCH * 16) |
33 | 33 | ||
34 | struct mc_buffer { | 34 | struct mc_buffer { |
35 | struct multicall_entry entries[MC_BATCH]; | 35 | struct multicall_entry entries[MC_BATCH]; |
36 | #if MC_DEBUG | 36 | #if MC_DEBUG |
37 | struct multicall_entry debug[MC_BATCH]; | 37 | struct multicall_entry debug[MC_BATCH]; |
38 | #endif | 38 | #endif |
39 | u64 args[MC_ARGS]; | 39 | unsigned char args[MC_ARGS]; |
40 | struct callback { | 40 | struct callback { |
41 | void (*fn)(void *); | 41 | void (*fn)(void *); |
42 | void *data; | 42 | void *data; |
@@ -107,20 +107,48 @@ struct multicall_space __xen_mc_entry(size_t args) | |||
107 | { | 107 | { |
108 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); | 108 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); |
109 | struct multicall_space ret; | 109 | struct multicall_space ret; |
110 | unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64); | 110 | unsigned argidx = roundup(b->argidx, sizeof(u64)); |
111 | 111 | ||
112 | BUG_ON(preemptible()); | 112 | BUG_ON(preemptible()); |
113 | BUG_ON(argspace > MC_ARGS); | 113 | BUG_ON(b->argidx > MC_ARGS); |
114 | 114 | ||
115 | if (b->mcidx == MC_BATCH || | 115 | if (b->mcidx == MC_BATCH || |
116 | (b->argidx + argspace) > MC_ARGS) | 116 | (argidx + args) > MC_ARGS) { |
117 | xen_mc_flush(); | 117 | xen_mc_flush(); |
118 | argidx = roundup(b->argidx, sizeof(u64)); | ||
119 | } | ||
118 | 120 | ||
119 | ret.mc = &b->entries[b->mcidx]; | 121 | ret.mc = &b->entries[b->mcidx]; |
120 | b->mcidx++; | 122 | b->mcidx++; |
123 | ret.args = &b->args[argidx]; | ||
124 | b->argidx = argidx + args; | ||
125 | |||
126 | BUG_ON(b->argidx > MC_ARGS); | ||
127 | return ret; | ||
128 | } | ||
129 | |||
130 | struct multicall_space xen_mc_extend_args(unsigned long op, size_t size) | ||
131 | { | ||
132 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); | ||
133 | struct multicall_space ret = { NULL, NULL }; | ||
134 | |||
135 | BUG_ON(preemptible()); | ||
136 | BUG_ON(b->argidx > MC_ARGS); | ||
137 | |||
138 | if (b->mcidx == 0) | ||
139 | return ret; | ||
140 | |||
141 | if (b->entries[b->mcidx - 1].op != op) | ||
142 | return ret; | ||
143 | |||
144 | if ((b->argidx + size) > MC_ARGS) | ||
145 | return ret; | ||
146 | |||
147 | ret.mc = &b->entries[b->mcidx - 1]; | ||
121 | ret.args = &b->args[b->argidx]; | 148 | ret.args = &b->args[b->argidx]; |
122 | b->argidx += argspace; | 149 | b->argidx += size; |
123 | 150 | ||
151 | BUG_ON(b->argidx > MC_ARGS); | ||
124 | return ret; | 152 | return ret; |
125 | } | 153 | } |
126 | 154 | ||
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index 8bae996d99a3..858938241616 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h | |||
@@ -45,4 +45,16 @@ static inline void xen_mc_issue(unsigned mode) | |||
45 | /* Set up a callback to be called when the current batch is flushed */ | 45 | /* Set up a callback to be called when the current batch is flushed */ |
46 | void xen_mc_callback(void (*fn)(void *), void *data); | 46 | void xen_mc_callback(void (*fn)(void *), void *data); |
47 | 47 | ||
48 | /* | ||
49 | * Try to extend the arguments of the previous multicall command. The | ||
50 | * previous command's op must match. If it does, then it attempts to | ||
51 | * extend the argument space allocated to the multicall entry by | ||
52 | * arg_size bytes. | ||
53 | * | ||
54 | * The returned multicall_space will return with mc pointing to the | ||
55 | * command on success, or NULL on failure, and args pointing to the | ||
56 | * newly allocated space. | ||
57 | */ | ||
58 | struct multicall_space xen_mc_extend_args(unsigned long op, size_t arg_size); | ||
59 | |||
48 | #endif /* _XEN_MULTICALLS_H */ | 60 | #endif /* _XEN_MULTICALLS_H */ |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 82517e4a752a..e0a39595bde3 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -13,9 +13,11 @@ | |||
13 | #include <asm/vdso.h> | 13 | #include <asm/vdso.h> |
14 | #include <asm/e820.h> | 14 | #include <asm/e820.h> |
15 | #include <asm/setup.h> | 15 | #include <asm/setup.h> |
16 | #include <asm/acpi.h> | ||
16 | #include <asm/xen/hypervisor.h> | 17 | #include <asm/xen/hypervisor.h> |
17 | #include <asm/xen/hypercall.h> | 18 | #include <asm/xen/hypercall.h> |
18 | 19 | ||
20 | #include <xen/page.h> | ||
19 | #include <xen/interface/callback.h> | 21 | #include <xen/interface/callback.h> |
20 | #include <xen/interface/physdev.h> | 22 | #include <xen/interface/physdev.h> |
21 | #include <xen/features.h> | 23 | #include <xen/features.h> |
@@ -27,8 +29,6 @@ | |||
27 | extern const char xen_hypervisor_callback[]; | 29 | extern const char xen_hypervisor_callback[]; |
28 | extern const char xen_failsafe_callback[]; | 30 | extern const char xen_failsafe_callback[]; |
29 | 31 | ||
30 | unsigned long *phys_to_machine_mapping; | ||
31 | EXPORT_SYMBOL(phys_to_machine_mapping); | ||
32 | 32 | ||
33 | /** | 33 | /** |
34 | * machine_specific_memory_setup - Hook for machine specific memory setup. | 34 | * machine_specific_memory_setup - Hook for machine specific memory setup. |
@@ -38,9 +38,31 @@ char * __init xen_memory_setup(void) | |||
38 | { | 38 | { |
39 | unsigned long max_pfn = xen_start_info->nr_pages; | 39 | unsigned long max_pfn = xen_start_info->nr_pages; |
40 | 40 | ||
41 | max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); | ||
42 | |||
41 | e820.nr_map = 0; | 43 | e820.nr_map = 0; |
42 | add_memory_region(0, LOWMEMSIZE(), E820_RAM); | 44 | |
43 | add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); | 45 | e820_add_region(0, PFN_PHYS(max_pfn), E820_RAM); |
46 | |||
47 | /* | ||
48 | * Even though this is normal, usable memory under Xen, reserve | ||
49 | * ISA memory anyway because too many things think they can poke | ||
50 | * about in there. | ||
51 | */ | ||
52 | e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, | ||
53 | E820_RESERVED); | ||
54 | |||
55 | /* | ||
56 | * Reserve Xen bits: | ||
57 | * - mfn_list | ||
58 | * - xen_start_info | ||
59 | * See comment above "struct start_info" in <xen/interface/xen.h> | ||
60 | */ | ||
61 | e820_add_region(__pa(xen_start_info->mfn_list), | ||
62 | xen_start_info->pt_base - xen_start_info->mfn_list, | ||
63 | E820_RESERVED); | ||
64 | |||
65 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | ||
44 | 66 | ||
45 | return "Xen"; | 67 | return "Xen"; |
46 | } | 68 | } |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 94e69000f982..233156f39b7f 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -35,28 +35,15 @@ | |||
35 | #include "xen-ops.h" | 35 | #include "xen-ops.h" |
36 | #include "mmu.h" | 36 | #include "mmu.h" |
37 | 37 | ||
38 | static cpumask_t xen_cpu_initialized_map; | 38 | cpumask_t xen_cpu_initialized_map; |
39 | static DEFINE_PER_CPU(int, resched_irq) = -1; | ||
40 | static DEFINE_PER_CPU(int, callfunc_irq) = -1; | ||
41 | static DEFINE_PER_CPU(int, debug_irq) = -1; | ||
42 | |||
43 | /* | ||
44 | * Structure and data for smp_call_function(). This is designed to minimise | ||
45 | * static memory requirements. It also looks cleaner. | ||
46 | */ | ||
47 | static DEFINE_SPINLOCK(call_lock); | ||
48 | 39 | ||
49 | struct call_data_struct { | 40 | static DEFINE_PER_CPU(int, resched_irq); |
50 | void (*func) (void *info); | 41 | static DEFINE_PER_CPU(int, callfunc_irq); |
51 | void *info; | 42 | static DEFINE_PER_CPU(int, callfuncsingle_irq); |
52 | atomic_t started; | 43 | static DEFINE_PER_CPU(int, debug_irq) = -1; |
53 | atomic_t finished; | ||
54 | int wait; | ||
55 | }; | ||
56 | 44 | ||
57 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); | 45 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); |
58 | 46 | static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); | |
59 | static struct call_data_struct *call_data; | ||
60 | 47 | ||
61 | /* | 48 | /* |
62 | * Reschedule call back. Nothing to do, | 49 | * Reschedule call back. Nothing to do, |
@@ -65,6 +52,12 @@ static struct call_data_struct *call_data; | |||
65 | */ | 52 | */ |
66 | static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) | 53 | static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) |
67 | { | 54 | { |
55 | #ifdef CONFIG_X86_32 | ||
56 | __get_cpu_var(irq_stat).irq_resched_count++; | ||
57 | #else | ||
58 | add_pda(irq_resched_count, 1); | ||
59 | #endif | ||
60 | |||
68 | return IRQ_HANDLED; | 61 | return IRQ_HANDLED; |
69 | } | 62 | } |
70 | 63 | ||
@@ -122,6 +115,17 @@ static int xen_smp_intr_init(unsigned int cpu) | |||
122 | goto fail; | 115 | goto fail; |
123 | per_cpu(debug_irq, cpu) = rc; | 116 | per_cpu(debug_irq, cpu) = rc; |
124 | 117 | ||
118 | callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu); | ||
119 | rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR, | ||
120 | cpu, | ||
121 | xen_call_function_single_interrupt, | ||
122 | IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, | ||
123 | callfunc_name, | ||
124 | NULL); | ||
125 | if (rc < 0) | ||
126 | goto fail; | ||
127 | per_cpu(callfuncsingle_irq, cpu) = rc; | ||
128 | |||
125 | return 0; | 129 | return 0; |
126 | 130 | ||
127 | fail: | 131 | fail: |
@@ -131,6 +135,9 @@ static int xen_smp_intr_init(unsigned int cpu) | |||
131 | unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); | 135 | unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); |
132 | if (per_cpu(debug_irq, cpu) >= 0) | 136 | if (per_cpu(debug_irq, cpu) >= 0) |
133 | unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL); | 137 | unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL); |
138 | if (per_cpu(callfuncsingle_irq, cpu) >= 0) | ||
139 | unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL); | ||
140 | |||
134 | return rc; | 141 | return rc; |
135 | } | 142 | } |
136 | 143 | ||
@@ -330,7 +337,7 @@ static void stop_self(void *v) | |||
330 | 337 | ||
331 | void xen_smp_send_stop(void) | 338 | void xen_smp_send_stop(void) |
332 | { | 339 | { |
333 | smp_call_function(stop_self, NULL, 0, 0); | 340 | smp_call_function(stop_self, NULL, 0); |
334 | } | 341 | } |
335 | 342 | ||
336 | void xen_smp_send_reschedule(int cpu) | 343 | void xen_smp_send_reschedule(int cpu) |
@@ -338,7 +345,6 @@ void xen_smp_send_reschedule(int cpu) | |||
338 | xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); | 345 | xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); |
339 | } | 346 | } |
340 | 347 | ||
341 | |||
342 | static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) | 348 | static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) |
343 | { | 349 | { |
344 | unsigned cpu; | 350 | unsigned cpu; |
@@ -349,83 +355,42 @@ static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) | |||
349 | xen_send_IPI_one(cpu, vector); | 355 | xen_send_IPI_one(cpu, vector); |
350 | } | 356 | } |
351 | 357 | ||
358 | void xen_smp_send_call_function_ipi(cpumask_t mask) | ||
359 | { | ||
360 | int cpu; | ||
361 | |||
362 | xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); | ||
363 | |||
364 | /* Make sure other vcpus get a chance to run if they need to. */ | ||
365 | for_each_cpu_mask(cpu, mask) { | ||
366 | if (xen_vcpu_stolen(cpu)) { | ||
367 | HYPERVISOR_sched_op(SCHEDOP_yield, 0); | ||
368 | break; | ||
369 | } | ||
370 | } | ||
371 | } | ||
372 | |||
373 | void xen_smp_send_call_function_single_ipi(int cpu) | ||
374 | { | ||
375 | xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR); | ||
376 | } | ||
377 | |||
352 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) | 378 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) |
353 | { | 379 | { |
354 | void (*func) (void *info) = call_data->func; | ||
355 | void *info = call_data->info; | ||
356 | int wait = call_data->wait; | ||
357 | |||
358 | /* | ||
359 | * Notify initiating CPU that I've grabbed the data and am | ||
360 | * about to execute the function | ||
361 | */ | ||
362 | mb(); | ||
363 | atomic_inc(&call_data->started); | ||
364 | /* | ||
365 | * At this point the info structure may be out of scope unless wait==1 | ||
366 | */ | ||
367 | irq_enter(); | 380 | irq_enter(); |
368 | (*func)(info); | 381 | generic_smp_call_function_interrupt(); |
369 | __get_cpu_var(irq_stat).irq_call_count++; | 382 | __get_cpu_var(irq_stat).irq_call_count++; |
370 | irq_exit(); | 383 | irq_exit(); |
371 | 384 | ||
372 | if (wait) { | ||
373 | mb(); /* commit everything before setting finished */ | ||
374 | atomic_inc(&call_data->finished); | ||
375 | } | ||
376 | |||
377 | return IRQ_HANDLED; | 385 | return IRQ_HANDLED; |
378 | } | 386 | } |
379 | 387 | ||
380 | int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), | 388 | static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) |
381 | void *info, int wait) | ||
382 | { | 389 | { |
383 | struct call_data_struct data; | 390 | irq_enter(); |
384 | int cpus, cpu; | 391 | generic_smp_call_function_single_interrupt(); |
385 | bool yield; | 392 | __get_cpu_var(irq_stat).irq_call_count++; |
386 | 393 | irq_exit(); | |
387 | /* Holding any lock stops cpus from going down. */ | ||
388 | spin_lock(&call_lock); | ||
389 | |||
390 | cpu_clear(smp_processor_id(), mask); | ||
391 | |||
392 | cpus = cpus_weight(mask); | ||
393 | if (!cpus) { | ||
394 | spin_unlock(&call_lock); | ||
395 | return 0; | ||
396 | } | ||
397 | |||
398 | /* Can deadlock when called with interrupts disabled */ | ||
399 | WARN_ON(irqs_disabled()); | ||
400 | |||
401 | data.func = func; | ||
402 | data.info = info; | ||
403 | atomic_set(&data.started, 0); | ||
404 | data.wait = wait; | ||
405 | if (wait) | ||
406 | atomic_set(&data.finished, 0); | ||
407 | |||
408 | call_data = &data; | ||
409 | mb(); /* write everything before IPI */ | ||
410 | |||
411 | /* Send a message to other CPUs and wait for them to respond */ | ||
412 | xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); | ||
413 | |||
414 | /* Make sure other vcpus get a chance to run if they need to. */ | ||
415 | yield = false; | ||
416 | for_each_cpu_mask(cpu, mask) | ||
417 | if (xen_vcpu_stolen(cpu)) | ||
418 | yield = true; | ||
419 | |||
420 | if (yield) | ||
421 | HYPERVISOR_sched_op(SCHEDOP_yield, 0); | ||
422 | |||
423 | /* Wait for response */ | ||
424 | while (atomic_read(&data.started) != cpus || | ||
425 | (wait && atomic_read(&data.finished) != cpus)) | ||
426 | cpu_relax(); | ||
427 | |||
428 | spin_unlock(&call_lock); | ||
429 | 394 | ||
430 | return 0; | 395 | return IRQ_HANDLED; |
431 | } | 396 | } |
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c new file mode 100644 index 000000000000..251669a932d4 --- /dev/null +++ b/arch/x86/xen/suspend.c | |||
@@ -0,0 +1,45 @@ | |||
1 | #include <linux/types.h> | ||
2 | |||
3 | #include <xen/interface/xen.h> | ||
4 | #include <xen/grant_table.h> | ||
5 | #include <xen/events.h> | ||
6 | |||
7 | #include <asm/xen/hypercall.h> | ||
8 | #include <asm/xen/page.h> | ||
9 | |||
10 | #include "xen-ops.h" | ||
11 | #include "mmu.h" | ||
12 | |||
13 | void xen_pre_suspend(void) | ||
14 | { | ||
15 | xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); | ||
16 | xen_start_info->console.domU.mfn = | ||
17 | mfn_to_pfn(xen_start_info->console.domU.mfn); | ||
18 | |||
19 | BUG_ON(!irqs_disabled()); | ||
20 | |||
21 | HYPERVISOR_shared_info = &xen_dummy_shared_info; | ||
22 | if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP), | ||
23 | __pte_ma(0), 0)) | ||
24 | BUG(); | ||
25 | } | ||
26 | |||
27 | void xen_post_suspend(int suspend_cancelled) | ||
28 | { | ||
29 | xen_setup_shared_info(); | ||
30 | |||
31 | if (suspend_cancelled) { | ||
32 | xen_start_info->store_mfn = | ||
33 | pfn_to_mfn(xen_start_info->store_mfn); | ||
34 | xen_start_info->console.domU.mfn = | ||
35 | pfn_to_mfn(xen_start_info->console.domU.mfn); | ||
36 | } else { | ||
37 | #ifdef CONFIG_SMP | ||
38 | xen_cpu_initialized_map = cpu_online_map; | ||
39 | #endif | ||
40 | xen_vcpu_restore(); | ||
41 | xen_timer_resume(); | ||
42 | } | ||
43 | |||
44 | } | ||
45 | |||
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 41e217503c96..685b77470fc3 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -197,8 +197,8 @@ unsigned long long xen_sched_clock(void) | |||
197 | } | 197 | } |
198 | 198 | ||
199 | 199 | ||
200 | /* Get the CPU speed from Xen */ | 200 | /* Get the TSC speed from Xen */ |
201 | unsigned long xen_cpu_khz(void) | 201 | unsigned long xen_tsc_khz(void) |
202 | { | 202 | { |
203 | u64 xen_khz = 1000000ULL << 32; | 203 | u64 xen_khz = 1000000ULL << 32; |
204 | const struct pvclock_vcpu_time_info *info = | 204 | const struct pvclock_vcpu_time_info *info = |
@@ -459,6 +459,19 @@ void xen_setup_cpu_clockevents(void) | |||
459 | clockevents_register_device(&__get_cpu_var(xen_clock_events)); | 459 | clockevents_register_device(&__get_cpu_var(xen_clock_events)); |
460 | } | 460 | } |
461 | 461 | ||
462 | void xen_timer_resume(void) | ||
463 | { | ||
464 | int cpu; | ||
465 | |||
466 | if (xen_clockevent != &xen_vcpuop_clockevent) | ||
467 | return; | ||
468 | |||
469 | for_each_online_cpu(cpu) { | ||
470 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) | ||
471 | BUG(); | ||
472 | } | ||
473 | } | ||
474 | |||
462 | __init void xen_time_init(void) | 475 | __init void xen_time_init(void) |
463 | { | 476 | { |
464 | int cpu = smp_processor_id(); | 477 | int cpu = smp_processor_id(); |
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 6ec3b4f7719b..7c0cf6320a0a 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/init.h> | 7 | #include <linux/init.h> |
8 | #include <asm/boot.h> | 8 | #include <asm/boot.h> |
9 | #include <xen/interface/elfnote.h> | 9 | #include <xen/interface/elfnote.h> |
10 | #include <asm/xen/interface.h> | ||
10 | 11 | ||
11 | __INIT | 12 | __INIT |
12 | ENTRY(startup_xen) | 13 | ENTRY(startup_xen) |
@@ -32,5 +33,9 @@ ENTRY(hypercall_page) | |||
32 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") | 33 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") |
33 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") | 34 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") |
34 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") | 35 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") |
36 | ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, | ||
37 | .quad _PAGE_PRESENT; .quad _PAGE_PRESENT) | ||
38 | ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) | ||
39 | ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long __HYPERVISOR_VIRT_START) | ||
35 | 40 | ||
36 | #endif /*CONFIG_XEN */ | 41 | #endif /*CONFIG_XEN */ |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index f1063ae08037..6f4b1045c1c2 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -9,26 +9,35 @@ | |||
9 | extern const char xen_hypervisor_callback[]; | 9 | extern const char xen_hypervisor_callback[]; |
10 | extern const char xen_failsafe_callback[]; | 10 | extern const char xen_failsafe_callback[]; |
11 | 11 | ||
12 | struct trap_info; | ||
12 | void xen_copy_trap_info(struct trap_info *traps); | 13 | void xen_copy_trap_info(struct trap_info *traps); |
13 | 14 | ||
14 | DECLARE_PER_CPU(unsigned long, xen_cr3); | 15 | DECLARE_PER_CPU(unsigned long, xen_cr3); |
15 | DECLARE_PER_CPU(unsigned long, xen_current_cr3); | 16 | DECLARE_PER_CPU(unsigned long, xen_current_cr3); |
16 | 17 | ||
17 | extern struct start_info *xen_start_info; | 18 | extern struct start_info *xen_start_info; |
19 | extern struct shared_info xen_dummy_shared_info; | ||
18 | extern struct shared_info *HYPERVISOR_shared_info; | 20 | extern struct shared_info *HYPERVISOR_shared_info; |
19 | 21 | ||
22 | void xen_setup_mfn_list_list(void); | ||
23 | void xen_setup_shared_info(void); | ||
24 | |||
20 | char * __init xen_memory_setup(void); | 25 | char * __init xen_memory_setup(void); |
21 | void __init xen_arch_setup(void); | 26 | void __init xen_arch_setup(void); |
22 | void __init xen_init_IRQ(void); | 27 | void __init xen_init_IRQ(void); |
23 | void xen_enable_sysenter(void); | 28 | void xen_enable_sysenter(void); |
29 | void xen_vcpu_restore(void); | ||
30 | |||
31 | void __init xen_build_dynamic_phys_to_machine(void); | ||
24 | 32 | ||
25 | void xen_setup_timer(int cpu); | 33 | void xen_setup_timer(int cpu); |
26 | void xen_setup_cpu_clockevents(void); | 34 | void xen_setup_cpu_clockevents(void); |
27 | unsigned long xen_cpu_khz(void); | 35 | unsigned long xen_tsc_khz(void); |
28 | void __init xen_time_init(void); | 36 | void __init xen_time_init(void); |
29 | unsigned long xen_get_wallclock(void); | 37 | unsigned long xen_get_wallclock(void); |
30 | int xen_set_wallclock(unsigned long time); | 38 | int xen_set_wallclock(unsigned long time); |
31 | unsigned long long xen_sched_clock(void); | 39 | unsigned long long xen_sched_clock(void); |
40 | void xen_timer_resume(void); | ||
32 | 41 | ||
33 | irqreturn_t xen_debug_interrupt(int irq, void *dev_id); | 42 | irqreturn_t xen_debug_interrupt(int irq, void *dev_id); |
34 | 43 | ||
@@ -46,13 +55,10 @@ void xen_smp_cpus_done(unsigned int max_cpus); | |||
46 | 55 | ||
47 | void xen_smp_send_stop(void); | 56 | void xen_smp_send_stop(void); |
48 | void xen_smp_send_reschedule(int cpu); | 57 | void xen_smp_send_reschedule(int cpu); |
49 | int xen_smp_call_function (void (*func) (void *info), void *info, int nonatomic, | 58 | void xen_smp_send_call_function_ipi(cpumask_t mask); |
50 | int wait); | 59 | void xen_smp_send_call_function_single_ipi(int cpu); |
51 | int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info, | ||
52 | int nonatomic, int wait); | ||
53 | 60 | ||
54 | int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), | 61 | extern cpumask_t xen_cpu_initialized_map; |
55 | void *info, int wait); | ||
56 | 62 | ||
57 | 63 | ||
58 | /* Declare an asm function, along with symbols needed to make it | 64 | /* Declare an asm function, along with symbols needed to make it |