diff options
author | Jeremy Fitzhardinge <jeremy@xensource.com> | 2007-07-17 21:37:07 -0400 |
---|---|---|
committer | Jeremy Fitzhardinge <jeremy@goop.org> | 2007-07-18 11:47:45 -0400 |
commit | 60223a326fc8fa6e90e2c3fd28ae6de4a311d731 (patch) | |
tree | cf4e667a56402b846488373bfaf5bf840395e219 | |
parent | 3e2b8fbeec8f005672f2a2e862fb9c26a0bafedc (diff) |
xen: Place vcpu_info structure into per-cpu memory
An experimental patch for Xen allows guests to place their vcpu_info
structs anywhere. We try to use this to place the vcpu_info into the
PDA, which allows direct access.
If this works, then switch to using direct access operations for
irq_enable, disable, save_fl and restore_fl.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Keir Fraser <keir@xensource.com>
-rw-r--r-- | arch/i386/xen/enlighten.c | 152 | ||||
-rw-r--r-- | arch/i386/xen/setup.c | 8 | ||||
-rw-r--r-- | arch/i386/xen/smp.c | 5 | ||||
-rw-r--r-- | arch/i386/xen/xen-ops.h | 2 | ||||
-rw-r--r-- | include/xen/interface/vcpu.h | 13 |
5 files changed, 164 insertions, 16 deletions
diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c index 142e74891344..e33fa0990eda 100644 --- a/arch/i386/xen/enlighten.c +++ b/arch/i386/xen/enlighten.c | |||
@@ -61,9 +61,63 @@ DEFINE_PER_CPU(unsigned long, xen_cr3); | |||
61 | struct start_info *xen_start_info; | 61 | struct start_info *xen_start_info; |
62 | EXPORT_SYMBOL_GPL(xen_start_info); | 62 | EXPORT_SYMBOL_GPL(xen_start_info); |
63 | 63 | ||
64 | void xen_vcpu_setup(int cpu) | 64 | static /* __initdata */ struct shared_info dummy_shared_info; |
65 | |||
66 | /* | ||
67 | * Point at some empty memory to start with. We map the real shared_info | ||
68 | * page as soon as fixmap is up and running. | ||
69 | */ | ||
70 | struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; | ||
71 | |||
72 | /* | ||
73 | * Flag to determine whether vcpu info placement is available on all | ||
74 | * VCPUs. We assume it is to start with, and then set it to zero on | ||
75 | * the first failure. This is because it can succeed on some VCPUs | ||
76 | * and not others, since it can involve hypervisor memory allocation, | ||
77 | * or because the guest failed to guarantee all the appropriate | ||
78 | * constraints on all VCPUs (ie buffer can't cross a page boundary). | ||
79 | * | ||
80 | * Note that any particular CPU may be using a placed vcpu structure, | ||
81 | * but we can only optimise if the all are. | ||
82 | * | ||
83 | * 0: not available, 1: available | ||
84 | */ | ||
85 | static int have_vcpu_info_placement = 1; | ||
86 | |||
87 | static void __init xen_vcpu_setup(int cpu) | ||
65 | { | 88 | { |
89 | struct vcpu_register_vcpu_info info; | ||
90 | int err; | ||
91 | struct vcpu_info *vcpup; | ||
92 | |||
66 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; | 93 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; |
94 | |||
95 | if (!have_vcpu_info_placement) | ||
96 | return; /* already tested, not available */ | ||
97 | |||
98 | vcpup = &per_cpu(xen_vcpu_info, cpu); | ||
99 | |||
100 | info.mfn = virt_to_mfn(vcpup); | ||
101 | info.offset = offset_in_page(vcpup); | ||
102 | |||
103 | printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %x, offset %d\n", | ||
104 | cpu, vcpup, info.mfn, info.offset); | ||
105 | |||
106 | /* Check to see if the hypervisor will put the vcpu_info | ||
107 | structure where we want it, which allows direct access via | ||
108 | a percpu-variable. */ | ||
109 | err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); | ||
110 | |||
111 | if (err) { | ||
112 | printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err); | ||
113 | have_vcpu_info_placement = 0; | ||
114 | } else { | ||
115 | /* This cpu is using the registered vcpu info, even if | ||
116 | later ones fail to. */ | ||
117 | per_cpu(xen_vcpu, cpu) = vcpup; | ||
118 | printk(KERN_DEBUG "cpu %d using vcpu_info at %p\n", | ||
119 | cpu, vcpup); | ||
120 | } | ||
67 | } | 121 | } |
68 | 122 | ||
69 | static void __init xen_banner(void) | 123 | static void __init xen_banner(void) |
@@ -123,6 +177,20 @@ static unsigned long xen_save_fl(void) | |||
123 | return (-flags) & X86_EFLAGS_IF; | 177 | return (-flags) & X86_EFLAGS_IF; |
124 | } | 178 | } |
125 | 179 | ||
180 | static unsigned long xen_save_fl_direct(void) | ||
181 | { | ||
182 | unsigned long flags; | ||
183 | |||
184 | /* flag has opposite sense of mask */ | ||
185 | flags = !x86_read_percpu(xen_vcpu_info.evtchn_upcall_mask); | ||
186 | |||
187 | /* convert to IF type flag | ||
188 | -0 -> 0x00000000 | ||
189 | -1 -> 0xffffffff | ||
190 | */ | ||
191 | return (-flags) & X86_EFLAGS_IF; | ||
192 | } | ||
193 | |||
126 | static void xen_restore_fl(unsigned long flags) | 194 | static void xen_restore_fl(unsigned long flags) |
127 | { | 195 | { |
128 | struct vcpu_info *vcpu; | 196 | struct vcpu_info *vcpu; |
@@ -149,6 +217,25 @@ static void xen_restore_fl(unsigned long flags) | |||
149 | } | 217 | } |
150 | } | 218 | } |
151 | 219 | ||
220 | static void xen_restore_fl_direct(unsigned long flags) | ||
221 | { | ||
222 | /* convert from IF type flag */ | ||
223 | flags = !(flags & X86_EFLAGS_IF); | ||
224 | |||
225 | /* This is an atomic update, so no need to worry about | ||
226 | preemption. */ | ||
227 | x86_write_percpu(xen_vcpu_info.evtchn_upcall_mask, flags); | ||
228 | |||
229 | /* If we get preempted here, then any pending event will be | ||
230 | handled anyway. */ | ||
231 | |||
232 | if (flags == 0) { | ||
233 | barrier(); /* unmask then check (avoid races) */ | ||
234 | if (unlikely(x86_read_percpu(xen_vcpu_info.evtchn_upcall_pending))) | ||
235 | force_evtchn_callback(); | ||
236 | } | ||
237 | } | ||
238 | |||
152 | static void xen_irq_disable(void) | 239 | static void xen_irq_disable(void) |
153 | { | 240 | { |
154 | /* There's a one instruction preempt window here. We need to | 241 | /* There's a one instruction preempt window here. We need to |
@@ -159,6 +246,12 @@ static void xen_irq_disable(void) | |||
159 | preempt_enable_no_resched(); | 246 | preempt_enable_no_resched(); |
160 | } | 247 | } |
161 | 248 | ||
249 | static void xen_irq_disable_direct(void) | ||
250 | { | ||
251 | /* Atomic update, so preemption not a concern. */ | ||
252 | x86_write_percpu(xen_vcpu_info.evtchn_upcall_mask, 1); | ||
253 | } | ||
254 | |||
162 | static void xen_irq_enable(void) | 255 | static void xen_irq_enable(void) |
163 | { | 256 | { |
164 | struct vcpu_info *vcpu; | 257 | struct vcpu_info *vcpu; |
@@ -179,6 +272,19 @@ static void xen_irq_enable(void) | |||
179 | force_evtchn_callback(); | 272 | force_evtchn_callback(); |
180 | } | 273 | } |
181 | 274 | ||
275 | static void xen_irq_enable_direct(void) | ||
276 | { | ||
277 | /* Atomic update, so preemption not a concern. */ | ||
278 | x86_write_percpu(xen_vcpu_info.evtchn_upcall_mask, 0); | ||
279 | |||
280 | /* Doesn't matter if we get preempted here, because any | ||
281 | pending event will get dealt with anyway. */ | ||
282 | |||
283 | barrier(); /* unmask then check (avoid races) */ | ||
284 | if (unlikely(x86_read_percpu(xen_vcpu_info.evtchn_upcall_pending))) | ||
285 | force_evtchn_callback(); | ||
286 | } | ||
287 | |||
182 | static void xen_safe_halt(void) | 288 | static void xen_safe_halt(void) |
183 | { | 289 | { |
184 | /* Blocking includes an implicit local_irq_enable(). */ | 290 | /* Blocking includes an implicit local_irq_enable(). */ |
@@ -551,11 +657,21 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, | |||
551 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 657 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
552 | } | 658 | } |
553 | 659 | ||
660 | static void xen_write_cr2(unsigned long cr2) | ||
661 | { | ||
662 | x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; | ||
663 | } | ||
664 | |||
554 | static unsigned long xen_read_cr2(void) | 665 | static unsigned long xen_read_cr2(void) |
555 | { | 666 | { |
556 | return x86_read_percpu(xen_vcpu)->arch.cr2; | 667 | return x86_read_percpu(xen_vcpu)->arch.cr2; |
557 | } | 668 | } |
558 | 669 | ||
670 | static unsigned long xen_read_cr2_direct(void) | ||
671 | { | ||
672 | return x86_read_percpu(xen_vcpu_info.arch.cr2); | ||
673 | } | ||
674 | |||
559 | static void xen_write_cr4(unsigned long cr4) | 675 | static void xen_write_cr4(unsigned long cr4) |
560 | { | 676 | { |
561 | /* never allow TSC to be disabled */ | 677 | /* never allow TSC to be disabled */ |
@@ -753,8 +869,27 @@ static __init void xen_pagetable_setup_done(pgd_t *base) | |||
753 | if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) | 869 | if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) |
754 | BUG(); | 870 | BUG(); |
755 | } | 871 | } |
872 | } | ||
756 | 873 | ||
757 | xen_vcpu_setup(smp_processor_id()); | 874 | /* This is called once we have the cpu_possible_map */ |
875 | void __init xen_setup_vcpu_info_placement(void) | ||
876 | { | ||
877 | int cpu; | ||
878 | |||
879 | for_each_possible_cpu(cpu) | ||
880 | xen_vcpu_setup(cpu); | ||
881 | |||
882 | /* xen_vcpu_setup managed to place the vcpu_info within the | ||
883 | percpu area for all cpus, so make use of it */ | ||
884 | if (have_vcpu_info_placement) { | ||
885 | printk(KERN_INFO "Xen: using vcpu_info placement\n"); | ||
886 | |||
887 | paravirt_ops.save_fl = xen_save_fl_direct; | ||
888 | paravirt_ops.restore_fl = xen_restore_fl_direct; | ||
889 | paravirt_ops.irq_disable = xen_irq_disable_direct; | ||
890 | paravirt_ops.irq_enable = xen_irq_enable_direct; | ||
891 | paravirt_ops.read_cr2 = xen_read_cr2_direct; | ||
892 | } | ||
758 | } | 893 | } |
759 | 894 | ||
760 | static const struct paravirt_ops xen_paravirt_ops __initdata = { | 895 | static const struct paravirt_ops xen_paravirt_ops __initdata = { |
@@ -788,7 +923,7 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { | |||
788 | .write_cr0 = native_write_cr0, | 923 | .write_cr0 = native_write_cr0, |
789 | 924 | ||
790 | .read_cr2 = xen_read_cr2, | 925 | .read_cr2 = xen_read_cr2, |
791 | .write_cr2 = native_write_cr2, | 926 | .write_cr2 = xen_write_cr2, |
792 | 927 | ||
793 | .read_cr3 = xen_read_cr3, | 928 | .read_cr3 = xen_read_cr3, |
794 | .write_cr3 = xen_write_cr3, | 929 | .write_cr3 = xen_write_cr3, |
@@ -974,7 +1109,16 @@ asmlinkage void __init xen_start_kernel(void) | |||
974 | /* keep using Xen gdt for now; no urgent need to change it */ | 1109 | /* keep using Xen gdt for now; no urgent need to change it */ |
975 | 1110 | ||
976 | x86_write_percpu(xen_cr3, __pa(pgd)); | 1111 | x86_write_percpu(xen_cr3, __pa(pgd)); |
977 | xen_vcpu_setup(0); | 1112 | |
1113 | #ifdef CONFIG_SMP | ||
1114 | /* Don't do the full vcpu_info placement stuff until we have a | ||
1115 | possible map. */ | ||
1116 | per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; | ||
1117 | #else | ||
1118 | /* May as well do it now, since there's no good time to call | ||
1119 | it later on UP. */ | ||
1120 | xen_setup_vcpu_info_placement(); | ||
1121 | #endif | ||
978 | 1122 | ||
979 | paravirt_ops.kernel_rpl = 1; | 1123 | paravirt_ops.kernel_rpl = 1; |
980 | if (xen_feature(XENFEAT_supervisor_mode_kernel)) | 1124 | if (xen_feature(XENFEAT_supervisor_mode_kernel)) |
diff --git a/arch/i386/xen/setup.c b/arch/i386/xen/setup.c index 18a994d5a4c5..3f8684eba62b 100644 --- a/arch/i386/xen/setup.c +++ b/arch/i386/xen/setup.c | |||
@@ -24,14 +24,6 @@ | |||
24 | extern const char xen_hypervisor_callback[]; | 24 | extern const char xen_hypervisor_callback[]; |
25 | extern const char xen_failsafe_callback[]; | 25 | extern const char xen_failsafe_callback[]; |
26 | 26 | ||
27 | static __initdata struct shared_info init_shared; | ||
28 | |||
29 | /* | ||
30 | * Point at some empty memory to start with. We map the real shared_info | ||
31 | * page as soon as fixmap is up and running. | ||
32 | */ | ||
33 | struct shared_info *HYPERVISOR_shared_info = &init_shared; | ||
34 | |||
35 | unsigned long *phys_to_machine_mapping; | 27 | unsigned long *phys_to_machine_mapping; |
36 | EXPORT_SYMBOL(phys_to_machine_mapping); | 28 | EXPORT_SYMBOL(phys_to_machine_mapping); |
37 | 29 | ||
diff --git a/arch/i386/xen/smp.c b/arch/i386/xen/smp.c index a620918f87ee..557b8e24706a 100644 --- a/arch/i386/xen/smp.c +++ b/arch/i386/xen/smp.c | |||
@@ -142,8 +142,6 @@ void __init xen_smp_prepare_boot_cpu(void) | |||
142 | BUG_ON(smp_processor_id() != 0); | 142 | BUG_ON(smp_processor_id() != 0); |
143 | native_smp_prepare_boot_cpu(); | 143 | native_smp_prepare_boot_cpu(); |
144 | 144 | ||
145 | xen_vcpu_setup(0); | ||
146 | |||
147 | /* We've switched to the "real" per-cpu gdt, so make sure the | 145 | /* We've switched to the "real" per-cpu gdt, so make sure the |
148 | old memory can be recycled */ | 146 | old memory can be recycled */ |
149 | make_lowmem_page_readwrite(&per_cpu__gdt_page); | 147 | make_lowmem_page_readwrite(&per_cpu__gdt_page); |
@@ -152,6 +150,8 @@ void __init xen_smp_prepare_boot_cpu(void) | |||
152 | cpus_clear(cpu_sibling_map[cpu]); | 150 | cpus_clear(cpu_sibling_map[cpu]); |
153 | cpus_clear(cpu_core_map[cpu]); | 151 | cpus_clear(cpu_core_map[cpu]); |
154 | } | 152 | } |
153 | |||
154 | xen_setup_vcpu_info_placement(); | ||
155 | } | 155 | } |
156 | 156 | ||
157 | void __init xen_smp_prepare_cpus(unsigned int max_cpus) | 157 | void __init xen_smp_prepare_cpus(unsigned int max_cpus) |
@@ -262,7 +262,6 @@ int __cpuinit xen_cpu_up(unsigned int cpu) | |||
262 | 262 | ||
263 | init_gdt(cpu); | 263 | init_gdt(cpu); |
264 | per_cpu(current_task, cpu) = idle; | 264 | per_cpu(current_task, cpu) = idle; |
265 | xen_vcpu_setup(cpu); | ||
266 | irq_ctx_init(cpu); | 265 | irq_ctx_init(cpu); |
267 | xen_setup_timer(cpu); | 266 | xen_setup_timer(cpu); |
268 | 267 | ||
diff --git a/arch/i386/xen/xen-ops.h b/arch/i386/xen/xen-ops.h index 4069be8ba31f..5b56f7fecd19 100644 --- a/arch/i386/xen/xen-ops.h +++ b/arch/i386/xen/xen-ops.h | |||
@@ -38,7 +38,7 @@ static inline unsigned xen_get_lazy_mode(void) | |||
38 | 38 | ||
39 | void __init xen_fill_possible_map(void); | 39 | void __init xen_fill_possible_map(void); |
40 | 40 | ||
41 | void xen_vcpu_setup(int cpu); | 41 | void __init xen_setup_vcpu_info_placement(void); |
42 | void xen_smp_prepare_boot_cpu(void); | 42 | void xen_smp_prepare_boot_cpu(void); |
43 | void xen_smp_prepare_cpus(unsigned int max_cpus); | 43 | void xen_smp_prepare_cpus(unsigned int max_cpus); |
44 | int xen_cpu_up(unsigned int cpu); | 44 | int xen_cpu_up(unsigned int cpu); |
diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h index c6218f1ad3ca..ff61ea365997 100644 --- a/include/xen/interface/vcpu.h +++ b/include/xen/interface/vcpu.h | |||
@@ -151,4 +151,17 @@ struct vcpu_set_singleshot_timer { | |||
151 | #define _VCPU_SSHOTTMR_future (0) | 151 | #define _VCPU_SSHOTTMR_future (0) |
152 | #define VCPU_SSHOTTMR_future (1U << _VCPU_SSHOTTMR_future) | 152 | #define VCPU_SSHOTTMR_future (1U << _VCPU_SSHOTTMR_future) |
153 | 153 | ||
154 | /* | ||
155 | * Register a memory location in the guest address space for the | ||
156 | * vcpu_info structure. This allows the guest to place the vcpu_info | ||
157 | * structure in a convenient place, such as in a per-cpu data area. | ||
158 | * The pointer need not be page aligned, but the structure must not | ||
159 | * cross a page boundary. | ||
160 | */ | ||
161 | #define VCPUOP_register_vcpu_info 10 /* arg == struct vcpu_info */ | ||
162 | struct vcpu_register_vcpu_info { | ||
163 | uint32_t mfn; /* mfn of page to place vcpu_info */ | ||
164 | uint32_t offset; /* offset within page */ | ||
165 | }; | ||
166 | |||
154 | #endif /* __XEN_PUBLIC_VCPU_H__ */ | 167 | #endif /* __XEN_PUBLIC_VCPU_H__ */ |