diff options
author | Jeremy Fitzhardinge <jeremy@xensource.com> | 2007-10-16 14:51:29 -0400 |
---|---|---|
committer | Jeremy Fitzhardinge <jeremy@goop.org> | 2007-10-16 14:51:29 -0400 |
commit | 93b1eab3d29e7ea32ee583de3362da84db06ded8 (patch) | |
tree | 8dc7eb61d4c65a48f9ce21a49e392f4967185cfd /arch/x86/kernel/paravirt_32.c | |
parent | ab9c232286c2b77be78441c2d8396500b045777e (diff) |
paravirt: refactor struct paravirt_ops into smaller pv_*_ops
This patch refactors the paravirt_ops structure into groups of
functionally related ops:
pv_info - random info, rather than function entrypoints
pv_init_ops - functions used at boot time (some for module_init too)
pv_misc_ops - lazy mode, which didn't fit well anywhere else
pv_time_ops - time-related functions
pv_cpu_ops - various privileged instruction ops
pv_irq_ops - operations for managing interrupt state
pv_apic_ops - APIC operations
pv_mmu_ops - operations for managing pagetables
There are several motivations for this:
1. Some of these ops will be general to all x86, and some will be
i386/x86-64 specific. This makes it easier to share common stuff
while allowing separate implementations where needed.
2. At the moment we must export all of paravirt_ops, but modules only
need selected parts of it. This allows us to export on a case by case
basis (and also choose which export license we want to apply).
3. Functional groupings make things a bit more readable.
Struct paravirt_ops is now only used as a template to generate
patch-site identifiers, and to extract function pointers for inserting
into jmp/calls when patching. It is only instantiated when needed.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <ak@suse.de>
Cc: Zach Amsden <zach@vmware.com>
Cc: Avi Kivity <avi@qumranet.com>
Cc: Anthony Liguory <aliguori@us.ibm.com>
Cc: "Glauber de Oliveira Costa" <glommer@gmail.com>
Cc: Jun Nakajima <jun.nakajima@intel.com>
Diffstat (limited to 'arch/x86/kernel/paravirt_32.c')
-rw-r--r-- | arch/x86/kernel/paravirt_32.c | 174 |
1 files changed, 103 insertions, 71 deletions
diff --git a/arch/x86/kernel/paravirt_32.c b/arch/x86/kernel/paravirt_32.c index 739cfb207dd7..fa412515af79 100644 --- a/arch/x86/kernel/paravirt_32.c +++ b/arch/x86/kernel/paravirt_32.c | |||
@@ -42,32 +42,33 @@ void _paravirt_nop(void) | |||
42 | static void __init default_banner(void) | 42 | static void __init default_banner(void) |
43 | { | 43 | { |
44 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | 44 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", |
45 | paravirt_ops.name); | 45 | pv_info.name); |
46 | } | 46 | } |
47 | 47 | ||
48 | char *memory_setup(void) | 48 | char *memory_setup(void) |
49 | { | 49 | { |
50 | return paravirt_ops.memory_setup(); | 50 | return pv_init_ops.memory_setup(); |
51 | } | 51 | } |
52 | 52 | ||
53 | /* Simple instruction patching code. */ | 53 | /* Simple instruction patching code. */ |
54 | #define DEF_NATIVE(name, code) \ | 54 | #define DEF_NATIVE(ops, name, code) \ |
55 | extern const char start_##name[], end_##name[]; \ | 55 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ |
56 | asm("start_" #name ": " code "; end_" #name ":") | 56 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") |
57 | 57 | ||
58 | DEF_NATIVE(irq_disable, "cli"); | 58 | DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); |
59 | DEF_NATIVE(irq_enable, "sti"); | 59 | DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); |
60 | DEF_NATIVE(restore_fl, "push %eax; popf"); | 60 | DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); |
61 | DEF_NATIVE(save_fl, "pushf; pop %eax"); | 61 | DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); |
62 | DEF_NATIVE(iret, "iret"); | 62 | DEF_NATIVE(pv_cpu_ops, iret, "iret"); |
63 | DEF_NATIVE(irq_enable_sysexit, "sti; sysexit"); | 63 | DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit"); |
64 | DEF_NATIVE(read_cr2, "mov %cr2, %eax"); | 64 | DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); |
65 | DEF_NATIVE(write_cr3, "mov %eax, %cr3"); | 65 | DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); |
66 | DEF_NATIVE(read_cr3, "mov %cr3, %eax"); | 66 | DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); |
67 | DEF_NATIVE(clts, "clts"); | 67 | DEF_NATIVE(pv_cpu_ops, clts, "clts"); |
68 | DEF_NATIVE(read_tsc, "rdtsc"); | 68 | DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); |
69 | 69 | ||
70 | DEF_NATIVE(ud2a, "ud2a"); | 70 | /* Undefined instruction for dealing with missing ops pointers. */ |
71 | static const unsigned char ud2a[] = { 0x0f, 0x0b }; | ||
71 | 72 | ||
72 | static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | 73 | static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, |
73 | unsigned long addr, unsigned len) | 74 | unsigned long addr, unsigned len) |
@@ -76,37 +77,29 @@ static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | |||
76 | unsigned ret; | 77 | unsigned ret; |
77 | 78 | ||
78 | switch(type) { | 79 | switch(type) { |
79 | #define SITE(x) case PARAVIRT_PATCH(x): start = start_##x; end = end_##x; goto patch_site | 80 | #define SITE(ops, x) \ |
80 | SITE(irq_disable); | 81 | case PARAVIRT_PATCH(ops.x): \ |
81 | SITE(irq_enable); | 82 | start = start_##ops##_##x; \ |
82 | SITE(restore_fl); | 83 | end = end_##ops##_##x; \ |
83 | SITE(save_fl); | 84 | goto patch_site |
84 | SITE(iret); | 85 | |
85 | SITE(irq_enable_sysexit); | 86 | SITE(pv_irq_ops, irq_disable); |
86 | SITE(read_cr2); | 87 | SITE(pv_irq_ops, irq_enable); |
87 | SITE(read_cr3); | 88 | SITE(pv_irq_ops, restore_fl); |
88 | SITE(write_cr3); | 89 | SITE(pv_irq_ops, save_fl); |
89 | SITE(clts); | 90 | SITE(pv_cpu_ops, iret); |
90 | SITE(read_tsc); | 91 | SITE(pv_cpu_ops, irq_enable_sysexit); |
92 | SITE(pv_mmu_ops, read_cr2); | ||
93 | SITE(pv_mmu_ops, read_cr3); | ||
94 | SITE(pv_mmu_ops, write_cr3); | ||
95 | SITE(pv_cpu_ops, clts); | ||
96 | SITE(pv_cpu_ops, read_tsc); | ||
91 | #undef SITE | 97 | #undef SITE |
92 | 98 | ||
93 | patch_site: | 99 | patch_site: |
94 | ret = paravirt_patch_insns(ibuf, len, start, end); | 100 | ret = paravirt_patch_insns(ibuf, len, start, end); |
95 | break; | 101 | break; |
96 | 102 | ||
97 | case PARAVIRT_PATCH(make_pgd): | ||
98 | case PARAVIRT_PATCH(make_pte): | ||
99 | case PARAVIRT_PATCH(pgd_val): | ||
100 | case PARAVIRT_PATCH(pte_val): | ||
101 | #ifdef CONFIG_X86_PAE | ||
102 | case PARAVIRT_PATCH(make_pmd): | ||
103 | case PARAVIRT_PATCH(pmd_val): | ||
104 | #endif | ||
105 | /* These functions end up returning exactly what | ||
106 | they're passed, in the same registers. */ | ||
107 | ret = paravirt_patch_nop(); | ||
108 | break; | ||
109 | |||
110 | default: | 103 | default: |
111 | ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); | 104 | ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); |
112 | break; | 105 | break; |
@@ -150,7 +143,7 @@ unsigned paravirt_patch_call(void *insnbuf, | |||
150 | return 5; | 143 | return 5; |
151 | } | 144 | } |
152 | 145 | ||
153 | unsigned paravirt_patch_jmp(const void *target, void *insnbuf, | 146 | unsigned paravirt_patch_jmp(void *insnbuf, const void *target, |
154 | unsigned long addr, unsigned len) | 147 | unsigned long addr, unsigned len) |
155 | { | 148 | { |
156 | struct branch *b = insnbuf; | 149 | struct branch *b = insnbuf; |
@@ -165,22 +158,38 @@ unsigned paravirt_patch_jmp(const void *target, void *insnbuf, | |||
165 | return 5; | 158 | return 5; |
166 | } | 159 | } |
167 | 160 | ||
161 | /* Neat trick to map patch type back to the call within the | ||
162 | * corresponding structure. */ | ||
163 | static void *get_call_destination(u8 type) | ||
164 | { | ||
165 | struct paravirt_patch_template tmpl = { | ||
166 | .pv_init_ops = pv_init_ops, | ||
167 | .pv_misc_ops = pv_misc_ops, | ||
168 | .pv_time_ops = pv_time_ops, | ||
169 | .pv_cpu_ops = pv_cpu_ops, | ||
170 | .pv_irq_ops = pv_irq_ops, | ||
171 | .pv_apic_ops = pv_apic_ops, | ||
172 | .pv_mmu_ops = pv_mmu_ops, | ||
173 | }; | ||
174 | return *((void **)&tmpl + type); | ||
175 | } | ||
176 | |||
168 | unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, | 177 | unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, |
169 | unsigned long addr, unsigned len) | 178 | unsigned long addr, unsigned len) |
170 | { | 179 | { |
171 | void *opfunc = *((void **)¶virt_ops + type); | 180 | void *opfunc = get_call_destination(type); |
172 | unsigned ret; | 181 | unsigned ret; |
173 | 182 | ||
174 | if (opfunc == NULL) | 183 | if (opfunc == NULL) |
175 | /* If there's no function, patch it with a ud2a (BUG) */ | 184 | /* If there's no function, patch it with a ud2a (BUG) */ |
176 | ret = paravirt_patch_insns(insnbuf, len, start_ud2a, end_ud2a); | 185 | ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); |
177 | else if (opfunc == paravirt_nop) | 186 | else if (opfunc == paravirt_nop) |
178 | /* If the operation is a nop, then nop the callsite */ | 187 | /* If the operation is a nop, then nop the callsite */ |
179 | ret = paravirt_patch_nop(); | 188 | ret = paravirt_patch_nop(); |
180 | else if (type == PARAVIRT_PATCH(iret) || | 189 | else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || |
181 | type == PARAVIRT_PATCH(irq_enable_sysexit)) | 190 | type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit)) |
182 | /* If operation requires a jmp, then jmp */ | 191 | /* If operation requires a jmp, then jmp */ |
183 | ret = paravirt_patch_jmp(opfunc, insnbuf, addr, len); | 192 | ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); |
184 | else | 193 | else |
185 | /* Otherwise call the function; assume target could | 194 | /* Otherwise call the function; assume target could |
186 | clobber any caller-save reg */ | 195 | clobber any caller-save reg */ |
@@ -205,7 +214,7 @@ unsigned paravirt_patch_insns(void *insnbuf, unsigned len, | |||
205 | 214 | ||
206 | void init_IRQ(void) | 215 | void init_IRQ(void) |
207 | { | 216 | { |
208 | paravirt_ops.init_IRQ(); | 217 | pv_irq_ops.init_IRQ(); |
209 | } | 218 | } |
210 | 219 | ||
211 | static void native_flush_tlb(void) | 220 | static void native_flush_tlb(void) |
@@ -233,7 +242,7 @@ extern void native_irq_enable_sysexit(void); | |||
233 | 242 | ||
234 | static int __init print_banner(void) | 243 | static int __init print_banner(void) |
235 | { | 244 | { |
236 | paravirt_ops.banner(); | 245 | pv_init_ops.banner(); |
237 | return 0; | 246 | return 0; |
238 | } | 247 | } |
239 | core_initcall(print_banner); | 248 | core_initcall(print_banner); |
@@ -273,47 +282,53 @@ int paravirt_disable_iospace(void) | |||
273 | return ret; | 282 | return ret; |
274 | } | 283 | } |
275 | 284 | ||
276 | struct paravirt_ops paravirt_ops = { | 285 | struct pv_info pv_info = { |
277 | .name = "bare hardware", | 286 | .name = "bare hardware", |
278 | .paravirt_enabled = 0, | 287 | .paravirt_enabled = 0, |
279 | .kernel_rpl = 0, | 288 | .kernel_rpl = 0, |
280 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ | 289 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ |
290 | }; | ||
281 | 291 | ||
282 | .patch = native_patch, | 292 | struct pv_init_ops pv_init_ops = { |
293 | .patch = native_patch, | ||
283 | .banner = default_banner, | 294 | .banner = default_banner, |
284 | .arch_setup = paravirt_nop, | 295 | .arch_setup = paravirt_nop, |
285 | .memory_setup = machine_specific_memory_setup, | 296 | .memory_setup = machine_specific_memory_setup, |
297 | }; | ||
298 | |||
299 | struct pv_time_ops pv_time_ops = { | ||
300 | .time_init = hpet_time_init, | ||
286 | .get_wallclock = native_get_wallclock, | 301 | .get_wallclock = native_get_wallclock, |
287 | .set_wallclock = native_set_wallclock, | 302 | .set_wallclock = native_set_wallclock, |
288 | .time_init = hpet_time_init, | 303 | .sched_clock = native_sched_clock, |
304 | .get_cpu_khz = native_calculate_cpu_khz, | ||
305 | }; | ||
306 | |||
307 | struct pv_irq_ops pv_irq_ops = { | ||
289 | .init_IRQ = native_init_IRQ, | 308 | .init_IRQ = native_init_IRQ, |
309 | .save_fl = native_save_fl, | ||
310 | .restore_fl = native_restore_fl, | ||
311 | .irq_disable = native_irq_disable, | ||
312 | .irq_enable = native_irq_enable, | ||
313 | .safe_halt = native_safe_halt, | ||
314 | .halt = native_halt, | ||
315 | }; | ||
290 | 316 | ||
317 | struct pv_cpu_ops pv_cpu_ops = { | ||
291 | .cpuid = native_cpuid, | 318 | .cpuid = native_cpuid, |
292 | .get_debugreg = native_get_debugreg, | 319 | .get_debugreg = native_get_debugreg, |
293 | .set_debugreg = native_set_debugreg, | 320 | .set_debugreg = native_set_debugreg, |
294 | .clts = native_clts, | 321 | .clts = native_clts, |
295 | .read_cr0 = native_read_cr0, | 322 | .read_cr0 = native_read_cr0, |
296 | .write_cr0 = native_write_cr0, | 323 | .write_cr0 = native_write_cr0, |
297 | .read_cr2 = native_read_cr2, | ||
298 | .write_cr2 = native_write_cr2, | ||
299 | .read_cr3 = native_read_cr3, | ||
300 | .write_cr3 = native_write_cr3, | ||
301 | .read_cr4 = native_read_cr4, | 324 | .read_cr4 = native_read_cr4, |
302 | .read_cr4_safe = native_read_cr4_safe, | 325 | .read_cr4_safe = native_read_cr4_safe, |
303 | .write_cr4 = native_write_cr4, | 326 | .write_cr4 = native_write_cr4, |
304 | .save_fl = native_save_fl, | ||
305 | .restore_fl = native_restore_fl, | ||
306 | .irq_disable = native_irq_disable, | ||
307 | .irq_enable = native_irq_enable, | ||
308 | .safe_halt = native_safe_halt, | ||
309 | .halt = native_halt, | ||
310 | .wbinvd = native_wbinvd, | 327 | .wbinvd = native_wbinvd, |
311 | .read_msr = native_read_msr_safe, | 328 | .read_msr = native_read_msr_safe, |
312 | .write_msr = native_write_msr_safe, | 329 | .write_msr = native_write_msr_safe, |
313 | .read_tsc = native_read_tsc, | 330 | .read_tsc = native_read_tsc, |
314 | .read_pmc = native_read_pmc, | 331 | .read_pmc = native_read_pmc, |
315 | .sched_clock = native_sched_clock, | ||
316 | .get_cpu_khz = native_calculate_cpu_khz, | ||
317 | .load_tr_desc = native_load_tr_desc, | 332 | .load_tr_desc = native_load_tr_desc, |
318 | .set_ldt = native_set_ldt, | 333 | .set_ldt = native_set_ldt, |
319 | .load_gdt = native_load_gdt, | 334 | .load_gdt = native_load_gdt, |
@@ -327,9 +342,14 @@ struct paravirt_ops paravirt_ops = { | |||
327 | .write_idt_entry = write_dt_entry, | 342 | .write_idt_entry = write_dt_entry, |
328 | .load_esp0 = native_load_esp0, | 343 | .load_esp0 = native_load_esp0, |
329 | 344 | ||
345 | .irq_enable_sysexit = native_irq_enable_sysexit, | ||
346 | .iret = native_iret, | ||
347 | |||
330 | .set_iopl_mask = native_set_iopl_mask, | 348 | .set_iopl_mask = native_set_iopl_mask, |
331 | .io_delay = native_io_delay, | 349 | .io_delay = native_io_delay, |
350 | }; | ||
332 | 351 | ||
352 | struct pv_apic_ops pv_apic_ops = { | ||
333 | #ifdef CONFIG_X86_LOCAL_APIC | 353 | #ifdef CONFIG_X86_LOCAL_APIC |
334 | .apic_write = native_apic_write, | 354 | .apic_write = native_apic_write, |
335 | .apic_write_atomic = native_apic_write_atomic, | 355 | .apic_write_atomic = native_apic_write_atomic, |
@@ -338,11 +358,21 @@ struct paravirt_ops paravirt_ops = { | |||
338 | .setup_secondary_clock = setup_secondary_APIC_clock, | 358 | .setup_secondary_clock = setup_secondary_APIC_clock, |
339 | .startup_ipi_hook = paravirt_nop, | 359 | .startup_ipi_hook = paravirt_nop, |
340 | #endif | 360 | #endif |
361 | }; | ||
362 | |||
363 | struct pv_misc_ops pv_misc_ops = { | ||
341 | .set_lazy_mode = paravirt_nop, | 364 | .set_lazy_mode = paravirt_nop, |
365 | }; | ||
342 | 366 | ||
367 | struct pv_mmu_ops pv_mmu_ops = { | ||
343 | .pagetable_setup_start = native_pagetable_setup_start, | 368 | .pagetable_setup_start = native_pagetable_setup_start, |
344 | .pagetable_setup_done = native_pagetable_setup_done, | 369 | .pagetable_setup_done = native_pagetable_setup_done, |
345 | 370 | ||
371 | .read_cr2 = native_read_cr2, | ||
372 | .write_cr2 = native_write_cr2, | ||
373 | .read_cr3 = native_read_cr3, | ||
374 | .write_cr3 = native_write_cr3, | ||
375 | |||
346 | .flush_tlb_user = native_flush_tlb, | 376 | .flush_tlb_user = native_flush_tlb, |
347 | .flush_tlb_kernel = native_flush_tlb_global, | 377 | .flush_tlb_kernel = native_flush_tlb_global, |
348 | .flush_tlb_single = native_flush_tlb_single, | 378 | .flush_tlb_single = native_flush_tlb_single, |
@@ -381,12 +411,14 @@ struct paravirt_ops paravirt_ops = { | |||
381 | .make_pte = native_make_pte, | 411 | .make_pte = native_make_pte, |
382 | .make_pgd = native_make_pgd, | 412 | .make_pgd = native_make_pgd, |
383 | 413 | ||
384 | .irq_enable_sysexit = native_irq_enable_sysexit, | ||
385 | .iret = native_iret, | ||
386 | |||
387 | .dup_mmap = paravirt_nop, | 414 | .dup_mmap = paravirt_nop, |
388 | .exit_mmap = paravirt_nop, | 415 | .exit_mmap = paravirt_nop, |
389 | .activate_mm = paravirt_nop, | 416 | .activate_mm = paravirt_nop, |
390 | }; | 417 | }; |
391 | 418 | ||
392 | EXPORT_SYMBOL(paravirt_ops); | 419 | EXPORT_SYMBOL_GPL(pv_time_ops); |
420 | EXPORT_SYMBOL_GPL(pv_cpu_ops); | ||
421 | EXPORT_SYMBOL_GPL(pv_mmu_ops); | ||
422 | EXPORT_SYMBOL_GPL(pv_apic_ops); | ||
423 | EXPORT_SYMBOL_GPL(pv_info); | ||
424 | EXPORT_SYMBOL (pv_irq_ops); | ||