diff options
Diffstat (limited to 'arch/x86/kernel/paravirt_32.c')
-rw-r--r-- | arch/x86/kernel/paravirt_32.c | 224 |
1 files changed, 152 insertions, 72 deletions
diff --git a/arch/x86/kernel/paravirt_32.c b/arch/x86/kernel/paravirt_32.c index 739cfb207dd7..6a80d67c2121 100644 --- a/arch/x86/kernel/paravirt_32.c +++ b/arch/x86/kernel/paravirt_32.c | |||
@@ -42,32 +42,33 @@ void _paravirt_nop(void) | |||
42 | static void __init default_banner(void) | 42 | static void __init default_banner(void) |
43 | { | 43 | { |
44 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | 44 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", |
45 | paravirt_ops.name); | 45 | pv_info.name); |
46 | } | 46 | } |
47 | 47 | ||
48 | char *memory_setup(void) | 48 | char *memory_setup(void) |
49 | { | 49 | { |
50 | return paravirt_ops.memory_setup(); | 50 | return pv_init_ops.memory_setup(); |
51 | } | 51 | } |
52 | 52 | ||
53 | /* Simple instruction patching code. */ | 53 | /* Simple instruction patching code. */ |
54 | #define DEF_NATIVE(name, code) \ | 54 | #define DEF_NATIVE(ops, name, code) \ |
55 | extern const char start_##name[], end_##name[]; \ | 55 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ |
56 | asm("start_" #name ": " code "; end_" #name ":") | 56 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") |
57 | 57 | ||
58 | DEF_NATIVE(irq_disable, "cli"); | 58 | DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); |
59 | DEF_NATIVE(irq_enable, "sti"); | 59 | DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); |
60 | DEF_NATIVE(restore_fl, "push %eax; popf"); | 60 | DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); |
61 | DEF_NATIVE(save_fl, "pushf; pop %eax"); | 61 | DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); |
62 | DEF_NATIVE(iret, "iret"); | 62 | DEF_NATIVE(pv_cpu_ops, iret, "iret"); |
63 | DEF_NATIVE(irq_enable_sysexit, "sti; sysexit"); | 63 | DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit"); |
64 | DEF_NATIVE(read_cr2, "mov %cr2, %eax"); | 64 | DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); |
65 | DEF_NATIVE(write_cr3, "mov %eax, %cr3"); | 65 | DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); |
66 | DEF_NATIVE(read_cr3, "mov %cr3, %eax"); | 66 | DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); |
67 | DEF_NATIVE(clts, "clts"); | 67 | DEF_NATIVE(pv_cpu_ops, clts, "clts"); |
68 | DEF_NATIVE(read_tsc, "rdtsc"); | 68 | DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); |
69 | 69 | ||
70 | DEF_NATIVE(ud2a, "ud2a"); | 70 | /* Undefined instruction for dealing with missing ops pointers. */ |
71 | static const unsigned char ud2a[] = { 0x0f, 0x0b }; | ||
71 | 72 | ||
72 | static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | 73 | static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, |
73 | unsigned long addr, unsigned len) | 74 | unsigned long addr, unsigned len) |
@@ -76,37 +77,29 @@ static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | |||
76 | unsigned ret; | 77 | unsigned ret; |
77 | 78 | ||
78 | switch(type) { | 79 | switch(type) { |
79 | #define SITE(x) case PARAVIRT_PATCH(x): start = start_##x; end = end_##x; goto patch_site | 80 | #define SITE(ops, x) \ |
80 | SITE(irq_disable); | 81 | case PARAVIRT_PATCH(ops.x): \ |
81 | SITE(irq_enable); | 82 | start = start_##ops##_##x; \ |
82 | SITE(restore_fl); | 83 | end = end_##ops##_##x; \ |
83 | SITE(save_fl); | 84 | goto patch_site |
84 | SITE(iret); | 85 | |
85 | SITE(irq_enable_sysexit); | 86 | SITE(pv_irq_ops, irq_disable); |
86 | SITE(read_cr2); | 87 | SITE(pv_irq_ops, irq_enable); |
87 | SITE(read_cr3); | 88 | SITE(pv_irq_ops, restore_fl); |
88 | SITE(write_cr3); | 89 | SITE(pv_irq_ops, save_fl); |
89 | SITE(clts); | 90 | SITE(pv_cpu_ops, iret); |
90 | SITE(read_tsc); | 91 | SITE(pv_cpu_ops, irq_enable_sysexit); |
92 | SITE(pv_mmu_ops, read_cr2); | ||
93 | SITE(pv_mmu_ops, read_cr3); | ||
94 | SITE(pv_mmu_ops, write_cr3); | ||
95 | SITE(pv_cpu_ops, clts); | ||
96 | SITE(pv_cpu_ops, read_tsc); | ||
91 | #undef SITE | 97 | #undef SITE |
92 | 98 | ||
93 | patch_site: | 99 | patch_site: |
94 | ret = paravirt_patch_insns(ibuf, len, start, end); | 100 | ret = paravirt_patch_insns(ibuf, len, start, end); |
95 | break; | 101 | break; |
96 | 102 | ||
97 | case PARAVIRT_PATCH(make_pgd): | ||
98 | case PARAVIRT_PATCH(make_pte): | ||
99 | case PARAVIRT_PATCH(pgd_val): | ||
100 | case PARAVIRT_PATCH(pte_val): | ||
101 | #ifdef CONFIG_X86_PAE | ||
102 | case PARAVIRT_PATCH(make_pmd): | ||
103 | case PARAVIRT_PATCH(pmd_val): | ||
104 | #endif | ||
105 | /* These functions end up returning exactly what | ||
106 | they're passed, in the same registers. */ | ||
107 | ret = paravirt_patch_nop(); | ||
108 | break; | ||
109 | |||
110 | default: | 103 | default: |
111 | ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); | 104 | ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); |
112 | break; | 105 | break; |
@@ -150,7 +143,7 @@ unsigned paravirt_patch_call(void *insnbuf, | |||
150 | return 5; | 143 | return 5; |
151 | } | 144 | } |
152 | 145 | ||
153 | unsigned paravirt_patch_jmp(const void *target, void *insnbuf, | 146 | unsigned paravirt_patch_jmp(void *insnbuf, const void *target, |
154 | unsigned long addr, unsigned len) | 147 | unsigned long addr, unsigned len) |
155 | { | 148 | { |
156 | struct branch *b = insnbuf; | 149 | struct branch *b = insnbuf; |
@@ -165,22 +158,37 @@ unsigned paravirt_patch_jmp(const void *target, void *insnbuf, | |||
165 | return 5; | 158 | return 5; |
166 | } | 159 | } |
167 | 160 | ||
161 | /* Neat trick to map patch type back to the call within the | ||
162 | * corresponding structure. */ | ||
163 | static void *get_call_destination(u8 type) | ||
164 | { | ||
165 | struct paravirt_patch_template tmpl = { | ||
166 | .pv_init_ops = pv_init_ops, | ||
167 | .pv_time_ops = pv_time_ops, | ||
168 | .pv_cpu_ops = pv_cpu_ops, | ||
169 | .pv_irq_ops = pv_irq_ops, | ||
170 | .pv_apic_ops = pv_apic_ops, | ||
171 | .pv_mmu_ops = pv_mmu_ops, | ||
172 | }; | ||
173 | return *((void **)&tmpl + type); | ||
174 | } | ||
175 | |||
168 | unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, | 176 | unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, |
169 | unsigned long addr, unsigned len) | 177 | unsigned long addr, unsigned len) |
170 | { | 178 | { |
171 | void *opfunc = *((void **)¶virt_ops + type); | 179 | void *opfunc = get_call_destination(type); |
172 | unsigned ret; | 180 | unsigned ret; |
173 | 181 | ||
174 | if (opfunc == NULL) | 182 | if (opfunc == NULL) |
175 | /* If there's no function, patch it with a ud2a (BUG) */ | 183 | /* If there's no function, patch it with a ud2a (BUG) */ |
176 | ret = paravirt_patch_insns(insnbuf, len, start_ud2a, end_ud2a); | 184 | ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); |
177 | else if (opfunc == paravirt_nop) | 185 | else if (opfunc == paravirt_nop) |
178 | /* If the operation is a nop, then nop the callsite */ | 186 | /* If the operation is a nop, then nop the callsite */ |
179 | ret = paravirt_patch_nop(); | 187 | ret = paravirt_patch_nop(); |
180 | else if (type == PARAVIRT_PATCH(iret) || | 188 | else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || |
181 | type == PARAVIRT_PATCH(irq_enable_sysexit)) | 189 | type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit)) |
182 | /* If operation requires a jmp, then jmp */ | 190 | /* If operation requires a jmp, then jmp */ |
183 | ret = paravirt_patch_jmp(opfunc, insnbuf, addr, len); | 191 | ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); |
184 | else | 192 | else |
185 | /* Otherwise call the function; assume target could | 193 | /* Otherwise call the function; assume target could |
186 | clobber any caller-save reg */ | 194 | clobber any caller-save reg */ |
@@ -205,7 +213,7 @@ unsigned paravirt_patch_insns(void *insnbuf, unsigned len, | |||
205 | 213 | ||
206 | void init_IRQ(void) | 214 | void init_IRQ(void) |
207 | { | 215 | { |
208 | paravirt_ops.init_IRQ(); | 216 | pv_irq_ops.init_IRQ(); |
209 | } | 217 | } |
210 | 218 | ||
211 | static void native_flush_tlb(void) | 219 | static void native_flush_tlb(void) |
@@ -233,7 +241,7 @@ extern void native_irq_enable_sysexit(void); | |||
233 | 241 | ||
234 | static int __init print_banner(void) | 242 | static int __init print_banner(void) |
235 | { | 243 | { |
236 | paravirt_ops.banner(); | 244 | pv_init_ops.banner(); |
237 | return 0; | 245 | return 0; |
238 | } | 246 | } |
239 | core_initcall(print_banner); | 247 | core_initcall(print_banner); |
@@ -273,47 +281,96 @@ int paravirt_disable_iospace(void) | |||
273 | return ret; | 281 | return ret; |
274 | } | 282 | } |
275 | 283 | ||
276 | struct paravirt_ops paravirt_ops = { | 284 | static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; |
285 | |||
286 | static inline void enter_lazy(enum paravirt_lazy_mode mode) | ||
287 | { | ||
288 | BUG_ON(x86_read_percpu(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); | ||
289 | BUG_ON(preemptible()); | ||
290 | |||
291 | x86_write_percpu(paravirt_lazy_mode, mode); | ||
292 | } | ||
293 | |||
294 | void paravirt_leave_lazy(enum paravirt_lazy_mode mode) | ||
295 | { | ||
296 | BUG_ON(x86_read_percpu(paravirt_lazy_mode) != mode); | ||
297 | BUG_ON(preemptible()); | ||
298 | |||
299 | x86_write_percpu(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); | ||
300 | } | ||
301 | |||
302 | void paravirt_enter_lazy_mmu(void) | ||
303 | { | ||
304 | enter_lazy(PARAVIRT_LAZY_MMU); | ||
305 | } | ||
306 | |||
307 | void paravirt_leave_lazy_mmu(void) | ||
308 | { | ||
309 | paravirt_leave_lazy(PARAVIRT_LAZY_MMU); | ||
310 | } | ||
311 | |||
312 | void paravirt_enter_lazy_cpu(void) | ||
313 | { | ||
314 | enter_lazy(PARAVIRT_LAZY_CPU); | ||
315 | } | ||
316 | |||
317 | void paravirt_leave_lazy_cpu(void) | ||
318 | { | ||
319 | paravirt_leave_lazy(PARAVIRT_LAZY_CPU); | ||
320 | } | ||
321 | |||
322 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void) | ||
323 | { | ||
324 | return x86_read_percpu(paravirt_lazy_mode); | ||
325 | } | ||
326 | |||
327 | struct pv_info pv_info = { | ||
277 | .name = "bare hardware", | 328 | .name = "bare hardware", |
278 | .paravirt_enabled = 0, | 329 | .paravirt_enabled = 0, |
279 | .kernel_rpl = 0, | 330 | .kernel_rpl = 0, |
280 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ | 331 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ |
332 | }; | ||
281 | 333 | ||
282 | .patch = native_patch, | 334 | struct pv_init_ops pv_init_ops = { |
335 | .patch = native_patch, | ||
283 | .banner = default_banner, | 336 | .banner = default_banner, |
284 | .arch_setup = paravirt_nop, | 337 | .arch_setup = paravirt_nop, |
285 | .memory_setup = machine_specific_memory_setup, | 338 | .memory_setup = machine_specific_memory_setup, |
339 | }; | ||
340 | |||
341 | struct pv_time_ops pv_time_ops = { | ||
342 | .time_init = hpet_time_init, | ||
286 | .get_wallclock = native_get_wallclock, | 343 | .get_wallclock = native_get_wallclock, |
287 | .set_wallclock = native_set_wallclock, | 344 | .set_wallclock = native_set_wallclock, |
288 | .time_init = hpet_time_init, | 345 | .sched_clock = native_sched_clock, |
346 | .get_cpu_khz = native_calculate_cpu_khz, | ||
347 | }; | ||
348 | |||
349 | struct pv_irq_ops pv_irq_ops = { | ||
289 | .init_IRQ = native_init_IRQ, | 350 | .init_IRQ = native_init_IRQ, |
351 | .save_fl = native_save_fl, | ||
352 | .restore_fl = native_restore_fl, | ||
353 | .irq_disable = native_irq_disable, | ||
354 | .irq_enable = native_irq_enable, | ||
355 | .safe_halt = native_safe_halt, | ||
356 | .halt = native_halt, | ||
357 | }; | ||
290 | 358 | ||
359 | struct pv_cpu_ops pv_cpu_ops = { | ||
291 | .cpuid = native_cpuid, | 360 | .cpuid = native_cpuid, |
292 | .get_debugreg = native_get_debugreg, | 361 | .get_debugreg = native_get_debugreg, |
293 | .set_debugreg = native_set_debugreg, | 362 | .set_debugreg = native_set_debugreg, |
294 | .clts = native_clts, | 363 | .clts = native_clts, |
295 | .read_cr0 = native_read_cr0, | 364 | .read_cr0 = native_read_cr0, |
296 | .write_cr0 = native_write_cr0, | 365 | .write_cr0 = native_write_cr0, |
297 | .read_cr2 = native_read_cr2, | ||
298 | .write_cr2 = native_write_cr2, | ||
299 | .read_cr3 = native_read_cr3, | ||
300 | .write_cr3 = native_write_cr3, | ||
301 | .read_cr4 = native_read_cr4, | 366 | .read_cr4 = native_read_cr4, |
302 | .read_cr4_safe = native_read_cr4_safe, | 367 | .read_cr4_safe = native_read_cr4_safe, |
303 | .write_cr4 = native_write_cr4, | 368 | .write_cr4 = native_write_cr4, |
304 | .save_fl = native_save_fl, | ||
305 | .restore_fl = native_restore_fl, | ||
306 | .irq_disable = native_irq_disable, | ||
307 | .irq_enable = native_irq_enable, | ||
308 | .safe_halt = native_safe_halt, | ||
309 | .halt = native_halt, | ||
310 | .wbinvd = native_wbinvd, | 369 | .wbinvd = native_wbinvd, |
311 | .read_msr = native_read_msr_safe, | 370 | .read_msr = native_read_msr_safe, |
312 | .write_msr = native_write_msr_safe, | 371 | .write_msr = native_write_msr_safe, |
313 | .read_tsc = native_read_tsc, | 372 | .read_tsc = native_read_tsc, |
314 | .read_pmc = native_read_pmc, | 373 | .read_pmc = native_read_pmc, |
315 | .sched_clock = native_sched_clock, | ||
316 | .get_cpu_khz = native_calculate_cpu_khz, | ||
317 | .load_tr_desc = native_load_tr_desc, | 374 | .load_tr_desc = native_load_tr_desc, |
318 | .set_ldt = native_set_ldt, | 375 | .set_ldt = native_set_ldt, |
319 | .load_gdt = native_load_gdt, | 376 | .load_gdt = native_load_gdt, |
@@ -327,9 +384,19 @@ struct paravirt_ops paravirt_ops = { | |||
327 | .write_idt_entry = write_dt_entry, | 384 | .write_idt_entry = write_dt_entry, |
328 | .load_esp0 = native_load_esp0, | 385 | .load_esp0 = native_load_esp0, |
329 | 386 | ||
387 | .irq_enable_sysexit = native_irq_enable_sysexit, | ||
388 | .iret = native_iret, | ||
389 | |||
330 | .set_iopl_mask = native_set_iopl_mask, | 390 | .set_iopl_mask = native_set_iopl_mask, |
331 | .io_delay = native_io_delay, | 391 | .io_delay = native_io_delay, |
332 | 392 | ||
393 | .lazy_mode = { | ||
394 | .enter = paravirt_nop, | ||
395 | .leave = paravirt_nop, | ||
396 | }, | ||
397 | }; | ||
398 | |||
399 | struct pv_apic_ops pv_apic_ops = { | ||
333 | #ifdef CONFIG_X86_LOCAL_APIC | 400 | #ifdef CONFIG_X86_LOCAL_APIC |
334 | .apic_write = native_apic_write, | 401 | .apic_write = native_apic_write, |
335 | .apic_write_atomic = native_apic_write_atomic, | 402 | .apic_write_atomic = native_apic_write_atomic, |
@@ -338,11 +405,17 @@ struct paravirt_ops paravirt_ops = { | |||
338 | .setup_secondary_clock = setup_secondary_APIC_clock, | 405 | .setup_secondary_clock = setup_secondary_APIC_clock, |
339 | .startup_ipi_hook = paravirt_nop, | 406 | .startup_ipi_hook = paravirt_nop, |
340 | #endif | 407 | #endif |
341 | .set_lazy_mode = paravirt_nop, | 408 | }; |
342 | 409 | ||
410 | struct pv_mmu_ops pv_mmu_ops = { | ||
343 | .pagetable_setup_start = native_pagetable_setup_start, | 411 | .pagetable_setup_start = native_pagetable_setup_start, |
344 | .pagetable_setup_done = native_pagetable_setup_done, | 412 | .pagetable_setup_done = native_pagetable_setup_done, |
345 | 413 | ||
414 | .read_cr2 = native_read_cr2, | ||
415 | .write_cr2 = native_write_cr2, | ||
416 | .read_cr3 = native_read_cr3, | ||
417 | .write_cr3 = native_write_cr3, | ||
418 | |||
346 | .flush_tlb_user = native_flush_tlb, | 419 | .flush_tlb_user = native_flush_tlb, |
347 | .flush_tlb_kernel = native_flush_tlb_global, | 420 | .flush_tlb_kernel = native_flush_tlb_global, |
348 | .flush_tlb_single = native_flush_tlb_single, | 421 | .flush_tlb_single = native_flush_tlb_single, |
@@ -381,12 +454,19 @@ struct paravirt_ops paravirt_ops = { | |||
381 | .make_pte = native_make_pte, | 454 | .make_pte = native_make_pte, |
382 | .make_pgd = native_make_pgd, | 455 | .make_pgd = native_make_pgd, |
383 | 456 | ||
384 | .irq_enable_sysexit = native_irq_enable_sysexit, | ||
385 | .iret = native_iret, | ||
386 | |||
387 | .dup_mmap = paravirt_nop, | 457 | .dup_mmap = paravirt_nop, |
388 | .exit_mmap = paravirt_nop, | 458 | .exit_mmap = paravirt_nop, |
389 | .activate_mm = paravirt_nop, | 459 | .activate_mm = paravirt_nop, |
460 | |||
461 | .lazy_mode = { | ||
462 | .enter = paravirt_nop, | ||
463 | .leave = paravirt_nop, | ||
464 | }, | ||
390 | }; | 465 | }; |
391 | 466 | ||
392 | EXPORT_SYMBOL(paravirt_ops); | 467 | EXPORT_SYMBOL_GPL(pv_time_ops); |
468 | EXPORT_SYMBOL_GPL(pv_cpu_ops); | ||
469 | EXPORT_SYMBOL_GPL(pv_mmu_ops); | ||
470 | EXPORT_SYMBOL_GPL(pv_apic_ops); | ||
471 | EXPORT_SYMBOL_GPL(pv_info); | ||
472 | EXPORT_SYMBOL (pv_irq_ops); | ||