aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/paravirt_32.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/paravirt_32.c')
-rw-r--r--arch/x86/kernel/paravirt_32.c224
1 files changed, 152 insertions, 72 deletions
diff --git a/arch/x86/kernel/paravirt_32.c b/arch/x86/kernel/paravirt_32.c
index 739cfb207dd7..6a80d67c2121 100644
--- a/arch/x86/kernel/paravirt_32.c
+++ b/arch/x86/kernel/paravirt_32.c
@@ -42,32 +42,33 @@ void _paravirt_nop(void)
42static void __init default_banner(void) 42static void __init default_banner(void)
43{ 43{
44 printk(KERN_INFO "Booting paravirtualized kernel on %s\n", 44 printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
45 paravirt_ops.name); 45 pv_info.name);
46} 46}
47 47
48char *memory_setup(void) 48char *memory_setup(void)
49{ 49{
50 return paravirt_ops.memory_setup(); 50 return pv_init_ops.memory_setup();
51} 51}
52 52
53/* Simple instruction patching code. */ 53/* Simple instruction patching code. */
54#define DEF_NATIVE(name, code) \ 54#define DEF_NATIVE(ops, name, code) \
55 extern const char start_##name[], end_##name[]; \ 55 extern const char start_##ops##_##name[], end_##ops##_##name[]; \
56 asm("start_" #name ": " code "; end_" #name ":") 56 asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
57 57
58DEF_NATIVE(irq_disable, "cli"); 58DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
59DEF_NATIVE(irq_enable, "sti"); 59DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
60DEF_NATIVE(restore_fl, "push %eax; popf"); 60DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
61DEF_NATIVE(save_fl, "pushf; pop %eax"); 61DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
62DEF_NATIVE(iret, "iret"); 62DEF_NATIVE(pv_cpu_ops, iret, "iret");
63DEF_NATIVE(irq_enable_sysexit, "sti; sysexit"); 63DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
64DEF_NATIVE(read_cr2, "mov %cr2, %eax"); 64DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
65DEF_NATIVE(write_cr3, "mov %eax, %cr3"); 65DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
66DEF_NATIVE(read_cr3, "mov %cr3, %eax"); 66DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
67DEF_NATIVE(clts, "clts"); 67DEF_NATIVE(pv_cpu_ops, clts, "clts");
68DEF_NATIVE(read_tsc, "rdtsc"); 68DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
69 69
70DEF_NATIVE(ud2a, "ud2a"); 70/* Undefined instruction for dealing with missing ops pointers. */
71static const unsigned char ud2a[] = { 0x0f, 0x0b };
71 72
72static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, 73static unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
73 unsigned long addr, unsigned len) 74 unsigned long addr, unsigned len)
@@ -76,37 +77,29 @@ static unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
76 unsigned ret; 77 unsigned ret;
77 78
78 switch(type) { 79 switch(type) {
79#define SITE(x) case PARAVIRT_PATCH(x): start = start_##x; end = end_##x; goto patch_site 80#define SITE(ops, x) \
80 SITE(irq_disable); 81 case PARAVIRT_PATCH(ops.x): \
81 SITE(irq_enable); 82 start = start_##ops##_##x; \
82 SITE(restore_fl); 83 end = end_##ops##_##x; \
83 SITE(save_fl); 84 goto patch_site
84 SITE(iret); 85
85 SITE(irq_enable_sysexit); 86 SITE(pv_irq_ops, irq_disable);
86 SITE(read_cr2); 87 SITE(pv_irq_ops, irq_enable);
87 SITE(read_cr3); 88 SITE(pv_irq_ops, restore_fl);
88 SITE(write_cr3); 89 SITE(pv_irq_ops, save_fl);
89 SITE(clts); 90 SITE(pv_cpu_ops, iret);
90 SITE(read_tsc); 91 SITE(pv_cpu_ops, irq_enable_sysexit);
92 SITE(pv_mmu_ops, read_cr2);
93 SITE(pv_mmu_ops, read_cr3);
94 SITE(pv_mmu_ops, write_cr3);
95 SITE(pv_cpu_ops, clts);
96 SITE(pv_cpu_ops, read_tsc);
91#undef SITE 97#undef SITE
92 98
93 patch_site: 99 patch_site:
94 ret = paravirt_patch_insns(ibuf, len, start, end); 100 ret = paravirt_patch_insns(ibuf, len, start, end);
95 break; 101 break;
96 102
97 case PARAVIRT_PATCH(make_pgd):
98 case PARAVIRT_PATCH(make_pte):
99 case PARAVIRT_PATCH(pgd_val):
100 case PARAVIRT_PATCH(pte_val):
101#ifdef CONFIG_X86_PAE
102 case PARAVIRT_PATCH(make_pmd):
103 case PARAVIRT_PATCH(pmd_val):
104#endif
105 /* These functions end up returning exactly what
106 they're passed, in the same registers. */
107 ret = paravirt_patch_nop();
108 break;
109
110 default: 103 default:
111 ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); 104 ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
112 break; 105 break;
@@ -150,7 +143,7 @@ unsigned paravirt_patch_call(void *insnbuf,
150 return 5; 143 return 5;
151} 144}
152 145
153unsigned paravirt_patch_jmp(const void *target, void *insnbuf, 146unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
154 unsigned long addr, unsigned len) 147 unsigned long addr, unsigned len)
155{ 148{
156 struct branch *b = insnbuf; 149 struct branch *b = insnbuf;
@@ -165,22 +158,37 @@ unsigned paravirt_patch_jmp(const void *target, void *insnbuf,
165 return 5; 158 return 5;
166} 159}
167 160
161/* Neat trick to map patch type back to the call within the
162 * corresponding structure. */
163static void *get_call_destination(u8 type)
164{
165 struct paravirt_patch_template tmpl = {
166 .pv_init_ops = pv_init_ops,
167 .pv_time_ops = pv_time_ops,
168 .pv_cpu_ops = pv_cpu_ops,
169 .pv_irq_ops = pv_irq_ops,
170 .pv_apic_ops = pv_apic_ops,
171 .pv_mmu_ops = pv_mmu_ops,
172 };
173 return *((void **)&tmpl + type);
174}
175
168unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, 176unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
169 unsigned long addr, unsigned len) 177 unsigned long addr, unsigned len)
170{ 178{
171 void *opfunc = *((void **)&paravirt_ops + type); 179 void *opfunc = get_call_destination(type);
172 unsigned ret; 180 unsigned ret;
173 181
174 if (opfunc == NULL) 182 if (opfunc == NULL)
175 /* If there's no function, patch it with a ud2a (BUG) */ 183 /* If there's no function, patch it with a ud2a (BUG) */
176 ret = paravirt_patch_insns(insnbuf, len, start_ud2a, end_ud2a); 184 ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
177 else if (opfunc == paravirt_nop) 185 else if (opfunc == paravirt_nop)
178 /* If the operation is a nop, then nop the callsite */ 186 /* If the operation is a nop, then nop the callsite */
179 ret = paravirt_patch_nop(); 187 ret = paravirt_patch_nop();
180 else if (type == PARAVIRT_PATCH(iret) || 188 else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
181 type == PARAVIRT_PATCH(irq_enable_sysexit)) 189 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit))
182 /* If operation requires a jmp, then jmp */ 190 /* If operation requires a jmp, then jmp */
183 ret = paravirt_patch_jmp(opfunc, insnbuf, addr, len); 191 ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
184 else 192 else
185 /* Otherwise call the function; assume target could 193 /* Otherwise call the function; assume target could
186 clobber any caller-save reg */ 194 clobber any caller-save reg */
@@ -205,7 +213,7 @@ unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
205 213
206void init_IRQ(void) 214void init_IRQ(void)
207{ 215{
208 paravirt_ops.init_IRQ(); 216 pv_irq_ops.init_IRQ();
209} 217}
210 218
211static void native_flush_tlb(void) 219static void native_flush_tlb(void)
@@ -233,7 +241,7 @@ extern void native_irq_enable_sysexit(void);
233 241
234static int __init print_banner(void) 242static int __init print_banner(void)
235{ 243{
236 paravirt_ops.banner(); 244 pv_init_ops.banner();
237 return 0; 245 return 0;
238} 246}
239core_initcall(print_banner); 247core_initcall(print_banner);
@@ -273,47 +281,96 @@ int paravirt_disable_iospace(void)
273 return ret; 281 return ret;
274} 282}
275 283
276struct paravirt_ops paravirt_ops = { 284static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
285
286static inline void enter_lazy(enum paravirt_lazy_mode mode)
287{
288 BUG_ON(x86_read_percpu(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
289 BUG_ON(preemptible());
290
291 x86_write_percpu(paravirt_lazy_mode, mode);
292}
293
294void paravirt_leave_lazy(enum paravirt_lazy_mode mode)
295{
296 BUG_ON(x86_read_percpu(paravirt_lazy_mode) != mode);
297 BUG_ON(preemptible());
298
299 x86_write_percpu(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
300}
301
302void paravirt_enter_lazy_mmu(void)
303{
304 enter_lazy(PARAVIRT_LAZY_MMU);
305}
306
307void paravirt_leave_lazy_mmu(void)
308{
309 paravirt_leave_lazy(PARAVIRT_LAZY_MMU);
310}
311
312void paravirt_enter_lazy_cpu(void)
313{
314 enter_lazy(PARAVIRT_LAZY_CPU);
315}
316
317void paravirt_leave_lazy_cpu(void)
318{
319 paravirt_leave_lazy(PARAVIRT_LAZY_CPU);
320}
321
322enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
323{
324 return x86_read_percpu(paravirt_lazy_mode);
325}
326
327struct pv_info pv_info = {
277 .name = "bare hardware", 328 .name = "bare hardware",
278 .paravirt_enabled = 0, 329 .paravirt_enabled = 0,
279 .kernel_rpl = 0, 330 .kernel_rpl = 0,
280 .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ 331 .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
332};
281 333
282 .patch = native_patch, 334struct pv_init_ops pv_init_ops = {
335 .patch = native_patch,
283 .banner = default_banner, 336 .banner = default_banner,
284 .arch_setup = paravirt_nop, 337 .arch_setup = paravirt_nop,
285 .memory_setup = machine_specific_memory_setup, 338 .memory_setup = machine_specific_memory_setup,
339};
340
341struct pv_time_ops pv_time_ops = {
342 .time_init = hpet_time_init,
286 .get_wallclock = native_get_wallclock, 343 .get_wallclock = native_get_wallclock,
287 .set_wallclock = native_set_wallclock, 344 .set_wallclock = native_set_wallclock,
288 .time_init = hpet_time_init, 345 .sched_clock = native_sched_clock,
346 .get_cpu_khz = native_calculate_cpu_khz,
347};
348
349struct pv_irq_ops pv_irq_ops = {
289 .init_IRQ = native_init_IRQ, 350 .init_IRQ = native_init_IRQ,
351 .save_fl = native_save_fl,
352 .restore_fl = native_restore_fl,
353 .irq_disable = native_irq_disable,
354 .irq_enable = native_irq_enable,
355 .safe_halt = native_safe_halt,
356 .halt = native_halt,
357};
290 358
359struct pv_cpu_ops pv_cpu_ops = {
291 .cpuid = native_cpuid, 360 .cpuid = native_cpuid,
292 .get_debugreg = native_get_debugreg, 361 .get_debugreg = native_get_debugreg,
293 .set_debugreg = native_set_debugreg, 362 .set_debugreg = native_set_debugreg,
294 .clts = native_clts, 363 .clts = native_clts,
295 .read_cr0 = native_read_cr0, 364 .read_cr0 = native_read_cr0,
296 .write_cr0 = native_write_cr0, 365 .write_cr0 = native_write_cr0,
297 .read_cr2 = native_read_cr2,
298 .write_cr2 = native_write_cr2,
299 .read_cr3 = native_read_cr3,
300 .write_cr3 = native_write_cr3,
301 .read_cr4 = native_read_cr4, 366 .read_cr4 = native_read_cr4,
302 .read_cr4_safe = native_read_cr4_safe, 367 .read_cr4_safe = native_read_cr4_safe,
303 .write_cr4 = native_write_cr4, 368 .write_cr4 = native_write_cr4,
304 .save_fl = native_save_fl,
305 .restore_fl = native_restore_fl,
306 .irq_disable = native_irq_disable,
307 .irq_enable = native_irq_enable,
308 .safe_halt = native_safe_halt,
309 .halt = native_halt,
310 .wbinvd = native_wbinvd, 369 .wbinvd = native_wbinvd,
311 .read_msr = native_read_msr_safe, 370 .read_msr = native_read_msr_safe,
312 .write_msr = native_write_msr_safe, 371 .write_msr = native_write_msr_safe,
313 .read_tsc = native_read_tsc, 372 .read_tsc = native_read_tsc,
314 .read_pmc = native_read_pmc, 373 .read_pmc = native_read_pmc,
315 .sched_clock = native_sched_clock,
316 .get_cpu_khz = native_calculate_cpu_khz,
317 .load_tr_desc = native_load_tr_desc, 374 .load_tr_desc = native_load_tr_desc,
318 .set_ldt = native_set_ldt, 375 .set_ldt = native_set_ldt,
319 .load_gdt = native_load_gdt, 376 .load_gdt = native_load_gdt,
@@ -327,9 +384,19 @@ struct paravirt_ops paravirt_ops = {
327 .write_idt_entry = write_dt_entry, 384 .write_idt_entry = write_dt_entry,
328 .load_esp0 = native_load_esp0, 385 .load_esp0 = native_load_esp0,
329 386
387 .irq_enable_sysexit = native_irq_enable_sysexit,
388 .iret = native_iret,
389
330 .set_iopl_mask = native_set_iopl_mask, 390 .set_iopl_mask = native_set_iopl_mask,
331 .io_delay = native_io_delay, 391 .io_delay = native_io_delay,
332 392
393 .lazy_mode = {
394 .enter = paravirt_nop,
395 .leave = paravirt_nop,
396 },
397};
398
399struct pv_apic_ops pv_apic_ops = {
333#ifdef CONFIG_X86_LOCAL_APIC 400#ifdef CONFIG_X86_LOCAL_APIC
334 .apic_write = native_apic_write, 401 .apic_write = native_apic_write,
335 .apic_write_atomic = native_apic_write_atomic, 402 .apic_write_atomic = native_apic_write_atomic,
@@ -338,11 +405,17 @@ struct paravirt_ops paravirt_ops = {
338 .setup_secondary_clock = setup_secondary_APIC_clock, 405 .setup_secondary_clock = setup_secondary_APIC_clock,
339 .startup_ipi_hook = paravirt_nop, 406 .startup_ipi_hook = paravirt_nop,
340#endif 407#endif
341 .set_lazy_mode = paravirt_nop, 408};
342 409
410struct pv_mmu_ops pv_mmu_ops = {
343 .pagetable_setup_start = native_pagetable_setup_start, 411 .pagetable_setup_start = native_pagetable_setup_start,
344 .pagetable_setup_done = native_pagetable_setup_done, 412 .pagetable_setup_done = native_pagetable_setup_done,
345 413
414 .read_cr2 = native_read_cr2,
415 .write_cr2 = native_write_cr2,
416 .read_cr3 = native_read_cr3,
417 .write_cr3 = native_write_cr3,
418
346 .flush_tlb_user = native_flush_tlb, 419 .flush_tlb_user = native_flush_tlb,
347 .flush_tlb_kernel = native_flush_tlb_global, 420 .flush_tlb_kernel = native_flush_tlb_global,
348 .flush_tlb_single = native_flush_tlb_single, 421 .flush_tlb_single = native_flush_tlb_single,
@@ -381,12 +454,19 @@ struct paravirt_ops paravirt_ops = {
381 .make_pte = native_make_pte, 454 .make_pte = native_make_pte,
382 .make_pgd = native_make_pgd, 455 .make_pgd = native_make_pgd,
383 456
384 .irq_enable_sysexit = native_irq_enable_sysexit,
385 .iret = native_iret,
386
387 .dup_mmap = paravirt_nop, 457 .dup_mmap = paravirt_nop,
388 .exit_mmap = paravirt_nop, 458 .exit_mmap = paravirt_nop,
389 .activate_mm = paravirt_nop, 459 .activate_mm = paravirt_nop,
460
461 .lazy_mode = {
462 .enter = paravirt_nop,
463 .leave = paravirt_nop,
464 },
390}; 465};
391 466
392EXPORT_SYMBOL(paravirt_ops); 467EXPORT_SYMBOL_GPL(pv_time_ops);
468EXPORT_SYMBOL_GPL(pv_cpu_ops);
469EXPORT_SYMBOL_GPL(pv_mmu_ops);
470EXPORT_SYMBOL_GPL(pv_apic_ops);
471EXPORT_SYMBOL_GPL(pv_info);
472EXPORT_SYMBOL (pv_irq_ops);