diff options
author | Glauber de Oliveira Costa <gcosta@redhat.com> | 2008-01-30 07:32:04 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-01-30 07:32:04 -0500 |
commit | b1df07bd6674a84fbd9248759dc3fa3ff5c78e5b (patch) | |
tree | e9cb1ffbff14cfbb94b869b05e96a7dbc77ed0cf /arch/x86/kernel/paravirt.c | |
parent | c6334593c61c71ab2e666c015eef13995736f49a (diff) |
x86: change paravirt_32.c name
This patch changes paravirt_32.c to paravirt.c. The goal
is to have paravirt support in x86_64, so we do it in a common file
Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/kernel/paravirt.c')
-rw-r--r-- | arch/x86/kernel/paravirt.c | 475 |
1 files changed, 475 insertions, 0 deletions
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c new file mode 100644 index 000000000000..1a170877f46c --- /dev/null +++ b/arch/x86/kernel/paravirt.c | |||
@@ -0,0 +1,475 @@ | |||
1 | /* Paravirtualization interfaces | ||
2 | Copyright (C) 2006 Rusty Russell IBM Corporation | ||
3 | |||
4 | This program is free software; you can redistribute it and/or modify | ||
5 | it under the terms of the GNU General Public License as published by | ||
6 | the Free Software Foundation; either version 2 of the License, or | ||
7 | (at your option) any later version. | ||
8 | |||
9 | This program is distributed in the hope that it will be useful, | ||
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | GNU General Public License for more details. | ||
13 | |||
14 | You should have received a copy of the GNU General Public License | ||
15 | along with this program; if not, write to the Free Software | ||
16 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | |||
18 | 2007 - x86_64 support added by Glauber de Oliveira Costa, Red Hat Inc | ||
19 | */ | ||
20 | |||
21 | #include <linux/errno.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/efi.h> | ||
24 | #include <linux/bcd.h> | ||
25 | #include <linux/highmem.h> | ||
26 | |||
27 | #include <asm/bug.h> | ||
28 | #include <asm/paravirt.h> | ||
29 | #include <asm/desc.h> | ||
30 | #include <asm/setup.h> | ||
31 | #include <asm/arch_hooks.h> | ||
32 | #include <asm/time.h> | ||
33 | #include <asm/irq.h> | ||
34 | #include <asm/delay.h> | ||
35 | #include <asm/fixmap.h> | ||
36 | #include <asm/apic.h> | ||
37 | #include <asm/tlbflush.h> | ||
38 | #include <asm/timer.h> | ||
39 | |||
40 | /* nop stub */ | ||
41 | void _paravirt_nop(void) | ||
42 | { | ||
43 | } | ||
44 | |||
45 | static void __init default_banner(void) | ||
46 | { | ||
47 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | ||
48 | pv_info.name); | ||
49 | } | ||
50 | |||
51 | char *memory_setup(void) | ||
52 | { | ||
53 | return pv_init_ops.memory_setup(); | ||
54 | } | ||
55 | |||
56 | /* Simple instruction patching code. */ | ||
57 | #define DEF_NATIVE(ops, name, code) \ | ||
58 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ | ||
59 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") | ||
60 | |||
61 | DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); | ||
62 | DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); | ||
63 | DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); | ||
64 | DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); | ||
65 | DEF_NATIVE(pv_cpu_ops, iret, "iret"); | ||
66 | DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "sti; sysexit"); | ||
67 | DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); | ||
68 | DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); | ||
69 | DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); | ||
70 | DEF_NATIVE(pv_cpu_ops, clts, "clts"); | ||
71 | DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); | ||
72 | |||
73 | /* Undefined instruction for dealing with missing ops pointers. */ | ||
74 | static const unsigned char ud2a[] = { 0x0f, 0x0b }; | ||
75 | |||
76 | static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | ||
77 | unsigned long addr, unsigned len) | ||
78 | { | ||
79 | const unsigned char *start, *end; | ||
80 | unsigned ret; | ||
81 | |||
82 | switch(type) { | ||
83 | #define SITE(ops, x) \ | ||
84 | case PARAVIRT_PATCH(ops.x): \ | ||
85 | start = start_##ops##_##x; \ | ||
86 | end = end_##ops##_##x; \ | ||
87 | goto patch_site | ||
88 | |||
89 | SITE(pv_irq_ops, irq_disable); | ||
90 | SITE(pv_irq_ops, irq_enable); | ||
91 | SITE(pv_irq_ops, restore_fl); | ||
92 | SITE(pv_irq_ops, save_fl); | ||
93 | SITE(pv_cpu_ops, iret); | ||
94 | SITE(pv_cpu_ops, irq_enable_syscall_ret); | ||
95 | SITE(pv_mmu_ops, read_cr2); | ||
96 | SITE(pv_mmu_ops, read_cr3); | ||
97 | SITE(pv_mmu_ops, write_cr3); | ||
98 | SITE(pv_cpu_ops, clts); | ||
99 | SITE(pv_cpu_ops, read_tsc); | ||
100 | #undef SITE | ||
101 | |||
102 | patch_site: | ||
103 | ret = paravirt_patch_insns(ibuf, len, start, end); | ||
104 | break; | ||
105 | |||
106 | default: | ||
107 | ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); | ||
108 | break; | ||
109 | } | ||
110 | |||
111 | return ret; | ||
112 | } | ||
113 | |||
114 | unsigned paravirt_patch_nop(void) | ||
115 | { | ||
116 | return 0; | ||
117 | } | ||
118 | |||
119 | unsigned paravirt_patch_ignore(unsigned len) | ||
120 | { | ||
121 | return len; | ||
122 | } | ||
123 | |||
124 | struct branch { | ||
125 | unsigned char opcode; | ||
126 | u32 delta; | ||
127 | } __attribute__((packed)); | ||
128 | |||
129 | unsigned paravirt_patch_call(void *insnbuf, | ||
130 | const void *target, u16 tgt_clobbers, | ||
131 | unsigned long addr, u16 site_clobbers, | ||
132 | unsigned len) | ||
133 | { | ||
134 | struct branch *b = insnbuf; | ||
135 | unsigned long delta = (unsigned long)target - (addr+5); | ||
136 | |||
137 | if (tgt_clobbers & ~site_clobbers) | ||
138 | return len; /* target would clobber too much for this site */ | ||
139 | if (len < 5) | ||
140 | return len; /* call too long for patch site */ | ||
141 | |||
142 | b->opcode = 0xe8; /* call */ | ||
143 | b->delta = delta; | ||
144 | BUILD_BUG_ON(sizeof(*b) != 5); | ||
145 | |||
146 | return 5; | ||
147 | } | ||
148 | |||
149 | unsigned paravirt_patch_jmp(void *insnbuf, const void *target, | ||
150 | unsigned long addr, unsigned len) | ||
151 | { | ||
152 | struct branch *b = insnbuf; | ||
153 | unsigned long delta = (unsigned long)target - (addr+5); | ||
154 | |||
155 | if (len < 5) | ||
156 | return len; /* call too long for patch site */ | ||
157 | |||
158 | b->opcode = 0xe9; /* jmp */ | ||
159 | b->delta = delta; | ||
160 | |||
161 | return 5; | ||
162 | } | ||
163 | |||
164 | /* Neat trick to map patch type back to the call within the | ||
165 | * corresponding structure. */ | ||
166 | static void *get_call_destination(u8 type) | ||
167 | { | ||
168 | struct paravirt_patch_template tmpl = { | ||
169 | .pv_init_ops = pv_init_ops, | ||
170 | .pv_time_ops = pv_time_ops, | ||
171 | .pv_cpu_ops = pv_cpu_ops, | ||
172 | .pv_irq_ops = pv_irq_ops, | ||
173 | .pv_apic_ops = pv_apic_ops, | ||
174 | .pv_mmu_ops = pv_mmu_ops, | ||
175 | }; | ||
176 | return *((void **)&tmpl + type); | ||
177 | } | ||
178 | |||
179 | unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, | ||
180 | unsigned long addr, unsigned len) | ||
181 | { | ||
182 | void *opfunc = get_call_destination(type); | ||
183 | unsigned ret; | ||
184 | |||
185 | if (opfunc == NULL) | ||
186 | /* If there's no function, patch it with a ud2a (BUG) */ | ||
187 | ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); | ||
188 | else if (opfunc == paravirt_nop) | ||
189 | /* If the operation is a nop, then nop the callsite */ | ||
190 | ret = paravirt_patch_nop(); | ||
191 | else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || | ||
192 | type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret)) | ||
193 | /* If operation requires a jmp, then jmp */ | ||
194 | ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); | ||
195 | else | ||
196 | /* Otherwise call the function; assume target could | ||
197 | clobber any caller-save reg */ | ||
198 | ret = paravirt_patch_call(insnbuf, opfunc, CLBR_ANY, | ||
199 | addr, clobbers, len); | ||
200 | |||
201 | return ret; | ||
202 | } | ||
203 | |||
204 | unsigned paravirt_patch_insns(void *insnbuf, unsigned len, | ||
205 | const char *start, const char *end) | ||
206 | { | ||
207 | unsigned insn_len = end - start; | ||
208 | |||
209 | if (insn_len > len || start == NULL) | ||
210 | insn_len = len; | ||
211 | else | ||
212 | memcpy(insnbuf, start, insn_len); | ||
213 | |||
214 | return insn_len; | ||
215 | } | ||
216 | |||
217 | void init_IRQ(void) | ||
218 | { | ||
219 | pv_irq_ops.init_IRQ(); | ||
220 | } | ||
221 | |||
222 | static void native_flush_tlb(void) | ||
223 | { | ||
224 | __native_flush_tlb(); | ||
225 | } | ||
226 | |||
227 | /* | ||
228 | * Global pages have to be flushed a bit differently. Not a real | ||
229 | * performance problem because this does not happen often. | ||
230 | */ | ||
231 | static void native_flush_tlb_global(void) | ||
232 | { | ||
233 | __native_flush_tlb_global(); | ||
234 | } | ||
235 | |||
236 | static void native_flush_tlb_single(unsigned long addr) | ||
237 | { | ||
238 | __native_flush_tlb_single(addr); | ||
239 | } | ||
240 | |||
241 | /* These are in entry.S */ | ||
242 | extern void native_iret(void); | ||
243 | extern void native_irq_enable_syscall_ret(void); | ||
244 | |||
245 | static int __init print_banner(void) | ||
246 | { | ||
247 | pv_init_ops.banner(); | ||
248 | return 0; | ||
249 | } | ||
250 | core_initcall(print_banner); | ||
251 | |||
252 | static struct resource reserve_ioports = { | ||
253 | .start = 0, | ||
254 | .end = IO_SPACE_LIMIT, | ||
255 | .name = "paravirt-ioport", | ||
256 | .flags = IORESOURCE_IO | IORESOURCE_BUSY, | ||
257 | }; | ||
258 | |||
259 | static struct resource reserve_iomem = { | ||
260 | .start = 0, | ||
261 | .end = -1, | ||
262 | .name = "paravirt-iomem", | ||
263 | .flags = IORESOURCE_MEM | IORESOURCE_BUSY, | ||
264 | }; | ||
265 | |||
266 | /* | ||
267 | * Reserve the whole legacy IO space to prevent any legacy drivers | ||
268 | * from wasting time probing for their hardware. This is a fairly | ||
269 | * brute-force approach to disabling all non-virtual drivers. | ||
270 | * | ||
271 | * Note that this must be called very early to have any effect. | ||
272 | */ | ||
273 | int paravirt_disable_iospace(void) | ||
274 | { | ||
275 | int ret; | ||
276 | |||
277 | ret = request_resource(&ioport_resource, &reserve_ioports); | ||
278 | if (ret == 0) { | ||
279 | ret = request_resource(&iomem_resource, &reserve_iomem); | ||
280 | if (ret) | ||
281 | release_resource(&reserve_ioports); | ||
282 | } | ||
283 | |||
284 | return ret; | ||
285 | } | ||
286 | |||
287 | static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; | ||
288 | |||
289 | static inline void enter_lazy(enum paravirt_lazy_mode mode) | ||
290 | { | ||
291 | BUG_ON(x86_read_percpu(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); | ||
292 | BUG_ON(preemptible()); | ||
293 | |||
294 | x86_write_percpu(paravirt_lazy_mode, mode); | ||
295 | } | ||
296 | |||
297 | void paravirt_leave_lazy(enum paravirt_lazy_mode mode) | ||
298 | { | ||
299 | BUG_ON(x86_read_percpu(paravirt_lazy_mode) != mode); | ||
300 | BUG_ON(preemptible()); | ||
301 | |||
302 | x86_write_percpu(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); | ||
303 | } | ||
304 | |||
305 | void paravirt_enter_lazy_mmu(void) | ||
306 | { | ||
307 | enter_lazy(PARAVIRT_LAZY_MMU); | ||
308 | } | ||
309 | |||
310 | void paravirt_leave_lazy_mmu(void) | ||
311 | { | ||
312 | paravirt_leave_lazy(PARAVIRT_LAZY_MMU); | ||
313 | } | ||
314 | |||
315 | void paravirt_enter_lazy_cpu(void) | ||
316 | { | ||
317 | enter_lazy(PARAVIRT_LAZY_CPU); | ||
318 | } | ||
319 | |||
320 | void paravirt_leave_lazy_cpu(void) | ||
321 | { | ||
322 | paravirt_leave_lazy(PARAVIRT_LAZY_CPU); | ||
323 | } | ||
324 | |||
325 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void) | ||
326 | { | ||
327 | return x86_read_percpu(paravirt_lazy_mode); | ||
328 | } | ||
329 | |||
330 | struct pv_info pv_info = { | ||
331 | .name = "bare hardware", | ||
332 | .paravirt_enabled = 0, | ||
333 | .kernel_rpl = 0, | ||
334 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ | ||
335 | }; | ||
336 | |||
337 | struct pv_init_ops pv_init_ops = { | ||
338 | .patch = native_patch, | ||
339 | .banner = default_banner, | ||
340 | .arch_setup = paravirt_nop, | ||
341 | .memory_setup = machine_specific_memory_setup, | ||
342 | }; | ||
343 | |||
344 | struct pv_time_ops pv_time_ops = { | ||
345 | .time_init = hpet_time_init, | ||
346 | .get_wallclock = native_get_wallclock, | ||
347 | .set_wallclock = native_set_wallclock, | ||
348 | .sched_clock = native_sched_clock, | ||
349 | .get_cpu_khz = native_calculate_cpu_khz, | ||
350 | }; | ||
351 | |||
352 | struct pv_irq_ops pv_irq_ops = { | ||
353 | .init_IRQ = native_init_IRQ, | ||
354 | .save_fl = native_save_fl, | ||
355 | .restore_fl = native_restore_fl, | ||
356 | .irq_disable = native_irq_disable, | ||
357 | .irq_enable = native_irq_enable, | ||
358 | .safe_halt = native_safe_halt, | ||
359 | .halt = native_halt, | ||
360 | }; | ||
361 | |||
362 | struct pv_cpu_ops pv_cpu_ops = { | ||
363 | .cpuid = native_cpuid, | ||
364 | .get_debugreg = native_get_debugreg, | ||
365 | .set_debugreg = native_set_debugreg, | ||
366 | .clts = native_clts, | ||
367 | .read_cr0 = native_read_cr0, | ||
368 | .write_cr0 = native_write_cr0, | ||
369 | .read_cr4 = native_read_cr4, | ||
370 | .read_cr4_safe = native_read_cr4_safe, | ||
371 | .write_cr4 = native_write_cr4, | ||
372 | .wbinvd = native_wbinvd, | ||
373 | .read_msr = native_read_msr_safe, | ||
374 | .write_msr = native_write_msr_safe, | ||
375 | .read_tsc = native_read_tsc, | ||
376 | .read_pmc = native_read_pmc, | ||
377 | .load_tr_desc = native_load_tr_desc, | ||
378 | .set_ldt = native_set_ldt, | ||
379 | .load_gdt = native_load_gdt, | ||
380 | .load_idt = native_load_idt, | ||
381 | .store_gdt = native_store_gdt, | ||
382 | .store_idt = native_store_idt, | ||
383 | .store_tr = native_store_tr, | ||
384 | .load_tls = native_load_tls, | ||
385 | .write_ldt_entry = native_write_ldt_entry, | ||
386 | .write_gdt_entry = native_write_gdt_entry, | ||
387 | .write_idt_entry = native_write_idt_entry, | ||
388 | .load_sp0 = native_load_sp0, | ||
389 | |||
390 | .irq_enable_syscall_ret = native_irq_enable_syscall_ret, | ||
391 | .iret = native_iret, | ||
392 | |||
393 | .set_iopl_mask = native_set_iopl_mask, | ||
394 | .io_delay = native_io_delay, | ||
395 | |||
396 | .lazy_mode = { | ||
397 | .enter = paravirt_nop, | ||
398 | .leave = paravirt_nop, | ||
399 | }, | ||
400 | }; | ||
401 | |||
402 | struct pv_apic_ops pv_apic_ops = { | ||
403 | #ifdef CONFIG_X86_LOCAL_APIC | ||
404 | .apic_write = native_apic_write, | ||
405 | .apic_write_atomic = native_apic_write_atomic, | ||
406 | .apic_read = native_apic_read, | ||
407 | .setup_boot_clock = setup_boot_APIC_clock, | ||
408 | .setup_secondary_clock = setup_secondary_APIC_clock, | ||
409 | .startup_ipi_hook = paravirt_nop, | ||
410 | #endif | ||
411 | }; | ||
412 | |||
413 | struct pv_mmu_ops pv_mmu_ops = { | ||
414 | .pagetable_setup_start = native_pagetable_setup_start, | ||
415 | .pagetable_setup_done = native_pagetable_setup_done, | ||
416 | |||
417 | .read_cr2 = native_read_cr2, | ||
418 | .write_cr2 = native_write_cr2, | ||
419 | .read_cr3 = native_read_cr3, | ||
420 | .write_cr3 = native_write_cr3, | ||
421 | |||
422 | .flush_tlb_user = native_flush_tlb, | ||
423 | .flush_tlb_kernel = native_flush_tlb_global, | ||
424 | .flush_tlb_single = native_flush_tlb_single, | ||
425 | .flush_tlb_others = native_flush_tlb_others, | ||
426 | |||
427 | .alloc_pt = paravirt_nop, | ||
428 | .alloc_pd = paravirt_nop, | ||
429 | .alloc_pd_clone = paravirt_nop, | ||
430 | .release_pt = paravirt_nop, | ||
431 | .release_pd = paravirt_nop, | ||
432 | |||
433 | .set_pte = native_set_pte, | ||
434 | .set_pte_at = native_set_pte_at, | ||
435 | .set_pmd = native_set_pmd, | ||
436 | .pte_update = paravirt_nop, | ||
437 | .pte_update_defer = paravirt_nop, | ||
438 | |||
439 | #ifdef CONFIG_HIGHPTE | ||
440 | .kmap_atomic_pte = kmap_atomic, | ||
441 | #endif | ||
442 | |||
443 | #ifdef CONFIG_X86_PAE | ||
444 | .set_pte_atomic = native_set_pte_atomic, | ||
445 | .set_pte_present = native_set_pte_present, | ||
446 | .set_pud = native_set_pud, | ||
447 | .pte_clear = native_pte_clear, | ||
448 | .pmd_clear = native_pmd_clear, | ||
449 | |||
450 | .pmd_val = native_pmd_val, | ||
451 | .make_pmd = native_make_pmd, | ||
452 | #endif | ||
453 | |||
454 | .pte_val = native_pte_val, | ||
455 | .pgd_val = native_pgd_val, | ||
456 | |||
457 | .make_pte = native_make_pte, | ||
458 | .make_pgd = native_make_pgd, | ||
459 | |||
460 | .dup_mmap = paravirt_nop, | ||
461 | .exit_mmap = paravirt_nop, | ||
462 | .activate_mm = paravirt_nop, | ||
463 | |||
464 | .lazy_mode = { | ||
465 | .enter = paravirt_nop, | ||
466 | .leave = paravirt_nop, | ||
467 | }, | ||
468 | }; | ||
469 | |||
470 | EXPORT_SYMBOL_GPL(pv_time_ops); | ||
471 | EXPORT_SYMBOL (pv_cpu_ops); | ||
472 | EXPORT_SYMBOL (pv_mmu_ops); | ||
473 | EXPORT_SYMBOL_GPL(pv_apic_ops); | ||
474 | EXPORT_SYMBOL_GPL(pv_info); | ||
475 | EXPORT_SYMBOL (pv_irq_ops); | ||