diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-09-01 06:13:30 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-09-01 06:13:30 -0400 |
commit | c931aaf0e1b11862077f6884b2cec22833080e23 (patch) | |
tree | a54b0ff24dd4b09fe8b3bff62b7c5ce82e197387 /arch/x86/include/asm/paravirt.h | |
parent | ff55df53dfdd338906c8ba9d1f4a759b86b869d5 (diff) | |
parent | ac5672f82c39ff2f8dce81bf3e68b1dfc41f366f (diff) |
Merge branch 'x86/paravirt' into x86/cpu
Conflicts:
arch/x86/include/asm/paravirt.h
Manual merge:
arch/x86/include/asm/paravirt_types.h
Merge reason: x86/paravirt conflicts non-trivially with x86/cpu,
resolve it.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/include/asm/paravirt.h')
-rw-r--r-- | arch/x86/include/asm/paravirt.h | 712 |
1 files changed, 1 insertions, 711 deletions
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 11574934a994..40d6586af25b 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -7,690 +7,11 @@ | |||
7 | #include <asm/pgtable_types.h> | 7 | #include <asm/pgtable_types.h> |
8 | #include <asm/asm.h> | 8 | #include <asm/asm.h> |
9 | 9 | ||
10 | /* Bitmask of what can be clobbered: usually at least eax. */ | 10 | #include <asm/paravirt_types.h> |
11 | #define CLBR_NONE 0 | ||
12 | #define CLBR_EAX (1 << 0) | ||
13 | #define CLBR_ECX (1 << 1) | ||
14 | #define CLBR_EDX (1 << 2) | ||
15 | #define CLBR_EDI (1 << 3) | ||
16 | |||
17 | #ifdef CONFIG_X86_32 | ||
18 | /* CLBR_ANY should match all regs platform has. For i386, that's just it */ | ||
19 | #define CLBR_ANY ((1 << 4) - 1) | ||
20 | |||
21 | #define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX) | ||
22 | #define CLBR_RET_REG (CLBR_EAX | CLBR_EDX) | ||
23 | #define CLBR_SCRATCH (0) | ||
24 | #else | ||
25 | #define CLBR_RAX CLBR_EAX | ||
26 | #define CLBR_RCX CLBR_ECX | ||
27 | #define CLBR_RDX CLBR_EDX | ||
28 | #define CLBR_RDI CLBR_EDI | ||
29 | #define CLBR_RSI (1 << 4) | ||
30 | #define CLBR_R8 (1 << 5) | ||
31 | #define CLBR_R9 (1 << 6) | ||
32 | #define CLBR_R10 (1 << 7) | ||
33 | #define CLBR_R11 (1 << 8) | ||
34 | |||
35 | #define CLBR_ANY ((1 << 9) - 1) | ||
36 | |||
37 | #define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \ | ||
38 | CLBR_RCX | CLBR_R8 | CLBR_R9) | ||
39 | #define CLBR_RET_REG (CLBR_RAX) | ||
40 | #define CLBR_SCRATCH (CLBR_R10 | CLBR_R11) | ||
41 | |||
42 | #include <asm/desc_defs.h> | ||
43 | #endif /* X86_64 */ | ||
44 | |||
45 | #define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG) | ||
46 | 11 | ||
47 | #ifndef __ASSEMBLY__ | 12 | #ifndef __ASSEMBLY__ |
48 | #include <linux/types.h> | 13 | #include <linux/types.h> |
49 | #include <linux/cpumask.h> | 14 | #include <linux/cpumask.h> |
50 | #include <asm/kmap_types.h> | ||
51 | #include <asm/desc_defs.h> | ||
52 | |||
53 | struct page; | ||
54 | struct thread_struct; | ||
55 | struct desc_ptr; | ||
56 | struct tss_struct; | ||
57 | struct mm_struct; | ||
58 | struct desc_struct; | ||
59 | struct task_struct; | ||
60 | |||
61 | /* | ||
62 | * Wrapper type for pointers to code which uses the non-standard | ||
63 | * calling convention. See PV_CALL_SAVE_REGS_THUNK below. | ||
64 | */ | ||
65 | struct paravirt_callee_save { | ||
66 | void *func; | ||
67 | }; | ||
68 | |||
69 | /* general info */ | ||
70 | struct pv_info { | ||
71 | unsigned int kernel_rpl; | ||
72 | int shared_kernel_pmd; | ||
73 | int paravirt_enabled; | ||
74 | const char *name; | ||
75 | }; | ||
76 | |||
77 | struct pv_init_ops { | ||
78 | /* | ||
79 | * Patch may replace one of the defined code sequences with | ||
80 | * arbitrary code, subject to the same register constraints. | ||
81 | * This generally means the code is not free to clobber any | ||
82 | * registers other than EAX. The patch function should return | ||
83 | * the number of bytes of code generated, as we nop pad the | ||
84 | * rest in generic code. | ||
85 | */ | ||
86 | unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, | ||
87 | unsigned long addr, unsigned len); | ||
88 | |||
89 | /* Basic arch-specific setup */ | ||
90 | void (*arch_setup)(void); | ||
91 | char *(*memory_setup)(void); | ||
92 | void (*post_allocator_init)(void); | ||
93 | |||
94 | /* Print a banner to identify the environment */ | ||
95 | void (*banner)(void); | ||
96 | }; | ||
97 | |||
98 | |||
99 | struct pv_lazy_ops { | ||
100 | /* Set deferred update mode, used for batching operations. */ | ||
101 | void (*enter)(void); | ||
102 | void (*leave)(void); | ||
103 | }; | ||
104 | |||
105 | struct pv_time_ops { | ||
106 | void (*time_init)(void); | ||
107 | |||
108 | /* Set and set time of day */ | ||
109 | unsigned long (*get_wallclock)(void); | ||
110 | int (*set_wallclock)(unsigned long); | ||
111 | |||
112 | unsigned long long (*sched_clock)(void); | ||
113 | unsigned long (*get_tsc_khz)(void); | ||
114 | }; | ||
115 | |||
116 | struct pv_cpu_ops { | ||
117 | /* hooks for various privileged instructions */ | ||
118 | unsigned long (*get_debugreg)(int regno); | ||
119 | void (*set_debugreg)(int regno, unsigned long value); | ||
120 | |||
121 | void (*clts)(void); | ||
122 | |||
123 | unsigned long (*read_cr0)(void); | ||
124 | void (*write_cr0)(unsigned long); | ||
125 | |||
126 | unsigned long (*read_cr4_safe)(void); | ||
127 | unsigned long (*read_cr4)(void); | ||
128 | void (*write_cr4)(unsigned long); | ||
129 | |||
130 | #ifdef CONFIG_X86_64 | ||
131 | unsigned long (*read_cr8)(void); | ||
132 | void (*write_cr8)(unsigned long); | ||
133 | #endif | ||
134 | |||
135 | /* Segment descriptor handling */ | ||
136 | void (*load_tr_desc)(void); | ||
137 | void (*load_gdt)(const struct desc_ptr *); | ||
138 | void (*load_idt)(const struct desc_ptr *); | ||
139 | void (*store_gdt)(struct desc_ptr *); | ||
140 | void (*store_idt)(struct desc_ptr *); | ||
141 | void (*set_ldt)(const void *desc, unsigned entries); | ||
142 | unsigned long (*store_tr)(void); | ||
143 | void (*load_tls)(struct thread_struct *t, unsigned int cpu); | ||
144 | #ifdef CONFIG_X86_64 | ||
145 | void (*load_gs_index)(unsigned int idx); | ||
146 | #endif | ||
147 | void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum, | ||
148 | const void *desc); | ||
149 | void (*write_gdt_entry)(struct desc_struct *, | ||
150 | int entrynum, const void *desc, int size); | ||
151 | void (*write_idt_entry)(gate_desc *, | ||
152 | int entrynum, const gate_desc *gate); | ||
153 | void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); | ||
154 | void (*free_ldt)(struct desc_struct *ldt, unsigned entries); | ||
155 | |||
156 | void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); | ||
157 | |||
158 | void (*set_iopl_mask)(unsigned mask); | ||
159 | |||
160 | void (*wbinvd)(void); | ||
161 | void (*io_delay)(void); | ||
162 | |||
163 | /* cpuid emulation, mostly so that caps bits can be disabled */ | ||
164 | void (*cpuid)(unsigned int *eax, unsigned int *ebx, | ||
165 | unsigned int *ecx, unsigned int *edx); | ||
166 | |||
167 | /* MSR, PMC and TSR operations. | ||
168 | err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ | ||
169 | u64 (*read_msr)(unsigned int msr, int *err); | ||
170 | int (*rdmsr_regs)(u32 *regs); | ||
171 | int (*write_msr)(unsigned int msr, unsigned low, unsigned high); | ||
172 | int (*wrmsr_regs)(u32 *regs); | ||
173 | |||
174 | u64 (*read_tsc)(void); | ||
175 | u64 (*read_pmc)(int counter); | ||
176 | unsigned long long (*read_tscp)(unsigned int *aux); | ||
177 | |||
178 | /* | ||
179 | * Atomically enable interrupts and return to userspace. This | ||
180 | * is only ever used to return to 32-bit processes; in a | ||
181 | * 64-bit kernel, it's used for 32-on-64 compat processes, but | ||
182 | * never native 64-bit processes. (Jump, not call.) | ||
183 | */ | ||
184 | void (*irq_enable_sysexit)(void); | ||
185 | |||
186 | /* | ||
187 | * Switch to usermode gs and return to 64-bit usermode using | ||
188 | * sysret. Only used in 64-bit kernels to return to 64-bit | ||
189 | * processes. Usermode register state, including %rsp, must | ||
190 | * already be restored. | ||
191 | */ | ||
192 | void (*usergs_sysret64)(void); | ||
193 | |||
194 | /* | ||
195 | * Switch to usermode gs and return to 32-bit usermode using | ||
196 | * sysret. Used to return to 32-on-64 compat processes. | ||
197 | * Other usermode register state, including %esp, must already | ||
198 | * be restored. | ||
199 | */ | ||
200 | void (*usergs_sysret32)(void); | ||
201 | |||
202 | /* Normal iret. Jump to this with the standard iret stack | ||
203 | frame set up. */ | ||
204 | void (*iret)(void); | ||
205 | |||
206 | void (*swapgs)(void); | ||
207 | |||
208 | void (*start_context_switch)(struct task_struct *prev); | ||
209 | void (*end_context_switch)(struct task_struct *next); | ||
210 | }; | ||
211 | |||
212 | struct pv_irq_ops { | ||
213 | void (*init_IRQ)(void); | ||
214 | |||
215 | /* | ||
216 | * Get/set interrupt state. save_fl and restore_fl are only | ||
217 | * expected to use X86_EFLAGS_IF; all other bits | ||
218 | * returned from save_fl are undefined, and may be ignored by | ||
219 | * restore_fl. | ||
220 | * | ||
221 | * NOTE: These functions callers expect the callee to preserve | ||
222 | * more registers than the standard C calling convention. | ||
223 | */ | ||
224 | struct paravirt_callee_save save_fl; | ||
225 | struct paravirt_callee_save restore_fl; | ||
226 | struct paravirt_callee_save irq_disable; | ||
227 | struct paravirt_callee_save irq_enable; | ||
228 | |||
229 | void (*safe_halt)(void); | ||
230 | void (*halt)(void); | ||
231 | |||
232 | #ifdef CONFIG_X86_64 | ||
233 | void (*adjust_exception_frame)(void); | ||
234 | #endif | ||
235 | }; | ||
236 | |||
237 | struct pv_apic_ops { | ||
238 | #ifdef CONFIG_X86_LOCAL_APIC | ||
239 | void (*setup_boot_clock)(void); | ||
240 | void (*setup_secondary_clock)(void); | ||
241 | |||
242 | void (*startup_ipi_hook)(int phys_apicid, | ||
243 | unsigned long start_eip, | ||
244 | unsigned long start_esp); | ||
245 | #endif | ||
246 | }; | ||
247 | |||
248 | struct pv_mmu_ops { | ||
249 | /* | ||
250 | * Called before/after init_mm pagetable setup. setup_start | ||
251 | * may reset %cr3, and may pre-install parts of the pagetable; | ||
252 | * pagetable setup is expected to preserve any existing | ||
253 | * mapping. | ||
254 | */ | ||
255 | void (*pagetable_setup_start)(pgd_t *pgd_base); | ||
256 | void (*pagetable_setup_done)(pgd_t *pgd_base); | ||
257 | |||
258 | unsigned long (*read_cr2)(void); | ||
259 | void (*write_cr2)(unsigned long); | ||
260 | |||
261 | unsigned long (*read_cr3)(void); | ||
262 | void (*write_cr3)(unsigned long); | ||
263 | |||
264 | /* | ||
265 | * Hooks for intercepting the creation/use/destruction of an | ||
266 | * mm_struct. | ||
267 | */ | ||
268 | void (*activate_mm)(struct mm_struct *prev, | ||
269 | struct mm_struct *next); | ||
270 | void (*dup_mmap)(struct mm_struct *oldmm, | ||
271 | struct mm_struct *mm); | ||
272 | void (*exit_mmap)(struct mm_struct *mm); | ||
273 | |||
274 | |||
275 | /* TLB operations */ | ||
276 | void (*flush_tlb_user)(void); | ||
277 | void (*flush_tlb_kernel)(void); | ||
278 | void (*flush_tlb_single)(unsigned long addr); | ||
279 | void (*flush_tlb_others)(const struct cpumask *cpus, | ||
280 | struct mm_struct *mm, | ||
281 | unsigned long va); | ||
282 | |||
283 | /* Hooks for allocating and freeing a pagetable top-level */ | ||
284 | int (*pgd_alloc)(struct mm_struct *mm); | ||
285 | void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd); | ||
286 | |||
287 | /* | ||
288 | * Hooks for allocating/releasing pagetable pages when they're | ||
289 | * attached to a pagetable | ||
290 | */ | ||
291 | void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn); | ||
292 | void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn); | ||
293 | void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count); | ||
294 | void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn); | ||
295 | void (*release_pte)(unsigned long pfn); | ||
296 | void (*release_pmd)(unsigned long pfn); | ||
297 | void (*release_pud)(unsigned long pfn); | ||
298 | |||
299 | /* Pagetable manipulation functions */ | ||
300 | void (*set_pte)(pte_t *ptep, pte_t pteval); | ||
301 | void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, | ||
302 | pte_t *ptep, pte_t pteval); | ||
303 | void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); | ||
304 | void (*pte_update)(struct mm_struct *mm, unsigned long addr, | ||
305 | pte_t *ptep); | ||
306 | void (*pte_update_defer)(struct mm_struct *mm, | ||
307 | unsigned long addr, pte_t *ptep); | ||
308 | |||
309 | pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, | ||
310 | pte_t *ptep); | ||
311 | void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr, | ||
312 | pte_t *ptep, pte_t pte); | ||
313 | |||
314 | struct paravirt_callee_save pte_val; | ||
315 | struct paravirt_callee_save make_pte; | ||
316 | |||
317 | struct paravirt_callee_save pgd_val; | ||
318 | struct paravirt_callee_save make_pgd; | ||
319 | |||
320 | #if PAGETABLE_LEVELS >= 3 | ||
321 | #ifdef CONFIG_X86_PAE | ||
322 | void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); | ||
323 | void (*pte_clear)(struct mm_struct *mm, unsigned long addr, | ||
324 | pte_t *ptep); | ||
325 | void (*pmd_clear)(pmd_t *pmdp); | ||
326 | |||
327 | #endif /* CONFIG_X86_PAE */ | ||
328 | |||
329 | void (*set_pud)(pud_t *pudp, pud_t pudval); | ||
330 | |||
331 | struct paravirt_callee_save pmd_val; | ||
332 | struct paravirt_callee_save make_pmd; | ||
333 | |||
334 | #if PAGETABLE_LEVELS == 4 | ||
335 | struct paravirt_callee_save pud_val; | ||
336 | struct paravirt_callee_save make_pud; | ||
337 | |||
338 | void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); | ||
339 | #endif /* PAGETABLE_LEVELS == 4 */ | ||
340 | #endif /* PAGETABLE_LEVELS >= 3 */ | ||
341 | |||
342 | #ifdef CONFIG_HIGHPTE | ||
343 | void *(*kmap_atomic_pte)(struct page *page, enum km_type type); | ||
344 | #endif | ||
345 | |||
346 | struct pv_lazy_ops lazy_mode; | ||
347 | |||
348 | /* dom0 ops */ | ||
349 | |||
350 | /* Sometimes the physical address is a pfn, and sometimes its | ||
351 | an mfn. We can tell which is which from the index. */ | ||
352 | void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, | ||
353 | phys_addr_t phys, pgprot_t flags); | ||
354 | }; | ||
355 | |||
356 | struct raw_spinlock; | ||
357 | struct pv_lock_ops { | ||
358 | int (*spin_is_locked)(struct raw_spinlock *lock); | ||
359 | int (*spin_is_contended)(struct raw_spinlock *lock); | ||
360 | void (*spin_lock)(struct raw_spinlock *lock); | ||
361 | void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags); | ||
362 | int (*spin_trylock)(struct raw_spinlock *lock); | ||
363 | void (*spin_unlock)(struct raw_spinlock *lock); | ||
364 | }; | ||
365 | |||
366 | /* This contains all the paravirt structures: we get a convenient | ||
367 | * number for each function using the offset which we use to indicate | ||
368 | * what to patch. */ | ||
369 | struct paravirt_patch_template { | ||
370 | struct pv_init_ops pv_init_ops; | ||
371 | struct pv_time_ops pv_time_ops; | ||
372 | struct pv_cpu_ops pv_cpu_ops; | ||
373 | struct pv_irq_ops pv_irq_ops; | ||
374 | struct pv_apic_ops pv_apic_ops; | ||
375 | struct pv_mmu_ops pv_mmu_ops; | ||
376 | struct pv_lock_ops pv_lock_ops; | ||
377 | }; | ||
378 | |||
379 | extern struct pv_info pv_info; | ||
380 | extern struct pv_init_ops pv_init_ops; | ||
381 | extern struct pv_time_ops pv_time_ops; | ||
382 | extern struct pv_cpu_ops pv_cpu_ops; | ||
383 | extern struct pv_irq_ops pv_irq_ops; | ||
384 | extern struct pv_apic_ops pv_apic_ops; | ||
385 | extern struct pv_mmu_ops pv_mmu_ops; | ||
386 | extern struct pv_lock_ops pv_lock_ops; | ||
387 | |||
388 | #define PARAVIRT_PATCH(x) \ | ||
389 | (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) | ||
390 | |||
391 | #define paravirt_type(op) \ | ||
392 | [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ | ||
393 | [paravirt_opptr] "i" (&(op)) | ||
394 | #define paravirt_clobber(clobber) \ | ||
395 | [paravirt_clobber] "i" (clobber) | ||
396 | |||
397 | /* | ||
398 | * Generate some code, and mark it as patchable by the | ||
399 | * apply_paravirt() alternate instruction patcher. | ||
400 | */ | ||
401 | #define _paravirt_alt(insn_string, type, clobber) \ | ||
402 | "771:\n\t" insn_string "\n" "772:\n" \ | ||
403 | ".pushsection .parainstructions,\"a\"\n" \ | ||
404 | _ASM_ALIGN "\n" \ | ||
405 | _ASM_PTR " 771b\n" \ | ||
406 | " .byte " type "\n" \ | ||
407 | " .byte 772b-771b\n" \ | ||
408 | " .short " clobber "\n" \ | ||
409 | ".popsection\n" | ||
410 | |||
411 | /* Generate patchable code, with the default asm parameters. */ | ||
412 | #define paravirt_alt(insn_string) \ | ||
413 | _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") | ||
414 | |||
415 | /* Simple instruction patching code. */ | ||
416 | #define DEF_NATIVE(ops, name, code) \ | ||
417 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ | ||
418 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") | ||
419 | |||
420 | unsigned paravirt_patch_nop(void); | ||
421 | unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); | ||
422 | unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len); | ||
423 | unsigned paravirt_patch_ignore(unsigned len); | ||
424 | unsigned paravirt_patch_call(void *insnbuf, | ||
425 | const void *target, u16 tgt_clobbers, | ||
426 | unsigned long addr, u16 site_clobbers, | ||
427 | unsigned len); | ||
428 | unsigned paravirt_patch_jmp(void *insnbuf, const void *target, | ||
429 | unsigned long addr, unsigned len); | ||
430 | unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, | ||
431 | unsigned long addr, unsigned len); | ||
432 | |||
433 | unsigned paravirt_patch_insns(void *insnbuf, unsigned len, | ||
434 | const char *start, const char *end); | ||
435 | |||
436 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | ||
437 | unsigned long addr, unsigned len); | ||
438 | |||
439 | int paravirt_disable_iospace(void); | ||
440 | |||
441 | /* | ||
442 | * This generates an indirect call based on the operation type number. | ||
443 | * The type number, computed in PARAVIRT_PATCH, is derived from the | ||
444 | * offset into the paravirt_patch_template structure, and can therefore be | ||
445 | * freely converted back into a structure offset. | ||
446 | */ | ||
447 | #define PARAVIRT_CALL "call *%c[paravirt_opptr];" | ||
448 | |||
449 | /* | ||
450 | * These macros are intended to wrap calls through one of the paravirt | ||
451 | * ops structs, so that they can be later identified and patched at | ||
452 | * runtime. | ||
453 | * | ||
454 | * Normally, a call to a pv_op function is a simple indirect call: | ||
455 | * (pv_op_struct.operations)(args...). | ||
456 | * | ||
457 | * Unfortunately, this is a relatively slow operation for modern CPUs, | ||
458 | * because it cannot necessarily determine what the destination | ||
459 | * address is. In this case, the address is a runtime constant, so at | ||
460 | * the very least we can patch the call to e a simple direct call, or | ||
461 | * ideally, patch an inline implementation into the callsite. (Direct | ||
462 | * calls are essentially free, because the call and return addresses | ||
463 | * are completely predictable.) | ||
464 | * | ||
465 | * For i386, these macros rely on the standard gcc "regparm(3)" calling | ||
466 | * convention, in which the first three arguments are placed in %eax, | ||
467 | * %edx, %ecx (in that order), and the remaining arguments are placed | ||
468 | * on the stack. All caller-save registers (eax,edx,ecx) are expected | ||
469 | * to be modified (either clobbered or used for return values). | ||
470 | * X86_64, on the other hand, already specifies a register-based calling | ||
471 | * conventions, returning at %rax, with parameteres going on %rdi, %rsi, | ||
472 | * %rdx, and %rcx. Note that for this reason, x86_64 does not need any | ||
473 | * special handling for dealing with 4 arguments, unlike i386. | ||
474 | * However, x86_64 also have to clobber all caller saved registers, which | ||
475 | * unfortunately, are quite a bit (r8 - r11) | ||
476 | * | ||
477 | * The call instruction itself is marked by placing its start address | ||
478 | * and size into the .parainstructions section, so that | ||
479 | * apply_paravirt() in arch/i386/kernel/alternative.c can do the | ||
480 | * appropriate patching under the control of the backend pv_init_ops | ||
481 | * implementation. | ||
482 | * | ||
483 | * Unfortunately there's no way to get gcc to generate the args setup | ||
484 | * for the call, and then allow the call itself to be generated by an | ||
485 | * inline asm. Because of this, we must do the complete arg setup and | ||
486 | * return value handling from within these macros. This is fairly | ||
487 | * cumbersome. | ||
488 | * | ||
489 | * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. | ||
490 | * It could be extended to more arguments, but there would be little | ||
491 | * to be gained from that. For each number of arguments, there are | ||
492 | * the two VCALL and CALL variants for void and non-void functions. | ||
493 | * | ||
494 | * When there is a return value, the invoker of the macro must specify | ||
495 | * the return type. The macro then uses sizeof() on that type to | ||
496 | * determine whether its a 32 or 64 bit value, and places the return | ||
497 | * in the right register(s) (just %eax for 32-bit, and %edx:%eax for | ||
498 | * 64-bit). For x86_64 machines, it just returns at %rax regardless of | ||
499 | * the return value size. | ||
500 | * | ||
501 | * 64-bit arguments are passed as a pair of adjacent 32-bit arguments | ||
502 | * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments | ||
503 | * in low,high order | ||
504 | * | ||
505 | * Small structures are passed and returned in registers. The macro | ||
506 | * calling convention can't directly deal with this, so the wrapper | ||
507 | * functions must do this. | ||
508 | * | ||
509 | * These PVOP_* macros are only defined within this header. This | ||
510 | * means that all uses must be wrapped in inline functions. This also | ||
511 | * makes sure the incoming and outgoing types are always correct. | ||
512 | */ | ||
513 | #ifdef CONFIG_X86_32 | ||
514 | #define PVOP_VCALL_ARGS \ | ||
515 | unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx | ||
516 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS | ||
517 | |||
518 | #define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x)) | ||
519 | #define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x)) | ||
520 | #define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x)) | ||
521 | |||
522 | #define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \ | ||
523 | "=c" (__ecx) | ||
524 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS | ||
525 | |||
526 | #define PVOP_VCALLEE_CLOBBERS "=a" (__eax), "=d" (__edx) | ||
527 | #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS | ||
528 | |||
529 | #define EXTRA_CLOBBERS | ||
530 | #define VEXTRA_CLOBBERS | ||
531 | #else /* CONFIG_X86_64 */ | ||
532 | #define PVOP_VCALL_ARGS \ | ||
533 | unsigned long __edi = __edi, __esi = __esi, \ | ||
534 | __edx = __edx, __ecx = __ecx | ||
535 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax | ||
536 | |||
537 | #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) | ||
538 | #define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) | ||
539 | #define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x)) | ||
540 | #define PVOP_CALL_ARG4(x) "c" ((unsigned long)(x)) | ||
541 | |||
542 | #define PVOP_VCALL_CLOBBERS "=D" (__edi), \ | ||
543 | "=S" (__esi), "=d" (__edx), \ | ||
544 | "=c" (__ecx) | ||
545 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) | ||
546 | |||
547 | #define PVOP_VCALLEE_CLOBBERS "=a" (__eax) | ||
548 | #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS | ||
549 | |||
550 | #define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" | ||
551 | #define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" | ||
552 | #endif /* CONFIG_X86_32 */ | ||
553 | |||
554 | #ifdef CONFIG_PARAVIRT_DEBUG | ||
555 | #define PVOP_TEST_NULL(op) BUG_ON(op == NULL) | ||
556 | #else | ||
557 | #define PVOP_TEST_NULL(op) ((void)op) | ||
558 | #endif | ||
559 | |||
560 | #define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \ | ||
561 | pre, post, ...) \ | ||
562 | ({ \ | ||
563 | rettype __ret; \ | ||
564 | PVOP_CALL_ARGS; \ | ||
565 | PVOP_TEST_NULL(op); \ | ||
566 | /* This is 32-bit specific, but is okay in 64-bit */ \ | ||
567 | /* since this condition will never hold */ \ | ||
568 | if (sizeof(rettype) > sizeof(unsigned long)) { \ | ||
569 | asm volatile(pre \ | ||
570 | paravirt_alt(PARAVIRT_CALL) \ | ||
571 | post \ | ||
572 | : call_clbr \ | ||
573 | : paravirt_type(op), \ | ||
574 | paravirt_clobber(clbr), \ | ||
575 | ##__VA_ARGS__ \ | ||
576 | : "memory", "cc" extra_clbr); \ | ||
577 | __ret = (rettype)((((u64)__edx) << 32) | __eax); \ | ||
578 | } else { \ | ||
579 | asm volatile(pre \ | ||
580 | paravirt_alt(PARAVIRT_CALL) \ | ||
581 | post \ | ||
582 | : call_clbr \ | ||
583 | : paravirt_type(op), \ | ||
584 | paravirt_clobber(clbr), \ | ||
585 | ##__VA_ARGS__ \ | ||
586 | : "memory", "cc" extra_clbr); \ | ||
587 | __ret = (rettype)__eax; \ | ||
588 | } \ | ||
589 | __ret; \ | ||
590 | }) | ||
591 | |||
592 | #define __PVOP_CALL(rettype, op, pre, post, ...) \ | ||
593 | ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \ | ||
594 | EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__) | ||
595 | |||
596 | #define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \ | ||
597 | ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ | ||
598 | PVOP_CALLEE_CLOBBERS, , \ | ||
599 | pre, post, ##__VA_ARGS__) | ||
600 | |||
601 | |||
602 | #define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \ | ||
603 | ({ \ | ||
604 | PVOP_VCALL_ARGS; \ | ||
605 | PVOP_TEST_NULL(op); \ | ||
606 | asm volatile(pre \ | ||
607 | paravirt_alt(PARAVIRT_CALL) \ | ||
608 | post \ | ||
609 | : call_clbr \ | ||
610 | : paravirt_type(op), \ | ||
611 | paravirt_clobber(clbr), \ | ||
612 | ##__VA_ARGS__ \ | ||
613 | : "memory", "cc" extra_clbr); \ | ||
614 | }) | ||
615 | |||
616 | #define __PVOP_VCALL(op, pre, post, ...) \ | ||
617 | ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \ | ||
618 | VEXTRA_CLOBBERS, \ | ||
619 | pre, post, ##__VA_ARGS__) | ||
620 | |||
621 | #define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \ | ||
622 | ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ | ||
623 | PVOP_VCALLEE_CLOBBERS, , \ | ||
624 | pre, post, ##__VA_ARGS__) | ||
625 | |||
626 | |||
627 | |||
628 | #define PVOP_CALL0(rettype, op) \ | ||
629 | __PVOP_CALL(rettype, op, "", "") | ||
630 | #define PVOP_VCALL0(op) \ | ||
631 | __PVOP_VCALL(op, "", "") | ||
632 | |||
633 | #define PVOP_CALLEE0(rettype, op) \ | ||
634 | __PVOP_CALLEESAVE(rettype, op, "", "") | ||
635 | #define PVOP_VCALLEE0(op) \ | ||
636 | __PVOP_VCALLEESAVE(op, "", "") | ||
637 | |||
638 | |||
639 | #define PVOP_CALL1(rettype, op, arg1) \ | ||
640 | __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) | ||
641 | #define PVOP_VCALL1(op, arg1) \ | ||
642 | __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1)) | ||
643 | |||
644 | #define PVOP_CALLEE1(rettype, op, arg1) \ | ||
645 | __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) | ||
646 | #define PVOP_VCALLEE1(op, arg1) \ | ||
647 | __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1)) | ||
648 | |||
649 | |||
650 | #define PVOP_CALL2(rettype, op, arg1, arg2) \ | ||
651 | __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
652 | PVOP_CALL_ARG2(arg2)) | ||
653 | #define PVOP_VCALL2(op, arg1, arg2) \ | ||
654 | __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
655 | PVOP_CALL_ARG2(arg2)) | ||
656 | |||
657 | #define PVOP_CALLEE2(rettype, op, arg1, arg2) \ | ||
658 | __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
659 | PVOP_CALL_ARG2(arg2)) | ||
660 | #define PVOP_VCALLEE2(op, arg1, arg2) \ | ||
661 | __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
662 | PVOP_CALL_ARG2(arg2)) | ||
663 | |||
664 | |||
665 | #define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ | ||
666 | __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
667 | PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) | ||
668 | #define PVOP_VCALL3(op, arg1, arg2, arg3) \ | ||
669 | __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ | ||
670 | PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) | ||
671 | |||
672 | /* This is the only difference in x86_64. We can make it much simpler */ | ||
673 | #ifdef CONFIG_X86_32 | ||
674 | #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ | ||
675 | __PVOP_CALL(rettype, op, \ | ||
676 | "push %[_arg4];", "lea 4(%%esp),%%esp;", \ | ||
677 | PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ | ||
678 | PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4))) | ||
679 | #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ | ||
680 | __PVOP_VCALL(op, \ | ||
681 | "push %[_arg4];", "lea 4(%%esp),%%esp;", \ | ||
682 | "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ | ||
683 | "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) | ||
684 | #else | ||
685 | #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ | ||
686 | __PVOP_CALL(rettype, op, "", "", \ | ||
687 | PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ | ||
688 | PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) | ||
689 | #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ | ||
690 | __PVOP_VCALL(op, "", "", \ | ||
691 | PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ | ||
692 | PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) | ||
693 | #endif | ||
694 | 15 | ||
695 | static inline int paravirt_enabled(void) | 16 | static inline int paravirt_enabled(void) |
696 | { | 17 | { |
@@ -1423,20 +744,6 @@ static inline void pmd_clear(pmd_t *pmdp) | |||
1423 | } | 744 | } |
1424 | #endif /* CONFIG_X86_PAE */ | 745 | #endif /* CONFIG_X86_PAE */ |
1425 | 746 | ||
1426 | /* Lazy mode for batching updates / context switch */ | ||
1427 | enum paravirt_lazy_mode { | ||
1428 | PARAVIRT_LAZY_NONE, | ||
1429 | PARAVIRT_LAZY_MMU, | ||
1430 | PARAVIRT_LAZY_CPU, | ||
1431 | }; | ||
1432 | |||
1433 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void); | ||
1434 | void paravirt_start_context_switch(struct task_struct *prev); | ||
1435 | void paravirt_end_context_switch(struct task_struct *next); | ||
1436 | |||
1437 | void paravirt_enter_lazy_mmu(void); | ||
1438 | void paravirt_leave_lazy_mmu(void); | ||
1439 | |||
1440 | #define __HAVE_ARCH_START_CONTEXT_SWITCH | 747 | #define __HAVE_ARCH_START_CONTEXT_SWITCH |
1441 | static inline void arch_start_context_switch(struct task_struct *prev) | 748 | static inline void arch_start_context_switch(struct task_struct *prev) |
1442 | { | 749 | { |
@@ -1467,12 +774,6 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, | |||
1467 | pv_mmu_ops.set_fixmap(idx, phys, flags); | 774 | pv_mmu_ops.set_fixmap(idx, phys, flags); |
1468 | } | 775 | } |
1469 | 776 | ||
1470 | void _paravirt_nop(void); | ||
1471 | u32 _paravirt_ident_32(u32); | ||
1472 | u64 _paravirt_ident_64(u64); | ||
1473 | |||
1474 | #define paravirt_nop ((void *)_paravirt_nop) | ||
1475 | |||
1476 | #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) | 777 | #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) |
1477 | 778 | ||
1478 | static inline int __raw_spin_is_locked(struct raw_spinlock *lock) | 779 | static inline int __raw_spin_is_locked(struct raw_spinlock *lock) |
@@ -1509,17 +810,6 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) | |||
1509 | 810 | ||
1510 | #endif | 811 | #endif |
1511 | 812 | ||
1512 | /* These all sit in the .parainstructions section to tell us what to patch. */ | ||
1513 | struct paravirt_patch_site { | ||
1514 | u8 *instr; /* original instructions */ | ||
1515 | u8 instrtype; /* type of this instruction */ | ||
1516 | u8 len; /* length of original instruction */ | ||
1517 | u16 clobbers; /* what registers you may clobber */ | ||
1518 | }; | ||
1519 | |||
1520 | extern struct paravirt_patch_site __parainstructions[], | ||
1521 | __parainstructions_end[]; | ||
1522 | |||
1523 | #ifdef CONFIG_X86_32 | 813 | #ifdef CONFIG_X86_32 |
1524 | #define PV_SAVE_REGS "pushl %ecx; pushl %edx;" | 814 | #define PV_SAVE_REGS "pushl %ecx; pushl %edx;" |
1525 | #define PV_RESTORE_REGS "popl %edx; popl %ecx;" | 815 | #define PV_RESTORE_REGS "popl %edx; popl %ecx;" |