diff options
Diffstat (limited to 'arch/x86/include/asm/paravirt.h')
-rw-r--r-- | arch/x86/include/asm/paravirt.h | 1650 |
1 files changed, 1650 insertions, 0 deletions
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h new file mode 100644 index 000000000000..ba3e2ff6aedc --- /dev/null +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -0,0 +1,1650 @@ | |||
1 | #ifndef _ASM_X86_PARAVIRT_H | ||
2 | #define _ASM_X86_PARAVIRT_H | ||
3 | /* Various instructions on x86 need to be replaced for | ||
4 | * para-virtualization: those hooks are defined here. */ | ||
5 | |||
6 | #ifdef CONFIG_PARAVIRT | ||
7 | #include <asm/page.h> | ||
8 | #include <asm/asm.h> | ||
9 | |||
10 | /* Bitmask of what can be clobbered: usually at least eax. */ | ||
11 | #define CLBR_NONE 0 | ||
12 | #define CLBR_EAX (1 << 0) | ||
13 | #define CLBR_ECX (1 << 1) | ||
14 | #define CLBR_EDX (1 << 2) | ||
15 | |||
16 | #ifdef CONFIG_X86_64 | ||
17 | #define CLBR_RSI (1 << 3) | ||
18 | #define CLBR_RDI (1 << 4) | ||
19 | #define CLBR_R8 (1 << 5) | ||
20 | #define CLBR_R9 (1 << 6) | ||
21 | #define CLBR_R10 (1 << 7) | ||
22 | #define CLBR_R11 (1 << 8) | ||
23 | #define CLBR_ANY ((1 << 9) - 1) | ||
24 | #include <asm/desc_defs.h> | ||
25 | #else | ||
26 | /* CLBR_ANY should match all regs platform has. For i386, that's just it */ | ||
27 | #define CLBR_ANY ((1 << 3) - 1) | ||
28 | #endif /* X86_64 */ | ||
29 | |||
30 | #ifndef __ASSEMBLY__ | ||
31 | #include <linux/types.h> | ||
32 | #include <linux/cpumask.h> | ||
33 | #include <asm/kmap_types.h> | ||
34 | #include <asm/desc_defs.h> | ||
35 | |||
36 | struct page; | ||
37 | struct thread_struct; | ||
38 | struct desc_ptr; | ||
39 | struct tss_struct; | ||
40 | struct mm_struct; | ||
41 | struct desc_struct; | ||
42 | |||
43 | /* general info */ | ||
44 | struct pv_info { | ||
45 | unsigned int kernel_rpl; | ||
46 | int shared_kernel_pmd; | ||
47 | int paravirt_enabled; | ||
48 | const char *name; | ||
49 | }; | ||
50 | |||
51 | struct pv_init_ops { | ||
52 | /* | ||
53 | * Patch may replace one of the defined code sequences with | ||
54 | * arbitrary code, subject to the same register constraints. | ||
55 | * This generally means the code is not free to clobber any | ||
56 | * registers other than EAX. The patch function should return | ||
57 | * the number of bytes of code generated, as we nop pad the | ||
58 | * rest in generic code. | ||
59 | */ | ||
60 | unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, | ||
61 | unsigned long addr, unsigned len); | ||
62 | |||
63 | /* Basic arch-specific setup */ | ||
64 | void (*arch_setup)(void); | ||
65 | char *(*memory_setup)(void); | ||
66 | void (*post_allocator_init)(void); | ||
67 | |||
68 | /* Print a banner to identify the environment */ | ||
69 | void (*banner)(void); | ||
70 | }; | ||
71 | |||
72 | |||
73 | struct pv_lazy_ops { | ||
74 | /* Set deferred update mode, used for batching operations. */ | ||
75 | void (*enter)(void); | ||
76 | void (*leave)(void); | ||
77 | }; | ||
78 | |||
79 | struct pv_time_ops { | ||
80 | void (*time_init)(void); | ||
81 | |||
82 | /* Set and set time of day */ | ||
83 | unsigned long (*get_wallclock)(void); | ||
84 | int (*set_wallclock)(unsigned long); | ||
85 | |||
86 | unsigned long long (*sched_clock)(void); | ||
87 | unsigned long (*get_tsc_khz)(void); | ||
88 | }; | ||
89 | |||
90 | struct pv_cpu_ops { | ||
91 | /* hooks for various privileged instructions */ | ||
92 | unsigned long (*get_debugreg)(int regno); | ||
93 | void (*set_debugreg)(int regno, unsigned long value); | ||
94 | |||
95 | void (*clts)(void); | ||
96 | |||
97 | unsigned long (*read_cr0)(void); | ||
98 | void (*write_cr0)(unsigned long); | ||
99 | |||
100 | unsigned long (*read_cr4_safe)(void); | ||
101 | unsigned long (*read_cr4)(void); | ||
102 | void (*write_cr4)(unsigned long); | ||
103 | |||
104 | #ifdef CONFIG_X86_64 | ||
105 | unsigned long (*read_cr8)(void); | ||
106 | void (*write_cr8)(unsigned long); | ||
107 | #endif | ||
108 | |||
109 | /* Segment descriptor handling */ | ||
110 | void (*load_tr_desc)(void); | ||
111 | void (*load_gdt)(const struct desc_ptr *); | ||
112 | void (*load_idt)(const struct desc_ptr *); | ||
113 | void (*store_gdt)(struct desc_ptr *); | ||
114 | void (*store_idt)(struct desc_ptr *); | ||
115 | void (*set_ldt)(const void *desc, unsigned entries); | ||
116 | unsigned long (*store_tr)(void); | ||
117 | void (*load_tls)(struct thread_struct *t, unsigned int cpu); | ||
118 | #ifdef CONFIG_X86_64 | ||
119 | void (*load_gs_index)(unsigned int idx); | ||
120 | #endif | ||
121 | void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum, | ||
122 | const void *desc); | ||
123 | void (*write_gdt_entry)(struct desc_struct *, | ||
124 | int entrynum, const void *desc, int size); | ||
125 | void (*write_idt_entry)(gate_desc *, | ||
126 | int entrynum, const gate_desc *gate); | ||
127 | void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); | ||
128 | void (*free_ldt)(struct desc_struct *ldt, unsigned entries); | ||
129 | |||
130 | void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); | ||
131 | |||
132 | void (*set_iopl_mask)(unsigned mask); | ||
133 | |||
134 | void (*wbinvd)(void); | ||
135 | void (*io_delay)(void); | ||
136 | |||
137 | /* cpuid emulation, mostly so that caps bits can be disabled */ | ||
138 | void (*cpuid)(unsigned int *eax, unsigned int *ebx, | ||
139 | unsigned int *ecx, unsigned int *edx); | ||
140 | |||
141 | /* MSR, PMC and TSR operations. | ||
142 | err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ | ||
143 | u64 (*read_msr_amd)(unsigned int msr, int *err); | ||
144 | u64 (*read_msr)(unsigned int msr, int *err); | ||
145 | int (*write_msr)(unsigned int msr, unsigned low, unsigned high); | ||
146 | |||
147 | u64 (*read_tsc)(void); | ||
148 | u64 (*read_pmc)(int counter); | ||
149 | unsigned long long (*read_tscp)(unsigned int *aux); | ||
150 | |||
151 | /* | ||
152 | * Atomically enable interrupts and return to userspace. This | ||
153 | * is only ever used to return to 32-bit processes; in a | ||
154 | * 64-bit kernel, it's used for 32-on-64 compat processes, but | ||
155 | * never native 64-bit processes. (Jump, not call.) | ||
156 | */ | ||
157 | void (*irq_enable_sysexit)(void); | ||
158 | |||
159 | /* | ||
160 | * Switch to usermode gs and return to 64-bit usermode using | ||
161 | * sysret. Only used in 64-bit kernels to return to 64-bit | ||
162 | * processes. Usermode register state, including %rsp, must | ||
163 | * already be restored. | ||
164 | */ | ||
165 | void (*usergs_sysret64)(void); | ||
166 | |||
167 | /* | ||
168 | * Switch to usermode gs and return to 32-bit usermode using | ||
169 | * sysret. Used to return to 32-on-64 compat processes. | ||
170 | * Other usermode register state, including %esp, must already | ||
171 | * be restored. | ||
172 | */ | ||
173 | void (*usergs_sysret32)(void); | ||
174 | |||
175 | /* Normal iret. Jump to this with the standard iret stack | ||
176 | frame set up. */ | ||
177 | void (*iret)(void); | ||
178 | |||
179 | void (*swapgs)(void); | ||
180 | |||
181 | struct pv_lazy_ops lazy_mode; | ||
182 | }; | ||
183 | |||
184 | struct pv_irq_ops { | ||
185 | void (*init_IRQ)(void); | ||
186 | |||
187 | /* | ||
188 | * Get/set interrupt state. save_fl and restore_fl are only | ||
189 | * expected to use X86_EFLAGS_IF; all other bits | ||
190 | * returned from save_fl are undefined, and may be ignored by | ||
191 | * restore_fl. | ||
192 | */ | ||
193 | unsigned long (*save_fl)(void); | ||
194 | void (*restore_fl)(unsigned long); | ||
195 | void (*irq_disable)(void); | ||
196 | void (*irq_enable)(void); | ||
197 | void (*safe_halt)(void); | ||
198 | void (*halt)(void); | ||
199 | |||
200 | #ifdef CONFIG_X86_64 | ||
201 | void (*adjust_exception_frame)(void); | ||
202 | #endif | ||
203 | }; | ||
204 | |||
205 | struct pv_apic_ops { | ||
206 | #ifdef CONFIG_X86_LOCAL_APIC | ||
207 | void (*setup_boot_clock)(void); | ||
208 | void (*setup_secondary_clock)(void); | ||
209 | |||
210 | void (*startup_ipi_hook)(int phys_apicid, | ||
211 | unsigned long start_eip, | ||
212 | unsigned long start_esp); | ||
213 | #endif | ||
214 | }; | ||
215 | |||
216 | struct pv_mmu_ops { | ||
217 | /* | ||
218 | * Called before/after init_mm pagetable setup. setup_start | ||
219 | * may reset %cr3, and may pre-install parts of the pagetable; | ||
220 | * pagetable setup is expected to preserve any existing | ||
221 | * mapping. | ||
222 | */ | ||
223 | void (*pagetable_setup_start)(pgd_t *pgd_base); | ||
224 | void (*pagetable_setup_done)(pgd_t *pgd_base); | ||
225 | |||
226 | unsigned long (*read_cr2)(void); | ||
227 | void (*write_cr2)(unsigned long); | ||
228 | |||
229 | unsigned long (*read_cr3)(void); | ||
230 | void (*write_cr3)(unsigned long); | ||
231 | |||
232 | /* | ||
233 | * Hooks for intercepting the creation/use/destruction of an | ||
234 | * mm_struct. | ||
235 | */ | ||
236 | void (*activate_mm)(struct mm_struct *prev, | ||
237 | struct mm_struct *next); | ||
238 | void (*dup_mmap)(struct mm_struct *oldmm, | ||
239 | struct mm_struct *mm); | ||
240 | void (*exit_mmap)(struct mm_struct *mm); | ||
241 | |||
242 | |||
243 | /* TLB operations */ | ||
244 | void (*flush_tlb_user)(void); | ||
245 | void (*flush_tlb_kernel)(void); | ||
246 | void (*flush_tlb_single)(unsigned long addr); | ||
247 | void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, | ||
248 | unsigned long va); | ||
249 | |||
250 | /* Hooks for allocating and freeing a pagetable top-level */ | ||
251 | int (*pgd_alloc)(struct mm_struct *mm); | ||
252 | void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd); | ||
253 | |||
254 | /* | ||
255 | * Hooks for allocating/releasing pagetable pages when they're | ||
256 | * attached to a pagetable | ||
257 | */ | ||
258 | void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn); | ||
259 | void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn); | ||
260 | void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count); | ||
261 | void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn); | ||
262 | void (*release_pte)(unsigned long pfn); | ||
263 | void (*release_pmd)(unsigned long pfn); | ||
264 | void (*release_pud)(unsigned long pfn); | ||
265 | |||
266 | /* Pagetable manipulation functions */ | ||
267 | void (*set_pte)(pte_t *ptep, pte_t pteval); | ||
268 | void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, | ||
269 | pte_t *ptep, pte_t pteval); | ||
270 | void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); | ||
271 | void (*pte_update)(struct mm_struct *mm, unsigned long addr, | ||
272 | pte_t *ptep); | ||
273 | void (*pte_update_defer)(struct mm_struct *mm, | ||
274 | unsigned long addr, pte_t *ptep); | ||
275 | |||
276 | pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, | ||
277 | pte_t *ptep); | ||
278 | void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr, | ||
279 | pte_t *ptep, pte_t pte); | ||
280 | |||
281 | pteval_t (*pte_val)(pte_t); | ||
282 | pteval_t (*pte_flags)(pte_t); | ||
283 | pte_t (*make_pte)(pteval_t pte); | ||
284 | |||
285 | pgdval_t (*pgd_val)(pgd_t); | ||
286 | pgd_t (*make_pgd)(pgdval_t pgd); | ||
287 | |||
288 | #if PAGETABLE_LEVELS >= 3 | ||
289 | #ifdef CONFIG_X86_PAE | ||
290 | void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); | ||
291 | void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, | ||
292 | pte_t *ptep, pte_t pte); | ||
293 | void (*pte_clear)(struct mm_struct *mm, unsigned long addr, | ||
294 | pte_t *ptep); | ||
295 | void (*pmd_clear)(pmd_t *pmdp); | ||
296 | |||
297 | #endif /* CONFIG_X86_PAE */ | ||
298 | |||
299 | void (*set_pud)(pud_t *pudp, pud_t pudval); | ||
300 | |||
301 | pmdval_t (*pmd_val)(pmd_t); | ||
302 | pmd_t (*make_pmd)(pmdval_t pmd); | ||
303 | |||
304 | #if PAGETABLE_LEVELS == 4 | ||
305 | pudval_t (*pud_val)(pud_t); | ||
306 | pud_t (*make_pud)(pudval_t pud); | ||
307 | |||
308 | void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); | ||
309 | #endif /* PAGETABLE_LEVELS == 4 */ | ||
310 | #endif /* PAGETABLE_LEVELS >= 3 */ | ||
311 | |||
312 | #ifdef CONFIG_HIGHPTE | ||
313 | void *(*kmap_atomic_pte)(struct page *page, enum km_type type); | ||
314 | #endif | ||
315 | |||
316 | struct pv_lazy_ops lazy_mode; | ||
317 | |||
318 | /* dom0 ops */ | ||
319 | |||
320 | /* Sometimes the physical address is a pfn, and sometimes its | ||
321 | an mfn. We can tell which is which from the index. */ | ||
322 | void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, | ||
323 | unsigned long phys, pgprot_t flags); | ||
324 | }; | ||
325 | |||
326 | struct raw_spinlock; | ||
327 | struct pv_lock_ops { | ||
328 | int (*spin_is_locked)(struct raw_spinlock *lock); | ||
329 | int (*spin_is_contended)(struct raw_spinlock *lock); | ||
330 | void (*spin_lock)(struct raw_spinlock *lock); | ||
331 | void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags); | ||
332 | int (*spin_trylock)(struct raw_spinlock *lock); | ||
333 | void (*spin_unlock)(struct raw_spinlock *lock); | ||
334 | }; | ||
335 | |||
336 | /* This contains all the paravirt structures: we get a convenient | ||
337 | * number for each function using the offset which we use to indicate | ||
338 | * what to patch. */ | ||
339 | struct paravirt_patch_template { | ||
340 | struct pv_init_ops pv_init_ops; | ||
341 | struct pv_time_ops pv_time_ops; | ||
342 | struct pv_cpu_ops pv_cpu_ops; | ||
343 | struct pv_irq_ops pv_irq_ops; | ||
344 | struct pv_apic_ops pv_apic_ops; | ||
345 | struct pv_mmu_ops pv_mmu_ops; | ||
346 | struct pv_lock_ops pv_lock_ops; | ||
347 | }; | ||
348 | |||
349 | extern struct pv_info pv_info; | ||
350 | extern struct pv_init_ops pv_init_ops; | ||
351 | extern struct pv_time_ops pv_time_ops; | ||
352 | extern struct pv_cpu_ops pv_cpu_ops; | ||
353 | extern struct pv_irq_ops pv_irq_ops; | ||
354 | extern struct pv_apic_ops pv_apic_ops; | ||
355 | extern struct pv_mmu_ops pv_mmu_ops; | ||
356 | extern struct pv_lock_ops pv_lock_ops; | ||
357 | |||
358 | #define PARAVIRT_PATCH(x) \ | ||
359 | (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) | ||
360 | |||
361 | #define paravirt_type(op) \ | ||
362 | [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ | ||
363 | [paravirt_opptr] "m" (op) | ||
364 | #define paravirt_clobber(clobber) \ | ||
365 | [paravirt_clobber] "i" (clobber) | ||
366 | |||
367 | /* | ||
368 | * Generate some code, and mark it as patchable by the | ||
369 | * apply_paravirt() alternate instruction patcher. | ||
370 | */ | ||
371 | #define _paravirt_alt(insn_string, type, clobber) \ | ||
372 | "771:\n\t" insn_string "\n" "772:\n" \ | ||
373 | ".pushsection .parainstructions,\"a\"\n" \ | ||
374 | _ASM_ALIGN "\n" \ | ||
375 | _ASM_PTR " 771b\n" \ | ||
376 | " .byte " type "\n" \ | ||
377 | " .byte 772b-771b\n" \ | ||
378 | " .short " clobber "\n" \ | ||
379 | ".popsection\n" | ||
380 | |||
381 | /* Generate patchable code, with the default asm parameters. */ | ||
382 | #define paravirt_alt(insn_string) \ | ||
383 | _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") | ||
384 | |||
385 | /* Simple instruction patching code. */ | ||
386 | #define DEF_NATIVE(ops, name, code) \ | ||
387 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ | ||
388 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") | ||
389 | |||
390 | unsigned paravirt_patch_nop(void); | ||
391 | unsigned paravirt_patch_ignore(unsigned len); | ||
392 | unsigned paravirt_patch_call(void *insnbuf, | ||
393 | const void *target, u16 tgt_clobbers, | ||
394 | unsigned long addr, u16 site_clobbers, | ||
395 | unsigned len); | ||
396 | unsigned paravirt_patch_jmp(void *insnbuf, const void *target, | ||
397 | unsigned long addr, unsigned len); | ||
398 | unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, | ||
399 | unsigned long addr, unsigned len); | ||
400 | |||
401 | unsigned paravirt_patch_insns(void *insnbuf, unsigned len, | ||
402 | const char *start, const char *end); | ||
403 | |||
404 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | ||
405 | unsigned long addr, unsigned len); | ||
406 | |||
407 | int paravirt_disable_iospace(void); | ||
408 | |||
409 | /* | ||
410 | * This generates an indirect call based on the operation type number. | ||
411 | * The type number, computed in PARAVIRT_PATCH, is derived from the | ||
412 | * offset into the paravirt_patch_template structure, and can therefore be | ||
413 | * freely converted back into a structure offset. | ||
414 | */ | ||
415 | #define PARAVIRT_CALL "call *%[paravirt_opptr];" | ||
416 | |||
417 | /* | ||
418 | * These macros are intended to wrap calls through one of the paravirt | ||
419 | * ops structs, so that they can be later identified and patched at | ||
420 | * runtime. | ||
421 | * | ||
422 | * Normally, a call to a pv_op function is a simple indirect call: | ||
423 | * (pv_op_struct.operations)(args...). | ||
424 | * | ||
425 | * Unfortunately, this is a relatively slow operation for modern CPUs, | ||
426 | * because it cannot necessarily determine what the destination | ||
427 | * address is. In this case, the address is a runtime constant, so at | ||
428 | * the very least we can patch the call to e a simple direct call, or | ||
429 | * ideally, patch an inline implementation into the callsite. (Direct | ||
430 | * calls are essentially free, because the call and return addresses | ||
431 | * are completely predictable.) | ||
432 | * | ||
433 | * For i386, these macros rely on the standard gcc "regparm(3)" calling | ||
434 | * convention, in which the first three arguments are placed in %eax, | ||
435 | * %edx, %ecx (in that order), and the remaining arguments are placed | ||
436 | * on the stack. All caller-save registers (eax,edx,ecx) are expected | ||
437 | * to be modified (either clobbered or used for return values). | ||
438 | * X86_64, on the other hand, already specifies a register-based calling | ||
439 | * conventions, returning at %rax, with parameteres going on %rdi, %rsi, | ||
440 | * %rdx, and %rcx. Note that for this reason, x86_64 does not need any | ||
441 | * special handling for dealing with 4 arguments, unlike i386. | ||
442 | * However, x86_64 also have to clobber all caller saved registers, which | ||
443 | * unfortunately, are quite a bit (r8 - r11) | ||
444 | * | ||
445 | * The call instruction itself is marked by placing its start address | ||
446 | * and size into the .parainstructions section, so that | ||
447 | * apply_paravirt() in arch/i386/kernel/alternative.c can do the | ||
448 | * appropriate patching under the control of the backend pv_init_ops | ||
449 | * implementation. | ||
450 | * | ||
451 | * Unfortunately there's no way to get gcc to generate the args setup | ||
452 | * for the call, and then allow the call itself to be generated by an | ||
453 | * inline asm. Because of this, we must do the complete arg setup and | ||
454 | * return value handling from within these macros. This is fairly | ||
455 | * cumbersome. | ||
456 | * | ||
457 | * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. | ||
458 | * It could be extended to more arguments, but there would be little | ||
459 | * to be gained from that. For each number of arguments, there are | ||
460 | * the two VCALL and CALL variants for void and non-void functions. | ||
461 | * | ||
462 | * When there is a return value, the invoker of the macro must specify | ||
463 | * the return type. The macro then uses sizeof() on that type to | ||
464 | * determine whether its a 32 or 64 bit value, and places the return | ||
465 | * in the right register(s) (just %eax for 32-bit, and %edx:%eax for | ||
466 | * 64-bit). For x86_64 machines, it just returns at %rax regardless of | ||
467 | * the return value size. | ||
468 | * | ||
469 | * 64-bit arguments are passed as a pair of adjacent 32-bit arguments | ||
470 | * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments | ||
471 | * in low,high order | ||
472 | * | ||
473 | * Small structures are passed and returned in registers. The macro | ||
474 | * calling convention can't directly deal with this, so the wrapper | ||
475 | * functions must do this. | ||
476 | * | ||
477 | * These PVOP_* macros are only defined within this header. This | ||
478 | * means that all uses must be wrapped in inline functions. This also | ||
479 | * makes sure the incoming and outgoing types are always correct. | ||
480 | */ | ||
481 | #ifdef CONFIG_X86_32 | ||
482 | #define PVOP_VCALL_ARGS unsigned long __eax, __edx, __ecx | ||
483 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS | ||
484 | #define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \ | ||
485 | "=c" (__ecx) | ||
486 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS | ||
487 | #define EXTRA_CLOBBERS | ||
488 | #define VEXTRA_CLOBBERS | ||
489 | #else | ||
490 | #define PVOP_VCALL_ARGS unsigned long __edi, __esi, __edx, __ecx | ||
491 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax | ||
492 | #define PVOP_VCALL_CLOBBERS "=D" (__edi), \ | ||
493 | "=S" (__esi), "=d" (__edx), \ | ||
494 | "=c" (__ecx) | ||
495 | |||
496 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) | ||
497 | |||
498 | #define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" | ||
499 | #define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" | ||
500 | #endif | ||
501 | |||
502 | #ifdef CONFIG_PARAVIRT_DEBUG | ||
503 | #define PVOP_TEST_NULL(op) BUG_ON(op == NULL) | ||
504 | #else | ||
505 | #define PVOP_TEST_NULL(op) ((void)op) | ||
506 | #endif | ||
507 | |||
508 | #define __PVOP_CALL(rettype, op, pre, post, ...) \ | ||
509 | ({ \ | ||
510 | rettype __ret; \ | ||
511 | PVOP_CALL_ARGS; \ | ||
512 | PVOP_TEST_NULL(op); \ | ||
513 | /* This is 32-bit specific, but is okay in 64-bit */ \ | ||
514 | /* since this condition will never hold */ \ | ||
515 | if (sizeof(rettype) > sizeof(unsigned long)) { \ | ||
516 | asm volatile(pre \ | ||
517 | paravirt_alt(PARAVIRT_CALL) \ | ||
518 | post \ | ||
519 | : PVOP_CALL_CLOBBERS \ | ||
520 | : paravirt_type(op), \ | ||
521 | paravirt_clobber(CLBR_ANY), \ | ||
522 | ##__VA_ARGS__ \ | ||
523 | : "memory", "cc" EXTRA_CLOBBERS); \ | ||
524 | __ret = (rettype)((((u64)__edx) << 32) | __eax); \ | ||
525 | } else { \ | ||
526 | asm volatile(pre \ | ||
527 | paravirt_alt(PARAVIRT_CALL) \ | ||
528 | post \ | ||
529 | : PVOP_CALL_CLOBBERS \ | ||
530 | : paravirt_type(op), \ | ||
531 | paravirt_clobber(CLBR_ANY), \ | ||
532 | ##__VA_ARGS__ \ | ||
533 | : "memory", "cc" EXTRA_CLOBBERS); \ | ||
534 | __ret = (rettype)__eax; \ | ||
535 | } \ | ||
536 | __ret; \ | ||
537 | }) | ||
538 | #define __PVOP_VCALL(op, pre, post, ...) \ | ||
539 | ({ \ | ||
540 | PVOP_VCALL_ARGS; \ | ||
541 | PVOP_TEST_NULL(op); \ | ||
542 | asm volatile(pre \ | ||
543 | paravirt_alt(PARAVIRT_CALL) \ | ||
544 | post \ | ||
545 | : PVOP_VCALL_CLOBBERS \ | ||
546 | : paravirt_type(op), \ | ||
547 | paravirt_clobber(CLBR_ANY), \ | ||
548 | ##__VA_ARGS__ \ | ||
549 | : "memory", "cc" VEXTRA_CLOBBERS); \ | ||
550 | }) | ||
551 | |||
552 | #define PVOP_CALL0(rettype, op) \ | ||
553 | __PVOP_CALL(rettype, op, "", "") | ||
554 | #define PVOP_VCALL0(op) \ | ||
555 | __PVOP_VCALL(op, "", "") | ||
556 | |||
557 | #define PVOP_CALL1(rettype, op, arg1) \ | ||
558 | __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1))) | ||
559 | #define PVOP_VCALL1(op, arg1) \ | ||
560 | __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1))) | ||
561 | |||
562 | #define PVOP_CALL2(rettype, op, arg1, arg2) \ | ||
563 | __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ | ||
564 | "1" ((unsigned long)(arg2))) | ||
565 | #define PVOP_VCALL2(op, arg1, arg2) \ | ||
566 | __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ | ||
567 | "1" ((unsigned long)(arg2))) | ||
568 | |||
569 | #define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ | ||
570 | __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ | ||
571 | "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) | ||
572 | #define PVOP_VCALL3(op, arg1, arg2, arg3) \ | ||
573 | __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ | ||
574 | "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) | ||
575 | |||
576 | /* This is the only difference in x86_64. We can make it much simpler */ | ||
577 | #ifdef CONFIG_X86_32 | ||
578 | #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ | ||
579 | __PVOP_CALL(rettype, op, \ | ||
580 | "push %[_arg4];", "lea 4(%%esp),%%esp;", \ | ||
581 | "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ | ||
582 | "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) | ||
583 | #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ | ||
584 | __PVOP_VCALL(op, \ | ||
585 | "push %[_arg4];", "lea 4(%%esp),%%esp;", \ | ||
586 | "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ | ||
587 | "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) | ||
588 | #else | ||
589 | #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ | ||
590 | __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ | ||
591 | "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ | ||
592 | "3"((unsigned long)(arg4))) | ||
593 | #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ | ||
594 | __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ | ||
595 | "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ | ||
596 | "3"((unsigned long)(arg4))) | ||
597 | #endif | ||
598 | |||
599 | static inline int paravirt_enabled(void) | ||
600 | { | ||
601 | return pv_info.paravirt_enabled; | ||
602 | } | ||
603 | |||
604 | static inline void load_sp0(struct tss_struct *tss, | ||
605 | struct thread_struct *thread) | ||
606 | { | ||
607 | PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread); | ||
608 | } | ||
609 | |||
610 | #define ARCH_SETUP pv_init_ops.arch_setup(); | ||
611 | static inline unsigned long get_wallclock(void) | ||
612 | { | ||
613 | return PVOP_CALL0(unsigned long, pv_time_ops.get_wallclock); | ||
614 | } | ||
615 | |||
616 | static inline int set_wallclock(unsigned long nowtime) | ||
617 | { | ||
618 | return PVOP_CALL1(int, pv_time_ops.set_wallclock, nowtime); | ||
619 | } | ||
620 | |||
621 | static inline void (*choose_time_init(void))(void) | ||
622 | { | ||
623 | return pv_time_ops.time_init; | ||
624 | } | ||
625 | |||
626 | /* The paravirtualized CPUID instruction. */ | ||
627 | static inline void __cpuid(unsigned int *eax, unsigned int *ebx, | ||
628 | unsigned int *ecx, unsigned int *edx) | ||
629 | { | ||
630 | PVOP_VCALL4(pv_cpu_ops.cpuid, eax, ebx, ecx, edx); | ||
631 | } | ||
632 | |||
633 | /* | ||
634 | * These special macros can be used to get or set a debugging register | ||
635 | */ | ||
636 | static inline unsigned long paravirt_get_debugreg(int reg) | ||
637 | { | ||
638 | return PVOP_CALL1(unsigned long, pv_cpu_ops.get_debugreg, reg); | ||
639 | } | ||
640 | #define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) | ||
641 | static inline void set_debugreg(unsigned long val, int reg) | ||
642 | { | ||
643 | PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val); | ||
644 | } | ||
645 | |||
646 | static inline void clts(void) | ||
647 | { | ||
648 | PVOP_VCALL0(pv_cpu_ops.clts); | ||
649 | } | ||
650 | |||
651 | static inline unsigned long read_cr0(void) | ||
652 | { | ||
653 | return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0); | ||
654 | } | ||
655 | |||
656 | static inline void write_cr0(unsigned long x) | ||
657 | { | ||
658 | PVOP_VCALL1(pv_cpu_ops.write_cr0, x); | ||
659 | } | ||
660 | |||
661 | static inline unsigned long read_cr2(void) | ||
662 | { | ||
663 | return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2); | ||
664 | } | ||
665 | |||
666 | static inline void write_cr2(unsigned long x) | ||
667 | { | ||
668 | PVOP_VCALL1(pv_mmu_ops.write_cr2, x); | ||
669 | } | ||
670 | |||
671 | static inline unsigned long read_cr3(void) | ||
672 | { | ||
673 | return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3); | ||
674 | } | ||
675 | |||
676 | static inline void write_cr3(unsigned long x) | ||
677 | { | ||
678 | PVOP_VCALL1(pv_mmu_ops.write_cr3, x); | ||
679 | } | ||
680 | |||
681 | static inline unsigned long read_cr4(void) | ||
682 | { | ||
683 | return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); | ||
684 | } | ||
685 | static inline unsigned long read_cr4_safe(void) | ||
686 | { | ||
687 | return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe); | ||
688 | } | ||
689 | |||
690 | static inline void write_cr4(unsigned long x) | ||
691 | { | ||
692 | PVOP_VCALL1(pv_cpu_ops.write_cr4, x); | ||
693 | } | ||
694 | |||
695 | #ifdef CONFIG_X86_64 | ||
696 | static inline unsigned long read_cr8(void) | ||
697 | { | ||
698 | return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8); | ||
699 | } | ||
700 | |||
701 | static inline void write_cr8(unsigned long x) | ||
702 | { | ||
703 | PVOP_VCALL1(pv_cpu_ops.write_cr8, x); | ||
704 | } | ||
705 | #endif | ||
706 | |||
707 | static inline void raw_safe_halt(void) | ||
708 | { | ||
709 | PVOP_VCALL0(pv_irq_ops.safe_halt); | ||
710 | } | ||
711 | |||
712 | static inline void halt(void) | ||
713 | { | ||
714 | PVOP_VCALL0(pv_irq_ops.safe_halt); | ||
715 | } | ||
716 | |||
717 | static inline void wbinvd(void) | ||
718 | { | ||
719 | PVOP_VCALL0(pv_cpu_ops.wbinvd); | ||
720 | } | ||
721 | |||
722 | #define get_kernel_rpl() (pv_info.kernel_rpl) | ||
723 | |||
724 | static inline u64 paravirt_read_msr(unsigned msr, int *err) | ||
725 | { | ||
726 | return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); | ||
727 | } | ||
728 | static inline u64 paravirt_read_msr_amd(unsigned msr, int *err) | ||
729 | { | ||
730 | return PVOP_CALL2(u64, pv_cpu_ops.read_msr_amd, msr, err); | ||
731 | } | ||
732 | static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) | ||
733 | { | ||
734 | return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); | ||
735 | } | ||
736 | |||
737 | /* These should all do BUG_ON(_err), but our headers are too tangled. */ | ||
738 | #define rdmsr(msr, val1, val2) \ | ||
739 | do { \ | ||
740 | int _err; \ | ||
741 | u64 _l = paravirt_read_msr(msr, &_err); \ | ||
742 | val1 = (u32)_l; \ | ||
743 | val2 = _l >> 32; \ | ||
744 | } while (0) | ||
745 | |||
746 | #define wrmsr(msr, val1, val2) \ | ||
747 | do { \ | ||
748 | paravirt_write_msr(msr, val1, val2); \ | ||
749 | } while (0) | ||
750 | |||
751 | #define rdmsrl(msr, val) \ | ||
752 | do { \ | ||
753 | int _err; \ | ||
754 | val = paravirt_read_msr(msr, &_err); \ | ||
755 | } while (0) | ||
756 | |||
757 | #define wrmsrl(msr, val) wrmsr(msr, (u32)((u64)(val)), ((u64)(val))>>32) | ||
758 | #define wrmsr_safe(msr, a, b) paravirt_write_msr(msr, a, b) | ||
759 | |||
760 | /* rdmsr with exception handling */ | ||
761 | #define rdmsr_safe(msr, a, b) \ | ||
762 | ({ \ | ||
763 | int _err; \ | ||
764 | u64 _l = paravirt_read_msr(msr, &_err); \ | ||
765 | (*a) = (u32)_l; \ | ||
766 | (*b) = _l >> 32; \ | ||
767 | _err; \ | ||
768 | }) | ||
769 | |||
770 | static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) | ||
771 | { | ||
772 | int err; | ||
773 | |||
774 | *p = paravirt_read_msr(msr, &err); | ||
775 | return err; | ||
776 | } | ||
777 | static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) | ||
778 | { | ||
779 | int err; | ||
780 | |||
781 | *p = paravirt_read_msr_amd(msr, &err); | ||
782 | return err; | ||
783 | } | ||
784 | |||
785 | static inline u64 paravirt_read_tsc(void) | ||
786 | { | ||
787 | return PVOP_CALL0(u64, pv_cpu_ops.read_tsc); | ||
788 | } | ||
789 | |||
790 | #define rdtscl(low) \ | ||
791 | do { \ | ||
792 | u64 _l = paravirt_read_tsc(); \ | ||
793 | low = (int)_l; \ | ||
794 | } while (0) | ||
795 | |||
796 | #define rdtscll(val) (val = paravirt_read_tsc()) | ||
797 | |||
798 | static inline unsigned long long paravirt_sched_clock(void) | ||
799 | { | ||
800 | return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); | ||
801 | } | ||
802 | #define calibrate_tsc() (pv_time_ops.get_tsc_khz()) | ||
803 | |||
804 | static inline unsigned long long paravirt_read_pmc(int counter) | ||
805 | { | ||
806 | return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter); | ||
807 | } | ||
808 | |||
809 | #define rdpmc(counter, low, high) \ | ||
810 | do { \ | ||
811 | u64 _l = paravirt_read_pmc(counter); \ | ||
812 | low = (u32)_l; \ | ||
813 | high = _l >> 32; \ | ||
814 | } while (0) | ||
815 | |||
816 | static inline unsigned long long paravirt_rdtscp(unsigned int *aux) | ||
817 | { | ||
818 | return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux); | ||
819 | } | ||
820 | |||
821 | #define rdtscp(low, high, aux) \ | ||
822 | do { \ | ||
823 | int __aux; \ | ||
824 | unsigned long __val = paravirt_rdtscp(&__aux); \ | ||
825 | (low) = (u32)__val; \ | ||
826 | (high) = (u32)(__val >> 32); \ | ||
827 | (aux) = __aux; \ | ||
828 | } while (0) | ||
829 | |||
830 | #define rdtscpll(val, aux) \ | ||
831 | do { \ | ||
832 | unsigned long __aux; \ | ||
833 | val = paravirt_rdtscp(&__aux); \ | ||
834 | (aux) = __aux; \ | ||
835 | } while (0) | ||
836 | |||
837 | static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) | ||
838 | { | ||
839 | PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries); | ||
840 | } | ||
841 | |||
842 | static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) | ||
843 | { | ||
844 | PVOP_VCALL2(pv_cpu_ops.free_ldt, ldt, entries); | ||
845 | } | ||
846 | |||
847 | static inline void load_TR_desc(void) | ||
848 | { | ||
849 | PVOP_VCALL0(pv_cpu_ops.load_tr_desc); | ||
850 | } | ||
851 | static inline void load_gdt(const struct desc_ptr *dtr) | ||
852 | { | ||
853 | PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr); | ||
854 | } | ||
855 | static inline void load_idt(const struct desc_ptr *dtr) | ||
856 | { | ||
857 | PVOP_VCALL1(pv_cpu_ops.load_idt, dtr); | ||
858 | } | ||
859 | static inline void set_ldt(const void *addr, unsigned entries) | ||
860 | { | ||
861 | PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries); | ||
862 | } | ||
863 | static inline void store_gdt(struct desc_ptr *dtr) | ||
864 | { | ||
865 | PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr); | ||
866 | } | ||
867 | static inline void store_idt(struct desc_ptr *dtr) | ||
868 | { | ||
869 | PVOP_VCALL1(pv_cpu_ops.store_idt, dtr); | ||
870 | } | ||
871 | static inline unsigned long paravirt_store_tr(void) | ||
872 | { | ||
873 | return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr); | ||
874 | } | ||
875 | #define store_tr(tr) ((tr) = paravirt_store_tr()) | ||
876 | static inline void load_TLS(struct thread_struct *t, unsigned cpu) | ||
877 | { | ||
878 | PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu); | ||
879 | } | ||
880 | |||
881 | #ifdef CONFIG_X86_64 | ||
882 | static inline void load_gs_index(unsigned int gs) | ||
883 | { | ||
884 | PVOP_VCALL1(pv_cpu_ops.load_gs_index, gs); | ||
885 | } | ||
886 | #endif | ||
887 | |||
888 | static inline void write_ldt_entry(struct desc_struct *dt, int entry, | ||
889 | const void *desc) | ||
890 | { | ||
891 | PVOP_VCALL3(pv_cpu_ops.write_ldt_entry, dt, entry, desc); | ||
892 | } | ||
893 | |||
894 | static inline void write_gdt_entry(struct desc_struct *dt, int entry, | ||
895 | void *desc, int type) | ||
896 | { | ||
897 | PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, desc, type); | ||
898 | } | ||
899 | |||
900 | static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) | ||
901 | { | ||
902 | PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g); | ||
903 | } | ||
904 | static inline void set_iopl_mask(unsigned mask) | ||
905 | { | ||
906 | PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask); | ||
907 | } | ||
908 | |||
909 | /* The paravirtualized I/O functions */ | ||
910 | static inline void slow_down_io(void) | ||
911 | { | ||
912 | pv_cpu_ops.io_delay(); | ||
913 | #ifdef REALLY_SLOW_IO | ||
914 | pv_cpu_ops.io_delay(); | ||
915 | pv_cpu_ops.io_delay(); | ||
916 | pv_cpu_ops.io_delay(); | ||
917 | #endif | ||
918 | } | ||
919 | |||
920 | #ifdef CONFIG_X86_LOCAL_APIC | ||
921 | static inline void setup_boot_clock(void) | ||
922 | { | ||
923 | PVOP_VCALL0(pv_apic_ops.setup_boot_clock); | ||
924 | } | ||
925 | |||
926 | static inline void setup_secondary_clock(void) | ||
927 | { | ||
928 | PVOP_VCALL0(pv_apic_ops.setup_secondary_clock); | ||
929 | } | ||
930 | #endif | ||
931 | |||
932 | static inline void paravirt_post_allocator_init(void) | ||
933 | { | ||
934 | if (pv_init_ops.post_allocator_init) | ||
935 | (*pv_init_ops.post_allocator_init)(); | ||
936 | } | ||
937 | |||
938 | static inline void paravirt_pagetable_setup_start(pgd_t *base) | ||
939 | { | ||
940 | (*pv_mmu_ops.pagetable_setup_start)(base); | ||
941 | } | ||
942 | |||
943 | static inline void paravirt_pagetable_setup_done(pgd_t *base) | ||
944 | { | ||
945 | (*pv_mmu_ops.pagetable_setup_done)(base); | ||
946 | } | ||
947 | |||
948 | #ifdef CONFIG_SMP | ||
949 | static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, | ||
950 | unsigned long start_esp) | ||
951 | { | ||
952 | PVOP_VCALL3(pv_apic_ops.startup_ipi_hook, | ||
953 | phys_apicid, start_eip, start_esp); | ||
954 | } | ||
955 | #endif | ||
956 | |||
957 | static inline void paravirt_activate_mm(struct mm_struct *prev, | ||
958 | struct mm_struct *next) | ||
959 | { | ||
960 | PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next); | ||
961 | } | ||
962 | |||
963 | static inline void arch_dup_mmap(struct mm_struct *oldmm, | ||
964 | struct mm_struct *mm) | ||
965 | { | ||
966 | PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm); | ||
967 | } | ||
968 | |||
969 | static inline void arch_exit_mmap(struct mm_struct *mm) | ||
970 | { | ||
971 | PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm); | ||
972 | } | ||
973 | |||
974 | static inline void __flush_tlb(void) | ||
975 | { | ||
976 | PVOP_VCALL0(pv_mmu_ops.flush_tlb_user); | ||
977 | } | ||
978 | static inline void __flush_tlb_global(void) | ||
979 | { | ||
980 | PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel); | ||
981 | } | ||
982 | static inline void __flush_tlb_single(unsigned long addr) | ||
983 | { | ||
984 | PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); | ||
985 | } | ||
986 | |||
987 | static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | ||
988 | unsigned long va) | ||
989 | { | ||
990 | PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); | ||
991 | } | ||
992 | |||
993 | static inline int paravirt_pgd_alloc(struct mm_struct *mm) | ||
994 | { | ||
995 | return PVOP_CALL1(int, pv_mmu_ops.pgd_alloc, mm); | ||
996 | } | ||
997 | |||
998 | static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) | ||
999 | { | ||
1000 | PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd); | ||
1001 | } | ||
1002 | |||
1003 | static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) | ||
1004 | { | ||
1005 | PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn); | ||
1006 | } | ||
1007 | static inline void paravirt_release_pte(unsigned long pfn) | ||
1008 | { | ||
1009 | PVOP_VCALL1(pv_mmu_ops.release_pte, pfn); | ||
1010 | } | ||
1011 | |||
1012 | static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) | ||
1013 | { | ||
1014 | PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn); | ||
1015 | } | ||
1016 | |||
1017 | static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn, | ||
1018 | unsigned long start, unsigned long count) | ||
1019 | { | ||
1020 | PVOP_VCALL4(pv_mmu_ops.alloc_pmd_clone, pfn, clonepfn, start, count); | ||
1021 | } | ||
1022 | static inline void paravirt_release_pmd(unsigned long pfn) | ||
1023 | { | ||
1024 | PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn); | ||
1025 | } | ||
1026 | |||
1027 | static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) | ||
1028 | { | ||
1029 | PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn); | ||
1030 | } | ||
1031 | static inline void paravirt_release_pud(unsigned long pfn) | ||
1032 | { | ||
1033 | PVOP_VCALL1(pv_mmu_ops.release_pud, pfn); | ||
1034 | } | ||
1035 | |||
1036 | #ifdef CONFIG_HIGHPTE | ||
1037 | static inline void *kmap_atomic_pte(struct page *page, enum km_type type) | ||
1038 | { | ||
1039 | unsigned long ret; | ||
1040 | ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte, page, type); | ||
1041 | return (void *)ret; | ||
1042 | } | ||
1043 | #endif | ||
1044 | |||
1045 | static inline void pte_update(struct mm_struct *mm, unsigned long addr, | ||
1046 | pte_t *ptep) | ||
1047 | { | ||
1048 | PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep); | ||
1049 | } | ||
1050 | |||
1051 | static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, | ||
1052 | pte_t *ptep) | ||
1053 | { | ||
1054 | PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep); | ||
1055 | } | ||
1056 | |||
1057 | static inline pte_t __pte(pteval_t val) | ||
1058 | { | ||
1059 | pteval_t ret; | ||
1060 | |||
1061 | if (sizeof(pteval_t) > sizeof(long)) | ||
1062 | ret = PVOP_CALL2(pteval_t, | ||
1063 | pv_mmu_ops.make_pte, | ||
1064 | val, (u64)val >> 32); | ||
1065 | else | ||
1066 | ret = PVOP_CALL1(pteval_t, | ||
1067 | pv_mmu_ops.make_pte, | ||
1068 | val); | ||
1069 | |||
1070 | return (pte_t) { .pte = ret }; | ||
1071 | } | ||
1072 | |||
1073 | static inline pteval_t pte_val(pte_t pte) | ||
1074 | { | ||
1075 | pteval_t ret; | ||
1076 | |||
1077 | if (sizeof(pteval_t) > sizeof(long)) | ||
1078 | ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val, | ||
1079 | pte.pte, (u64)pte.pte >> 32); | ||
1080 | else | ||
1081 | ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val, | ||
1082 | pte.pte); | ||
1083 | |||
1084 | return ret; | ||
1085 | } | ||
1086 | |||
1087 | static inline pteval_t pte_flags(pte_t pte) | ||
1088 | { | ||
1089 | pteval_t ret; | ||
1090 | |||
1091 | if (sizeof(pteval_t) > sizeof(long)) | ||
1092 | ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags, | ||
1093 | pte.pte, (u64)pte.pte >> 32); | ||
1094 | else | ||
1095 | ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags, | ||
1096 | pte.pte); | ||
1097 | |||
1098 | #ifdef CONFIG_PARAVIRT_DEBUG | ||
1099 | BUG_ON(ret & PTE_PFN_MASK); | ||
1100 | #endif | ||
1101 | return ret; | ||
1102 | } | ||
1103 | |||
1104 | static inline pgd_t __pgd(pgdval_t val) | ||
1105 | { | ||
1106 | pgdval_t ret; | ||
1107 | |||
1108 | if (sizeof(pgdval_t) > sizeof(long)) | ||
1109 | ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd, | ||
1110 | val, (u64)val >> 32); | ||
1111 | else | ||
1112 | ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd, | ||
1113 | val); | ||
1114 | |||
1115 | return (pgd_t) { ret }; | ||
1116 | } | ||
1117 | |||
1118 | static inline pgdval_t pgd_val(pgd_t pgd) | ||
1119 | { | ||
1120 | pgdval_t ret; | ||
1121 | |||
1122 | if (sizeof(pgdval_t) > sizeof(long)) | ||
1123 | ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val, | ||
1124 | pgd.pgd, (u64)pgd.pgd >> 32); | ||
1125 | else | ||
1126 | ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val, | ||
1127 | pgd.pgd); | ||
1128 | |||
1129 | return ret; | ||
1130 | } | ||
1131 | |||
1132 | #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION | ||
1133 | static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, | ||
1134 | pte_t *ptep) | ||
1135 | { | ||
1136 | pteval_t ret; | ||
1137 | |||
1138 | ret = PVOP_CALL3(pteval_t, pv_mmu_ops.ptep_modify_prot_start, | ||
1139 | mm, addr, ptep); | ||
1140 | |||
1141 | return (pte_t) { .pte = ret }; | ||
1142 | } | ||
1143 | |||
1144 | static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | ||
1145 | pte_t *ptep, pte_t pte) | ||
1146 | { | ||
1147 | if (sizeof(pteval_t) > sizeof(long)) | ||
1148 | /* 5 arg words */ | ||
1149 | pv_mmu_ops.ptep_modify_prot_commit(mm, addr, ptep, pte); | ||
1150 | else | ||
1151 | PVOP_VCALL4(pv_mmu_ops.ptep_modify_prot_commit, | ||
1152 | mm, addr, ptep, pte.pte); | ||
1153 | } | ||
1154 | |||
1155 | static inline void set_pte(pte_t *ptep, pte_t pte) | ||
1156 | { | ||
1157 | if (sizeof(pteval_t) > sizeof(long)) | ||
1158 | PVOP_VCALL3(pv_mmu_ops.set_pte, ptep, | ||
1159 | pte.pte, (u64)pte.pte >> 32); | ||
1160 | else | ||
1161 | PVOP_VCALL2(pv_mmu_ops.set_pte, ptep, | ||
1162 | pte.pte); | ||
1163 | } | ||
1164 | |||
1165 | static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, | ||
1166 | pte_t *ptep, pte_t pte) | ||
1167 | { | ||
1168 | if (sizeof(pteval_t) > sizeof(long)) | ||
1169 | /* 5 arg words */ | ||
1170 | pv_mmu_ops.set_pte_at(mm, addr, ptep, pte); | ||
1171 | else | ||
1172 | PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte); | ||
1173 | } | ||
1174 | |||
1175 | static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) | ||
1176 | { | ||
1177 | pmdval_t val = native_pmd_val(pmd); | ||
1178 | |||
1179 | if (sizeof(pmdval_t) > sizeof(long)) | ||
1180 | PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp, val, (u64)val >> 32); | ||
1181 | else | ||
1182 | PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val); | ||
1183 | } | ||
1184 | |||
1185 | #if PAGETABLE_LEVELS >= 3 | ||
1186 | static inline pmd_t __pmd(pmdval_t val) | ||
1187 | { | ||
1188 | pmdval_t ret; | ||
1189 | |||
1190 | if (sizeof(pmdval_t) > sizeof(long)) | ||
1191 | ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd, | ||
1192 | val, (u64)val >> 32); | ||
1193 | else | ||
1194 | ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd, | ||
1195 | val); | ||
1196 | |||
1197 | return (pmd_t) { ret }; | ||
1198 | } | ||
1199 | |||
1200 | static inline pmdval_t pmd_val(pmd_t pmd) | ||
1201 | { | ||
1202 | pmdval_t ret; | ||
1203 | |||
1204 | if (sizeof(pmdval_t) > sizeof(long)) | ||
1205 | ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val, | ||
1206 | pmd.pmd, (u64)pmd.pmd >> 32); | ||
1207 | else | ||
1208 | ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val, | ||
1209 | pmd.pmd); | ||
1210 | |||
1211 | return ret; | ||
1212 | } | ||
1213 | |||
1214 | static inline void set_pud(pud_t *pudp, pud_t pud) | ||
1215 | { | ||
1216 | pudval_t val = native_pud_val(pud); | ||
1217 | |||
1218 | if (sizeof(pudval_t) > sizeof(long)) | ||
1219 | PVOP_VCALL3(pv_mmu_ops.set_pud, pudp, | ||
1220 | val, (u64)val >> 32); | ||
1221 | else | ||
1222 | PVOP_VCALL2(pv_mmu_ops.set_pud, pudp, | ||
1223 | val); | ||
1224 | } | ||
1225 | #if PAGETABLE_LEVELS == 4 | ||
1226 | static inline pud_t __pud(pudval_t val) | ||
1227 | { | ||
1228 | pudval_t ret; | ||
1229 | |||
1230 | if (sizeof(pudval_t) > sizeof(long)) | ||
1231 | ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud, | ||
1232 | val, (u64)val >> 32); | ||
1233 | else | ||
1234 | ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud, | ||
1235 | val); | ||
1236 | |||
1237 | return (pud_t) { ret }; | ||
1238 | } | ||
1239 | |||
1240 | static inline pudval_t pud_val(pud_t pud) | ||
1241 | { | ||
1242 | pudval_t ret; | ||
1243 | |||
1244 | if (sizeof(pudval_t) > sizeof(long)) | ||
1245 | ret = PVOP_CALL2(pudval_t, pv_mmu_ops.pud_val, | ||
1246 | pud.pud, (u64)pud.pud >> 32); | ||
1247 | else | ||
1248 | ret = PVOP_CALL1(pudval_t, pv_mmu_ops.pud_val, | ||
1249 | pud.pud); | ||
1250 | |||
1251 | return ret; | ||
1252 | } | ||
1253 | |||
1254 | static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) | ||
1255 | { | ||
1256 | pgdval_t val = native_pgd_val(pgd); | ||
1257 | |||
1258 | if (sizeof(pgdval_t) > sizeof(long)) | ||
1259 | PVOP_VCALL3(pv_mmu_ops.set_pgd, pgdp, | ||
1260 | val, (u64)val >> 32); | ||
1261 | else | ||
1262 | PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, | ||
1263 | val); | ||
1264 | } | ||
1265 | |||
1266 | static inline void pgd_clear(pgd_t *pgdp) | ||
1267 | { | ||
1268 | set_pgd(pgdp, __pgd(0)); | ||
1269 | } | ||
1270 | |||
1271 | static inline void pud_clear(pud_t *pudp) | ||
1272 | { | ||
1273 | set_pud(pudp, __pud(0)); | ||
1274 | } | ||
1275 | |||
1276 | #endif /* PAGETABLE_LEVELS == 4 */ | ||
1277 | |||
1278 | #endif /* PAGETABLE_LEVELS >= 3 */ | ||
1279 | |||
1280 | #ifdef CONFIG_X86_PAE | ||
1281 | /* Special-case pte-setting operations for PAE, which can't update a | ||
1282 | 64-bit pte atomically */ | ||
1283 | static inline void set_pte_atomic(pte_t *ptep, pte_t pte) | ||
1284 | { | ||
1285 | PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep, | ||
1286 | pte.pte, pte.pte >> 32); | ||
1287 | } | ||
1288 | |||
1289 | static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, | ||
1290 | pte_t *ptep, pte_t pte) | ||
1291 | { | ||
1292 | /* 5 arg words */ | ||
1293 | pv_mmu_ops.set_pte_present(mm, addr, ptep, pte); | ||
1294 | } | ||
1295 | |||
1296 | static inline void pte_clear(struct mm_struct *mm, unsigned long addr, | ||
1297 | pte_t *ptep) | ||
1298 | { | ||
1299 | PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep); | ||
1300 | } | ||
1301 | |||
1302 | static inline void pmd_clear(pmd_t *pmdp) | ||
1303 | { | ||
1304 | PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp); | ||
1305 | } | ||
1306 | #else /* !CONFIG_X86_PAE */ | ||
1307 | static inline void set_pte_atomic(pte_t *ptep, pte_t pte) | ||
1308 | { | ||
1309 | set_pte(ptep, pte); | ||
1310 | } | ||
1311 | |||
1312 | static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, | ||
1313 | pte_t *ptep, pte_t pte) | ||
1314 | { | ||
1315 | set_pte(ptep, pte); | ||
1316 | } | ||
1317 | |||
1318 | static inline void pte_clear(struct mm_struct *mm, unsigned long addr, | ||
1319 | pte_t *ptep) | ||
1320 | { | ||
1321 | set_pte_at(mm, addr, ptep, __pte(0)); | ||
1322 | } | ||
1323 | |||
1324 | static inline void pmd_clear(pmd_t *pmdp) | ||
1325 | { | ||
1326 | set_pmd(pmdp, __pmd(0)); | ||
1327 | } | ||
1328 | #endif /* CONFIG_X86_PAE */ | ||
1329 | |||
1330 | /* Lazy mode for batching updates / context switch */ | ||
1331 | enum paravirt_lazy_mode { | ||
1332 | PARAVIRT_LAZY_NONE, | ||
1333 | PARAVIRT_LAZY_MMU, | ||
1334 | PARAVIRT_LAZY_CPU, | ||
1335 | }; | ||
1336 | |||
1337 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void); | ||
1338 | void paravirt_enter_lazy_cpu(void); | ||
1339 | void paravirt_leave_lazy_cpu(void); | ||
1340 | void paravirt_enter_lazy_mmu(void); | ||
1341 | void paravirt_leave_lazy_mmu(void); | ||
1342 | void paravirt_leave_lazy(enum paravirt_lazy_mode mode); | ||
1343 | |||
1344 | #define __HAVE_ARCH_ENTER_LAZY_CPU_MODE | ||
1345 | static inline void arch_enter_lazy_cpu_mode(void) | ||
1346 | { | ||
1347 | PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); | ||
1348 | } | ||
1349 | |||
1350 | static inline void arch_leave_lazy_cpu_mode(void) | ||
1351 | { | ||
1352 | PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); | ||
1353 | } | ||
1354 | |||
1355 | static inline void arch_flush_lazy_cpu_mode(void) | ||
1356 | { | ||
1357 | if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) { | ||
1358 | arch_leave_lazy_cpu_mode(); | ||
1359 | arch_enter_lazy_cpu_mode(); | ||
1360 | } | ||
1361 | } | ||
1362 | |||
1363 | |||
1364 | #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE | ||
1365 | static inline void arch_enter_lazy_mmu_mode(void) | ||
1366 | { | ||
1367 | PVOP_VCALL0(pv_mmu_ops.lazy_mode.enter); | ||
1368 | } | ||
1369 | |||
1370 | static inline void arch_leave_lazy_mmu_mode(void) | ||
1371 | { | ||
1372 | PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); | ||
1373 | } | ||
1374 | |||
1375 | static inline void arch_flush_lazy_mmu_mode(void) | ||
1376 | { | ||
1377 | if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) { | ||
1378 | arch_leave_lazy_mmu_mode(); | ||
1379 | arch_enter_lazy_mmu_mode(); | ||
1380 | } | ||
1381 | } | ||
1382 | |||
1383 | static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, | ||
1384 | unsigned long phys, pgprot_t flags) | ||
1385 | { | ||
1386 | pv_mmu_ops.set_fixmap(idx, phys, flags); | ||
1387 | } | ||
1388 | |||
1389 | void _paravirt_nop(void); | ||
1390 | #define paravirt_nop ((void *)_paravirt_nop) | ||
1391 | |||
1392 | void paravirt_use_bytelocks(void); | ||
1393 | |||
1394 | #ifdef CONFIG_SMP | ||
1395 | |||
1396 | static inline int __raw_spin_is_locked(struct raw_spinlock *lock) | ||
1397 | { | ||
1398 | return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock); | ||
1399 | } | ||
1400 | |||
1401 | static inline int __raw_spin_is_contended(struct raw_spinlock *lock) | ||
1402 | { | ||
1403 | return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); | ||
1404 | } | ||
1405 | |||
1406 | static __always_inline void __raw_spin_lock(struct raw_spinlock *lock) | ||
1407 | { | ||
1408 | PVOP_VCALL1(pv_lock_ops.spin_lock, lock); | ||
1409 | } | ||
1410 | |||
1411 | static __always_inline void __raw_spin_lock_flags(struct raw_spinlock *lock, | ||
1412 | unsigned long flags) | ||
1413 | { | ||
1414 | PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags); | ||
1415 | } | ||
1416 | |||
1417 | static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock) | ||
1418 | { | ||
1419 | return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock); | ||
1420 | } | ||
1421 | |||
1422 | static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) | ||
1423 | { | ||
1424 | PVOP_VCALL1(pv_lock_ops.spin_unlock, lock); | ||
1425 | } | ||
1426 | |||
1427 | #endif | ||
1428 | |||
1429 | /* These all sit in the .parainstructions section to tell us what to patch. */ | ||
1430 | struct paravirt_patch_site { | ||
1431 | u8 *instr; /* original instructions */ | ||
1432 | u8 instrtype; /* type of this instruction */ | ||
1433 | u8 len; /* length of original instruction */ | ||
1434 | u16 clobbers; /* what registers you may clobber */ | ||
1435 | }; | ||
1436 | |||
1437 | extern struct paravirt_patch_site __parainstructions[], | ||
1438 | __parainstructions_end[]; | ||
1439 | |||
1440 | #ifdef CONFIG_X86_32 | ||
1441 | #define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;" | ||
1442 | #define PV_RESTORE_REGS "popl %%edx; popl %%ecx" | ||
1443 | #define PV_FLAGS_ARG "0" | ||
1444 | #define PV_EXTRA_CLOBBERS | ||
1445 | #define PV_VEXTRA_CLOBBERS | ||
1446 | #else | ||
1447 | /* We save some registers, but all of them, that's too much. We clobber all | ||
1448 | * caller saved registers but the argument parameter */ | ||
1449 | #define PV_SAVE_REGS "pushq %%rdi;" | ||
1450 | #define PV_RESTORE_REGS "popq %%rdi;" | ||
1451 | #define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx", "rsi" | ||
1452 | #define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx", "rsi" | ||
1453 | #define PV_FLAGS_ARG "D" | ||
1454 | #endif | ||
1455 | |||
1456 | static inline unsigned long __raw_local_save_flags(void) | ||
1457 | { | ||
1458 | unsigned long f; | ||
1459 | |||
1460 | asm volatile(paravirt_alt(PV_SAVE_REGS | ||
1461 | PARAVIRT_CALL | ||
1462 | PV_RESTORE_REGS) | ||
1463 | : "=a"(f) | ||
1464 | : paravirt_type(pv_irq_ops.save_fl), | ||
1465 | paravirt_clobber(CLBR_EAX) | ||
1466 | : "memory", "cc" PV_VEXTRA_CLOBBERS); | ||
1467 | return f; | ||
1468 | } | ||
1469 | |||
1470 | static inline void raw_local_irq_restore(unsigned long f) | ||
1471 | { | ||
1472 | asm volatile(paravirt_alt(PV_SAVE_REGS | ||
1473 | PARAVIRT_CALL | ||
1474 | PV_RESTORE_REGS) | ||
1475 | : "=a"(f) | ||
1476 | : PV_FLAGS_ARG(f), | ||
1477 | paravirt_type(pv_irq_ops.restore_fl), | ||
1478 | paravirt_clobber(CLBR_EAX) | ||
1479 | : "memory", "cc" PV_EXTRA_CLOBBERS); | ||
1480 | } | ||
1481 | |||
1482 | static inline void raw_local_irq_disable(void) | ||
1483 | { | ||
1484 | asm volatile(paravirt_alt(PV_SAVE_REGS | ||
1485 | PARAVIRT_CALL | ||
1486 | PV_RESTORE_REGS) | ||
1487 | : | ||
1488 | : paravirt_type(pv_irq_ops.irq_disable), | ||
1489 | paravirt_clobber(CLBR_EAX) | ||
1490 | : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); | ||
1491 | } | ||
1492 | |||
1493 | static inline void raw_local_irq_enable(void) | ||
1494 | { | ||
1495 | asm volatile(paravirt_alt(PV_SAVE_REGS | ||
1496 | PARAVIRT_CALL | ||
1497 | PV_RESTORE_REGS) | ||
1498 | : | ||
1499 | : paravirt_type(pv_irq_ops.irq_enable), | ||
1500 | paravirt_clobber(CLBR_EAX) | ||
1501 | : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); | ||
1502 | } | ||
1503 | |||
1504 | static inline unsigned long __raw_local_irq_save(void) | ||
1505 | { | ||
1506 | unsigned long f; | ||
1507 | |||
1508 | f = __raw_local_save_flags(); | ||
1509 | raw_local_irq_disable(); | ||
1510 | return f; | ||
1511 | } | ||
1512 | |||
1513 | |||
1514 | /* Make sure as little as possible of this mess escapes. */ | ||
1515 | #undef PARAVIRT_CALL | ||
1516 | #undef __PVOP_CALL | ||
1517 | #undef __PVOP_VCALL | ||
1518 | #undef PVOP_VCALL0 | ||
1519 | #undef PVOP_CALL0 | ||
1520 | #undef PVOP_VCALL1 | ||
1521 | #undef PVOP_CALL1 | ||
1522 | #undef PVOP_VCALL2 | ||
1523 | #undef PVOP_CALL2 | ||
1524 | #undef PVOP_VCALL3 | ||
1525 | #undef PVOP_CALL3 | ||
1526 | #undef PVOP_VCALL4 | ||
1527 | #undef PVOP_CALL4 | ||
1528 | |||
1529 | #else /* __ASSEMBLY__ */ | ||
1530 | |||
1531 | #define _PVSITE(ptype, clobbers, ops, word, algn) \ | ||
1532 | 771:; \ | ||
1533 | ops; \ | ||
1534 | 772:; \ | ||
1535 | .pushsection .parainstructions,"a"; \ | ||
1536 | .align algn; \ | ||
1537 | word 771b; \ | ||
1538 | .byte ptype; \ | ||
1539 | .byte 772b-771b; \ | ||
1540 | .short clobbers; \ | ||
1541 | .popsection | ||
1542 | |||
1543 | |||
1544 | #ifdef CONFIG_X86_64 | ||
1545 | #define PV_SAVE_REGS \ | ||
1546 | push %rax; \ | ||
1547 | push %rcx; \ | ||
1548 | push %rdx; \ | ||
1549 | push %rsi; \ | ||
1550 | push %rdi; \ | ||
1551 | push %r8; \ | ||
1552 | push %r9; \ | ||
1553 | push %r10; \ | ||
1554 | push %r11 | ||
1555 | #define PV_RESTORE_REGS \ | ||
1556 | pop %r11; \ | ||
1557 | pop %r10; \ | ||
1558 | pop %r9; \ | ||
1559 | pop %r8; \ | ||
1560 | pop %rdi; \ | ||
1561 | pop %rsi; \ | ||
1562 | pop %rdx; \ | ||
1563 | pop %rcx; \ | ||
1564 | pop %rax | ||
1565 | #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8) | ||
1566 | #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8) | ||
1567 | #define PARA_INDIRECT(addr) *addr(%rip) | ||
1568 | #else | ||
1569 | #define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx | ||
1570 | #define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax | ||
1571 | #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) | ||
1572 | #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) | ||
1573 | #define PARA_INDIRECT(addr) *%cs:addr | ||
1574 | #endif | ||
1575 | |||
1576 | #define INTERRUPT_RETURN \ | ||
1577 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ | ||
1578 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret)) | ||
1579 | |||
1580 | #define DISABLE_INTERRUPTS(clobbers) \ | ||
1581 | PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ | ||
1582 | PV_SAVE_REGS; \ | ||
1583 | call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ | ||
1584 | PV_RESTORE_REGS;) \ | ||
1585 | |||
1586 | #define ENABLE_INTERRUPTS(clobbers) \ | ||
1587 | PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ | ||
1588 | PV_SAVE_REGS; \ | ||
1589 | call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ | ||
1590 | PV_RESTORE_REGS;) | ||
1591 | |||
1592 | #define USERGS_SYSRET32 \ | ||
1593 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \ | ||
1594 | CLBR_NONE, \ | ||
1595 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32)) | ||
1596 | |||
1597 | #ifdef CONFIG_X86_32 | ||
1598 | #define GET_CR0_INTO_EAX \ | ||
1599 | push %ecx; push %edx; \ | ||
1600 | call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ | ||
1601 | pop %edx; pop %ecx | ||
1602 | |||
1603 | #define ENABLE_INTERRUPTS_SYSEXIT \ | ||
1604 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ | ||
1605 | CLBR_NONE, \ | ||
1606 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) | ||
1607 | |||
1608 | |||
1609 | #else /* !CONFIG_X86_32 */ | ||
1610 | |||
1611 | /* | ||
1612 | * If swapgs is used while the userspace stack is still current, | ||
1613 | * there's no way to call a pvop. The PV replacement *must* be | ||
1614 | * inlined, or the swapgs instruction must be trapped and emulated. | ||
1615 | */ | ||
1616 | #define SWAPGS_UNSAFE_STACK \ | ||
1617 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ | ||
1618 | swapgs) | ||
1619 | |||
1620 | #define SWAPGS \ | ||
1621 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ | ||
1622 | PV_SAVE_REGS; \ | ||
1623 | call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \ | ||
1624 | PV_RESTORE_REGS \ | ||
1625 | ) | ||
1626 | |||
1627 | #define GET_CR2_INTO_RCX \ | ||
1628 | call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); \ | ||
1629 | movq %rax, %rcx; \ | ||
1630 | xorq %rax, %rax; | ||
1631 | |||
1632 | #define PARAVIRT_ADJUST_EXCEPTION_FRAME \ | ||
1633 | PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \ | ||
1634 | CLBR_NONE, \ | ||
1635 | call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame)) | ||
1636 | |||
1637 | #define USERGS_SYSRET64 \ | ||
1638 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ | ||
1639 | CLBR_NONE, \ | ||
1640 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) | ||
1641 | |||
1642 | #define ENABLE_INTERRUPTS_SYSEXIT32 \ | ||
1643 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ | ||
1644 | CLBR_NONE, \ | ||
1645 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) | ||
1646 | #endif /* CONFIG_X86_32 */ | ||
1647 | |||
1648 | #endif /* __ASSEMBLY__ */ | ||
1649 | #endif /* CONFIG_PARAVIRT */ | ||
1650 | #endif /* _ASM_X86_PARAVIRT_H */ | ||