diff options
author | Jeremy Fitzhardinge <jeremy@goop.org> | 2007-05-02 13:27:14 -0400 |
---|---|---|
committer | Andi Kleen <andi@basil.nowhere.org> | 2007-05-02 13:27:14 -0400 |
commit | 294688c028e80fd467cdd22da79f62c5f311eaf5 (patch) | |
tree | 3a2dcacf3f4f9997ae0032fb3c8342a2a7e5753d /include | |
parent | f8822f42019eceed19cc6c0f985a489e17796ed8 (diff) |
[PATCH] i386: PARAVIRT: Document asm-i386/paravirt.h
Clean things up, and broadly document:
- the paravirt_ops functions themselves
- the patching mechanism
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-i386/paravirt.h | 131 |
1 files changed, 121 insertions, 10 deletions
diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 837457b42dbe..8bfaf10d9961 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h | |||
@@ -21,6 +21,14 @@ struct Xgt_desc_struct; | |||
21 | struct tss_struct; | 21 | struct tss_struct; |
22 | struct mm_struct; | 22 | struct mm_struct; |
23 | struct desc_struct; | 23 | struct desc_struct; |
24 | |||
25 | /* Lazy mode for batching updates / context switch */ | ||
26 | enum paravirt_lazy_mode { | ||
27 | PARAVIRT_LAZY_NONE = 0, | ||
28 | PARAVIRT_LAZY_MMU = 1, | ||
29 | PARAVIRT_LAZY_CPU = 2, | ||
30 | }; | ||
31 | |||
24 | struct paravirt_ops | 32 | struct paravirt_ops |
25 | { | 33 | { |
26 | unsigned int kernel_rpl; | 34 | unsigned int kernel_rpl; |
@@ -37,22 +45,33 @@ struct paravirt_ops | |||
37 | */ | 45 | */ |
38 | unsigned (*patch)(u8 type, u16 clobber, void *firstinsn, unsigned len); | 46 | unsigned (*patch)(u8 type, u16 clobber, void *firstinsn, unsigned len); |
39 | 47 | ||
48 | /* Basic arch-specific setup */ | ||
40 | void (*arch_setup)(void); | 49 | void (*arch_setup)(void); |
41 | char *(*memory_setup)(void); | 50 | char *(*memory_setup)(void); |
42 | void (*init_IRQ)(void); | 51 | void (*init_IRQ)(void); |
52 | void (*time_init)(void); | ||
43 | 53 | ||
54 | /* | ||
55 | * Called before/after init_mm pagetable setup. setup_start | ||
56 | * may reset %cr3, and may pre-install parts of the pagetable; | ||
57 | * pagetable setup is expected to preserve any existing | ||
58 | * mapping. | ||
59 | */ | ||
44 | void (*pagetable_setup_start)(pgd_t *pgd_base); | 60 | void (*pagetable_setup_start)(pgd_t *pgd_base); |
45 | void (*pagetable_setup_done)(pgd_t *pgd_base); | 61 | void (*pagetable_setup_done)(pgd_t *pgd_base); |
46 | 62 | ||
63 | /* Print a banner to identify the environment */ | ||
47 | void (*banner)(void); | 64 | void (*banner)(void); |
48 | 65 | ||
66 | /* Set and set time of day */ | ||
49 | unsigned long (*get_wallclock)(void); | 67 | unsigned long (*get_wallclock)(void); |
50 | int (*set_wallclock)(unsigned long); | 68 | int (*set_wallclock)(unsigned long); |
51 | void (*time_init)(void); | ||
52 | 69 | ||
70 | /* cpuid emulation, mostly so that caps bits can be disabled */ | ||
53 | void (*cpuid)(unsigned int *eax, unsigned int *ebx, | 71 | void (*cpuid)(unsigned int *eax, unsigned int *ebx, |
54 | unsigned int *ecx, unsigned int *edx); | 72 | unsigned int *ecx, unsigned int *edx); |
55 | 73 | ||
74 | /* hooks for various privileged instructions */ | ||
56 | unsigned long (*get_debugreg)(int regno); | 75 | unsigned long (*get_debugreg)(int regno); |
57 | void (*set_debugreg)(int regno, unsigned long value); | 76 | void (*set_debugreg)(int regno, unsigned long value); |
58 | 77 | ||
@@ -71,15 +90,23 @@ struct paravirt_ops | |||
71 | unsigned long (*read_cr4)(void); | 90 | unsigned long (*read_cr4)(void); |
72 | void (*write_cr4)(unsigned long); | 91 | void (*write_cr4)(unsigned long); |
73 | 92 | ||
93 | /* | ||
94 | * Get/set interrupt state. save_fl and restore_fl are only | ||
95 | * expected to use X86_EFLAGS_IF; all other bits | ||
96 | * returned from save_fl are undefined, and may be ignored by | ||
97 | * restore_fl. | ||
98 | */ | ||
74 | unsigned long (*save_fl)(void); | 99 | unsigned long (*save_fl)(void); |
75 | void (*restore_fl)(unsigned long); | 100 | void (*restore_fl)(unsigned long); |
76 | void (*irq_disable)(void); | 101 | void (*irq_disable)(void); |
77 | void (*irq_enable)(void); | 102 | void (*irq_enable)(void); |
78 | void (*safe_halt)(void); | 103 | void (*safe_halt)(void); |
79 | void (*halt)(void); | 104 | void (*halt)(void); |
105 | |||
80 | void (*wbinvd)(void); | 106 | void (*wbinvd)(void); |
81 | 107 | ||
82 | /* err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ | 108 | /* MSR, PMC and TSR operations. |
109 | err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ | ||
83 | u64 (*read_msr)(unsigned int msr, int *err); | 110 | u64 (*read_msr)(unsigned int msr, int *err); |
84 | int (*write_msr)(unsigned int msr, u64 val); | 111 | int (*write_msr)(unsigned int msr, u64 val); |
85 | 112 | ||
@@ -88,6 +115,7 @@ struct paravirt_ops | |||
88 | u64 (*get_scheduled_cycles)(void); | 115 | u64 (*get_scheduled_cycles)(void); |
89 | unsigned long (*get_cpu_khz)(void); | 116 | unsigned long (*get_cpu_khz)(void); |
90 | 117 | ||
118 | /* Segment descriptor handling */ | ||
91 | void (*load_tr_desc)(void); | 119 | void (*load_tr_desc)(void); |
92 | void (*load_gdt)(const struct Xgt_desc_struct *); | 120 | void (*load_gdt)(const struct Xgt_desc_struct *); |
93 | void (*load_idt)(const struct Xgt_desc_struct *); | 121 | void (*load_idt)(const struct Xgt_desc_struct *); |
@@ -105,9 +133,12 @@ struct paravirt_ops | |||
105 | void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t); | 133 | void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t); |
106 | 134 | ||
107 | void (*set_iopl_mask)(unsigned mask); | 135 | void (*set_iopl_mask)(unsigned mask); |
108 | |||
109 | void (*io_delay)(void); | 136 | void (*io_delay)(void); |
110 | 137 | ||
138 | /* | ||
139 | * Hooks for intercepting the creation/use/destruction of an | ||
140 | * mm_struct. | ||
141 | */ | ||
111 | void (*activate_mm)(struct mm_struct *prev, | 142 | void (*activate_mm)(struct mm_struct *prev, |
112 | struct mm_struct *next); | 143 | struct mm_struct *next); |
113 | void (*dup_mmap)(struct mm_struct *oldmm, | 144 | void (*dup_mmap)(struct mm_struct *oldmm, |
@@ -115,30 +146,43 @@ struct paravirt_ops | |||
115 | void (*exit_mmap)(struct mm_struct *mm); | 146 | void (*exit_mmap)(struct mm_struct *mm); |
116 | 147 | ||
117 | #ifdef CONFIG_X86_LOCAL_APIC | 148 | #ifdef CONFIG_X86_LOCAL_APIC |
149 | /* | ||
150 | * Direct APIC operations, principally for VMI. Ideally | ||
151 | * these shouldn't be in this interface. | ||
152 | */ | ||
118 | void (*apic_write)(unsigned long reg, unsigned long v); | 153 | void (*apic_write)(unsigned long reg, unsigned long v); |
119 | void (*apic_write_atomic)(unsigned long reg, unsigned long v); | 154 | void (*apic_write_atomic)(unsigned long reg, unsigned long v); |
120 | unsigned long (*apic_read)(unsigned long reg); | 155 | unsigned long (*apic_read)(unsigned long reg); |
121 | void (*setup_boot_clock)(void); | 156 | void (*setup_boot_clock)(void); |
122 | void (*setup_secondary_clock)(void); | 157 | void (*setup_secondary_clock)(void); |
158 | |||
159 | void (*startup_ipi_hook)(int phys_apicid, | ||
160 | unsigned long start_eip, | ||
161 | unsigned long start_esp); | ||
123 | #endif | 162 | #endif |
124 | 163 | ||
164 | /* TLB operations */ | ||
125 | void (*flush_tlb_user)(void); | 165 | void (*flush_tlb_user)(void); |
126 | void (*flush_tlb_kernel)(void); | 166 | void (*flush_tlb_kernel)(void); |
127 | void (*flush_tlb_single)(unsigned long addr); | 167 | void (*flush_tlb_single)(unsigned long addr); |
128 | 168 | ||
129 | void (*map_pt_hook)(int type, pte_t *va, u32 pfn); | 169 | void (*map_pt_hook)(int type, pte_t *va, u32 pfn); |
130 | 170 | ||
171 | /* Hooks for allocating/releasing pagetable pages */ | ||
131 | void (*alloc_pt)(u32 pfn); | 172 | void (*alloc_pt)(u32 pfn); |
132 | void (*alloc_pd)(u32 pfn); | 173 | void (*alloc_pd)(u32 pfn); |
133 | void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); | 174 | void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); |
134 | void (*release_pt)(u32 pfn); | 175 | void (*release_pt)(u32 pfn); |
135 | void (*release_pd)(u32 pfn); | 176 | void (*release_pd)(u32 pfn); |
136 | 177 | ||
178 | /* Pagetable manipulation functions */ | ||
137 | void (*set_pte)(pte_t *ptep, pte_t pteval); | 179 | void (*set_pte)(pte_t *ptep, pte_t pteval); |
138 | void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval); | 180 | void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, |
181 | pte_t *ptep, pte_t pteval); | ||
139 | void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); | 182 | void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); |
140 | void (*pte_update)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); | 183 | void (*pte_update)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); |
141 | void (*pte_update_defer)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); | 184 | void (*pte_update_defer)(struct mm_struct *mm, |
185 | unsigned long addr, pte_t *ptep); | ||
142 | 186 | ||
143 | pte_t (*ptep_get_and_clear)(pte_t *ptep); | 187 | pte_t (*ptep_get_and_clear)(pte_t *ptep); |
144 | 188 | ||
@@ -164,13 +208,12 @@ struct paravirt_ops | |||
164 | pgd_t (*make_pgd)(unsigned long pgd); | 208 | pgd_t (*make_pgd)(unsigned long pgd); |
165 | #endif | 209 | #endif |
166 | 210 | ||
167 | void (*set_lazy_mode)(int mode); | 211 | /* Set deferred update mode, used for batching operations. */ |
212 | void (*set_lazy_mode)(enum paravirt_lazy_mode mode); | ||
168 | 213 | ||
169 | /* These two are jmp to, not actually called. */ | 214 | /* These two are jmp to, not actually called. */ |
170 | void (*irq_enable_sysexit)(void); | 215 | void (*irq_enable_sysexit)(void); |
171 | void (*iret)(void); | 216 | void (*iret)(void); |
172 | |||
173 | void (*startup_ipi_hook)(int phys_apicid, unsigned long start_eip, unsigned long start_esp); | ||
174 | }; | 217 | }; |
175 | 218 | ||
176 | /* Mark a paravirt probe function. */ | 219 | /* Mark a paravirt probe function. */ |
@@ -188,8 +231,10 @@ extern struct paravirt_ops paravirt_ops; | |||
188 | #define paravirt_clobber(clobber) \ | 231 | #define paravirt_clobber(clobber) \ |
189 | [paravirt_clobber] "i" (clobber) | 232 | [paravirt_clobber] "i" (clobber) |
190 | 233 | ||
191 | #define PARAVIRT_CALL "call *(paravirt_ops+%c[paravirt_typenum]*4);" | 234 | /* |
192 | 235 | * Generate some code, and mark it as patchable by the | |
236 | * apply_paravirt() alternate instruction patcher. | ||
237 | */ | ||
193 | #define _paravirt_alt(insn_string, type, clobber) \ | 238 | #define _paravirt_alt(insn_string, type, clobber) \ |
194 | "771:\n\t" insn_string "\n" "772:\n" \ | 239 | "771:\n\t" insn_string "\n" "772:\n" \ |
195 | ".pushsection .parainstructions,\"a\"\n" \ | 240 | ".pushsection .parainstructions,\"a\"\n" \ |
@@ -199,9 +244,74 @@ extern struct paravirt_ops paravirt_ops; | |||
199 | " .short " clobber "\n" \ | 244 | " .short " clobber "\n" \ |
200 | ".popsection\n" | 245 | ".popsection\n" |
201 | 246 | ||
247 | /* Generate patchable code, with the default asm parameters. */ | ||
202 | #define paravirt_alt(insn_string) \ | 248 | #define paravirt_alt(insn_string) \ |
203 | _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") | 249 | _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") |
204 | 250 | ||
251 | /* | ||
252 | * This generates an indirect call based on the operation type number. | ||
253 | * The type number, computed in PARAVIRT_PATCH, is derived from the | ||
254 | * offset into the paravirt_ops structure, and can therefore be freely | ||
255 | * converted back into a structure offset. | ||
256 | */ | ||
257 | #define PARAVIRT_CALL "call *(paravirt_ops+%c[paravirt_typenum]*4);" | ||
258 | |||
259 | /* | ||
260 | * These macros are intended to wrap calls into a paravirt_ops | ||
261 | * operation, so that they can be later identified and patched at | ||
262 | * runtime. | ||
263 | * | ||
264 | * Normally, a call to a pv_op function is a simple indirect call: | ||
265 | * (paravirt_ops.operations)(args...). | ||
266 | * | ||
267 | * Unfortunately, this is a relatively slow operation for modern CPUs, | ||
268 | * because it cannot necessarily determine what the destination | ||
269 | * address is. In this case, the address is a runtime constant, so at | ||
270 | * the very least we can patch the call to e a simple direct call, or | ||
271 | * ideally, patch an inline implementation into the callsite. (Direct | ||
272 | * calls are essentially free, because the call and return addresses | ||
273 | * are completely predictable.) | ||
274 | * | ||
275 | * These macros rely on the standard gcc "regparm(3)" calling | ||
276 | * convention, in which the first three arguments are placed in %eax, | ||
277 | * %edx, %ecx (in that order), and the remaining arguments are placed | ||
278 | * on the stack. All caller-save registers (eax,edx,ecx) are expected | ||
279 | * to be modified (either clobbered or used for return values). | ||
280 | * | ||
281 | * The call instruction itself is marked by placing its start address | ||
282 | * and size into the .parainstructions section, so that | ||
283 | * apply_paravirt() in arch/i386/kernel/alternative.c can do the | ||
284 | * appropriate patching under the control of the backend paravirt_ops | ||
285 | * implementation. | ||
286 | * | ||
287 | * Unfortunately there's no way to get gcc to generate the args setup | ||
288 | * for the call, and then allow the call itself to be generated by an | ||
289 | * inline asm. Because of this, we must do the complete arg setup and | ||
290 | * return value handling from within these macros. This is fairly | ||
291 | * cumbersome. | ||
292 | * | ||
293 | * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. | ||
294 | * It could be extended to more arguments, but there would be little | ||
295 | * to be gained from that. For each number of arguments, there are | ||
296 | * the two VCALL and CALL variants for void and non-void functions. | ||
297 | * | ||
298 | * When there is a return value, the invoker of the macro must specify | ||
299 | * the return type. The macro then uses sizeof() on that type to | ||
300 | * determine whether its a 32 or 64 bit value, and places the return | ||
301 | * in the right register(s) (just %eax for 32-bit, and %edx:%eax for | ||
302 | * 64-bit). | ||
303 | * | ||
304 | * 64-bit arguments are passed as a pair of adjacent 32-bit arguments | ||
305 | * in low,high order. | ||
306 | * | ||
307 | * Small structures are passed and returned in registers. The macro | ||
308 | * calling convention can't directly deal with this, so the wrapper | ||
309 | * functions must do this. | ||
310 | * | ||
311 | * These PVOP_* macros are only defined within this header. This | ||
312 | * means that all uses must be wrapped in inline functions. This also | ||
313 | * makes sure the incoming and outgoing types are always correct. | ||
314 | */ | ||
205 | #define PVOP_CALL0(__rettype, __op) \ | 315 | #define PVOP_CALL0(__rettype, __op) \ |
206 | ({ \ | 316 | ({ \ |
207 | __rettype __ret; \ | 317 | __rettype __ret; \ |
@@ -1026,6 +1136,7 @@ static inline unsigned long __raw_local_irq_save(void) | |||
1026 | [paravirt_sti_type] "i" (PARAVIRT_PATCH(irq_enable)), \ | 1136 | [paravirt_sti_type] "i" (PARAVIRT_PATCH(irq_enable)), \ |
1027 | paravirt_clobber(CLBR_EAX) | 1137 | paravirt_clobber(CLBR_EAX) |
1028 | 1138 | ||
1139 | /* Make sure as little as possible of this mess escapes. */ | ||
1029 | #undef PARAVIRT_CALL | 1140 | #undef PARAVIRT_CALL |
1030 | #undef PVOP_VCALL0 | 1141 | #undef PVOP_VCALL0 |
1031 | #undef PVOP_CALL0 | 1142 | #undef PVOP_CALL0 |