aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@goop.org>2007-05-02 13:27:14 -0400
committerAndi Kleen <andi@basil.nowhere.org>2007-05-02 13:27:14 -0400
commit294688c028e80fd467cdd22da79f62c5f311eaf5 (patch)
tree3a2dcacf3f4f9997ae0032fb3c8342a2a7e5753d /include
parentf8822f42019eceed19cc6c0f985a489e17796ed8 (diff)
[PATCH] i386: PARAVIRT: Document asm-i386/paravirt.h
Clean things up, and broadly document: - the paravirt_ops functions themselves - the patching mechanism Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Signed-off-by: Andi Kleen <ak@suse.de> Cc: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'include')
-rw-r--r--include/asm-i386/paravirt.h131
1 files changed, 121 insertions, 10 deletions
diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h
index 837457b42dbe..8bfaf10d9961 100644
--- a/include/asm-i386/paravirt.h
+++ b/include/asm-i386/paravirt.h
@@ -21,6 +21,14 @@ struct Xgt_desc_struct;
21struct tss_struct; 21struct tss_struct;
22struct mm_struct; 22struct mm_struct;
23struct desc_struct; 23struct desc_struct;
24
25/* Lazy mode for batching updates / context switch */
26enum paravirt_lazy_mode {
27 PARAVIRT_LAZY_NONE = 0,
28 PARAVIRT_LAZY_MMU = 1,
29 PARAVIRT_LAZY_CPU = 2,
30};
31
24struct paravirt_ops 32struct paravirt_ops
25{ 33{
26 unsigned int kernel_rpl; 34 unsigned int kernel_rpl;
@@ -37,22 +45,33 @@ struct paravirt_ops
37 */ 45 */
38 unsigned (*patch)(u8 type, u16 clobber, void *firstinsn, unsigned len); 46 unsigned (*patch)(u8 type, u16 clobber, void *firstinsn, unsigned len);
39 47
48 /* Basic arch-specific setup */
40 void (*arch_setup)(void); 49 void (*arch_setup)(void);
41 char *(*memory_setup)(void); 50 char *(*memory_setup)(void);
42 void (*init_IRQ)(void); 51 void (*init_IRQ)(void);
52 void (*time_init)(void);
43 53
54 /*
55 * Called before/after init_mm pagetable setup. setup_start
56 * may reset %cr3, and may pre-install parts of the pagetable;
57 * pagetable setup is expected to preserve any existing
58 * mapping.
59 */
44 void (*pagetable_setup_start)(pgd_t *pgd_base); 60 void (*pagetable_setup_start)(pgd_t *pgd_base);
45 void (*pagetable_setup_done)(pgd_t *pgd_base); 61 void (*pagetable_setup_done)(pgd_t *pgd_base);
46 62
63 /* Print a banner to identify the environment */
47 void (*banner)(void); 64 void (*banner)(void);
48 65
66 /* Set and set time of day */
49 unsigned long (*get_wallclock)(void); 67 unsigned long (*get_wallclock)(void);
50 int (*set_wallclock)(unsigned long); 68 int (*set_wallclock)(unsigned long);
51 void (*time_init)(void);
52 69
70 /* cpuid emulation, mostly so that caps bits can be disabled */
53 void (*cpuid)(unsigned int *eax, unsigned int *ebx, 71 void (*cpuid)(unsigned int *eax, unsigned int *ebx,
54 unsigned int *ecx, unsigned int *edx); 72 unsigned int *ecx, unsigned int *edx);
55 73
74 /* hooks for various privileged instructions */
56 unsigned long (*get_debugreg)(int regno); 75 unsigned long (*get_debugreg)(int regno);
57 void (*set_debugreg)(int regno, unsigned long value); 76 void (*set_debugreg)(int regno, unsigned long value);
58 77
@@ -71,15 +90,23 @@ struct paravirt_ops
71 unsigned long (*read_cr4)(void); 90 unsigned long (*read_cr4)(void);
72 void (*write_cr4)(unsigned long); 91 void (*write_cr4)(unsigned long);
73 92
93 /*
94 * Get/set interrupt state. save_fl and restore_fl are only
95 * expected to use X86_EFLAGS_IF; all other bits
96 * returned from save_fl are undefined, and may be ignored by
97 * restore_fl.
98 */
74 unsigned long (*save_fl)(void); 99 unsigned long (*save_fl)(void);
75 void (*restore_fl)(unsigned long); 100 void (*restore_fl)(unsigned long);
76 void (*irq_disable)(void); 101 void (*irq_disable)(void);
77 void (*irq_enable)(void); 102 void (*irq_enable)(void);
78 void (*safe_halt)(void); 103 void (*safe_halt)(void);
79 void (*halt)(void); 104 void (*halt)(void);
105
80 void (*wbinvd)(void); 106 void (*wbinvd)(void);
81 107
82 /* err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ 108 /* MSR, PMC and TSR operations.
109 err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */
83 u64 (*read_msr)(unsigned int msr, int *err); 110 u64 (*read_msr)(unsigned int msr, int *err);
84 int (*write_msr)(unsigned int msr, u64 val); 111 int (*write_msr)(unsigned int msr, u64 val);
85 112
@@ -88,6 +115,7 @@ struct paravirt_ops
88 u64 (*get_scheduled_cycles)(void); 115 u64 (*get_scheduled_cycles)(void);
89 unsigned long (*get_cpu_khz)(void); 116 unsigned long (*get_cpu_khz)(void);
90 117
118 /* Segment descriptor handling */
91 void (*load_tr_desc)(void); 119 void (*load_tr_desc)(void);
92 void (*load_gdt)(const struct Xgt_desc_struct *); 120 void (*load_gdt)(const struct Xgt_desc_struct *);
93 void (*load_idt)(const struct Xgt_desc_struct *); 121 void (*load_idt)(const struct Xgt_desc_struct *);
@@ -105,9 +133,12 @@ struct paravirt_ops
105 void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t); 133 void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t);
106 134
107 void (*set_iopl_mask)(unsigned mask); 135 void (*set_iopl_mask)(unsigned mask);
108
109 void (*io_delay)(void); 136 void (*io_delay)(void);
110 137
138 /*
139 * Hooks for intercepting the creation/use/destruction of an
140 * mm_struct.
141 */
111 void (*activate_mm)(struct mm_struct *prev, 142 void (*activate_mm)(struct mm_struct *prev,
112 struct mm_struct *next); 143 struct mm_struct *next);
113 void (*dup_mmap)(struct mm_struct *oldmm, 144 void (*dup_mmap)(struct mm_struct *oldmm,
@@ -115,30 +146,43 @@ struct paravirt_ops
115 void (*exit_mmap)(struct mm_struct *mm); 146 void (*exit_mmap)(struct mm_struct *mm);
116 147
117#ifdef CONFIG_X86_LOCAL_APIC 148#ifdef CONFIG_X86_LOCAL_APIC
149 /*
150 * Direct APIC operations, principally for VMI. Ideally
151 * these shouldn't be in this interface.
152 */
118 void (*apic_write)(unsigned long reg, unsigned long v); 153 void (*apic_write)(unsigned long reg, unsigned long v);
119 void (*apic_write_atomic)(unsigned long reg, unsigned long v); 154 void (*apic_write_atomic)(unsigned long reg, unsigned long v);
120 unsigned long (*apic_read)(unsigned long reg); 155 unsigned long (*apic_read)(unsigned long reg);
121 void (*setup_boot_clock)(void); 156 void (*setup_boot_clock)(void);
122 void (*setup_secondary_clock)(void); 157 void (*setup_secondary_clock)(void);
158
159 void (*startup_ipi_hook)(int phys_apicid,
160 unsigned long start_eip,
161 unsigned long start_esp);
123#endif 162#endif
124 163
164 /* TLB operations */
125 void (*flush_tlb_user)(void); 165 void (*flush_tlb_user)(void);
126 void (*flush_tlb_kernel)(void); 166 void (*flush_tlb_kernel)(void);
127 void (*flush_tlb_single)(unsigned long addr); 167 void (*flush_tlb_single)(unsigned long addr);
128 168
129 void (*map_pt_hook)(int type, pte_t *va, u32 pfn); 169 void (*map_pt_hook)(int type, pte_t *va, u32 pfn);
130 170
171 /* Hooks for allocating/releasing pagetable pages */
131 void (*alloc_pt)(u32 pfn); 172 void (*alloc_pt)(u32 pfn);
132 void (*alloc_pd)(u32 pfn); 173 void (*alloc_pd)(u32 pfn);
133 void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); 174 void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count);
134 void (*release_pt)(u32 pfn); 175 void (*release_pt)(u32 pfn);
135 void (*release_pd)(u32 pfn); 176 void (*release_pd)(u32 pfn);
136 177
178 /* Pagetable manipulation functions */
137 void (*set_pte)(pte_t *ptep, pte_t pteval); 179 void (*set_pte)(pte_t *ptep, pte_t pteval);
138 void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval); 180 void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
181 pte_t *ptep, pte_t pteval);
139 void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); 182 void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
140 void (*pte_update)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); 183 void (*pte_update)(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
141 void (*pte_update_defer)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); 184 void (*pte_update_defer)(struct mm_struct *mm,
185 unsigned long addr, pte_t *ptep);
142 186
143 pte_t (*ptep_get_and_clear)(pte_t *ptep); 187 pte_t (*ptep_get_and_clear)(pte_t *ptep);
144 188
@@ -164,13 +208,12 @@ struct paravirt_ops
164 pgd_t (*make_pgd)(unsigned long pgd); 208 pgd_t (*make_pgd)(unsigned long pgd);
165#endif 209#endif
166 210
167 void (*set_lazy_mode)(int mode); 211 /* Set deferred update mode, used for batching operations. */
212 void (*set_lazy_mode)(enum paravirt_lazy_mode mode);
168 213
169 /* These two are jmp to, not actually called. */ 214 /* These two are jmp to, not actually called. */
170 void (*irq_enable_sysexit)(void); 215 void (*irq_enable_sysexit)(void);
171 void (*iret)(void); 216 void (*iret)(void);
172
173 void (*startup_ipi_hook)(int phys_apicid, unsigned long start_eip, unsigned long start_esp);
174}; 217};
175 218
176/* Mark a paravirt probe function. */ 219/* Mark a paravirt probe function. */
@@ -188,8 +231,10 @@ extern struct paravirt_ops paravirt_ops;
188#define paravirt_clobber(clobber) \ 231#define paravirt_clobber(clobber) \
189 [paravirt_clobber] "i" (clobber) 232 [paravirt_clobber] "i" (clobber)
190 233
191#define PARAVIRT_CALL "call *(paravirt_ops+%c[paravirt_typenum]*4);" 234/*
192 235 * Generate some code, and mark it as patchable by the
236 * apply_paravirt() alternate instruction patcher.
237 */
193#define _paravirt_alt(insn_string, type, clobber) \ 238#define _paravirt_alt(insn_string, type, clobber) \
194 "771:\n\t" insn_string "\n" "772:\n" \ 239 "771:\n\t" insn_string "\n" "772:\n" \
195 ".pushsection .parainstructions,\"a\"\n" \ 240 ".pushsection .parainstructions,\"a\"\n" \
@@ -199,9 +244,74 @@ extern struct paravirt_ops paravirt_ops;
199 " .short " clobber "\n" \ 244 " .short " clobber "\n" \
200 ".popsection\n" 245 ".popsection\n"
201 246
247/* Generate patchable code, with the default asm parameters. */
202#define paravirt_alt(insn_string) \ 248#define paravirt_alt(insn_string) \
203 _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") 249 _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
204 250
251/*
252 * This generates an indirect call based on the operation type number.
253 * The type number, computed in PARAVIRT_PATCH, is derived from the
254 * offset into the paravirt_ops structure, and can therefore be freely
255 * converted back into a structure offset.
256 */
257#define PARAVIRT_CALL "call *(paravirt_ops+%c[paravirt_typenum]*4);"
258
259/*
260 * These macros are intended to wrap calls into a paravirt_ops
261 * operation, so that they can be later identified and patched at
262 * runtime.
263 *
264 * Normally, a call to a pv_op function is a simple indirect call:
265 * (paravirt_ops.operations)(args...).
266 *
267 * Unfortunately, this is a relatively slow operation for modern CPUs,
268 * because it cannot necessarily determine what the destination
269 * address is. In this case, the address is a runtime constant, so at
270 * the very least we can patch the call to e a simple direct call, or
271 * ideally, patch an inline implementation into the callsite. (Direct
272 * calls are essentially free, because the call and return addresses
273 * are completely predictable.)
274 *
275 * These macros rely on the standard gcc "regparm(3)" calling
276 * convention, in which the first three arguments are placed in %eax,
277 * %edx, %ecx (in that order), and the remaining arguments are placed
278 * on the stack. All caller-save registers (eax,edx,ecx) are expected
279 * to be modified (either clobbered or used for return values).
280 *
281 * The call instruction itself is marked by placing its start address
282 * and size into the .parainstructions section, so that
283 * apply_paravirt() in arch/i386/kernel/alternative.c can do the
284 * appropriate patching under the control of the backend paravirt_ops
285 * implementation.
286 *
287 * Unfortunately there's no way to get gcc to generate the args setup
288 * for the call, and then allow the call itself to be generated by an
289 * inline asm. Because of this, we must do the complete arg setup and
290 * return value handling from within these macros. This is fairly
291 * cumbersome.
292 *
293 * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments.
294 * It could be extended to more arguments, but there would be little
295 * to be gained from that. For each number of arguments, there are
296 * the two VCALL and CALL variants for void and non-void functions.
297 *
298 * When there is a return value, the invoker of the macro must specify
299 * the return type. The macro then uses sizeof() on that type to
300 * determine whether its a 32 or 64 bit value, and places the return
301 * in the right register(s) (just %eax for 32-bit, and %edx:%eax for
302 * 64-bit).
303 *
304 * 64-bit arguments are passed as a pair of adjacent 32-bit arguments
305 * in low,high order.
306 *
307 * Small structures are passed and returned in registers. The macro
308 * calling convention can't directly deal with this, so the wrapper
309 * functions must do this.
310 *
311 * These PVOP_* macros are only defined within this header. This
312 * means that all uses must be wrapped in inline functions. This also
313 * makes sure the incoming and outgoing types are always correct.
314 */
205#define PVOP_CALL0(__rettype, __op) \ 315#define PVOP_CALL0(__rettype, __op) \
206 ({ \ 316 ({ \
207 __rettype __ret; \ 317 __rettype __ret; \
@@ -1026,6 +1136,7 @@ static inline unsigned long __raw_local_irq_save(void)
1026 [paravirt_sti_type] "i" (PARAVIRT_PATCH(irq_enable)), \ 1136 [paravirt_sti_type] "i" (PARAVIRT_PATCH(irq_enable)), \
1027 paravirt_clobber(CLBR_EAX) 1137 paravirt_clobber(CLBR_EAX)
1028 1138
1139/* Make sure as little as possible of this mess escapes. */
1029#undef PARAVIRT_CALL 1140#undef PARAVIRT_CALL
1030#undef PVOP_VCALL0 1141#undef PVOP_VCALL0
1031#undef PVOP_CALL0 1142#undef PVOP_CALL0