diff options
Diffstat (limited to 'arch/x86/kernel/vmi_32.c')
-rw-r--r-- | arch/x86/kernel/vmi_32.c | 126 |
1 files changed, 76 insertions, 50 deletions
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index f02bad68abaa..12affe1f9bce 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -62,7 +62,10 @@ static struct { | |||
62 | void (*cpuid)(void /* non-c */); | 62 | void (*cpuid)(void /* non-c */); |
63 | void (*_set_ldt)(u32 selector); | 63 | void (*_set_ldt)(u32 selector); |
64 | void (*set_tr)(u32 selector); | 64 | void (*set_tr)(u32 selector); |
65 | void (*set_kernel_stack)(u32 selector, u32 esp0); | 65 | void (*write_idt_entry)(struct desc_struct *, int, u32, u32); |
66 | void (*write_gdt_entry)(struct desc_struct *, int, u32, u32); | ||
67 | void (*write_ldt_entry)(struct desc_struct *, int, u32, u32); | ||
68 | void (*set_kernel_stack)(u32 selector, u32 sp0); | ||
66 | void (*allocate_page)(u32, u32, u32, u32, u32); | 69 | void (*allocate_page)(u32, u32, u32, u32, u32); |
67 | void (*release_page)(u32, u32); | 70 | void (*release_page)(u32, u32); |
68 | void (*set_pte)(pte_t, pte_t *, unsigned); | 71 | void (*set_pte)(pte_t, pte_t *, unsigned); |
@@ -88,13 +91,13 @@ struct vmi_timer_ops vmi_timer_ops; | |||
88 | #define IRQ_PATCH_DISABLE 5 | 91 | #define IRQ_PATCH_DISABLE 5 |
89 | 92 | ||
90 | static inline void patch_offset(void *insnbuf, | 93 | static inline void patch_offset(void *insnbuf, |
91 | unsigned long eip, unsigned long dest) | 94 | unsigned long ip, unsigned long dest) |
92 | { | 95 | { |
93 | *(unsigned long *)(insnbuf+1) = dest-eip-5; | 96 | *(unsigned long *)(insnbuf+1) = dest-ip-5; |
94 | } | 97 | } |
95 | 98 | ||
96 | static unsigned patch_internal(int call, unsigned len, void *insnbuf, | 99 | static unsigned patch_internal(int call, unsigned len, void *insnbuf, |
97 | unsigned long eip) | 100 | unsigned long ip) |
98 | { | 101 | { |
99 | u64 reloc; | 102 | u64 reloc; |
100 | struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc; | 103 | struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc; |
@@ -103,13 +106,13 @@ static unsigned patch_internal(int call, unsigned len, void *insnbuf, | |||
103 | case VMI_RELOCATION_CALL_REL: | 106 | case VMI_RELOCATION_CALL_REL: |
104 | BUG_ON(len < 5); | 107 | BUG_ON(len < 5); |
105 | *(char *)insnbuf = MNEM_CALL; | 108 | *(char *)insnbuf = MNEM_CALL; |
106 | patch_offset(insnbuf, eip, (unsigned long)rel->eip); | 109 | patch_offset(insnbuf, ip, (unsigned long)rel->eip); |
107 | return 5; | 110 | return 5; |
108 | 111 | ||
109 | case VMI_RELOCATION_JUMP_REL: | 112 | case VMI_RELOCATION_JUMP_REL: |
110 | BUG_ON(len < 5); | 113 | BUG_ON(len < 5); |
111 | *(char *)insnbuf = MNEM_JMP; | 114 | *(char *)insnbuf = MNEM_JMP; |
112 | patch_offset(insnbuf, eip, (unsigned long)rel->eip); | 115 | patch_offset(insnbuf, ip, (unsigned long)rel->eip); |
113 | return 5; | 116 | return 5; |
114 | 117 | ||
115 | case VMI_RELOCATION_NOP: | 118 | case VMI_RELOCATION_NOP: |
@@ -131,25 +134,25 @@ static unsigned patch_internal(int call, unsigned len, void *insnbuf, | |||
131 | * sequence. The callee does nop padding for us. | 134 | * sequence. The callee does nop padding for us. |
132 | */ | 135 | */ |
133 | static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, | 136 | static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, |
134 | unsigned long eip, unsigned len) | 137 | unsigned long ip, unsigned len) |
135 | { | 138 | { |
136 | switch (type) { | 139 | switch (type) { |
137 | case PARAVIRT_PATCH(pv_irq_ops.irq_disable): | 140 | case PARAVIRT_PATCH(pv_irq_ops.irq_disable): |
138 | return patch_internal(VMI_CALL_DisableInterrupts, len, | 141 | return patch_internal(VMI_CALL_DisableInterrupts, len, |
139 | insns, eip); | 142 | insns, ip); |
140 | case PARAVIRT_PATCH(pv_irq_ops.irq_enable): | 143 | case PARAVIRT_PATCH(pv_irq_ops.irq_enable): |
141 | return patch_internal(VMI_CALL_EnableInterrupts, len, | 144 | return patch_internal(VMI_CALL_EnableInterrupts, len, |
142 | insns, eip); | 145 | insns, ip); |
143 | case PARAVIRT_PATCH(pv_irq_ops.restore_fl): | 146 | case PARAVIRT_PATCH(pv_irq_ops.restore_fl): |
144 | return patch_internal(VMI_CALL_SetInterruptMask, len, | 147 | return patch_internal(VMI_CALL_SetInterruptMask, len, |
145 | insns, eip); | 148 | insns, ip); |
146 | case PARAVIRT_PATCH(pv_irq_ops.save_fl): | 149 | case PARAVIRT_PATCH(pv_irq_ops.save_fl): |
147 | return patch_internal(VMI_CALL_GetInterruptMask, len, | 150 | return patch_internal(VMI_CALL_GetInterruptMask, len, |
148 | insns, eip); | 151 | insns, ip); |
149 | case PARAVIRT_PATCH(pv_cpu_ops.iret): | 152 | case PARAVIRT_PATCH(pv_cpu_ops.iret): |
150 | return patch_internal(VMI_CALL_IRET, len, insns, eip); | 153 | return patch_internal(VMI_CALL_IRET, len, insns, ip); |
151 | case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit): | 154 | case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret): |
152 | return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip); | 155 | return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip); |
153 | default: | 156 | default: |
154 | break; | 157 | break; |
155 | } | 158 | } |
@@ -157,36 +160,36 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, | |||
157 | } | 160 | } |
158 | 161 | ||
159 | /* CPUID has non-C semantics, and paravirt-ops API doesn't match hardware ISA */ | 162 | /* CPUID has non-C semantics, and paravirt-ops API doesn't match hardware ISA */ |
160 | static void vmi_cpuid(unsigned int *eax, unsigned int *ebx, | 163 | static void vmi_cpuid(unsigned int *ax, unsigned int *bx, |
161 | unsigned int *ecx, unsigned int *edx) | 164 | unsigned int *cx, unsigned int *dx) |
162 | { | 165 | { |
163 | int override = 0; | 166 | int override = 0; |
164 | if (*eax == 1) | 167 | if (*ax == 1) |
165 | override = 1; | 168 | override = 1; |
166 | asm volatile ("call *%6" | 169 | asm volatile ("call *%6" |
167 | : "=a" (*eax), | 170 | : "=a" (*ax), |
168 | "=b" (*ebx), | 171 | "=b" (*bx), |
169 | "=c" (*ecx), | 172 | "=c" (*cx), |
170 | "=d" (*edx) | 173 | "=d" (*dx) |
171 | : "0" (*eax), "2" (*ecx), "r" (vmi_ops.cpuid)); | 174 | : "0" (*ax), "2" (*cx), "r" (vmi_ops.cpuid)); |
172 | if (override) { | 175 | if (override) { |
173 | if (disable_pse) | 176 | if (disable_pse) |
174 | *edx &= ~X86_FEATURE_PSE; | 177 | *dx &= ~X86_FEATURE_PSE; |
175 | if (disable_pge) | 178 | if (disable_pge) |
176 | *edx &= ~X86_FEATURE_PGE; | 179 | *dx &= ~X86_FEATURE_PGE; |
177 | if (disable_sep) | 180 | if (disable_sep) |
178 | *edx &= ~X86_FEATURE_SEP; | 181 | *dx &= ~X86_FEATURE_SEP; |
179 | if (disable_tsc) | 182 | if (disable_tsc) |
180 | *edx &= ~X86_FEATURE_TSC; | 183 | *dx &= ~X86_FEATURE_TSC; |
181 | if (disable_mtrr) | 184 | if (disable_mtrr) |
182 | *edx &= ~X86_FEATURE_MTRR; | 185 | *dx &= ~X86_FEATURE_MTRR; |
183 | } | 186 | } |
184 | } | 187 | } |
185 | 188 | ||
186 | static inline void vmi_maybe_load_tls(struct desc_struct *gdt, int nr, struct desc_struct *new) | 189 | static inline void vmi_maybe_load_tls(struct desc_struct *gdt, int nr, struct desc_struct *new) |
187 | { | 190 | { |
188 | if (gdt[nr].a != new->a || gdt[nr].b != new->b) | 191 | if (gdt[nr].a != new->a || gdt[nr].b != new->b) |
189 | write_gdt_entry(gdt, nr, new->a, new->b); | 192 | write_gdt_entry(gdt, nr, new, 0); |
190 | } | 193 | } |
191 | 194 | ||
192 | static void vmi_load_tls(struct thread_struct *t, unsigned int cpu) | 195 | static void vmi_load_tls(struct thread_struct *t, unsigned int cpu) |
@@ -200,12 +203,12 @@ static void vmi_load_tls(struct thread_struct *t, unsigned int cpu) | |||
200 | static void vmi_set_ldt(const void *addr, unsigned entries) | 203 | static void vmi_set_ldt(const void *addr, unsigned entries) |
201 | { | 204 | { |
202 | unsigned cpu = smp_processor_id(); | 205 | unsigned cpu = smp_processor_id(); |
203 | u32 low, high; | 206 | struct desc_struct desc; |
204 | 207 | ||
205 | pack_descriptor(&low, &high, (unsigned long)addr, | 208 | pack_descriptor(&desc, (unsigned long)addr, |
206 | entries * sizeof(struct desc_struct) - 1, | 209 | entries * sizeof(struct desc_struct) - 1, |
207 | DESCTYPE_LDT, 0); | 210 | DESC_LDT, 0); |
208 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, low, high); | 211 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, &desc, DESC_LDT); |
209 | vmi_ops._set_ldt(entries ? GDT_ENTRY_LDT*sizeof(struct desc_struct) : 0); | 212 | vmi_ops._set_ldt(entries ? GDT_ENTRY_LDT*sizeof(struct desc_struct) : 0); |
210 | } | 213 | } |
211 | 214 | ||
@@ -214,17 +217,37 @@ static void vmi_set_tr(void) | |||
214 | vmi_ops.set_tr(GDT_ENTRY_TSS*sizeof(struct desc_struct)); | 217 | vmi_ops.set_tr(GDT_ENTRY_TSS*sizeof(struct desc_struct)); |
215 | } | 218 | } |
216 | 219 | ||
217 | static void vmi_load_esp0(struct tss_struct *tss, | 220 | static void vmi_write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) |
221 | { | ||
222 | u32 *idt_entry = (u32 *)g; | ||
223 | vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[1]); | ||
224 | } | ||
225 | |||
226 | static void vmi_write_gdt_entry(struct desc_struct *dt, int entry, | ||
227 | const void *desc, int type) | ||
228 | { | ||
229 | u32 *gdt_entry = (u32 *)desc; | ||
230 | vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[1]); | ||
231 | } | ||
232 | |||
233 | static void vmi_write_ldt_entry(struct desc_struct *dt, int entry, | ||
234 | const void *desc) | ||
235 | { | ||
236 | u32 *ldt_entry = (u32 *)desc; | ||
237 | vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); | ||
238 | } | ||
239 | |||
240 | static void vmi_load_sp0(struct tss_struct *tss, | ||
218 | struct thread_struct *thread) | 241 | struct thread_struct *thread) |
219 | { | 242 | { |
220 | tss->x86_tss.esp0 = thread->esp0; | 243 | tss->x86_tss.sp0 = thread->sp0; |
221 | 244 | ||
222 | /* This can only happen when SEP is enabled, no need to test "SEP"arately */ | 245 | /* This can only happen when SEP is enabled, no need to test "SEP"arately */ |
223 | if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { | 246 | if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { |
224 | tss->x86_tss.ss1 = thread->sysenter_cs; | 247 | tss->x86_tss.ss1 = thread->sysenter_cs; |
225 | wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | 248 | wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); |
226 | } | 249 | } |
227 | vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.esp0); | 250 | vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.sp0); |
228 | } | 251 | } |
229 | 252 | ||
230 | static void vmi_flush_tlb_user(void) | 253 | static void vmi_flush_tlb_user(void) |
@@ -375,7 +398,7 @@ static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn) | |||
375 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); | 398 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); |
376 | } | 399 | } |
377 | 400 | ||
378 | static void vmi_allocate_pd(u32 pfn) | 401 | static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn) |
379 | { | 402 | { |
380 | /* | 403 | /* |
381 | * This call comes in very early, before mem_map is setup. | 404 | * This call comes in very early, before mem_map is setup. |
@@ -452,7 +475,7 @@ static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep | |||
452 | static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval) | 475 | static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval) |
453 | { | 476 | { |
454 | #ifdef CONFIG_X86_PAE | 477 | #ifdef CONFIG_X86_PAE |
455 | const pte_t pte = { pmdval.pmd, pmdval.pmd >> 32 }; | 478 | const pte_t pte = { .pte = pmdval.pmd }; |
456 | vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PMD); | 479 | vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PMD); |
457 | #else | 480 | #else |
458 | const pte_t pte = { pmdval.pud.pgd.pgd }; | 481 | const pte_t pte = { pmdval.pud.pgd.pgd }; |
@@ -485,21 +508,21 @@ static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t | |||
485 | static void vmi_set_pud(pud_t *pudp, pud_t pudval) | 508 | static void vmi_set_pud(pud_t *pudp, pud_t pudval) |
486 | { | 509 | { |
487 | /* Um, eww */ | 510 | /* Um, eww */ |
488 | const pte_t pte = { pudval.pgd.pgd, pudval.pgd.pgd >> 32 }; | 511 | const pte_t pte = { .pte = pudval.pgd.pgd }; |
489 | vmi_check_page_type(__pa(pudp) >> PAGE_SHIFT, VMI_PAGE_PGD); | 512 | vmi_check_page_type(__pa(pudp) >> PAGE_SHIFT, VMI_PAGE_PGD); |
490 | vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); | 513 | vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); |
491 | } | 514 | } |
492 | 515 | ||
493 | static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 516 | static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
494 | { | 517 | { |
495 | const pte_t pte = { 0 }; | 518 | const pte_t pte = { .pte = 0 }; |
496 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | 519 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); |
497 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | 520 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); |
498 | } | 521 | } |
499 | 522 | ||
500 | static void vmi_pmd_clear(pmd_t *pmd) | 523 | static void vmi_pmd_clear(pmd_t *pmd) |
501 | { | 524 | { |
502 | const pte_t pte = { 0 }; | 525 | const pte_t pte = { .pte = 0 }; |
503 | vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD); | 526 | vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD); |
504 | vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); | 527 | vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); |
505 | } | 528 | } |
@@ -790,10 +813,13 @@ static inline int __init activate_vmi(void) | |||
790 | para_fill(pv_cpu_ops.store_idt, GetIDT); | 813 | para_fill(pv_cpu_ops.store_idt, GetIDT); |
791 | para_fill(pv_cpu_ops.store_tr, GetTR); | 814 | para_fill(pv_cpu_ops.store_tr, GetTR); |
792 | pv_cpu_ops.load_tls = vmi_load_tls; | 815 | pv_cpu_ops.load_tls = vmi_load_tls; |
793 | para_fill(pv_cpu_ops.write_ldt_entry, WriteLDTEntry); | 816 | para_wrap(pv_cpu_ops.write_ldt_entry, vmi_write_ldt_entry, |
794 | para_fill(pv_cpu_ops.write_gdt_entry, WriteGDTEntry); | 817 | write_ldt_entry, WriteLDTEntry); |
795 | para_fill(pv_cpu_ops.write_idt_entry, WriteIDTEntry); | 818 | para_wrap(pv_cpu_ops.write_gdt_entry, vmi_write_gdt_entry, |
796 | para_wrap(pv_cpu_ops.load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack); | 819 | write_gdt_entry, WriteGDTEntry); |
820 | para_wrap(pv_cpu_ops.write_idt_entry, vmi_write_idt_entry, | ||
821 | write_idt_entry, WriteIDTEntry); | ||
822 | para_wrap(pv_cpu_ops.load_sp0, vmi_load_sp0, set_kernel_stack, UpdateKernelStack); | ||
797 | para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); | 823 | para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); |
798 | para_fill(pv_cpu_ops.io_delay, IODelay); | 824 | para_fill(pv_cpu_ops.io_delay, IODelay); |
799 | 825 | ||
@@ -870,7 +896,7 @@ static inline int __init activate_vmi(void) | |||
870 | * the backend. They are performance critical anyway, so requiring | 896 | * the backend. They are performance critical anyway, so requiring |
871 | * a patch is not a big problem. | 897 | * a patch is not a big problem. |
872 | */ | 898 | */ |
873 | pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0; | 899 | pv_cpu_ops.irq_enable_syscall_ret = (void *)0xfeedbab0; |
874 | pv_cpu_ops.iret = (void *)0xbadbab0; | 900 | pv_cpu_ops.iret = (void *)0xbadbab0; |
875 | 901 | ||
876 | #ifdef CONFIG_SMP | 902 | #ifdef CONFIG_SMP |
@@ -963,19 +989,19 @@ static int __init parse_vmi(char *arg) | |||
963 | return -EINVAL; | 989 | return -EINVAL; |
964 | 990 | ||
965 | if (!strcmp(arg, "disable_pge")) { | 991 | if (!strcmp(arg, "disable_pge")) { |
966 | clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); | 992 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE); |
967 | disable_pge = 1; | 993 | disable_pge = 1; |
968 | } else if (!strcmp(arg, "disable_pse")) { | 994 | } else if (!strcmp(arg, "disable_pse")) { |
969 | clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); | 995 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PSE); |
970 | disable_pse = 1; | 996 | disable_pse = 1; |
971 | } else if (!strcmp(arg, "disable_sep")) { | 997 | } else if (!strcmp(arg, "disable_sep")) { |
972 | clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability); | 998 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP); |
973 | disable_sep = 1; | 999 | disable_sep = 1; |
974 | } else if (!strcmp(arg, "disable_tsc")) { | 1000 | } else if (!strcmp(arg, "disable_tsc")) { |
975 | clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); | 1001 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC); |
976 | disable_tsc = 1; | 1002 | disable_tsc = 1; |
977 | } else if (!strcmp(arg, "disable_mtrr")) { | 1003 | } else if (!strcmp(arg, "disable_mtrr")) { |
978 | clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability); | 1004 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_MTRR); |
979 | disable_mtrr = 1; | 1005 | disable_mtrr = 1; |
980 | } else if (!strcmp(arg, "disable_timer")) { | 1006 | } else if (!strcmp(arg, "disable_timer")) { |
981 | disable_vmi_timer = 1; | 1007 | disable_vmi_timer = 1; |