aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/vmi_32.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/vmi_32.c')
-rw-r--r--arch/x86/kernel/vmi_32.c126
1 files changed, 76 insertions, 50 deletions
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index f02bad68abaa..12affe1f9bce 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -62,7 +62,10 @@ static struct {
62 void (*cpuid)(void /* non-c */); 62 void (*cpuid)(void /* non-c */);
63 void (*_set_ldt)(u32 selector); 63 void (*_set_ldt)(u32 selector);
64 void (*set_tr)(u32 selector); 64 void (*set_tr)(u32 selector);
65 void (*set_kernel_stack)(u32 selector, u32 esp0); 65 void (*write_idt_entry)(struct desc_struct *, int, u32, u32);
66 void (*write_gdt_entry)(struct desc_struct *, int, u32, u32);
67 void (*write_ldt_entry)(struct desc_struct *, int, u32, u32);
68 void (*set_kernel_stack)(u32 selector, u32 sp0);
66 void (*allocate_page)(u32, u32, u32, u32, u32); 69 void (*allocate_page)(u32, u32, u32, u32, u32);
67 void (*release_page)(u32, u32); 70 void (*release_page)(u32, u32);
68 void (*set_pte)(pte_t, pte_t *, unsigned); 71 void (*set_pte)(pte_t, pte_t *, unsigned);
@@ -88,13 +91,13 @@ struct vmi_timer_ops vmi_timer_ops;
88#define IRQ_PATCH_DISABLE 5 91#define IRQ_PATCH_DISABLE 5
89 92
90static inline void patch_offset(void *insnbuf, 93static inline void patch_offset(void *insnbuf,
91 unsigned long eip, unsigned long dest) 94 unsigned long ip, unsigned long dest)
92{ 95{
93 *(unsigned long *)(insnbuf+1) = dest-eip-5; 96 *(unsigned long *)(insnbuf+1) = dest-ip-5;
94} 97}
95 98
96static unsigned patch_internal(int call, unsigned len, void *insnbuf, 99static unsigned patch_internal(int call, unsigned len, void *insnbuf,
97 unsigned long eip) 100 unsigned long ip)
98{ 101{
99 u64 reloc; 102 u64 reloc;
100 struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc; 103 struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc;
@@ -103,13 +106,13 @@ static unsigned patch_internal(int call, unsigned len, void *insnbuf,
103 case VMI_RELOCATION_CALL_REL: 106 case VMI_RELOCATION_CALL_REL:
104 BUG_ON(len < 5); 107 BUG_ON(len < 5);
105 *(char *)insnbuf = MNEM_CALL; 108 *(char *)insnbuf = MNEM_CALL;
106 patch_offset(insnbuf, eip, (unsigned long)rel->eip); 109 patch_offset(insnbuf, ip, (unsigned long)rel->eip);
107 return 5; 110 return 5;
108 111
109 case VMI_RELOCATION_JUMP_REL: 112 case VMI_RELOCATION_JUMP_REL:
110 BUG_ON(len < 5); 113 BUG_ON(len < 5);
111 *(char *)insnbuf = MNEM_JMP; 114 *(char *)insnbuf = MNEM_JMP;
112 patch_offset(insnbuf, eip, (unsigned long)rel->eip); 115 patch_offset(insnbuf, ip, (unsigned long)rel->eip);
113 return 5; 116 return 5;
114 117
115 case VMI_RELOCATION_NOP: 118 case VMI_RELOCATION_NOP:
@@ -131,25 +134,25 @@ static unsigned patch_internal(int call, unsigned len, void *insnbuf,
131 * sequence. The callee does nop padding for us. 134 * sequence. The callee does nop padding for us.
132 */ 135 */
133static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, 136static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
134 unsigned long eip, unsigned len) 137 unsigned long ip, unsigned len)
135{ 138{
136 switch (type) { 139 switch (type) {
137 case PARAVIRT_PATCH(pv_irq_ops.irq_disable): 140 case PARAVIRT_PATCH(pv_irq_ops.irq_disable):
138 return patch_internal(VMI_CALL_DisableInterrupts, len, 141 return patch_internal(VMI_CALL_DisableInterrupts, len,
139 insns, eip); 142 insns, ip);
140 case PARAVIRT_PATCH(pv_irq_ops.irq_enable): 143 case PARAVIRT_PATCH(pv_irq_ops.irq_enable):
141 return patch_internal(VMI_CALL_EnableInterrupts, len, 144 return patch_internal(VMI_CALL_EnableInterrupts, len,
142 insns, eip); 145 insns, ip);
143 case PARAVIRT_PATCH(pv_irq_ops.restore_fl): 146 case PARAVIRT_PATCH(pv_irq_ops.restore_fl):
144 return patch_internal(VMI_CALL_SetInterruptMask, len, 147 return patch_internal(VMI_CALL_SetInterruptMask, len,
145 insns, eip); 148 insns, ip);
146 case PARAVIRT_PATCH(pv_irq_ops.save_fl): 149 case PARAVIRT_PATCH(pv_irq_ops.save_fl):
147 return patch_internal(VMI_CALL_GetInterruptMask, len, 150 return patch_internal(VMI_CALL_GetInterruptMask, len,
148 insns, eip); 151 insns, ip);
149 case PARAVIRT_PATCH(pv_cpu_ops.iret): 152 case PARAVIRT_PATCH(pv_cpu_ops.iret):
150 return patch_internal(VMI_CALL_IRET, len, insns, eip); 153 return patch_internal(VMI_CALL_IRET, len, insns, ip);
151 case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit): 154 case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret):
152 return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip); 155 return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip);
153 default: 156 default:
154 break; 157 break;
155 } 158 }
@@ -157,36 +160,36 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
157} 160}
158 161
159/* CPUID has non-C semantics, and paravirt-ops API doesn't match hardware ISA */ 162/* CPUID has non-C semantics, and paravirt-ops API doesn't match hardware ISA */
160static void vmi_cpuid(unsigned int *eax, unsigned int *ebx, 163static void vmi_cpuid(unsigned int *ax, unsigned int *bx,
161 unsigned int *ecx, unsigned int *edx) 164 unsigned int *cx, unsigned int *dx)
162{ 165{
163 int override = 0; 166 int override = 0;
164 if (*eax == 1) 167 if (*ax == 1)
165 override = 1; 168 override = 1;
166 asm volatile ("call *%6" 169 asm volatile ("call *%6"
167 : "=a" (*eax), 170 : "=a" (*ax),
168 "=b" (*ebx), 171 "=b" (*bx),
169 "=c" (*ecx), 172 "=c" (*cx),
170 "=d" (*edx) 173 "=d" (*dx)
171 : "0" (*eax), "2" (*ecx), "r" (vmi_ops.cpuid)); 174 : "0" (*ax), "2" (*cx), "r" (vmi_ops.cpuid));
172 if (override) { 175 if (override) {
173 if (disable_pse) 176 if (disable_pse)
174 *edx &= ~X86_FEATURE_PSE; 177 *dx &= ~X86_FEATURE_PSE;
175 if (disable_pge) 178 if (disable_pge)
176 *edx &= ~X86_FEATURE_PGE; 179 *dx &= ~X86_FEATURE_PGE;
177 if (disable_sep) 180 if (disable_sep)
178 *edx &= ~X86_FEATURE_SEP; 181 *dx &= ~X86_FEATURE_SEP;
179 if (disable_tsc) 182 if (disable_tsc)
180 *edx &= ~X86_FEATURE_TSC; 183 *dx &= ~X86_FEATURE_TSC;
181 if (disable_mtrr) 184 if (disable_mtrr)
182 *edx &= ~X86_FEATURE_MTRR; 185 *dx &= ~X86_FEATURE_MTRR;
183 } 186 }
184} 187}
185 188
186static inline void vmi_maybe_load_tls(struct desc_struct *gdt, int nr, struct desc_struct *new) 189static inline void vmi_maybe_load_tls(struct desc_struct *gdt, int nr, struct desc_struct *new)
187{ 190{
188 if (gdt[nr].a != new->a || gdt[nr].b != new->b) 191 if (gdt[nr].a != new->a || gdt[nr].b != new->b)
189 write_gdt_entry(gdt, nr, new->a, new->b); 192 write_gdt_entry(gdt, nr, new, 0);
190} 193}
191 194
192static void vmi_load_tls(struct thread_struct *t, unsigned int cpu) 195static void vmi_load_tls(struct thread_struct *t, unsigned int cpu)
@@ -200,12 +203,12 @@ static void vmi_load_tls(struct thread_struct *t, unsigned int cpu)
200static void vmi_set_ldt(const void *addr, unsigned entries) 203static void vmi_set_ldt(const void *addr, unsigned entries)
201{ 204{
202 unsigned cpu = smp_processor_id(); 205 unsigned cpu = smp_processor_id();
203 u32 low, high; 206 struct desc_struct desc;
204 207
205 pack_descriptor(&low, &high, (unsigned long)addr, 208 pack_descriptor(&desc, (unsigned long)addr,
206 entries * sizeof(struct desc_struct) - 1, 209 entries * sizeof(struct desc_struct) - 1,
207 DESCTYPE_LDT, 0); 210 DESC_LDT, 0);
208 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, low, high); 211 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, &desc, DESC_LDT);
209 vmi_ops._set_ldt(entries ? GDT_ENTRY_LDT*sizeof(struct desc_struct) : 0); 212 vmi_ops._set_ldt(entries ? GDT_ENTRY_LDT*sizeof(struct desc_struct) : 0);
210} 213}
211 214
@@ -214,17 +217,37 @@ static void vmi_set_tr(void)
214 vmi_ops.set_tr(GDT_ENTRY_TSS*sizeof(struct desc_struct)); 217 vmi_ops.set_tr(GDT_ENTRY_TSS*sizeof(struct desc_struct));
215} 218}
216 219
217static void vmi_load_esp0(struct tss_struct *tss, 220static void vmi_write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
221{
222 u32 *idt_entry = (u32 *)g;
223 vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[1]);
224}
225
226static void vmi_write_gdt_entry(struct desc_struct *dt, int entry,
227 const void *desc, int type)
228{
229 u32 *gdt_entry = (u32 *)desc;
230 vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[1]);
231}
232
233static void vmi_write_ldt_entry(struct desc_struct *dt, int entry,
234 const void *desc)
235{
236 u32 *ldt_entry = (u32 *)desc;
237 vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]);
238}
239
240static void vmi_load_sp0(struct tss_struct *tss,
218 struct thread_struct *thread) 241 struct thread_struct *thread)
219{ 242{
220 tss->x86_tss.esp0 = thread->esp0; 243 tss->x86_tss.sp0 = thread->sp0;
221 244
222 /* This can only happen when SEP is enabled, no need to test "SEP"arately */ 245 /* This can only happen when SEP is enabled, no need to test "SEP"arately */
223 if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { 246 if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
224 tss->x86_tss.ss1 = thread->sysenter_cs; 247 tss->x86_tss.ss1 = thread->sysenter_cs;
225 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); 248 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
226 } 249 }
227 vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.esp0); 250 vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.sp0);
228} 251}
229 252
230static void vmi_flush_tlb_user(void) 253static void vmi_flush_tlb_user(void)
@@ -375,7 +398,7 @@ static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn)
375 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); 398 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
376} 399}
377 400
378static void vmi_allocate_pd(u32 pfn) 401static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn)
379{ 402{
380 /* 403 /*
381 * This call comes in very early, before mem_map is setup. 404 * This call comes in very early, before mem_map is setup.
@@ -452,7 +475,7 @@ static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep
452static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval) 475static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval)
453{ 476{
454#ifdef CONFIG_X86_PAE 477#ifdef CONFIG_X86_PAE
455 const pte_t pte = { pmdval.pmd, pmdval.pmd >> 32 }; 478 const pte_t pte = { .pte = pmdval.pmd };
456 vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PMD); 479 vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PMD);
457#else 480#else
458 const pte_t pte = { pmdval.pud.pgd.pgd }; 481 const pte_t pte = { pmdval.pud.pgd.pgd };
@@ -485,21 +508,21 @@ static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t
485static void vmi_set_pud(pud_t *pudp, pud_t pudval) 508static void vmi_set_pud(pud_t *pudp, pud_t pudval)
486{ 509{
487 /* Um, eww */ 510 /* Um, eww */
488 const pte_t pte = { pudval.pgd.pgd, pudval.pgd.pgd >> 32 }; 511 const pte_t pte = { .pte = pudval.pgd.pgd };
489 vmi_check_page_type(__pa(pudp) >> PAGE_SHIFT, VMI_PAGE_PGD); 512 vmi_check_page_type(__pa(pudp) >> PAGE_SHIFT, VMI_PAGE_PGD);
490 vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); 513 vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP);
491} 514}
492 515
493static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 516static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
494{ 517{
495 const pte_t pte = { 0 }; 518 const pte_t pte = { .pte = 0 };
496 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); 519 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
497 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); 520 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
498} 521}
499 522
500static void vmi_pmd_clear(pmd_t *pmd) 523static void vmi_pmd_clear(pmd_t *pmd)
501{ 524{
502 const pte_t pte = { 0 }; 525 const pte_t pte = { .pte = 0 };
503 vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD); 526 vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD);
504 vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); 527 vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD);
505} 528}
@@ -790,10 +813,13 @@ static inline int __init activate_vmi(void)
790 para_fill(pv_cpu_ops.store_idt, GetIDT); 813 para_fill(pv_cpu_ops.store_idt, GetIDT);
791 para_fill(pv_cpu_ops.store_tr, GetTR); 814 para_fill(pv_cpu_ops.store_tr, GetTR);
792 pv_cpu_ops.load_tls = vmi_load_tls; 815 pv_cpu_ops.load_tls = vmi_load_tls;
793 para_fill(pv_cpu_ops.write_ldt_entry, WriteLDTEntry); 816 para_wrap(pv_cpu_ops.write_ldt_entry, vmi_write_ldt_entry,
794 para_fill(pv_cpu_ops.write_gdt_entry, WriteGDTEntry); 817 write_ldt_entry, WriteLDTEntry);
795 para_fill(pv_cpu_ops.write_idt_entry, WriteIDTEntry); 818 para_wrap(pv_cpu_ops.write_gdt_entry, vmi_write_gdt_entry,
796 para_wrap(pv_cpu_ops.load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack); 819 write_gdt_entry, WriteGDTEntry);
820 para_wrap(pv_cpu_ops.write_idt_entry, vmi_write_idt_entry,
821 write_idt_entry, WriteIDTEntry);
822 para_wrap(pv_cpu_ops.load_sp0, vmi_load_sp0, set_kernel_stack, UpdateKernelStack);
797 para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); 823 para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask);
798 para_fill(pv_cpu_ops.io_delay, IODelay); 824 para_fill(pv_cpu_ops.io_delay, IODelay);
799 825
@@ -870,7 +896,7 @@ static inline int __init activate_vmi(void)
870 * the backend. They are performance critical anyway, so requiring 896 * the backend. They are performance critical anyway, so requiring
871 * a patch is not a big problem. 897 * a patch is not a big problem.
872 */ 898 */
873 pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0; 899 pv_cpu_ops.irq_enable_syscall_ret = (void *)0xfeedbab0;
874 pv_cpu_ops.iret = (void *)0xbadbab0; 900 pv_cpu_ops.iret = (void *)0xbadbab0;
875 901
876#ifdef CONFIG_SMP 902#ifdef CONFIG_SMP
@@ -963,19 +989,19 @@ static int __init parse_vmi(char *arg)
963 return -EINVAL; 989 return -EINVAL;
964 990
965 if (!strcmp(arg, "disable_pge")) { 991 if (!strcmp(arg, "disable_pge")) {
966 clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); 992 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE);
967 disable_pge = 1; 993 disable_pge = 1;
968 } else if (!strcmp(arg, "disable_pse")) { 994 } else if (!strcmp(arg, "disable_pse")) {
969 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); 995 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PSE);
970 disable_pse = 1; 996 disable_pse = 1;
971 } else if (!strcmp(arg, "disable_sep")) { 997 } else if (!strcmp(arg, "disable_sep")) {
972 clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability); 998 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
973 disable_sep = 1; 999 disable_sep = 1;
974 } else if (!strcmp(arg, "disable_tsc")) { 1000 } else if (!strcmp(arg, "disable_tsc")) {
975 clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); 1001 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC);
976 disable_tsc = 1; 1002 disable_tsc = 1;
977 } else if (!strcmp(arg, "disable_mtrr")) { 1003 } else if (!strcmp(arg, "disable_mtrr")) {
978 clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability); 1004 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_MTRR);
979 disable_mtrr = 1; 1005 disable_mtrr = 1;
980 } else if (!strcmp(arg, "disable_timer")) { 1006 } else if (!strcmp(arg, "disable_timer")) {
981 disable_vmi_timer = 1; 1007 disable_vmi_timer = 1;