aboutsummaryrefslogtreecommitdiffstats
path: root/include/asm-x86/paravirt.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/asm-x86/paravirt.h')
-rw-r--r--include/asm-x86/paravirt.h197
1 files changed, 174 insertions, 23 deletions
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 0f13b945e240..ef5e8ec6a6ab 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -84,7 +84,7 @@ struct pv_time_ops {
84 int (*set_wallclock)(unsigned long); 84 int (*set_wallclock)(unsigned long);
85 85
86 unsigned long long (*sched_clock)(void); 86 unsigned long long (*sched_clock)(void);
87 unsigned long (*get_cpu_khz)(void); 87 unsigned long (*get_tsc_khz)(void);
88}; 88};
89 89
90struct pv_cpu_ops { 90struct pv_cpu_ops {
@@ -115,6 +115,9 @@ struct pv_cpu_ops {
115 void (*set_ldt)(const void *desc, unsigned entries); 115 void (*set_ldt)(const void *desc, unsigned entries);
116 unsigned long (*store_tr)(void); 116 unsigned long (*store_tr)(void);
117 void (*load_tls)(struct thread_struct *t, unsigned int cpu); 117 void (*load_tls)(struct thread_struct *t, unsigned int cpu);
118#ifdef CONFIG_X86_64
119 void (*load_gs_index)(unsigned int idx);
120#endif
118 void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum, 121 void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum,
119 const void *desc); 122 const void *desc);
120 void (*write_gdt_entry)(struct desc_struct *, 123 void (*write_gdt_entry)(struct desc_struct *,
@@ -141,8 +144,32 @@ struct pv_cpu_ops {
141 u64 (*read_pmc)(int counter); 144 u64 (*read_pmc)(int counter);
142 unsigned long long (*read_tscp)(unsigned int *aux); 145 unsigned long long (*read_tscp)(unsigned int *aux);
143 146
144 /* These two are jmp to, not actually called. */ 147 /*
145 void (*irq_enable_syscall_ret)(void); 148 * Atomically enable interrupts and return to userspace. This
149 * is only ever used to return to 32-bit processes; in a
150 * 64-bit kernel, it's used for 32-on-64 compat processes, but
151 * never native 64-bit processes. (Jump, not call.)
152 */
153 void (*irq_enable_sysexit)(void);
154
155 /*
156 * Switch to usermode gs and return to 64-bit usermode using
157 * sysret. Only used in 64-bit kernels to return to 64-bit
158 * processes. Usermode register state, including %rsp, must
159 * already be restored.
160 */
161 void (*usergs_sysret64)(void);
162
163 /*
164 * Switch to usermode gs and return to 32-bit usermode using
165 * sysret. Used to return to 32-on-64 compat processes.
166 * Other usermode register state, including %esp, must already
167 * be restored.
168 */
169 void (*usergs_sysret32)(void);
170
171 /* Normal iret. Jump to this with the standard iret stack
172 frame set up. */
146 void (*iret)(void); 173 void (*iret)(void);
147 174
148 void (*swapgs)(void); 175 void (*swapgs)(void);
@@ -165,6 +192,10 @@ struct pv_irq_ops {
165 void (*irq_enable)(void); 192 void (*irq_enable)(void);
166 void (*safe_halt)(void); 193 void (*safe_halt)(void);
167 void (*halt)(void); 194 void (*halt)(void);
195
196#ifdef CONFIG_X86_64
197 void (*adjust_exception_frame)(void);
198#endif
168}; 199};
169 200
170struct pv_apic_ops { 201struct pv_apic_ops {
@@ -219,7 +250,14 @@ struct pv_mmu_ops {
219 void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, 250 void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm,
220 unsigned long va); 251 unsigned long va);
221 252
222 /* Hooks for allocating/releasing pagetable pages */ 253 /* Hooks for allocating and freeing a pagetable top-level */
254 int (*pgd_alloc)(struct mm_struct *mm);
255 void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd);
256
257 /*
258 * Hooks for allocating/releasing pagetable pages when they're
259 * attached to a pagetable
260 */
223 void (*alloc_pte)(struct mm_struct *mm, u32 pfn); 261 void (*alloc_pte)(struct mm_struct *mm, u32 pfn);
224 void (*alloc_pmd)(struct mm_struct *mm, u32 pfn); 262 void (*alloc_pmd)(struct mm_struct *mm, u32 pfn);
225 void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); 263 void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count);
@@ -238,7 +276,13 @@ struct pv_mmu_ops {
238 void (*pte_update_defer)(struct mm_struct *mm, 276 void (*pte_update_defer)(struct mm_struct *mm,
239 unsigned long addr, pte_t *ptep); 277 unsigned long addr, pte_t *ptep);
240 278
279 pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
280 pte_t *ptep);
281 void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
282 pte_t *ptep, pte_t pte);
283
241 pteval_t (*pte_val)(pte_t); 284 pteval_t (*pte_val)(pte_t);
285 pteval_t (*pte_flags)(pte_t);
242 pte_t (*make_pte)(pteval_t pte); 286 pte_t (*make_pte)(pteval_t pte);
243 287
244 pgdval_t (*pgd_val)(pgd_t); 288 pgdval_t (*pgd_val)(pgd_t);
@@ -273,6 +317,13 @@ struct pv_mmu_ops {
273#endif 317#endif
274 318
275 struct pv_lazy_ops lazy_mode; 319 struct pv_lazy_ops lazy_mode;
320
321 /* dom0 ops */
322
323 /* Sometimes the physical address is a pfn, and sometimes its
324 an mfn. We can tell which is which from the index. */
325 void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx,
326 unsigned long phys, pgprot_t flags);
276}; 327};
277 328
278/* This contains all the paravirt structures: we get a convenient 329/* This contains all the paravirt structures: we get a convenient
@@ -439,10 +490,17 @@ int paravirt_disable_iospace(void);
439#define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" 490#define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11"
440#endif 491#endif
441 492
493#ifdef CONFIG_PARAVIRT_DEBUG
494#define PVOP_TEST_NULL(op) BUG_ON(op == NULL)
495#else
496#define PVOP_TEST_NULL(op) ((void)op)
497#endif
498
442#define __PVOP_CALL(rettype, op, pre, post, ...) \ 499#define __PVOP_CALL(rettype, op, pre, post, ...) \
443 ({ \ 500 ({ \
444 rettype __ret; \ 501 rettype __ret; \
445 PVOP_CALL_ARGS; \ 502 PVOP_CALL_ARGS; \
503 PVOP_TEST_NULL(op); \
446 /* This is 32-bit specific, but is okay in 64-bit */ \ 504 /* This is 32-bit specific, but is okay in 64-bit */ \
447 /* since this condition will never hold */ \ 505 /* since this condition will never hold */ \
448 if (sizeof(rettype) > sizeof(unsigned long)) { \ 506 if (sizeof(rettype) > sizeof(unsigned long)) { \
@@ -471,6 +529,7 @@ int paravirt_disable_iospace(void);
471#define __PVOP_VCALL(op, pre, post, ...) \ 529#define __PVOP_VCALL(op, pre, post, ...) \
472 ({ \ 530 ({ \
473 PVOP_VCALL_ARGS; \ 531 PVOP_VCALL_ARGS; \
532 PVOP_TEST_NULL(op); \
474 asm volatile(pre \ 533 asm volatile(pre \
475 paravirt_alt(PARAVIRT_CALL) \ 534 paravirt_alt(PARAVIRT_CALL) \
476 post \ 535 post \
@@ -720,7 +779,7 @@ static inline unsigned long long paravirt_sched_clock(void)
720{ 779{
721 return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); 780 return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
722} 781}
723#define calculate_cpu_khz() (pv_time_ops.get_cpu_khz()) 782#define calibrate_tsc() (pv_time_ops.get_tsc_khz())
724 783
725static inline unsigned long long paravirt_read_pmc(int counter) 784static inline unsigned long long paravirt_read_pmc(int counter)
726{ 785{
@@ -789,6 +848,13 @@ static inline void load_TLS(struct thread_struct *t, unsigned cpu)
789 PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu); 848 PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu);
790} 849}
791 850
851#ifdef CONFIG_X86_64
852static inline void load_gs_index(unsigned int gs)
853{
854 PVOP_VCALL1(pv_cpu_ops.load_gs_index, gs);
855}
856#endif
857
792static inline void write_ldt_entry(struct desc_struct *dt, int entry, 858static inline void write_ldt_entry(struct desc_struct *dt, int entry,
793 const void *desc) 859 const void *desc)
794{ 860{
@@ -912,6 +978,16 @@ static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
912 PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); 978 PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va);
913} 979}
914 980
981static inline int paravirt_pgd_alloc(struct mm_struct *mm)
982{
983 return PVOP_CALL1(int, pv_mmu_ops.pgd_alloc, mm);
984}
985
986static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd)
987{
988 PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd);
989}
990
915static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn) 991static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn)
916{ 992{
917 PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn); 993 PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn);
@@ -996,6 +1072,20 @@ static inline pteval_t pte_val(pte_t pte)
996 return ret; 1072 return ret;
997} 1073}
998 1074
1075static inline pteval_t pte_flags(pte_t pte)
1076{
1077 pteval_t ret;
1078
1079 if (sizeof(pteval_t) > sizeof(long))
1080 ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags,
1081 pte.pte, (u64)pte.pte >> 32);
1082 else
1083 ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags,
1084 pte.pte);
1085
1086 return ret;
1087}
1088
999static inline pgd_t __pgd(pgdval_t val) 1089static inline pgd_t __pgd(pgdval_t val)
1000{ 1090{
1001 pgdval_t ret; 1091 pgdval_t ret;
@@ -1024,6 +1114,29 @@ static inline pgdval_t pgd_val(pgd_t pgd)
1024 return ret; 1114 return ret;
1025} 1115}
1026 1116
1117#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
1118static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
1119 pte_t *ptep)
1120{
1121 pteval_t ret;
1122
1123 ret = PVOP_CALL3(pteval_t, pv_mmu_ops.ptep_modify_prot_start,
1124 mm, addr, ptep);
1125
1126 return (pte_t) { .pte = ret };
1127}
1128
1129static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
1130 pte_t *ptep, pte_t pte)
1131{
1132 if (sizeof(pteval_t) > sizeof(long))
1133 /* 5 arg words */
1134 pv_mmu_ops.ptep_modify_prot_commit(mm, addr, ptep, pte);
1135 else
1136 PVOP_VCALL4(pv_mmu_ops.ptep_modify_prot_commit,
1137 mm, addr, ptep, pte.pte);
1138}
1139
1027static inline void set_pte(pte_t *ptep, pte_t pte) 1140static inline void set_pte(pte_t *ptep, pte_t pte)
1028{ 1141{
1029 if (sizeof(pteval_t) > sizeof(long)) 1142 if (sizeof(pteval_t) > sizeof(long))
@@ -1252,6 +1365,12 @@ static inline void arch_flush_lazy_mmu_mode(void)
1252 } 1365 }
1253} 1366}
1254 1367
1368static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
1369 unsigned long phys, pgprot_t flags)
1370{
1371 pv_mmu_ops.set_fixmap(idx, phys, flags);
1372}
1373
1255void _paravirt_nop(void); 1374void _paravirt_nop(void);
1256#define paravirt_nop ((void *)_paravirt_nop) 1375#define paravirt_nop ((void *)_paravirt_nop)
1257 1376
@@ -1374,54 +1493,86 @@ static inline unsigned long __raw_local_irq_save(void)
1374#define PV_RESTORE_REGS popq %rdx; popq %rcx; popq %rdi; popq %rax 1493#define PV_RESTORE_REGS popq %rdx; popq %rcx; popq %rdi; popq %rax
1375#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8) 1494#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8)
1376#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8) 1495#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
1496#define PARA_INDIRECT(addr) *addr(%rip)
1377#else 1497#else
1378#define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx 1498#define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx
1379#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax 1499#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax
1380#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) 1500#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4)
1381#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) 1501#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
1502#define PARA_INDIRECT(addr) *%cs:addr
1382#endif 1503#endif
1383 1504
1384#define INTERRUPT_RETURN \ 1505#define INTERRUPT_RETURN \
1385 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ 1506 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \
1386 jmp *%cs:pv_cpu_ops+PV_CPU_iret) 1507 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
1387 1508
1388#define DISABLE_INTERRUPTS(clobbers) \ 1509#define DISABLE_INTERRUPTS(clobbers) \
1389 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ 1510 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
1390 PV_SAVE_REGS; \ 1511 PV_SAVE_REGS; \
1391 call *%cs:pv_irq_ops+PV_IRQ_irq_disable; \ 1512 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \
1392 PV_RESTORE_REGS;) \ 1513 PV_RESTORE_REGS;) \
1393 1514
1394#define ENABLE_INTERRUPTS(clobbers) \ 1515#define ENABLE_INTERRUPTS(clobbers) \
1395 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ 1516 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \
1396 PV_SAVE_REGS; \ 1517 PV_SAVE_REGS; \
1397 call *%cs:pv_irq_ops+PV_IRQ_irq_enable; \ 1518 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
1398 PV_RESTORE_REGS;) 1519 PV_RESTORE_REGS;)
1399 1520
1400#define ENABLE_INTERRUPTS_SYSCALL_RET \ 1521#define USERGS_SYSRET32 \
1401 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_syscall_ret),\ 1522 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \
1402 CLBR_NONE, \ 1523 CLBR_NONE, \
1403 jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_syscall_ret) 1524 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32))
1404
1405 1525
1406#ifdef CONFIG_X86_32 1526#ifdef CONFIG_X86_32
1407#define GET_CR0_INTO_EAX \ 1527#define GET_CR0_INTO_EAX \
1408 push %ecx; push %edx; \ 1528 push %ecx; push %edx; \
1409 call *pv_cpu_ops+PV_CPU_read_cr0; \ 1529 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
1410 pop %edx; pop %ecx 1530 pop %edx; pop %ecx
1411#else 1531
1532#define ENABLE_INTERRUPTS_SYSEXIT \
1533 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
1534 CLBR_NONE, \
1535 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
1536
1537
1538#else /* !CONFIG_X86_32 */
1539
1540/*
1541 * If swapgs is used while the userspace stack is still current,
1542 * there's no way to call a pvop. The PV replacement *must* be
1543 * inlined, or the swapgs instruction must be trapped and emulated.
1544 */
1545#define SWAPGS_UNSAFE_STACK \
1546 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
1547 swapgs)
1548
1412#define SWAPGS \ 1549#define SWAPGS \
1413 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ 1550 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
1414 PV_SAVE_REGS; \ 1551 PV_SAVE_REGS; \
1415 call *pv_cpu_ops+PV_CPU_swapgs; \ 1552 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \
1416 PV_RESTORE_REGS \ 1553 PV_RESTORE_REGS \
1417 ) 1554 )
1418 1555
1419#define GET_CR2_INTO_RCX \ 1556#define GET_CR2_INTO_RCX \
1420 call *pv_mmu_ops+PV_MMU_read_cr2; \ 1557 call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); \
1421 movq %rax, %rcx; \ 1558 movq %rax, %rcx; \
1422 xorq %rax, %rax; 1559 xorq %rax, %rax;
1423 1560
1424#endif 1561#define PARAVIRT_ADJUST_EXCEPTION_FRAME \
1562 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
1563 CLBR_NONE, \
1564 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame))
1565
1566#define USERGS_SYSRET64 \
1567 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
1568 CLBR_NONE, \
1569 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
1570
1571#define ENABLE_INTERRUPTS_SYSEXIT32 \
1572 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
1573 CLBR_NONE, \
1574 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
1575#endif /* CONFIG_X86_32 */
1425 1576
1426#endif /* __ASSEMBLY__ */ 1577#endif /* __ASSEMBLY__ */
1427#endif /* CONFIG_PARAVIRT */ 1578#endif /* CONFIG_PARAVIRT */