aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen/enlighten.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen/enlighten.c')
-rw-r--r--arch/x86/xen/enlighten.c252
1 files changed, 83 insertions, 169 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index a27d562a9744..0013a729b41d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -30,7 +30,6 @@
30#include <xen/interface/xen.h> 30#include <xen/interface/xen.h>
31#include <xen/interface/physdev.h> 31#include <xen/interface/physdev.h>
32#include <xen/interface/vcpu.h> 32#include <xen/interface/vcpu.h>
33#include <xen/interface/sched.h>
34#include <xen/features.h> 33#include <xen/features.h>
35#include <xen/page.h> 34#include <xen/page.h>
36#include <xen/hvc-console.h> 35#include <xen/hvc-console.h>
@@ -58,6 +57,9 @@ EXPORT_SYMBOL_GPL(hypercall_page);
58DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); 57DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
59DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); 58DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
60 59
60enum xen_domain_type xen_domain_type = XEN_NATIVE;
61EXPORT_SYMBOL_GPL(xen_domain_type);
62
61/* 63/*
62 * Identity map, in addition to plain kernel map. This needs to be 64 * Identity map, in addition to plain kernel map. This needs to be
63 * large enough to allocate page table pages to allocate the rest. 65 * large enough to allocate page table pages to allocate the rest.
@@ -111,7 +113,14 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
111 * 113 *
112 * 0: not available, 1: available 114 * 0: not available, 1: available
113 */ 115 */
114static int have_vcpu_info_placement = 1; 116static int have_vcpu_info_placement =
117#ifdef CONFIG_X86_32
118 1
119#else
120 0
121#endif
122 ;
123
115 124
116static void xen_vcpu_setup(int cpu) 125static void xen_vcpu_setup(int cpu)
117{ 126{
@@ -227,103 +236,68 @@ static unsigned long xen_get_debugreg(int reg)
227 return HYPERVISOR_get_debugreg(reg); 236 return HYPERVISOR_get_debugreg(reg);
228} 237}
229 238
230static unsigned long xen_save_fl(void) 239static void xen_leave_lazy(void)
231{ 240{
232 struct vcpu_info *vcpu; 241 paravirt_leave_lazy(paravirt_get_lazy_mode());
233 unsigned long flags; 242 xen_mc_flush();
234
235 vcpu = x86_read_percpu(xen_vcpu);
236
237 /* flag has opposite sense of mask */
238 flags = !vcpu->evtchn_upcall_mask;
239
240 /* convert to IF type flag
241 -0 -> 0x00000000
242 -1 -> 0xffffffff
243 */
244 return (-flags) & X86_EFLAGS_IF;
245} 243}
246 244
247static void xen_restore_fl(unsigned long flags) 245static unsigned long xen_store_tr(void)
248{ 246{
249 struct vcpu_info *vcpu; 247 return 0;
250
251 /* convert from IF type flag */
252 flags = !(flags & X86_EFLAGS_IF);
253
254 /* There's a one instruction preempt window here. We need to
255 make sure we're don't switch CPUs between getting the vcpu
256 pointer and updating the mask. */
257 preempt_disable();
258 vcpu = x86_read_percpu(xen_vcpu);
259 vcpu->evtchn_upcall_mask = flags;
260 preempt_enable_no_resched();
261
262 /* Doesn't matter if we get preempted here, because any
263 pending event will get dealt with anyway. */
264
265 if (flags == 0) {
266 preempt_check_resched();
267 barrier(); /* unmask then check (avoid races) */
268 if (unlikely(vcpu->evtchn_upcall_pending))
269 force_evtchn_callback();
270 }
271} 248}
272 249
273static void xen_irq_disable(void) 250/*
251 * Set the page permissions for a particular virtual address. If the
252 * address is a vmalloc mapping (or other non-linear mapping), then
253 * find the linear mapping of the page and also set its protections to
254 * match.
255 */
256static void set_aliased_prot(void *v, pgprot_t prot)
274{ 257{
275 /* There's a one instruction preempt window here. We need to 258 int level;
276 make sure we're don't switch CPUs between getting the vcpu 259 pte_t *ptep;
277 pointer and updating the mask. */ 260 pte_t pte;
278 preempt_disable(); 261 unsigned long pfn;
279 x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; 262 struct page *page;
280 preempt_enable_no_resched();
281}
282 263
283static void xen_irq_enable(void) 264 ptep = lookup_address((unsigned long)v, &level);
284{ 265 BUG_ON(ptep == NULL);
285 struct vcpu_info *vcpu;
286 266
287 /* We don't need to worry about being preempted here, since 267 pfn = pte_pfn(*ptep);
288 either a) interrupts are disabled, so no preemption, or b) 268 page = pfn_to_page(pfn);
289 the caller is confused and is trying to re-enable interrupts
290 on an indeterminate processor. */
291 269
292 vcpu = x86_read_percpu(xen_vcpu); 270 pte = pfn_pte(pfn, prot);
293 vcpu->evtchn_upcall_mask = 0;
294 271
295 /* Doesn't matter if we get preempted here, because any 272 if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
296 pending event will get dealt with anyway. */ 273 BUG();
297 274
298 barrier(); /* unmask then check (avoid races) */ 275 if (!PageHighMem(page)) {
299 if (unlikely(vcpu->evtchn_upcall_pending)) 276 void *av = __va(PFN_PHYS(pfn));
300 force_evtchn_callback();
301}
302 277
303static void xen_safe_halt(void) 278 if (av != v)
304{ 279 if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0))
305 /* Blocking includes an implicit local_irq_enable(). */ 280 BUG();
306 if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) 281 } else
307 BUG(); 282 kmap_flush_unused();
308} 283}
309 284
310static void xen_halt(void) 285static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
311{ 286{
312 if (irqs_disabled()) 287 const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
313 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); 288 int i;
314 else
315 xen_safe_halt();
316}
317 289
318static void xen_leave_lazy(void) 290 for(i = 0; i < entries; i += entries_per_page)
319{ 291 set_aliased_prot(ldt + i, PAGE_KERNEL_RO);
320 paravirt_leave_lazy(paravirt_get_lazy_mode());
321 xen_mc_flush();
322} 292}
323 293
324static unsigned long xen_store_tr(void) 294static void xen_free_ldt(struct desc_struct *ldt, unsigned entries)
325{ 295{
326 return 0; 296 const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
297 int i;
298
299 for(i = 0; i < entries; i += entries_per_page)
300 set_aliased_prot(ldt + i, PAGE_KERNEL);
327} 301}
328 302
329static void xen_set_ldt(const void *addr, unsigned entries) 303static void xen_set_ldt(const void *addr, unsigned entries)
@@ -426,8 +400,7 @@ static void xen_load_gs_index(unsigned int idx)
426static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, 400static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
427 const void *ptr) 401 const void *ptr)
428{ 402{
429 unsigned long lp = (unsigned long)&dt[entrynum]; 403 xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]);
430 xmaddr_t mach_lp = virt_to_machine(lp);
431 u64 entry = *(u64 *)ptr; 404 u64 entry = *(u64 *)ptr;
432 405
433 preempt_disable(); 406 preempt_disable();
@@ -560,7 +533,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
560} 533}
561 534
562static void xen_load_sp0(struct tss_struct *tss, 535static void xen_load_sp0(struct tss_struct *tss,
563 struct thread_struct *thread) 536 struct thread_struct *thread)
564{ 537{
565 struct multicall_space mcs = xen_mc_entry(0); 538 struct multicall_space mcs = xen_mc_entry(0);
566 MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); 539 MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
@@ -835,6 +808,19 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
835 ret = -EFAULT; 808 ret = -EFAULT;
836 break; 809 break;
837#endif 810#endif
811
812 case MSR_STAR:
813 case MSR_CSTAR:
814 case MSR_LSTAR:
815 case MSR_SYSCALL_MASK:
816 case MSR_IA32_SYSENTER_CS:
817 case MSR_IA32_SYSENTER_ESP:
818 case MSR_IA32_SYSENTER_EIP:
819 /* Fast syscall setup is all done in hypercalls, so
820 these are all ignored. Stub them out here to stop
821 Xen console noise. */
822 break;
823
838 default: 824 default:
839 ret = native_write_msr_safe(msr, low, high); 825 ret = native_write_msr_safe(msr, low, high);
840 } 826 }
@@ -878,8 +864,8 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l
878 SetPagePinned(page); 864 SetPagePinned(page);
879 865
880 if (!PageHighMem(page)) { 866 if (!PageHighMem(page)) {
881 make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); 867 make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
882 if (level == PT_PTE) 868 if (level == PT_PTE && USE_SPLIT_PTLOCKS)
883 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); 869 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
884 } else 870 } else
885 /* make sure there are no stray mappings of 871 /* make sure there are no stray mappings of
@@ -947,7 +933,7 @@ static void xen_release_ptpage(unsigned long pfn, unsigned level)
947 933
948 if (PagePinned(page)) { 934 if (PagePinned(page)) {
949 if (!PageHighMem(page)) { 935 if (!PageHighMem(page)) {
950 if (level == PT_PTE) 936 if (level == PT_PTE && USE_SPLIT_PTLOCKS)
951 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); 937 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
952 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); 938 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
953 } 939 }
@@ -994,6 +980,7 @@ static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
994} 980}
995#endif 981#endif
996 982
983#ifdef CONFIG_X86_32
997static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) 984static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
998{ 985{
999 /* If there's an existing pte, then don't allow _PAGE_RW to be set */ 986 /* If there's an existing pte, then don't allow _PAGE_RW to be set */
@@ -1012,6 +999,7 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
1012 999
1013 xen_set_pte(ptep, pte); 1000 xen_set_pte(ptep, pte);
1014} 1001}
1002#endif
1015 1003
1016static __init void xen_pagetable_setup_start(pgd_t *base) 1004static __init void xen_pagetable_setup_start(pgd_t *base)
1017{ 1005{
@@ -1078,7 +1066,6 @@ void xen_setup_vcpu_info_placement(void)
1078 1066
1079 /* xen_vcpu_setup managed to place the vcpu_info within the 1067 /* xen_vcpu_setup managed to place the vcpu_info within the
1080 percpu area for all cpus, so make use of it */ 1068 percpu area for all cpus, so make use of it */
1081#ifdef CONFIG_X86_32
1082 if (have_vcpu_info_placement) { 1069 if (have_vcpu_info_placement) {
1083 printk(KERN_INFO "Xen: using vcpu_info placement\n"); 1070 printk(KERN_INFO "Xen: using vcpu_info placement\n");
1084 1071
@@ -1088,7 +1075,6 @@ void xen_setup_vcpu_info_placement(void)
1088 pv_irq_ops.irq_enable = xen_irq_enable_direct; 1075 pv_irq_ops.irq_enable = xen_irq_enable_direct;
1089 pv_mmu_ops.read_cr2 = xen_read_cr2_direct; 1076 pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
1090 } 1077 }
1091#endif
1092} 1078}
1093 1079
1094static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, 1080static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
@@ -1109,12 +1095,10 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
1109 goto patch_site 1095 goto patch_site
1110 1096
1111 switch (type) { 1097 switch (type) {
1112#ifdef CONFIG_X86_32
1113 SITE(pv_irq_ops, irq_enable); 1098 SITE(pv_irq_ops, irq_enable);
1114 SITE(pv_irq_ops, irq_disable); 1099 SITE(pv_irq_ops, irq_disable);
1115 SITE(pv_irq_ops, save_fl); 1100 SITE(pv_irq_ops, save_fl);
1116 SITE(pv_irq_ops, restore_fl); 1101 SITE(pv_irq_ops, restore_fl);
1117#endif /* CONFIG_X86_32 */
1118#undef SITE 1102#undef SITE
1119 1103
1120 patch_site: 1104 patch_site:
@@ -1252,6 +1236,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
1252 .load_gs_index = xen_load_gs_index, 1236 .load_gs_index = xen_load_gs_index,
1253#endif 1237#endif
1254 1238
1239 .alloc_ldt = xen_alloc_ldt,
1240 .free_ldt = xen_free_ldt,
1241
1255 .store_gdt = native_store_gdt, 1242 .store_gdt = native_store_gdt,
1256 .store_idt = native_store_idt, 1243 .store_idt = native_store_idt,
1257 .store_tr = xen_store_tr, 1244 .store_tr = xen_store_tr,
@@ -1273,36 +1260,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
1273 }, 1260 },
1274}; 1261};
1275 1262
1276static void __init __xen_init_IRQ(void)
1277{
1278#ifdef CONFIG_X86_64
1279 int i;
1280
1281 /* Create identity vector->irq map */
1282 for(i = 0; i < NR_VECTORS; i++) {
1283 int cpu;
1284
1285 for_each_possible_cpu(cpu)
1286 per_cpu(vector_irq, cpu)[i] = i;
1287 }
1288#endif /* CONFIG_X86_64 */
1289
1290 xen_init_IRQ();
1291}
1292
1293static const struct pv_irq_ops xen_irq_ops __initdata = {
1294 .init_IRQ = __xen_init_IRQ,
1295 .save_fl = xen_save_fl,
1296 .restore_fl = xen_restore_fl,
1297 .irq_disable = xen_irq_disable,
1298 .irq_enable = xen_irq_enable,
1299 .safe_halt = xen_safe_halt,
1300 .halt = xen_halt,
1301#ifdef CONFIG_X86_64
1302 .adjust_exception_frame = xen_adjust_exception_frame,
1303#endif
1304};
1305
1306static const struct pv_apic_ops xen_apic_ops __initdata = { 1263static const struct pv_apic_ops xen_apic_ops __initdata = {
1307#ifdef CONFIG_X86_LOCAL_APIC 1264#ifdef CONFIG_X86_LOCAL_APIC
1308 .setup_boot_clock = paravirt_nop, 1265 .setup_boot_clock = paravirt_nop,
@@ -1443,7 +1400,7 @@ static void __init xen_reserve_top(void)
1443 if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) 1400 if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
1444 top = pp.virt_start; 1401 top = pp.virt_start;
1445 1402
1446 reserve_top_address(-top + 2 * PAGE_SIZE); 1403 reserve_top_address(-top);
1447#endif /* CONFIG_X86_32 */ 1404#endif /* CONFIG_X86_32 */
1448} 1405}
1449 1406
@@ -1477,48 +1434,11 @@ static void *m2v(phys_addr_t maddr)
1477 return __ka(m2p(maddr)); 1434 return __ka(m2p(maddr));
1478} 1435}
1479 1436
1480#ifdef CONFIG_X86_64
1481static void walk(pgd_t *pgd, unsigned long addr)
1482{
1483 unsigned l4idx = pgd_index(addr);
1484 unsigned l3idx = pud_index(addr);
1485 unsigned l2idx = pmd_index(addr);
1486 unsigned l1idx = pte_index(addr);
1487 pgd_t l4;
1488 pud_t l3;
1489 pmd_t l2;
1490 pte_t l1;
1491
1492 xen_raw_printk("walk %p, %lx -> %d %d %d %d\n",
1493 pgd, addr, l4idx, l3idx, l2idx, l1idx);
1494
1495 l4 = pgd[l4idx];
1496 xen_raw_printk(" l4: %016lx\n", l4.pgd);
1497 xen_raw_printk(" %016lx\n", pgd_val(l4));
1498
1499 l3 = ((pud_t *)(m2v(l4.pgd)))[l3idx];
1500 xen_raw_printk(" l3: %016lx\n", l3.pud);
1501 xen_raw_printk(" %016lx\n", pud_val(l3));
1502
1503 l2 = ((pmd_t *)(m2v(l3.pud)))[l2idx];
1504 xen_raw_printk(" l2: %016lx\n", l2.pmd);
1505 xen_raw_printk(" %016lx\n", pmd_val(l2));
1506
1507 l1 = ((pte_t *)(m2v(l2.pmd)))[l1idx];
1508 xen_raw_printk(" l1: %016lx\n", l1.pte);
1509 xen_raw_printk(" %016lx\n", pte_val(l1));
1510}
1511#endif
1512
1513static void set_page_prot(void *addr, pgprot_t prot) 1437static void set_page_prot(void *addr, pgprot_t prot)
1514{ 1438{
1515 unsigned long pfn = __pa(addr) >> PAGE_SHIFT; 1439 unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
1516 pte_t pte = pfn_pte(pfn, prot); 1440 pte_t pte = pfn_pte(pfn, prot);
1517 1441
1518 xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016llx pte=%016llx\n",
1519 addr, pfn, get_phys_to_machine(pfn),
1520 pgprot_val(prot), pte.pte);
1521
1522 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) 1442 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
1523 BUG(); 1443 BUG();
1524} 1444}
@@ -1694,6 +1614,8 @@ asmlinkage void __init xen_start_kernel(void)
1694 if (!xen_start_info) 1614 if (!xen_start_info)
1695 return; 1615 return;
1696 1616
1617 xen_domain_type = XEN_PV_DOMAIN;
1618
1697 BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); 1619 BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0);
1698 1620
1699 xen_setup_features(); 1621 xen_setup_features();
@@ -1703,10 +1625,11 @@ asmlinkage void __init xen_start_kernel(void)
1703 pv_init_ops = xen_init_ops; 1625 pv_init_ops = xen_init_ops;
1704 pv_time_ops = xen_time_ops; 1626 pv_time_ops = xen_time_ops;
1705 pv_cpu_ops = xen_cpu_ops; 1627 pv_cpu_ops = xen_cpu_ops;
1706 pv_irq_ops = xen_irq_ops;
1707 pv_apic_ops = xen_apic_ops; 1628 pv_apic_ops = xen_apic_ops;
1708 pv_mmu_ops = xen_mmu_ops; 1629 pv_mmu_ops = xen_mmu_ops;
1709 1630
1631 xen_init_irq_ops();
1632
1710#ifdef CONFIG_X86_LOCAL_APIC 1633#ifdef CONFIG_X86_LOCAL_APIC
1711 /* 1634 /*
1712 * set up the basic apic ops. 1635 * set up the basic apic ops.
@@ -1737,7 +1660,7 @@ asmlinkage void __init xen_start_kernel(void)
1737 1660
1738 /* Prevent unwanted bits from being set in PTEs. */ 1661 /* Prevent unwanted bits from being set in PTEs. */
1739 __supported_pte_mask &= ~_PAGE_GLOBAL; 1662 __supported_pte_mask &= ~_PAGE_GLOBAL;
1740 if (!is_initial_xendomain()) 1663 if (!xen_initial_domain())
1741 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); 1664 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1742 1665
1743 /* Don't do the full vcpu_info placement stuff until we have a 1666 /* Don't do the full vcpu_info placement stuff until we have a
@@ -1772,7 +1695,7 @@ asmlinkage void __init xen_start_kernel(void)
1772 boot_params.hdr.ramdisk_size = xen_start_info->mod_len; 1695 boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
1773 boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); 1696 boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
1774 1697
1775 if (!is_initial_xendomain()) { 1698 if (!xen_initial_domain()) {
1776 add_preferred_console("xenboot", 0, NULL); 1699 add_preferred_console("xenboot", 0, NULL);
1777 add_preferred_console("tty", 0, NULL); 1700 add_preferred_console("tty", 0, NULL);
1778 add_preferred_console("hvc", 0, NULL); 1701 add_preferred_console("hvc", 0, NULL);
@@ -1780,15 +1703,6 @@ asmlinkage void __init xen_start_kernel(void)
1780 1703
1781 xen_raw_console_write("about to get started...\n"); 1704 xen_raw_console_write("about to get started...\n");
1782 1705
1783#if 0
1784 xen_raw_printk("&boot_params=%p __pa(&boot_params)=%lx __va(__pa(&boot_params))=%lx\n",
1785 &boot_params, __pa_symbol(&boot_params),
1786 __va(__pa_symbol(&boot_params)));
1787
1788 walk(pgd, &boot_params);
1789 walk(pgd, __va(__pa(&boot_params)));
1790#endif
1791
1792 /* Start the world */ 1706 /* Start the world */
1793#ifdef CONFIG_X86_32 1707#ifdef CONFIG_X86_32
1794 i386_start_kernel(); 1708 i386_start_kernel();