aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-05-12 03:36:18 -0400
committerIngo Molnar <mingo@elte.hu>2011-05-12 03:36:18 -0400
commit9cb5baba5e3acba0994ad899ee908799104c9965 (patch)
treed5ff16000256a0bf56279926e6114b4603ede2b4 /arch/x86/xen
parent7142d17e8f935fa842e9f6eece2281b6d41625d6 (diff)
parent693d92a1bbc9e42681c42ed190bd42b636ca876f (diff)
Merge commit 'v2.6.39-rc7' into sched/core
Diffstat (limited to 'arch/x86/xen')
-rw-r--r--arch/x86/xen/mmu.c138
-rw-r--r--arch/x86/xen/setup.c2
2 files changed, 134 insertions, 6 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index a991b57f91fe..55c965b38c27 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1463,6 +1463,119 @@ static int xen_pgd_alloc(struct mm_struct *mm)
1463 return ret; 1463 return ret;
1464} 1464}
1465 1465
1466#ifdef CONFIG_X86_64
1467static __initdata u64 __last_pgt_set_rw = 0;
1468static __initdata u64 __pgt_buf_start = 0;
1469static __initdata u64 __pgt_buf_end = 0;
1470static __initdata u64 __pgt_buf_top = 0;
1471/*
1472 * As a consequence of the commit:
1473 *
1474 * commit 4b239f458c229de044d6905c2b0f9fe16ed9e01e
1475 * Author: Yinghai Lu <yinghai@kernel.org>
1476 * Date: Fri Dec 17 16:58:28 2010 -0800
1477 *
1478 * x86-64, mm: Put early page table high
1479 *
1480 * at some point init_memory_mapping is going to reach the pagetable pages
1481 * area and map those pages too (mapping them as normal memory that falls
1482 * in the range of addresses passed to init_memory_mapping as argument).
1483 * Some of those pages are already pagetable pages (they are in the range
1484 * pgt_buf_start-pgt_buf_end) therefore they are going to be mapped RO and
1485 * everything is fine.
1486 * Some of these pages are not pagetable pages yet (they fall in the range
1487 * pgt_buf_end-pgt_buf_top; for example the page at pgt_buf_end) so they
1488 * are going to be mapped RW. When these pages become pagetable pages and
1489 * are hooked into the pagetable, xen will find that the guest has already
1490 * a RW mapping of them somewhere and fail the operation.
1491 * The reason Xen requires pagetables to be RO is that the hypervisor needs
1492 * to verify that the pagetables are valid before using them. The validation
1493 * operations are called "pinning".
1494 *
1495 * In order to fix the issue we mark all the pages in the entire range
1496 * pgt_buf_start-pgt_buf_top as RO, however when the pagetable allocation
1497 * is completed only the range pgt_buf_start-pgt_buf_end is reserved by
1498 * init_memory_mapping. Hence the kernel is going to crash as soon as one
1499 * of the pages in the range pgt_buf_end-pgt_buf_top is reused (b/c those
1500 * ranges are RO).
1501 *
1502 * For this reason, 'mark_rw_past_pgt' is introduced which is called _after_
1503 * the init_memory_mapping has completed (in a perfect world we would
1504 * call this function from init_memory_mapping, but lets ignore that).
1505 *
1506 * Because we are called _after_ init_memory_mapping the pgt_buf_[start,
1507 * end,top] have all changed to new values (b/c init_memory_mapping
1508 * is called and setting up another new page-table). Hence, the first time
1509 * we enter this function, we save away the pgt_buf_start value and update
1510 * the pgt_buf_[end,top].
1511 *
1512 * When we detect that the "old" pgt_buf_start through pgt_buf_end
1513 * PFNs have been reserved (so memblock_x86_reserve_range has been called),
1514 * we immediately set out to RW the "old" pgt_buf_end through pgt_buf_top.
1515 *
1516 * And then we update those "old" pgt_buf_[end|top] with the new ones
1517 * so that we can redo this on the next pagetable.
1518 */
1519static __init void mark_rw_past_pgt(void) {
1520
1521 if (pgt_buf_end > pgt_buf_start) {
1522 u64 addr, size;
1523
1524 /* Save it away. */
1525 if (!__pgt_buf_start) {
1526 __pgt_buf_start = pgt_buf_start;
1527 __pgt_buf_end = pgt_buf_end;
1528 __pgt_buf_top = pgt_buf_top;
1529 return;
1530 }
1531 /* If we get the range that starts at __pgt_buf_end that means
1532 * the range is reserved, and that in 'init_memory_mapping'
1533 * the 'memblock_x86_reserve_range' has been called with the
1534 * outdated __pgt_buf_start, __pgt_buf_end (the "new"
1535 * pgt_buf_[start|end|top] refer now to a new pagetable.
1536 * Note: we are called _after_ the pgt_buf_[..] have been
1537 * updated.*/
1538
1539 addr = memblock_x86_find_in_range_size(PFN_PHYS(__pgt_buf_start),
1540 &size, PAGE_SIZE);
1541
1542 /* Still not reserved, meaning 'memblock_x86_reserve_range'
1543 * hasn't been called yet. Update the _end and _top.*/
1544 if (addr == PFN_PHYS(__pgt_buf_start)) {
1545 __pgt_buf_end = pgt_buf_end;
1546 __pgt_buf_top = pgt_buf_top;
1547 return;
1548 }
1549
1550 /* OK, the area is reserved, meaning it is time for us to
1551 * set RW for the old end->top PFNs. */
1552
1553 /* ..unless we had already done this. */
1554 if (__pgt_buf_end == __last_pgt_set_rw)
1555 return;
1556
1557 addr = PFN_PHYS(__pgt_buf_end);
1558
1559 /* set as RW the rest */
1560 printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n",
1561 PFN_PHYS(__pgt_buf_end), PFN_PHYS(__pgt_buf_top));
1562
1563 while (addr < PFN_PHYS(__pgt_buf_top)) {
1564 make_lowmem_page_readwrite(__va(addr));
1565 addr += PAGE_SIZE;
1566 }
1567 /* And update everything so that we are ready for the next
1568 * pagetable (the one created for regions past 4GB) */
1569 __last_pgt_set_rw = __pgt_buf_end;
1570 __pgt_buf_start = pgt_buf_start;
1571 __pgt_buf_end = pgt_buf_end;
1572 __pgt_buf_top = pgt_buf_top;
1573 }
1574 return;
1575}
1576#else
1577static __init void mark_rw_past_pgt(void) { }
1578#endif
1466static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) 1579static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
1467{ 1580{
1468#ifdef CONFIG_X86_64 1581#ifdef CONFIG_X86_64
@@ -1473,30 +1586,43 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
1473#endif 1586#endif
1474} 1587}
1475 1588
1589#ifdef CONFIG_X86_32
1476static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) 1590static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
1477{ 1591{
1478 unsigned long pfn = pte_pfn(pte);
1479
1480#ifdef CONFIG_X86_32
1481 /* If there's an existing pte, then don't allow _PAGE_RW to be set */ 1592 /* If there's an existing pte, then don't allow _PAGE_RW to be set */
1482 if (pte_val_ma(*ptep) & _PAGE_PRESENT) 1593 if (pte_val_ma(*ptep) & _PAGE_PRESENT)
1483 pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & 1594 pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
1484 pte_val_ma(pte)); 1595 pte_val_ma(pte));
1485#endif 1596
1597 return pte;
1598}
1599#else /* CONFIG_X86_64 */
1600static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
1601{
1602 unsigned long pfn = pte_pfn(pte);
1486 1603
1487 /* 1604 /*
1605 * A bit of optimization. We do not need to call the workaround
1606 * when xen_set_pte_init is called with a PTE with 0 as PFN.
1607 * That is b/c the pagetable at that point are just being populated
1608 * with empty values and we can save some cycles by not calling
1609 * the 'memblock' code.*/
1610 if (pfn)
1611 mark_rw_past_pgt();
1612 /*
1488 * If the new pfn is within the range of the newly allocated 1613 * If the new pfn is within the range of the newly allocated
1489 * kernel pagetable, and it isn't being mapped into an 1614 * kernel pagetable, and it isn't being mapped into an
1490 * early_ioremap fixmap slot as a freshly allocated page, make sure 1615 * early_ioremap fixmap slot as a freshly allocated page, make sure
1491 * it is RO. 1616 * it is RO.
1492 */ 1617 */
1493 if (((!is_early_ioremap_ptep(ptep) && 1618 if (((!is_early_ioremap_ptep(ptep) &&
1494 pfn >= pgt_buf_start && pfn < pgt_buf_end)) || 1619 pfn >= pgt_buf_start && pfn < pgt_buf_top)) ||
1495 (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1))) 1620 (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1)))
1496 pte = pte_wrprotect(pte); 1621 pte = pte_wrprotect(pte);
1497 1622
1498 return pte; 1623 return pte;
1499} 1624}
1625#endif /* CONFIG_X86_64 */
1500 1626
1501/* Init-time set_pte while constructing initial pagetables, which 1627/* Init-time set_pte while constructing initial pagetables, which
1502 doesn't allow RO pagetable pages to be remapped RW */ 1628 doesn't allow RO pagetable pages to be remapped RW */
@@ -1992,6 +2118,8 @@ __init void xen_ident_map_ISA(void)
1992 2118
1993static __init void xen_post_allocator_init(void) 2119static __init void xen_post_allocator_init(void)
1994{ 2120{
2121 mark_rw_past_pgt();
2122
1995#ifdef CONFIG_XEN_DEBUG 2123#ifdef CONFIG_XEN_DEBUG
1996 pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); 2124 pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
1997#endif 2125#endif
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index fa0269a99377..90bac0aac3a5 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -227,7 +227,7 @@ char * __init xen_memory_setup(void)
227 227
228 memcpy(map_raw, map, sizeof(map)); 228 memcpy(map_raw, map, sizeof(map));
229 e820.nr_map = 0; 229 e820.nr_map = 0;
230 xen_extra_mem_start = mem_end; 230 xen_extra_mem_start = max((1ULL << 32), mem_end);
231 for (i = 0; i < memmap.nr_entries; i++) { 231 for (i = 0; i < memmap.nr_entries; i++) {
232 unsigned long long end; 232 unsigned long long end;
233 233