aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-11 20:29:01 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-11 20:29:01 -0400
commit81ae31d78239318610d7c2acb3e2610d622a5aa4 (patch)
tree1e31b300f1574fceaff065a9bd92460b7c466f7c /arch
parentef4a48c513211d842c55e84f7a1c31884b91dcf7 (diff)
parent95afae481414cbdb0567bf82d5e5077c3ac9da20 (diff)
Merge tag 'stable/for-linus-3.18-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull Xen updates from David Vrabel: "Features and fixes: - Add pvscsi frontend and backend drivers. - Remove _PAGE_IOMAP PTE flag, freeing it for alternate uses. - Try and keep memory contiguous during PV memory setup (reduces SWIOTLB usage). - Allow front/back drivers to use threaded irqs. - Support large initrds in PV guests. - Fix PVH guests in preparation for Xen 4.5" * tag 'stable/for-linus-3.18-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (22 commits) xen: remove DEFINE_XENBUS_DRIVER() macro xen/xenbus: Remove BUG_ON() when error string trucated xen/xenbus: Correct the comments for xenbus_grant_ring() x86/xen: Set EFER.NX and EFER.SCE in PVH guests xen: eliminate scalability issues from initrd handling xen: sync some headers with xen tree xen: make pvscsi frontend dependant on xenbus frontend arm{,64}/xen: Remove "EXPERIMENTAL" in the description of the Xen options xen-scsifront: don't deadlock if the ring becomes full x86: remove the Xen-specific _PAGE_IOMAP PTE flag x86/xen: do not use _PAGE_IOMAP PTE flag for I/O mappings x86: skip check for spurious faults for non-present faults xen/efi: Directly include needed headers xen-scsiback: clean up a type issue in scsiback_make_tpg() xen-scsifront: use GFP_ATOMIC under spin_lock MAINTAINERS: Add xen pvscsi maintainer xen-scsiback: Add Xen PV SCSI backend driver xen-scsifront: Add Xen PV SCSI frontend driver xen: Add Xen pvSCSI protocol description xen/events: support threaded irqs for interdomain event channels ...
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm64/Kconfig2
-rw-r--r--arch/x86/include/asm/pgtable_types.h11
-rw-r--r--arch/x86/mm/fault.c22
-rw-r--r--arch/x86/mm/init_32.c2
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/pci/i386.c2
-rw-r--r--arch/x86/xen/efi.c2
-rw-r--r--arch/x86/xen/enlighten.c19
-rw-r--r--arch/x86/xen/mmu.c48
-rw-r--r--arch/x86/xen/p2m.c23
-rw-r--r--arch/x86/xen/p2m.h15
-rw-r--r--arch/x86/xen/setup.c370
-rw-r--r--arch/x86/xen/smp.c29
-rw-r--r--arch/x86/xen/smp.h8
-rw-r--r--arch/x86/xen/xen-head.S36
16 files changed, 426 insertions, 167 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 18f392f8b744..89c4b5ccc68d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1779,7 +1779,7 @@ config XEN_DOM0
1779 depends on XEN 1779 depends on XEN
1780 1780
1781config XEN 1781config XEN
1782 bool "Xen guest support on ARM (EXPERIMENTAL)" 1782 bool "Xen guest support on ARM"
1783 depends on ARM && AEABI && OF 1783 depends on ARM && AEABI && OF
1784 depends on CPU_V7 && !CPU_V6 1784 depends on CPU_V7 && !CPU_V6
1785 depends on !GENERIC_ATOMIC64 1785 depends on !GENERIC_ATOMIC64
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c49ca4c738bb..ac9afde76dea 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -349,7 +349,7 @@ config XEN_DOM0
349 depends on XEN 349 depends on XEN
350 350
351config XEN 351config XEN
352 bool "Xen guest support on ARM64 (EXPERIMENTAL)" 352 bool "Xen guest support on ARM64"
353 depends on ARM64 && OF 353 depends on ARM64 && OF
354 select SWIOTLB_XEN 354 select SWIOTLB_XEN
355 help 355 help
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 0f9724c9c510..07789647bf33 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -23,7 +23,6 @@
23#define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1 23#define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1
24#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 24#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1
25#define _PAGE_BIT_SPLITTING _PAGE_BIT_SOFTW2 /* only valid on a PSE pmd */ 25#define _PAGE_BIT_SPLITTING _PAGE_BIT_SOFTW2 /* only valid on a PSE pmd */
26#define _PAGE_BIT_IOMAP _PAGE_BIT_SOFTW2 /* flag used to indicate IO mapping */
27#define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */ 26#define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */
28#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ 27#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
29#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ 28#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
@@ -52,7 +51,7 @@
52#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) 51#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
53#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) 52#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
54#define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1) 53#define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
55#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) 54#define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
56#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) 55#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
57#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) 56#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
58#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) 57#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
@@ -168,10 +167,10 @@
168#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) 167#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE)
169#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) 168#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
170 169
171#define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP) 170#define __PAGE_KERNEL_IO (__PAGE_KERNEL)
172#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP) 171#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE)
173#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP) 172#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS)
174#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP) 173#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC)
175 174
176#define PAGE_KERNEL __pgprot(__PAGE_KERNEL) 175#define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
177#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) 176#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a24194681513..83bb03bfa259 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -933,8 +933,17 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
933 * cross-processor TLB flush, even if no stale TLB entries exist 933 * cross-processor TLB flush, even if no stale TLB entries exist
934 * on other processors. 934 * on other processors.
935 * 935 *
936 * Spurious faults may only occur if the TLB contains an entry with
937 * fewer permission than the page table entry. Non-present (P = 0)
938 * and reserved bit (R = 1) faults are never spurious.
939 *
936 * There are no security implications to leaving a stale TLB when 940 * There are no security implications to leaving a stale TLB when
937 * increasing the permissions on a page. 941 * increasing the permissions on a page.
942 *
943 * Returns non-zero if a spurious fault was handled, zero otherwise.
944 *
945 * See Intel Developer's Manual Vol 3 Section 4.10.4.3, bullet 3
946 * (Optional Invalidation).
938 */ 947 */
939static noinline int 948static noinline int
940spurious_fault(unsigned long error_code, unsigned long address) 949spurious_fault(unsigned long error_code, unsigned long address)
@@ -945,8 +954,17 @@ spurious_fault(unsigned long error_code, unsigned long address)
945 pte_t *pte; 954 pte_t *pte;
946 int ret; 955 int ret;
947 956
948 /* Reserved-bit violation or user access to kernel space? */ 957 /*
949 if (error_code & (PF_USER | PF_RSVD)) 958 * Only writes to RO or instruction fetches from NX may cause
959 * spurious faults.
960 *
961 * These could be from user or supervisor accesses but the TLB
962 * is only lazily flushed after a kernel mapping protection
963 * change, so user accesses are not expected to cause spurious
964 * faults.
965 */
966 if (error_code != (PF_WRITE | PF_PROT)
967 && error_code != (PF_INSTR | PF_PROT))
950 return 0; 968 return 0;
951 969
952 pgd = init_mm.pgd + pgd_index(address); 970 pgd = init_mm.pgd + pgd_index(address);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 7d05565ba781..c8140e12816a 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -537,7 +537,7 @@ static void __init pagetable_init(void)
537 permanent_kmaps_init(pgd_base); 537 permanent_kmaps_init(pgd_base);
538} 538}
539 539
540pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); 540pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL);
541EXPORT_SYMBOL_GPL(__supported_pte_mask); 541EXPORT_SYMBOL_GPL(__supported_pte_mask);
542 542
543/* user-defined highmem size */ 543/* user-defined highmem size */
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5621c47d7a1a..5d984769cbd8 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -151,7 +151,7 @@ early_param("gbpages", parse_direct_gbpages_on);
151 * around without checking the pgd every time. 151 * around without checking the pgd every time.
152 */ 152 */
153 153
154pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; 154pteval_t __supported_pte_mask __read_mostly = ~0;
155EXPORT_SYMBOL_GPL(__supported_pte_mask); 155EXPORT_SYMBOL_GPL(__supported_pte_mask);
156 156
157int force_personality32; 157int force_personality32;
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 2ae525e0d8ba..37c1435889ce 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -442,8 +442,6 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
442 */ 442 */
443 prot |= _PAGE_CACHE_UC_MINUS; 443 prot |= _PAGE_CACHE_UC_MINUS;
444 444
445 prot |= _PAGE_IOMAP; /* creating a mapping for IO */
446
447 vma->vm_page_prot = __pgprot(prot); 445 vma->vm_page_prot = __pgprot(prot);
448 446
449 if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, 447 if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index a02e09e18f57..be14cc3e48d5 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -15,12 +15,14 @@
15 * with this program. If not, see <http://www.gnu.org/licenses/>. 15 * with this program. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include <linux/bitops.h>
18#include <linux/efi.h> 19#include <linux/efi.h>
19#include <linux/init.h> 20#include <linux/init.h>
20#include <linux/string.h> 21#include <linux/string.h>
21 22
22#include <xen/xen-ops.h> 23#include <xen/xen-ops.h>
23 24
25#include <asm/page.h>
24#include <asm/setup.h> 26#include <asm/setup.h>
25 27
26void __init xen_efi_init(void) 28void __init xen_efi_init(void)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c0cb11fb5008..acb0effd8077 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1463,6 +1463,7 @@ static void __ref xen_setup_gdt(int cpu)
1463 pv_cpu_ops.load_gdt = xen_load_gdt; 1463 pv_cpu_ops.load_gdt = xen_load_gdt;
1464} 1464}
1465 1465
1466#ifdef CONFIG_XEN_PVH
1466/* 1467/*
1467 * A PV guest starts with default flags that are not set for PVH, set them 1468 * A PV guest starts with default flags that are not set for PVH, set them
1468 * here asap. 1469 * here asap.
@@ -1508,17 +1509,21 @@ static void __init xen_pvh_early_guest_init(void)
1508 return; 1509 return;
1509 1510
1510 xen_have_vector_callback = 1; 1511 xen_have_vector_callback = 1;
1512
1513 xen_pvh_early_cpu_init(0, false);
1511 xen_pvh_set_cr_flags(0); 1514 xen_pvh_set_cr_flags(0);
1512 1515
1513#ifdef CONFIG_X86_32 1516#ifdef CONFIG_X86_32
1514 BUG(); /* PVH: Implement proper support. */ 1517 BUG(); /* PVH: Implement proper support. */
1515#endif 1518#endif
1516} 1519}
1520#endif /* CONFIG_XEN_PVH */
1517 1521
1518/* First C function to be called on Xen boot */ 1522/* First C function to be called on Xen boot */
1519asmlinkage __visible void __init xen_start_kernel(void) 1523asmlinkage __visible void __init xen_start_kernel(void)
1520{ 1524{
1521 struct physdev_set_iopl set_iopl; 1525 struct physdev_set_iopl set_iopl;
1526 unsigned long initrd_start = 0;
1522 int rc; 1527 int rc;
1523 1528
1524 if (!xen_start_info) 1529 if (!xen_start_info)
@@ -1527,7 +1532,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
1527 xen_domain_type = XEN_PV_DOMAIN; 1532 xen_domain_type = XEN_PV_DOMAIN;
1528 1533
1529 xen_setup_features(); 1534 xen_setup_features();
1535#ifdef CONFIG_XEN_PVH
1530 xen_pvh_early_guest_init(); 1536 xen_pvh_early_guest_init();
1537#endif
1531 xen_setup_machphys_mapping(); 1538 xen_setup_machphys_mapping();
1532 1539
1533 /* Install Xen paravirt ops */ 1540 /* Install Xen paravirt ops */
@@ -1559,8 +1566,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
1559#endif 1566#endif
1560 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); 1567 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1561 1568
1562 __supported_pte_mask |= _PAGE_IOMAP;
1563
1564 /* 1569 /*
1565 * Prevent page tables from being allocated in highmem, even 1570 * Prevent page tables from being allocated in highmem, even
1566 * if CONFIG_HIGHPTE is enabled. 1571 * if CONFIG_HIGHPTE is enabled.
@@ -1667,10 +1672,16 @@ asmlinkage __visible void __init xen_start_kernel(void)
1667 new_cpu_data.x86_capability[0] = cpuid_edx(1); 1672 new_cpu_data.x86_capability[0] = cpuid_edx(1);
1668#endif 1673#endif
1669 1674
1675 if (xen_start_info->mod_start) {
1676 if (xen_start_info->flags & SIF_MOD_START_PFN)
1677 initrd_start = PFN_PHYS(xen_start_info->mod_start);
1678 else
1679 initrd_start = __pa(xen_start_info->mod_start);
1680 }
1681
1670 /* Poke various useful things into boot_params */ 1682 /* Poke various useful things into boot_params */
1671 boot_params.hdr.type_of_loader = (9 << 4) | 0; 1683 boot_params.hdr.type_of_loader = (9 << 4) | 0;
1672 boot_params.hdr.ramdisk_image = xen_start_info->mod_start 1684 boot_params.hdr.ramdisk_image = initrd_start;
1673 ? __pa(xen_start_info->mod_start) : 0;
1674 boot_params.hdr.ramdisk_size = xen_start_info->mod_len; 1685 boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
1675 boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); 1686 boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
1676 1687
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 16fb0099b7f2..f62af7647ec9 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -399,38 +399,14 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
399 if (unlikely(mfn == INVALID_P2M_ENTRY)) { 399 if (unlikely(mfn == INVALID_P2M_ENTRY)) {
400 mfn = 0; 400 mfn = 0;
401 flags = 0; 401 flags = 0;
402 } else { 402 } else
403 /* 403 mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
404 * Paramount to do this test _after_ the
405 * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
406 * IDENTITY_FRAME_BIT resolves to true.
407 */
408 mfn &= ~FOREIGN_FRAME_BIT;
409 if (mfn & IDENTITY_FRAME_BIT) {
410 mfn &= ~IDENTITY_FRAME_BIT;
411 flags |= _PAGE_IOMAP;
412 }
413 }
414 val = ((pteval_t)mfn << PAGE_SHIFT) | flags; 404 val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
415 } 405 }
416 406
417 return val; 407 return val;
418} 408}
419 409
420static pteval_t iomap_pte(pteval_t val)
421{
422 if (val & _PAGE_PRESENT) {
423 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
424 pteval_t flags = val & PTE_FLAGS_MASK;
425
426 /* We assume the pte frame number is a MFN, so
427 just use it as-is. */
428 val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
429 }
430
431 return val;
432}
433
434__visible pteval_t xen_pte_val(pte_t pte) 410__visible pteval_t xen_pte_val(pte_t pte)
435{ 411{
436 pteval_t pteval = pte.pte; 412 pteval_t pteval = pte.pte;
@@ -441,9 +417,6 @@ __visible pteval_t xen_pte_val(pte_t pte)
441 pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT; 417 pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT;
442 } 418 }
443#endif 419#endif
444 if (xen_initial_domain() && (pteval & _PAGE_IOMAP))
445 return pteval;
446
447 return pte_mfn_to_pfn(pteval); 420 return pte_mfn_to_pfn(pteval);
448} 421}
449PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); 422PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
@@ -481,7 +454,6 @@ void xen_set_pat(u64 pat)
481 454
482__visible pte_t xen_make_pte(pteval_t pte) 455__visible pte_t xen_make_pte(pteval_t pte)
483{ 456{
484 phys_addr_t addr = (pte & PTE_PFN_MASK);
485#if 0 457#if 0
486 /* If Linux is trying to set a WC pte, then map to the Xen WC. 458 /* If Linux is trying to set a WC pte, then map to the Xen WC.
487 * If _PAGE_PAT is set, then it probably means it is really 459 * If _PAGE_PAT is set, then it probably means it is really
@@ -496,19 +468,7 @@ __visible pte_t xen_make_pte(pteval_t pte)
496 pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT; 468 pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT;
497 } 469 }
498#endif 470#endif
499 /* 471 pte = pte_pfn_to_mfn(pte);
500 * Unprivileged domains are allowed to do IOMAPpings for
501 * PCI passthrough, but not map ISA space. The ISA
502 * mappings are just dummy local mappings to keep other
503 * parts of the kernel happy.
504 */
505 if (unlikely(pte & _PAGE_IOMAP) &&
506 (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
507 pte = iomap_pte(pte);
508 } else {
509 pte &= ~_PAGE_IOMAP;
510 pte = pte_pfn_to_mfn(pte);
511 }
512 472
513 return native_make_pte(pte); 473 return native_make_pte(pte);
514} 474}
@@ -2091,7 +2051,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2091 2051
2092 default: 2052 default:
2093 /* By default, set_fixmap is used for hardware mappings */ 2053 /* By default, set_fixmap is used for hardware mappings */
2094 pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP)); 2054 pte = mfn_pte(phys, prot);
2095 break; 2055 break;
2096 } 2056 }
2097 2057
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 3172692381ae..9f5983b01ed9 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -173,6 +173,7 @@
173#include <xen/balloon.h> 173#include <xen/balloon.h>
174#include <xen/grant_table.h> 174#include <xen/grant_table.h>
175 175
176#include "p2m.h"
176#include "multicalls.h" 177#include "multicalls.h"
177#include "xen-ops.h" 178#include "xen-ops.h"
178 179
@@ -180,12 +181,6 @@ static void __init m2p_override_init(void);
180 181
181unsigned long xen_max_p2m_pfn __read_mostly; 182unsigned long xen_max_p2m_pfn __read_mostly;
182 183
183#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
184#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
185#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
186
187#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
188
189/* Placeholders for holes in the address space */ 184/* Placeholders for holes in the address space */
190static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); 185static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
191static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); 186static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
@@ -202,16 +197,12 @@ static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE);
202RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); 197RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
203RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); 198RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
204 199
205/* We might hit two boundary violations at the start and end, at max each 200/* For each I/O range remapped we may lose up to two leaf pages for the boundary
206 * boundary violation will require three middle nodes. */ 201 * violations and three mid pages to cover up to 3GB. With
207RESERVE_BRK(p2m_mid_extra, PAGE_SIZE * 2 * 3); 202 * early_can_reuse_p2m_middle() most of the leaf pages will be reused by the
208 203 * remapped region.
209/* When we populate back during bootup, the amount of pages can vary. The 204 */
210 * max we have is seen is 395979, but that does not mean it can't be more. 205RESERVE_BRK(p2m_identity_remap, PAGE_SIZE * 2 * 3 * MAX_REMAP_RANGES);
211 * Some machines can have 3GB I/O holes even. With early_can_reuse_p2m_middle
212 * it can re-use Xen provided mfn_list array, so we only need to allocate at
213 * most three P2M top nodes. */
214RESERVE_BRK(p2m_populated, PAGE_SIZE * 3);
215 206
216static inline unsigned p2m_top_index(unsigned long pfn) 207static inline unsigned p2m_top_index(unsigned long pfn)
217{ 208{
diff --git a/arch/x86/xen/p2m.h b/arch/x86/xen/p2m.h
new file mode 100644
index 000000000000..ad8aee24ab72
--- /dev/null
+++ b/arch/x86/xen/p2m.h
@@ -0,0 +1,15 @@
1#ifndef _XEN_P2M_H
2#define _XEN_P2M_H
3
4#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
5#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
6#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
7
8#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
9
10#define MAX_REMAP_RANGES 10
11
12extern unsigned long __init set_phys_range_identity(unsigned long pfn_s,
13 unsigned long pfn_e);
14
15#endif /* _XEN_P2M_H */
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 2e555163c2fe..af7216128d93 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -29,6 +29,7 @@
29#include <xen/features.h> 29#include <xen/features.h>
30#include "xen-ops.h" 30#include "xen-ops.h"
31#include "vdso.h" 31#include "vdso.h"
32#include "p2m.h"
32 33
33/* These are code, but not functions. Defined in entry.S */ 34/* These are code, but not functions. Defined in entry.S */
34extern const char xen_hypervisor_callback[]; 35extern const char xen_hypervisor_callback[];
@@ -46,6 +47,9 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
46/* Number of pages released from the initial allocation. */ 47/* Number of pages released from the initial allocation. */
47unsigned long xen_released_pages; 48unsigned long xen_released_pages;
48 49
50/* Buffer used to remap identity mapped pages */
51unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata;
52
49/* 53/*
50 * The maximum amount of extra memory compared to the base size. The 54 * The maximum amount of extra memory compared to the base size. The
51 * main scaling factor is the size of struct page. At extreme ratios 55 * main scaling factor is the size of struct page. At extreme ratios
@@ -151,107 +155,325 @@ static unsigned long __init xen_do_chunk(unsigned long start,
151 return len; 155 return len;
152} 156}
153 157
154static unsigned long __init xen_release_chunk(unsigned long start, 158/*
155 unsigned long end) 159 * Finds the next RAM pfn available in the E820 map after min_pfn.
156{ 160 * This function updates min_pfn with the pfn found and returns
157 return xen_do_chunk(start, end, true); 161 * the size of that range or zero if not found.
158} 162 */
159 163static unsigned long __init xen_find_pfn_range(
160static unsigned long __init xen_populate_chunk(
161 const struct e820entry *list, size_t map_size, 164 const struct e820entry *list, size_t map_size,
162 unsigned long max_pfn, unsigned long *last_pfn, 165 unsigned long *min_pfn)
163 unsigned long credits_left)
164{ 166{
165 const struct e820entry *entry; 167 const struct e820entry *entry;
166 unsigned int i; 168 unsigned int i;
167 unsigned long done = 0; 169 unsigned long done = 0;
168 unsigned long dest_pfn;
169 170
170 for (i = 0, entry = list; i < map_size; i++, entry++) { 171 for (i = 0, entry = list; i < map_size; i++, entry++) {
171 unsigned long s_pfn; 172 unsigned long s_pfn;
172 unsigned long e_pfn; 173 unsigned long e_pfn;
173 unsigned long pfns;
174 long capacity;
175
176 if (credits_left <= 0)
177 break;
178 174
179 if (entry->type != E820_RAM) 175 if (entry->type != E820_RAM)
180 continue; 176 continue;
181 177
182 e_pfn = PFN_DOWN(entry->addr + entry->size); 178 e_pfn = PFN_DOWN(entry->addr + entry->size);
183 179
184 /* We only care about E820 after the xen_start_info->nr_pages */ 180 /* We only care about E820 after this */
185 if (e_pfn <= max_pfn) 181 if (e_pfn < *min_pfn)
186 continue; 182 continue;
187 183
188 s_pfn = PFN_UP(entry->addr); 184 s_pfn = PFN_UP(entry->addr);
189 /* If the E820 falls within the nr_pages, we want to start 185
190 * at the nr_pages PFN. 186 /* If min_pfn falls within the E820 entry, we want to start
191 * If that would mean going past the E820 entry, skip it 187 * at the min_pfn PFN.
192 */ 188 */
193 if (s_pfn <= max_pfn) { 189 if (s_pfn <= *min_pfn) {
194 capacity = e_pfn - max_pfn; 190 done = e_pfn - *min_pfn;
195 dest_pfn = max_pfn;
196 } else { 191 } else {
197 capacity = e_pfn - s_pfn; 192 done = e_pfn - s_pfn;
198 dest_pfn = s_pfn; 193 *min_pfn = s_pfn;
199 } 194 }
195 break;
196 }
200 197
201 if (credits_left < capacity) 198 return done;
202 capacity = credits_left; 199}
203 200
204 pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false); 201/*
205 done += pfns; 202 * This releases a chunk of memory and then does the identity map. It's used as
206 *last_pfn = (dest_pfn + pfns); 203 * as a fallback if the remapping fails.
207 if (pfns < capacity) 204 */
208 break; 205static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
209 credits_left -= pfns; 206 unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity,
207 unsigned long *released)
208{
209 WARN_ON(start_pfn > end_pfn);
210
211 /* Need to release pages first */
212 *released += xen_do_chunk(start_pfn, min(end_pfn, nr_pages), true);
213 *identity += set_phys_range_identity(start_pfn, end_pfn);
214}
215
216/*
217 * Helper function to update both the p2m and m2p tables.
218 */
219static unsigned long __init xen_update_mem_tables(unsigned long pfn,
220 unsigned long mfn)
221{
222 struct mmu_update update = {
223 .ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
224 .val = pfn
225 };
226
227 /* Update p2m */
228 if (!early_set_phys_to_machine(pfn, mfn)) {
229 WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
230 pfn, mfn);
231 return false;
210 } 232 }
211 return done; 233
234 /* Update m2p */
235 if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) {
236 WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n",
237 mfn, pfn);
238 return false;
239 }
240
241 return true;
212} 242}
213 243
214static void __init xen_set_identity_and_release_chunk( 244/*
215 unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, 245 * This function updates the p2m and m2p tables with an identity map from
216 unsigned long *released, unsigned long *identity) 246 * start_pfn to start_pfn+size and remaps the underlying RAM of the original
247 * allocation at remap_pfn. It must do so carefully in P2M_PER_PAGE sized blocks
248 * to not exhaust the reserved brk space. Doing it in properly aligned blocks
249 * ensures we only allocate the minimum required leaf pages in the p2m table. It
250 * copies the existing mfns from the p2m table under the 1:1 map, overwrites
251 * them with the identity map and then updates the p2m and m2p tables with the
252 * remapped memory.
253 */
254static unsigned long __init xen_do_set_identity_and_remap_chunk(
255 unsigned long start_pfn, unsigned long size, unsigned long remap_pfn)
217{ 256{
218 unsigned long pfn; 257 unsigned long ident_pfn_iter, remap_pfn_iter;
258 unsigned long ident_start_pfn_align, remap_start_pfn_align;
259 unsigned long ident_end_pfn_align, remap_end_pfn_align;
260 unsigned long ident_boundary_pfn, remap_boundary_pfn;
261 unsigned long ident_cnt = 0;
262 unsigned long remap_cnt = 0;
263 unsigned long left = size;
264 unsigned long mod;
265 int i;
266
267 WARN_ON(size == 0);
268
269 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
219 270
220 /* 271 /*
221 * If the PFNs are currently mapped, clear the mappings 272 * Determine the proper alignment to remap memory in P2M_PER_PAGE sized
222 * (except for the ISA region which must be 1:1 mapped) to 273 * blocks. We need to keep track of both the existing pfn mapping and
223 * release the refcounts (in Xen) on the original frames. 274 * the new pfn remapping.
224 */ 275 */
225 for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { 276 mod = start_pfn % P2M_PER_PAGE;
226 pte_t pte = __pte_ma(0); 277 ident_start_pfn_align =
278 mod ? (start_pfn - mod + P2M_PER_PAGE) : start_pfn;
279 mod = remap_pfn % P2M_PER_PAGE;
280 remap_start_pfn_align =
281 mod ? (remap_pfn - mod + P2M_PER_PAGE) : remap_pfn;
282 mod = (start_pfn + size) % P2M_PER_PAGE;
283 ident_end_pfn_align = start_pfn + size - mod;
284 mod = (remap_pfn + size) % P2M_PER_PAGE;
285 remap_end_pfn_align = remap_pfn + size - mod;
286
287 /* Iterate over each p2m leaf node in each range */
288 for (ident_pfn_iter = ident_start_pfn_align, remap_pfn_iter = remap_start_pfn_align;
289 ident_pfn_iter < ident_end_pfn_align && remap_pfn_iter < remap_end_pfn_align;
290 ident_pfn_iter += P2M_PER_PAGE, remap_pfn_iter += P2M_PER_PAGE) {
291 /* Check we aren't past the end */
292 BUG_ON(ident_pfn_iter + P2M_PER_PAGE > start_pfn + size);
293 BUG_ON(remap_pfn_iter + P2M_PER_PAGE > remap_pfn + size);
294
295 /* Save p2m mappings */
296 for (i = 0; i < P2M_PER_PAGE; i++)
297 xen_remap_buf[i] = pfn_to_mfn(ident_pfn_iter + i);
298
299 /* Set identity map which will free a p2m leaf */
300 ident_cnt += set_phys_range_identity(ident_pfn_iter,
301 ident_pfn_iter + P2M_PER_PAGE);
302
303#ifdef DEBUG
304 /* Helps verify a p2m leaf has been freed */
305 for (i = 0; i < P2M_PER_PAGE; i++) {
306 unsigned int pfn = ident_pfn_iter + i;
307 BUG_ON(pfn_to_mfn(pfn) != pfn);
308 }
309#endif
310 /* Now remap memory */
311 for (i = 0; i < P2M_PER_PAGE; i++) {
312 unsigned long mfn = xen_remap_buf[i];
313
314 /* This will use the p2m leaf freed above */
315 if (!xen_update_mem_tables(remap_pfn_iter + i, mfn)) {
316 WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
317 remap_pfn_iter + i, mfn);
318 return 0;
319 }
320
321 remap_cnt++;
322 }
227 323
228 if (pfn < PFN_UP(ISA_END_ADDRESS)) 324 left -= P2M_PER_PAGE;
229 pte = mfn_pte(pfn, PAGE_KERNEL_IO); 325 }
230 326
231 (void)HYPERVISOR_update_va_mapping( 327 /* Max boundary space possible */
232 (unsigned long)__va(pfn << PAGE_SHIFT), pte, 0); 328 BUG_ON(left > (P2M_PER_PAGE - 1) * 2);
329
330 /* Now handle the boundary conditions */
331 ident_boundary_pfn = start_pfn;
332 remap_boundary_pfn = remap_pfn;
333 for (i = 0; i < left; i++) {
334 unsigned long mfn;
335
336 /* These two checks move from the start to end boundaries */
337 if (ident_boundary_pfn == ident_start_pfn_align)
338 ident_boundary_pfn = ident_pfn_iter;
339 if (remap_boundary_pfn == remap_start_pfn_align)
340 remap_boundary_pfn = remap_pfn_iter;
341
342 /* Check we aren't past the end */
343 BUG_ON(ident_boundary_pfn >= start_pfn + size);
344 BUG_ON(remap_boundary_pfn >= remap_pfn + size);
345
346 mfn = pfn_to_mfn(ident_boundary_pfn);
347
348 if (!xen_update_mem_tables(remap_boundary_pfn, mfn)) {
349 WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
350 remap_pfn_iter + i, mfn);
351 return 0;
352 }
353 remap_cnt++;
354
355 ident_boundary_pfn++;
356 remap_boundary_pfn++;
233 } 357 }
234 358
235 if (start_pfn < nr_pages) 359 /* Finish up the identity map */
236 *released += xen_release_chunk( 360 if (ident_start_pfn_align >= ident_end_pfn_align) {
237 start_pfn, min(end_pfn, nr_pages)); 361 /*
362 * In this case we have an identity range which does not span an
363 * aligned block so everything needs to be identity mapped here.
364 * If we didn't check this we might remap too many pages since
365 * the align boundaries are not meaningful in this case.
366 */
367 ident_cnt += set_phys_range_identity(start_pfn,
368 start_pfn + size);
369 } else {
370 /* Remapped above so check each end of the chunk */
371 if (start_pfn < ident_start_pfn_align)
372 ident_cnt += set_phys_range_identity(start_pfn,
373 ident_start_pfn_align);
374 if (start_pfn + size > ident_pfn_iter)
375 ident_cnt += set_phys_range_identity(ident_pfn_iter,
376 start_pfn + size);
377 }
238 378
239 *identity += set_phys_range_identity(start_pfn, end_pfn); 379 BUG_ON(ident_cnt != size);
380 BUG_ON(remap_cnt != size);
381
382 return size;
240} 383}
241 384
242static unsigned long __init xen_set_identity_and_release( 385/*
243 const struct e820entry *list, size_t map_size, unsigned long nr_pages) 386 * This function takes a contiguous pfn range that needs to be identity mapped
387 * and:
388 *
389 * 1) Finds a new range of pfns to use to remap based on E820 and remap_pfn.
390 * 2) Calls the do_ function to actually do the mapping/remapping work.
391 *
392 * The goal is to not allocate additional memory but to remap the existing
393 * pages. In the case of an error the underlying memory is simply released back
394 * to Xen and not remapped.
395 */
396static unsigned long __init xen_set_identity_and_remap_chunk(
397 const struct e820entry *list, size_t map_size, unsigned long start_pfn,
398 unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn,
399 unsigned long *identity, unsigned long *remapped,
400 unsigned long *released)
401{
402 unsigned long pfn;
403 unsigned long i = 0;
404 unsigned long n = end_pfn - start_pfn;
405
406 while (i < n) {
407 unsigned long cur_pfn = start_pfn + i;
408 unsigned long left = n - i;
409 unsigned long size = left;
410 unsigned long remap_range_size;
411
412 /* Do not remap pages beyond the current allocation */
413 if (cur_pfn >= nr_pages) {
414 /* Identity map remaining pages */
415 *identity += set_phys_range_identity(cur_pfn,
416 cur_pfn + size);
417 break;
418 }
419 if (cur_pfn + size > nr_pages)
420 size = nr_pages - cur_pfn;
421
422 remap_range_size = xen_find_pfn_range(list, map_size,
423 &remap_pfn);
424 if (!remap_range_size) {
425 pr_warning("Unable to find available pfn range, not remapping identity pages\n");
426 xen_set_identity_and_release_chunk(cur_pfn,
427 cur_pfn + left, nr_pages, identity, released);
428 break;
429 }
430 /* Adjust size to fit in current e820 RAM region */
431 if (size > remap_range_size)
432 size = remap_range_size;
433
434 if (!xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn)) {
435 WARN(1, "Failed to remap 1:1 memory cur_pfn=%ld size=%ld remap_pfn=%ld\n",
436 cur_pfn, size, remap_pfn);
437 xen_set_identity_and_release_chunk(cur_pfn,
438 cur_pfn + left, nr_pages, identity, released);
439 break;
440 }
441
442 /* Update variables to reflect new mappings. */
443 i += size;
444 remap_pfn += size;
445 *identity += size;
446 *remapped += size;
447 }
448
449 /*
450 * If the PFNs are currently mapped, the VA mapping also needs
451 * to be updated to be 1:1.
452 */
453 for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
454 (void)HYPERVISOR_update_va_mapping(
455 (unsigned long)__va(pfn << PAGE_SHIFT),
456 mfn_pte(pfn, PAGE_KERNEL_IO), 0);
457
458 return remap_pfn;
459}
460
461static unsigned long __init xen_set_identity_and_remap(
462 const struct e820entry *list, size_t map_size, unsigned long nr_pages,
463 unsigned long *released)
244{ 464{
245 phys_addr_t start = 0; 465 phys_addr_t start = 0;
246 unsigned long released = 0;
247 unsigned long identity = 0; 466 unsigned long identity = 0;
467 unsigned long remapped = 0;
468 unsigned long last_pfn = nr_pages;
248 const struct e820entry *entry; 469 const struct e820entry *entry;
470 unsigned long num_released = 0;
249 int i; 471 int i;
250 472
251 /* 473 /*
252 * Combine non-RAM regions and gaps until a RAM region (or the 474 * Combine non-RAM regions and gaps until a RAM region (or the
253 * end of the map) is reached, then set the 1:1 map and 475 * end of the map) is reached, then set the 1:1 map and
254 * release the pages (if available) in those non-RAM regions. 476 * remap the memory in those non-RAM regions.
255 * 477 *
256 * The combined non-RAM regions are rounded to a whole number 478 * The combined non-RAM regions are rounded to a whole number
257 * of pages so any partial pages are accessible via the 1:1 479 * of pages so any partial pages are accessible via the 1:1
@@ -269,22 +491,24 @@ static unsigned long __init xen_set_identity_and_release(
269 end_pfn = PFN_UP(entry->addr); 491 end_pfn = PFN_UP(entry->addr);
270 492
271 if (start_pfn < end_pfn) 493 if (start_pfn < end_pfn)
272 xen_set_identity_and_release_chunk( 494 last_pfn = xen_set_identity_and_remap_chunk(
273 start_pfn, end_pfn, nr_pages, 495 list, map_size, start_pfn,
274 &released, &identity); 496 end_pfn, nr_pages, last_pfn,
275 497 &identity, &remapped,
498 &num_released);
276 start = end; 499 start = end;
277 } 500 }
278 } 501 }
279 502
280 if (released) 503 *released = num_released;
281 printk(KERN_INFO "Released %lu pages of unused memory\n", released);
282 if (identity)
283 printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);
284 504
285 return released; 505 pr_info("Set %ld page(s) to 1-1 mapping\n", identity);
286} 506 pr_info("Remapped %ld page(s), last_pfn=%ld\n", remapped,
507 last_pfn);
508 pr_info("Released %ld page(s)\n", num_released);
287 509
510 return last_pfn;
511}
288static unsigned long __init xen_get_max_pages(void) 512static unsigned long __init xen_get_max_pages(void)
289{ 513{
290 unsigned long max_pages = MAX_DOMAIN_PAGES; 514 unsigned long max_pages = MAX_DOMAIN_PAGES;
@@ -347,7 +571,6 @@ char * __init xen_memory_setup(void)
347 unsigned long max_pages; 571 unsigned long max_pages;
348 unsigned long last_pfn = 0; 572 unsigned long last_pfn = 0;
349 unsigned long extra_pages = 0; 573 unsigned long extra_pages = 0;
350 unsigned long populated;
351 int i; 574 int i;
352 int op; 575 int op;
353 576
@@ -392,20 +615,11 @@ char * __init xen_memory_setup(void)
392 extra_pages += max_pages - max_pfn; 615 extra_pages += max_pages - max_pfn;
393 616
394 /* 617 /*
395 * Set P2M for all non-RAM pages and E820 gaps to be identity 618 * Set identity map on non-RAM pages and remap the underlying RAM.
396 * type PFNs. Any RAM pages that would be made inaccesible by
397 * this are first released.
398 */ 619 */
399 xen_released_pages = xen_set_identity_and_release( 620 last_pfn = xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn,
400 map, memmap.nr_entries, max_pfn); 621 &xen_released_pages);
401
402 /*
403 * Populate back the non-RAM pages and E820 gaps that had been
404 * released. */
405 populated = xen_populate_chunk(map, memmap.nr_entries,
406 max_pfn, &last_pfn, xen_released_pages);
407 622
408 xen_released_pages -= populated;
409 extra_pages += xen_released_pages; 623 extra_pages += xen_released_pages;
410 624
411 if (last_pfn > max_pfn) { 625 if (last_pfn > max_pfn) {
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 7005974c3ff3..c670d7518cf4 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -37,6 +37,7 @@
37#include <xen/hvc-console.h> 37#include <xen/hvc-console.h>
38#include "xen-ops.h" 38#include "xen-ops.h"
39#include "mmu.h" 39#include "mmu.h"
40#include "smp.h"
40 41
41cpumask_var_t xen_cpu_initialized_map; 42cpumask_var_t xen_cpu_initialized_map;
42 43
@@ -99,10 +100,14 @@ static void cpu_bringup(void)
99 wmb(); /* make sure everything is out */ 100 wmb(); /* make sure everything is out */
100} 101}
101 102
102/* Note: cpu parameter is only relevant for PVH */ 103/*
103static void cpu_bringup_and_idle(int cpu) 104 * Note: cpu parameter is only relevant for PVH. The reason for passing it
105 * is we can't do smp_processor_id until the percpu segments are loaded, for
106 * which we need the cpu number! So we pass it in rdi as first parameter.
107 */
108asmlinkage __visible void cpu_bringup_and_idle(int cpu)
104{ 109{
105#ifdef CONFIG_X86_64 110#ifdef CONFIG_XEN_PVH
106 if (xen_feature(XENFEAT_auto_translated_physmap) && 111 if (xen_feature(XENFEAT_auto_translated_physmap) &&
107 xen_feature(XENFEAT_supervisor_mode_kernel)) 112 xen_feature(XENFEAT_supervisor_mode_kernel))
108 xen_pvh_secondary_vcpu_init(cpu); 113 xen_pvh_secondary_vcpu_init(cpu);
@@ -374,11 +379,10 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
374 ctxt->user_regs.fs = __KERNEL_PERCPU; 379 ctxt->user_regs.fs = __KERNEL_PERCPU;
375 ctxt->user_regs.gs = __KERNEL_STACK_CANARY; 380 ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
376#endif 381#endif
377 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
378
379 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); 382 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
380 383
381 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 384 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
385 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
382 ctxt->flags = VGCF_IN_KERNEL; 386 ctxt->flags = VGCF_IN_KERNEL;
383 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ 387 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
384 ctxt->user_regs.ds = __USER_DS; 388 ctxt->user_regs.ds = __USER_DS;
@@ -413,15 +417,18 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
413 (unsigned long)xen_failsafe_callback; 417 (unsigned long)xen_failsafe_callback;
414 ctxt->user_regs.cs = __KERNEL_CS; 418 ctxt->user_regs.cs = __KERNEL_CS;
415 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); 419 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
416#ifdef CONFIG_X86_32
417 } 420 }
418#else 421#ifdef CONFIG_XEN_PVH
419 } else 422 else {
420 /* N.B. The user_regs.eip (cpu_bringup_and_idle) is called with 423 /*
421 * %rdi having the cpu number - which means are passing in 424 * The vcpu comes on kernel page tables which have the NX pte
422 * as the first parameter the cpu. Subtle! 425 * bit set. This means before DS/SS is touched, NX in
426 * EFER must be set. Hence the following assembly glue code.
423 */ 427 */
428 ctxt->user_regs.eip = (unsigned long)xen_pvh_early_cpu_init;
424 ctxt->user_regs.rdi = cpu; 429 ctxt->user_regs.rdi = cpu;
430 ctxt->user_regs.rsi = true; /* entry == true */
431 }
425#endif 432#endif
426 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); 433 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
427 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); 434 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h
index c7c2d89efd76..963d62a35c82 100644
--- a/arch/x86/xen/smp.h
+++ b/arch/x86/xen/smp.h
@@ -8,4 +8,12 @@ extern void xen_send_IPI_allbutself(int vector);
8extern void xen_send_IPI_all(int vector); 8extern void xen_send_IPI_all(int vector);
9extern void xen_send_IPI_self(int vector); 9extern void xen_send_IPI_self(int vector);
10 10
11#ifdef CONFIG_XEN_PVH
12extern void xen_pvh_early_cpu_init(int cpu, bool entry);
13#else
14static inline void xen_pvh_early_cpu_init(int cpu, bool entry)
15{
16}
17#endif
18
11#endif 19#endif
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 485b69585540..674b222544b7 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -47,6 +47,41 @@ ENTRY(startup_xen)
47 47
48 __FINIT 48 __FINIT
49 49
50#ifdef CONFIG_XEN_PVH
51/*
52 * xen_pvh_early_cpu_init() - early PVH VCPU initialization
53 * @cpu: this cpu number (%rdi)
54 * @entry: true if this is a secondary vcpu coming up on this entry
55 * point, false if this is the boot CPU being initialized for
56 * the first time (%rsi)
57 *
58 * Note: This is called as a function on the boot CPU, and is the entry point
59 * on the secondary CPU.
60 */
61ENTRY(xen_pvh_early_cpu_init)
62 mov %rsi, %r11
63
64 /* Gather features to see if NX implemented. */
65 mov $0x80000001, %eax
66 cpuid
67 mov %edx, %esi
68
69 mov $MSR_EFER, %ecx
70 rdmsr
71 bts $_EFER_SCE, %eax
72
73 bt $20, %esi
74 jnc 1f /* No NX, skip setting it */
75 bts $_EFER_NX, %eax
761: wrmsr
77#ifdef CONFIG_SMP
78 cmp $0, %r11b
79 jne cpu_bringup_and_idle
80#endif
81 ret
82
83#endif /* CONFIG_XEN_PVH */
84
50.pushsection .text 85.pushsection .text
51 .balign PAGE_SIZE 86 .balign PAGE_SIZE
52ENTRY(hypercall_page) 87ENTRY(hypercall_page)
@@ -124,6 +159,7 @@ NEXT_HYPERCALL(arch_6)
124 ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, 159 ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
125 .quad _PAGE_PRESENT; .quad _PAGE_PRESENT) 160 .quad _PAGE_PRESENT; .quad _PAGE_PRESENT)
126 ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) 161 ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
162 ELFNOTE(Xen, XEN_ELFNOTE_MOD_START_PFN, .long 1)
127 ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, _ASM_PTR __HYPERVISOR_VIRT_START) 163 ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, _ASM_PTR __HYPERVISOR_VIRT_START)
128 ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, _ASM_PTR 0) 164 ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, _ASM_PTR 0)
129 165