diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-11 20:29:01 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-11 20:29:01 -0400 |
commit | 81ae31d78239318610d7c2acb3e2610d622a5aa4 (patch) | |
tree | 1e31b300f1574fceaff065a9bd92460b7c466f7c /arch | |
parent | ef4a48c513211d842c55e84f7a1c31884b91dcf7 (diff) | |
parent | 95afae481414cbdb0567bf82d5e5077c3ac9da20 (diff) |
Merge tag 'stable/for-linus-3.18-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull Xen updates from David Vrabel:
"Features and fixes:
- Add pvscsi frontend and backend drivers.
- Remove _PAGE_IOMAP PTE flag, freeing it for alternate uses.
- Try and keep memory contiguous during PV memory setup (reduces
SWIOTLB usage).
- Allow front/back drivers to use threaded irqs.
- Support large initrds in PV guests.
- Fix PVH guests in preparation for Xen 4.5"
* tag 'stable/for-linus-3.18-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (22 commits)
xen: remove DEFINE_XENBUS_DRIVER() macro
xen/xenbus: Remove BUG_ON() when error string trucated
xen/xenbus: Correct the comments for xenbus_grant_ring()
x86/xen: Set EFER.NX and EFER.SCE in PVH guests
xen: eliminate scalability issues from initrd handling
xen: sync some headers with xen tree
xen: make pvscsi frontend dependant on xenbus frontend
arm{,64}/xen: Remove "EXPERIMENTAL" in the description of the Xen options
xen-scsifront: don't deadlock if the ring becomes full
x86: remove the Xen-specific _PAGE_IOMAP PTE flag
x86/xen: do not use _PAGE_IOMAP PTE flag for I/O mappings
x86: skip check for spurious faults for non-present faults
xen/efi: Directly include needed headers
xen-scsiback: clean up a type issue in scsiback_make_tpg()
xen-scsifront: use GFP_ATOMIC under spin_lock
MAINTAINERS: Add xen pvscsi maintainer
xen-scsiback: Add Xen PV SCSI backend driver
xen-scsifront: Add Xen PV SCSI frontend driver
xen: Add Xen pvSCSI protocol description
xen/events: support threaded irqs for interdomain event channels
...
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/Kconfig | 2 | ||||
-rw-r--r-- | arch/arm64/Kconfig | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_types.h | 11 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 22 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 2 | ||||
-rw-r--r-- | arch/x86/pci/i386.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/efi.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 19 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 48 | ||||
-rw-r--r-- | arch/x86/xen/p2m.c | 23 | ||||
-rw-r--r-- | arch/x86/xen/p2m.h | 15 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 370 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 29 | ||||
-rw-r--r-- | arch/x86/xen/smp.h | 8 | ||||
-rw-r--r-- | arch/x86/xen/xen-head.S | 36 |
16 files changed, 426 insertions, 167 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 18f392f8b744..89c4b5ccc68d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig | |||
@@ -1779,7 +1779,7 @@ config XEN_DOM0 | |||
1779 | depends on XEN | 1779 | depends on XEN |
1780 | 1780 | ||
1781 | config XEN | 1781 | config XEN |
1782 | bool "Xen guest support on ARM (EXPERIMENTAL)" | 1782 | bool "Xen guest support on ARM" |
1783 | depends on ARM && AEABI && OF | 1783 | depends on ARM && AEABI && OF |
1784 | depends on CPU_V7 && !CPU_V6 | 1784 | depends on CPU_V7 && !CPU_V6 |
1785 | depends on !GENERIC_ATOMIC64 | 1785 | depends on !GENERIC_ATOMIC64 |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c49ca4c738bb..ac9afde76dea 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig | |||
@@ -349,7 +349,7 @@ config XEN_DOM0 | |||
349 | depends on XEN | 349 | depends on XEN |
350 | 350 | ||
351 | config XEN | 351 | config XEN |
352 | bool "Xen guest support on ARM64 (EXPERIMENTAL)" | 352 | bool "Xen guest support on ARM64" |
353 | depends on ARM64 && OF | 353 | depends on ARM64 && OF |
354 | select SWIOTLB_XEN | 354 | select SWIOTLB_XEN |
355 | help | 355 | help |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 0f9724c9c510..07789647bf33 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -23,7 +23,6 @@ | |||
23 | #define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1 | 23 | #define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1 |
24 | #define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 | 24 | #define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 |
25 | #define _PAGE_BIT_SPLITTING _PAGE_BIT_SOFTW2 /* only valid on a PSE pmd */ | 25 | #define _PAGE_BIT_SPLITTING _PAGE_BIT_SOFTW2 /* only valid on a PSE pmd */ |
26 | #define _PAGE_BIT_IOMAP _PAGE_BIT_SOFTW2 /* flag used to indicate IO mapping */ | ||
27 | #define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */ | 26 | #define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */ |
28 | #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ | 27 | #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ |
29 | #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ | 28 | #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ |
@@ -52,7 +51,7 @@ | |||
52 | #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) | 51 | #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) |
53 | #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) | 52 | #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) |
54 | #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1) | 53 | #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1) |
55 | #define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) | 54 | #define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2) |
56 | #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) | 55 | #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) |
57 | #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) | 56 | #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) |
58 | #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) | 57 | #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) |
@@ -168,10 +167,10 @@ | |||
168 | #define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) | 167 | #define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) |
169 | #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) | 168 | #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) |
170 | 169 | ||
171 | #define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP) | 170 | #define __PAGE_KERNEL_IO (__PAGE_KERNEL) |
172 | #define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP) | 171 | #define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE) |
173 | #define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP) | 172 | #define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS) |
174 | #define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP) | 173 | #define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC) |
175 | 174 | ||
176 | #define PAGE_KERNEL __pgprot(__PAGE_KERNEL) | 175 | #define PAGE_KERNEL __pgprot(__PAGE_KERNEL) |
177 | #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) | 176 | #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a24194681513..83bb03bfa259 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -933,8 +933,17 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) | |||
933 | * cross-processor TLB flush, even if no stale TLB entries exist | 933 | * cross-processor TLB flush, even if no stale TLB entries exist |
934 | * on other processors. | 934 | * on other processors. |
935 | * | 935 | * |
936 | * Spurious faults may only occur if the TLB contains an entry with | ||
937 | * fewer permission than the page table entry. Non-present (P = 0) | ||
938 | * and reserved bit (R = 1) faults are never spurious. | ||
939 | * | ||
936 | * There are no security implications to leaving a stale TLB when | 940 | * There are no security implications to leaving a stale TLB when |
937 | * increasing the permissions on a page. | 941 | * increasing the permissions on a page. |
942 | * | ||
943 | * Returns non-zero if a spurious fault was handled, zero otherwise. | ||
944 | * | ||
945 | * See Intel Developer's Manual Vol 3 Section 4.10.4.3, bullet 3 | ||
946 | * (Optional Invalidation). | ||
938 | */ | 947 | */ |
939 | static noinline int | 948 | static noinline int |
940 | spurious_fault(unsigned long error_code, unsigned long address) | 949 | spurious_fault(unsigned long error_code, unsigned long address) |
@@ -945,8 +954,17 @@ spurious_fault(unsigned long error_code, unsigned long address) | |||
945 | pte_t *pte; | 954 | pte_t *pte; |
946 | int ret; | 955 | int ret; |
947 | 956 | ||
948 | /* Reserved-bit violation or user access to kernel space? */ | 957 | /* |
949 | if (error_code & (PF_USER | PF_RSVD)) | 958 | * Only writes to RO or instruction fetches from NX may cause |
959 | * spurious faults. | ||
960 | * | ||
961 | * These could be from user or supervisor accesses but the TLB | ||
962 | * is only lazily flushed after a kernel mapping protection | ||
963 | * change, so user accesses are not expected to cause spurious | ||
964 | * faults. | ||
965 | */ | ||
966 | if (error_code != (PF_WRITE | PF_PROT) | ||
967 | && error_code != (PF_INSTR | PF_PROT)) | ||
950 | return 0; | 968 | return 0; |
951 | 969 | ||
952 | pgd = init_mm.pgd + pgd_index(address); | 970 | pgd = init_mm.pgd + pgd_index(address); |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 7d05565ba781..c8140e12816a 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -537,7 +537,7 @@ static void __init pagetable_init(void) | |||
537 | permanent_kmaps_init(pgd_base); | 537 | permanent_kmaps_init(pgd_base); |
538 | } | 538 | } |
539 | 539 | ||
540 | pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); | 540 | pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL); |
541 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | 541 | EXPORT_SYMBOL_GPL(__supported_pte_mask); |
542 | 542 | ||
543 | /* user-defined highmem size */ | 543 | /* user-defined highmem size */ |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5621c47d7a1a..5d984769cbd8 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -151,7 +151,7 @@ early_param("gbpages", parse_direct_gbpages_on); | |||
151 | * around without checking the pgd every time. | 151 | * around without checking the pgd every time. |
152 | */ | 152 | */ |
153 | 153 | ||
154 | pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; | 154 | pteval_t __supported_pte_mask __read_mostly = ~0; |
155 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | 155 | EXPORT_SYMBOL_GPL(__supported_pte_mask); |
156 | 156 | ||
157 | int force_personality32; | 157 | int force_personality32; |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 2ae525e0d8ba..37c1435889ce 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -442,8 +442,6 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, | |||
442 | */ | 442 | */ |
443 | prot |= _PAGE_CACHE_UC_MINUS; | 443 | prot |= _PAGE_CACHE_UC_MINUS; |
444 | 444 | ||
445 | prot |= _PAGE_IOMAP; /* creating a mapping for IO */ | ||
446 | |||
447 | vma->vm_page_prot = __pgprot(prot); | 445 | vma->vm_page_prot = __pgprot(prot); |
448 | 446 | ||
449 | if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, | 447 | if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, |
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index a02e09e18f57..be14cc3e48d5 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c | |||
@@ -15,12 +15,14 @@ | |||
15 | * with this program. If not, see <http://www.gnu.org/licenses/>. | 15 | * with this program. If not, see <http://www.gnu.org/licenses/>. |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include <linux/bitops.h> | ||
18 | #include <linux/efi.h> | 19 | #include <linux/efi.h> |
19 | #include <linux/init.h> | 20 | #include <linux/init.h> |
20 | #include <linux/string.h> | 21 | #include <linux/string.h> |
21 | 22 | ||
22 | #include <xen/xen-ops.h> | 23 | #include <xen/xen-ops.h> |
23 | 24 | ||
25 | #include <asm/page.h> | ||
24 | #include <asm/setup.h> | 26 | #include <asm/setup.h> |
25 | 27 | ||
26 | void __init xen_efi_init(void) | 28 | void __init xen_efi_init(void) |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c0cb11fb5008..acb0effd8077 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -1463,6 +1463,7 @@ static void __ref xen_setup_gdt(int cpu) | |||
1463 | pv_cpu_ops.load_gdt = xen_load_gdt; | 1463 | pv_cpu_ops.load_gdt = xen_load_gdt; |
1464 | } | 1464 | } |
1465 | 1465 | ||
1466 | #ifdef CONFIG_XEN_PVH | ||
1466 | /* | 1467 | /* |
1467 | * A PV guest starts with default flags that are not set for PVH, set them | 1468 | * A PV guest starts with default flags that are not set for PVH, set them |
1468 | * here asap. | 1469 | * here asap. |
@@ -1508,17 +1509,21 @@ static void __init xen_pvh_early_guest_init(void) | |||
1508 | return; | 1509 | return; |
1509 | 1510 | ||
1510 | xen_have_vector_callback = 1; | 1511 | xen_have_vector_callback = 1; |
1512 | |||
1513 | xen_pvh_early_cpu_init(0, false); | ||
1511 | xen_pvh_set_cr_flags(0); | 1514 | xen_pvh_set_cr_flags(0); |
1512 | 1515 | ||
1513 | #ifdef CONFIG_X86_32 | 1516 | #ifdef CONFIG_X86_32 |
1514 | BUG(); /* PVH: Implement proper support. */ | 1517 | BUG(); /* PVH: Implement proper support. */ |
1515 | #endif | 1518 | #endif |
1516 | } | 1519 | } |
1520 | #endif /* CONFIG_XEN_PVH */ | ||
1517 | 1521 | ||
1518 | /* First C function to be called on Xen boot */ | 1522 | /* First C function to be called on Xen boot */ |
1519 | asmlinkage __visible void __init xen_start_kernel(void) | 1523 | asmlinkage __visible void __init xen_start_kernel(void) |
1520 | { | 1524 | { |
1521 | struct physdev_set_iopl set_iopl; | 1525 | struct physdev_set_iopl set_iopl; |
1526 | unsigned long initrd_start = 0; | ||
1522 | int rc; | 1527 | int rc; |
1523 | 1528 | ||
1524 | if (!xen_start_info) | 1529 | if (!xen_start_info) |
@@ -1527,7 +1532,9 @@ asmlinkage __visible void __init xen_start_kernel(void) | |||
1527 | xen_domain_type = XEN_PV_DOMAIN; | 1532 | xen_domain_type = XEN_PV_DOMAIN; |
1528 | 1533 | ||
1529 | xen_setup_features(); | 1534 | xen_setup_features(); |
1535 | #ifdef CONFIG_XEN_PVH | ||
1530 | xen_pvh_early_guest_init(); | 1536 | xen_pvh_early_guest_init(); |
1537 | #endif | ||
1531 | xen_setup_machphys_mapping(); | 1538 | xen_setup_machphys_mapping(); |
1532 | 1539 | ||
1533 | /* Install Xen paravirt ops */ | 1540 | /* Install Xen paravirt ops */ |
@@ -1559,8 +1566,6 @@ asmlinkage __visible void __init xen_start_kernel(void) | |||
1559 | #endif | 1566 | #endif |
1560 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); | 1567 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); |
1561 | 1568 | ||
1562 | __supported_pte_mask |= _PAGE_IOMAP; | ||
1563 | |||
1564 | /* | 1569 | /* |
1565 | * Prevent page tables from being allocated in highmem, even | 1570 | * Prevent page tables from being allocated in highmem, even |
1566 | * if CONFIG_HIGHPTE is enabled. | 1571 | * if CONFIG_HIGHPTE is enabled. |
@@ -1667,10 +1672,16 @@ asmlinkage __visible void __init xen_start_kernel(void) | |||
1667 | new_cpu_data.x86_capability[0] = cpuid_edx(1); | 1672 | new_cpu_data.x86_capability[0] = cpuid_edx(1); |
1668 | #endif | 1673 | #endif |
1669 | 1674 | ||
1675 | if (xen_start_info->mod_start) { | ||
1676 | if (xen_start_info->flags & SIF_MOD_START_PFN) | ||
1677 | initrd_start = PFN_PHYS(xen_start_info->mod_start); | ||
1678 | else | ||
1679 | initrd_start = __pa(xen_start_info->mod_start); | ||
1680 | } | ||
1681 | |||
1670 | /* Poke various useful things into boot_params */ | 1682 | /* Poke various useful things into boot_params */ |
1671 | boot_params.hdr.type_of_loader = (9 << 4) | 0; | 1683 | boot_params.hdr.type_of_loader = (9 << 4) | 0; |
1672 | boot_params.hdr.ramdisk_image = xen_start_info->mod_start | 1684 | boot_params.hdr.ramdisk_image = initrd_start; |
1673 | ? __pa(xen_start_info->mod_start) : 0; | ||
1674 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; | 1685 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; |
1675 | boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); | 1686 | boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); |
1676 | 1687 | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 16fb0099b7f2..f62af7647ec9 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -399,38 +399,14 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) | |||
399 | if (unlikely(mfn == INVALID_P2M_ENTRY)) { | 399 | if (unlikely(mfn == INVALID_P2M_ENTRY)) { |
400 | mfn = 0; | 400 | mfn = 0; |
401 | flags = 0; | 401 | flags = 0; |
402 | } else { | 402 | } else |
403 | /* | 403 | mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT); |
404 | * Paramount to do this test _after_ the | ||
405 | * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY & | ||
406 | * IDENTITY_FRAME_BIT resolves to true. | ||
407 | */ | ||
408 | mfn &= ~FOREIGN_FRAME_BIT; | ||
409 | if (mfn & IDENTITY_FRAME_BIT) { | ||
410 | mfn &= ~IDENTITY_FRAME_BIT; | ||
411 | flags |= _PAGE_IOMAP; | ||
412 | } | ||
413 | } | ||
414 | val = ((pteval_t)mfn << PAGE_SHIFT) | flags; | 404 | val = ((pteval_t)mfn << PAGE_SHIFT) | flags; |
415 | } | 405 | } |
416 | 406 | ||
417 | return val; | 407 | return val; |
418 | } | 408 | } |
419 | 409 | ||
420 | static pteval_t iomap_pte(pteval_t val) | ||
421 | { | ||
422 | if (val & _PAGE_PRESENT) { | ||
423 | unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; | ||
424 | pteval_t flags = val & PTE_FLAGS_MASK; | ||
425 | |||
426 | /* We assume the pte frame number is a MFN, so | ||
427 | just use it as-is. */ | ||
428 | val = ((pteval_t)pfn << PAGE_SHIFT) | flags; | ||
429 | } | ||
430 | |||
431 | return val; | ||
432 | } | ||
433 | |||
434 | __visible pteval_t xen_pte_val(pte_t pte) | 410 | __visible pteval_t xen_pte_val(pte_t pte) |
435 | { | 411 | { |
436 | pteval_t pteval = pte.pte; | 412 | pteval_t pteval = pte.pte; |
@@ -441,9 +417,6 @@ __visible pteval_t xen_pte_val(pte_t pte) | |||
441 | pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT; | 417 | pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT; |
442 | } | 418 | } |
443 | #endif | 419 | #endif |
444 | if (xen_initial_domain() && (pteval & _PAGE_IOMAP)) | ||
445 | return pteval; | ||
446 | |||
447 | return pte_mfn_to_pfn(pteval); | 420 | return pte_mfn_to_pfn(pteval); |
448 | } | 421 | } |
449 | PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); | 422 | PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); |
@@ -481,7 +454,6 @@ void xen_set_pat(u64 pat) | |||
481 | 454 | ||
482 | __visible pte_t xen_make_pte(pteval_t pte) | 455 | __visible pte_t xen_make_pte(pteval_t pte) |
483 | { | 456 | { |
484 | phys_addr_t addr = (pte & PTE_PFN_MASK); | ||
485 | #if 0 | 457 | #if 0 |
486 | /* If Linux is trying to set a WC pte, then map to the Xen WC. | 458 | /* If Linux is trying to set a WC pte, then map to the Xen WC. |
487 | * If _PAGE_PAT is set, then it probably means it is really | 459 | * If _PAGE_PAT is set, then it probably means it is really |
@@ -496,19 +468,7 @@ __visible pte_t xen_make_pte(pteval_t pte) | |||
496 | pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT; | 468 | pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT; |
497 | } | 469 | } |
498 | #endif | 470 | #endif |
499 | /* | 471 | pte = pte_pfn_to_mfn(pte); |
500 | * Unprivileged domains are allowed to do IOMAPpings for | ||
501 | * PCI passthrough, but not map ISA space. The ISA | ||
502 | * mappings are just dummy local mappings to keep other | ||
503 | * parts of the kernel happy. | ||
504 | */ | ||
505 | if (unlikely(pte & _PAGE_IOMAP) && | ||
506 | (xen_initial_domain() || addr >= ISA_END_ADDRESS)) { | ||
507 | pte = iomap_pte(pte); | ||
508 | } else { | ||
509 | pte &= ~_PAGE_IOMAP; | ||
510 | pte = pte_pfn_to_mfn(pte); | ||
511 | } | ||
512 | 472 | ||
513 | return native_make_pte(pte); | 473 | return native_make_pte(pte); |
514 | } | 474 | } |
@@ -2091,7 +2051,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
2091 | 2051 | ||
2092 | default: | 2052 | default: |
2093 | /* By default, set_fixmap is used for hardware mappings */ | 2053 | /* By default, set_fixmap is used for hardware mappings */ |
2094 | pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP)); | 2054 | pte = mfn_pte(phys, prot); |
2095 | break; | 2055 | break; |
2096 | } | 2056 | } |
2097 | 2057 | ||
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 3172692381ae..9f5983b01ed9 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -173,6 +173,7 @@ | |||
173 | #include <xen/balloon.h> | 173 | #include <xen/balloon.h> |
174 | #include <xen/grant_table.h> | 174 | #include <xen/grant_table.h> |
175 | 175 | ||
176 | #include "p2m.h" | ||
176 | #include "multicalls.h" | 177 | #include "multicalls.h" |
177 | #include "xen-ops.h" | 178 | #include "xen-ops.h" |
178 | 179 | ||
@@ -180,12 +181,6 @@ static void __init m2p_override_init(void); | |||
180 | 181 | ||
181 | unsigned long xen_max_p2m_pfn __read_mostly; | 182 | unsigned long xen_max_p2m_pfn __read_mostly; |
182 | 183 | ||
183 | #define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | ||
184 | #define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) | ||
185 | #define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) | ||
186 | |||
187 | #define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) | ||
188 | |||
189 | /* Placeholders for holes in the address space */ | 184 | /* Placeholders for holes in the address space */ |
190 | static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); | 185 | static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); |
191 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); | 186 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); |
@@ -202,16 +197,12 @@ static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE); | |||
202 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | 197 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); |
203 | RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | 198 | RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); |
204 | 199 | ||
205 | /* We might hit two boundary violations at the start and end, at max each | 200 | /* For each I/O range remapped we may lose up to two leaf pages for the boundary |
206 | * boundary violation will require three middle nodes. */ | 201 | * violations and three mid pages to cover up to 3GB. With |
207 | RESERVE_BRK(p2m_mid_extra, PAGE_SIZE * 2 * 3); | 202 | * early_can_reuse_p2m_middle() most of the leaf pages will be reused by the |
208 | 203 | * remapped region. | |
209 | /* When we populate back during bootup, the amount of pages can vary. The | 204 | */ |
210 | * max we have is seen is 395979, but that does not mean it can't be more. | 205 | RESERVE_BRK(p2m_identity_remap, PAGE_SIZE * 2 * 3 * MAX_REMAP_RANGES); |
211 | * Some machines can have 3GB I/O holes even. With early_can_reuse_p2m_middle | ||
212 | * it can re-use Xen provided mfn_list array, so we only need to allocate at | ||
213 | * most three P2M top nodes. */ | ||
214 | RESERVE_BRK(p2m_populated, PAGE_SIZE * 3); | ||
215 | 206 | ||
216 | static inline unsigned p2m_top_index(unsigned long pfn) | 207 | static inline unsigned p2m_top_index(unsigned long pfn) |
217 | { | 208 | { |
diff --git a/arch/x86/xen/p2m.h b/arch/x86/xen/p2m.h new file mode 100644 index 000000000000..ad8aee24ab72 --- /dev/null +++ b/arch/x86/xen/p2m.h | |||
@@ -0,0 +1,15 @@ | |||
1 | #ifndef _XEN_P2M_H | ||
2 | #define _XEN_P2M_H | ||
3 | |||
4 | #define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | ||
5 | #define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) | ||
6 | #define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) | ||
7 | |||
8 | #define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) | ||
9 | |||
10 | #define MAX_REMAP_RANGES 10 | ||
11 | |||
12 | extern unsigned long __init set_phys_range_identity(unsigned long pfn_s, | ||
13 | unsigned long pfn_e); | ||
14 | |||
15 | #endif /* _XEN_P2M_H */ | ||
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 2e555163c2fe..af7216128d93 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <xen/features.h> | 29 | #include <xen/features.h> |
30 | #include "xen-ops.h" | 30 | #include "xen-ops.h" |
31 | #include "vdso.h" | 31 | #include "vdso.h" |
32 | #include "p2m.h" | ||
32 | 33 | ||
33 | /* These are code, but not functions. Defined in entry.S */ | 34 | /* These are code, but not functions. Defined in entry.S */ |
34 | extern const char xen_hypervisor_callback[]; | 35 | extern const char xen_hypervisor_callback[]; |
@@ -46,6 +47,9 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; | |||
46 | /* Number of pages released from the initial allocation. */ | 47 | /* Number of pages released from the initial allocation. */ |
47 | unsigned long xen_released_pages; | 48 | unsigned long xen_released_pages; |
48 | 49 | ||
50 | /* Buffer used to remap identity mapped pages */ | ||
51 | unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata; | ||
52 | |||
49 | /* | 53 | /* |
50 | * The maximum amount of extra memory compared to the base size. The | 54 | * The maximum amount of extra memory compared to the base size. The |
51 | * main scaling factor is the size of struct page. At extreme ratios | 55 | * main scaling factor is the size of struct page. At extreme ratios |
@@ -151,107 +155,325 @@ static unsigned long __init xen_do_chunk(unsigned long start, | |||
151 | return len; | 155 | return len; |
152 | } | 156 | } |
153 | 157 | ||
154 | static unsigned long __init xen_release_chunk(unsigned long start, | 158 | /* |
155 | unsigned long end) | 159 | * Finds the next RAM pfn available in the E820 map after min_pfn. |
156 | { | 160 | * This function updates min_pfn with the pfn found and returns |
157 | return xen_do_chunk(start, end, true); | 161 | * the size of that range or zero if not found. |
158 | } | 162 | */ |
159 | 163 | static unsigned long __init xen_find_pfn_range( | |
160 | static unsigned long __init xen_populate_chunk( | ||
161 | const struct e820entry *list, size_t map_size, | 164 | const struct e820entry *list, size_t map_size, |
162 | unsigned long max_pfn, unsigned long *last_pfn, | 165 | unsigned long *min_pfn) |
163 | unsigned long credits_left) | ||
164 | { | 166 | { |
165 | const struct e820entry *entry; | 167 | const struct e820entry *entry; |
166 | unsigned int i; | 168 | unsigned int i; |
167 | unsigned long done = 0; | 169 | unsigned long done = 0; |
168 | unsigned long dest_pfn; | ||
169 | 170 | ||
170 | for (i = 0, entry = list; i < map_size; i++, entry++) { | 171 | for (i = 0, entry = list; i < map_size; i++, entry++) { |
171 | unsigned long s_pfn; | 172 | unsigned long s_pfn; |
172 | unsigned long e_pfn; | 173 | unsigned long e_pfn; |
173 | unsigned long pfns; | ||
174 | long capacity; | ||
175 | |||
176 | if (credits_left <= 0) | ||
177 | break; | ||
178 | 174 | ||
179 | if (entry->type != E820_RAM) | 175 | if (entry->type != E820_RAM) |
180 | continue; | 176 | continue; |
181 | 177 | ||
182 | e_pfn = PFN_DOWN(entry->addr + entry->size); | 178 | e_pfn = PFN_DOWN(entry->addr + entry->size); |
183 | 179 | ||
184 | /* We only care about E820 after the xen_start_info->nr_pages */ | 180 | /* We only care about E820 after this */ |
185 | if (e_pfn <= max_pfn) | 181 | if (e_pfn < *min_pfn) |
186 | continue; | 182 | continue; |
187 | 183 | ||
188 | s_pfn = PFN_UP(entry->addr); | 184 | s_pfn = PFN_UP(entry->addr); |
189 | /* If the E820 falls within the nr_pages, we want to start | 185 | |
190 | * at the nr_pages PFN. | 186 | /* If min_pfn falls within the E820 entry, we want to start |
191 | * If that would mean going past the E820 entry, skip it | 187 | * at the min_pfn PFN. |
192 | */ | 188 | */ |
193 | if (s_pfn <= max_pfn) { | 189 | if (s_pfn <= *min_pfn) { |
194 | capacity = e_pfn - max_pfn; | 190 | done = e_pfn - *min_pfn; |
195 | dest_pfn = max_pfn; | ||
196 | } else { | 191 | } else { |
197 | capacity = e_pfn - s_pfn; | 192 | done = e_pfn - s_pfn; |
198 | dest_pfn = s_pfn; | 193 | *min_pfn = s_pfn; |
199 | } | 194 | } |
195 | break; | ||
196 | } | ||
200 | 197 | ||
201 | if (credits_left < capacity) | 198 | return done; |
202 | capacity = credits_left; | 199 | } |
203 | 200 | ||
204 | pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false); | 201 | /* |
205 | done += pfns; | 202 | * This releases a chunk of memory and then does the identity map. It's used as |
206 | *last_pfn = (dest_pfn + pfns); | 203 | * as a fallback if the remapping fails. |
207 | if (pfns < capacity) | 204 | */ |
208 | break; | 205 | static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, |
209 | credits_left -= pfns; | 206 | unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity, |
207 | unsigned long *released) | ||
208 | { | ||
209 | WARN_ON(start_pfn > end_pfn); | ||
210 | |||
211 | /* Need to release pages first */ | ||
212 | *released += xen_do_chunk(start_pfn, min(end_pfn, nr_pages), true); | ||
213 | *identity += set_phys_range_identity(start_pfn, end_pfn); | ||
214 | } | ||
215 | |||
216 | /* | ||
217 | * Helper function to update both the p2m and m2p tables. | ||
218 | */ | ||
219 | static unsigned long __init xen_update_mem_tables(unsigned long pfn, | ||
220 | unsigned long mfn) | ||
221 | { | ||
222 | struct mmu_update update = { | ||
223 | .ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, | ||
224 | .val = pfn | ||
225 | }; | ||
226 | |||
227 | /* Update p2m */ | ||
228 | if (!early_set_phys_to_machine(pfn, mfn)) { | ||
229 | WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n", | ||
230 | pfn, mfn); | ||
231 | return false; | ||
210 | } | 232 | } |
211 | return done; | 233 | |
234 | /* Update m2p */ | ||
235 | if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) { | ||
236 | WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n", | ||
237 | mfn, pfn); | ||
238 | return false; | ||
239 | } | ||
240 | |||
241 | return true; | ||
212 | } | 242 | } |
213 | 243 | ||
214 | static void __init xen_set_identity_and_release_chunk( | 244 | /* |
215 | unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, | 245 | * This function updates the p2m and m2p tables with an identity map from |
216 | unsigned long *released, unsigned long *identity) | 246 | * start_pfn to start_pfn+size and remaps the underlying RAM of the original |
247 | * allocation at remap_pfn. It must do so carefully in P2M_PER_PAGE sized blocks | ||
248 | * to not exhaust the reserved brk space. Doing it in properly aligned blocks | ||
249 | * ensures we only allocate the minimum required leaf pages in the p2m table. It | ||
250 | * copies the existing mfns from the p2m table under the 1:1 map, overwrites | ||
251 | * them with the identity map and then updates the p2m and m2p tables with the | ||
252 | * remapped memory. | ||
253 | */ | ||
254 | static unsigned long __init xen_do_set_identity_and_remap_chunk( | ||
255 | unsigned long start_pfn, unsigned long size, unsigned long remap_pfn) | ||
217 | { | 256 | { |
218 | unsigned long pfn; | 257 | unsigned long ident_pfn_iter, remap_pfn_iter; |
258 | unsigned long ident_start_pfn_align, remap_start_pfn_align; | ||
259 | unsigned long ident_end_pfn_align, remap_end_pfn_align; | ||
260 | unsigned long ident_boundary_pfn, remap_boundary_pfn; | ||
261 | unsigned long ident_cnt = 0; | ||
262 | unsigned long remap_cnt = 0; | ||
263 | unsigned long left = size; | ||
264 | unsigned long mod; | ||
265 | int i; | ||
266 | |||
267 | WARN_ON(size == 0); | ||
268 | |||
269 | BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); | ||
219 | 270 | ||
220 | /* | 271 | /* |
221 | * If the PFNs are currently mapped, clear the mappings | 272 | * Determine the proper alignment to remap memory in P2M_PER_PAGE sized |
222 | * (except for the ISA region which must be 1:1 mapped) to | 273 | * blocks. We need to keep track of both the existing pfn mapping and |
223 | * release the refcounts (in Xen) on the original frames. | 274 | * the new pfn remapping. |
224 | */ | 275 | */ |
225 | for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { | 276 | mod = start_pfn % P2M_PER_PAGE; |
226 | pte_t pte = __pte_ma(0); | 277 | ident_start_pfn_align = |
278 | mod ? (start_pfn - mod + P2M_PER_PAGE) : start_pfn; | ||
279 | mod = remap_pfn % P2M_PER_PAGE; | ||
280 | remap_start_pfn_align = | ||
281 | mod ? (remap_pfn - mod + P2M_PER_PAGE) : remap_pfn; | ||
282 | mod = (start_pfn + size) % P2M_PER_PAGE; | ||
283 | ident_end_pfn_align = start_pfn + size - mod; | ||
284 | mod = (remap_pfn + size) % P2M_PER_PAGE; | ||
285 | remap_end_pfn_align = remap_pfn + size - mod; | ||
286 | |||
287 | /* Iterate over each p2m leaf node in each range */ | ||
288 | for (ident_pfn_iter = ident_start_pfn_align, remap_pfn_iter = remap_start_pfn_align; | ||
289 | ident_pfn_iter < ident_end_pfn_align && remap_pfn_iter < remap_end_pfn_align; | ||
290 | ident_pfn_iter += P2M_PER_PAGE, remap_pfn_iter += P2M_PER_PAGE) { | ||
291 | /* Check we aren't past the end */ | ||
292 | BUG_ON(ident_pfn_iter + P2M_PER_PAGE > start_pfn + size); | ||
293 | BUG_ON(remap_pfn_iter + P2M_PER_PAGE > remap_pfn + size); | ||
294 | |||
295 | /* Save p2m mappings */ | ||
296 | for (i = 0; i < P2M_PER_PAGE; i++) | ||
297 | xen_remap_buf[i] = pfn_to_mfn(ident_pfn_iter + i); | ||
298 | |||
299 | /* Set identity map which will free a p2m leaf */ | ||
300 | ident_cnt += set_phys_range_identity(ident_pfn_iter, | ||
301 | ident_pfn_iter + P2M_PER_PAGE); | ||
302 | |||
303 | #ifdef DEBUG | ||
304 | /* Helps verify a p2m leaf has been freed */ | ||
305 | for (i = 0; i < P2M_PER_PAGE; i++) { | ||
306 | unsigned int pfn = ident_pfn_iter + i; | ||
307 | BUG_ON(pfn_to_mfn(pfn) != pfn); | ||
308 | } | ||
309 | #endif | ||
310 | /* Now remap memory */ | ||
311 | for (i = 0; i < P2M_PER_PAGE; i++) { | ||
312 | unsigned long mfn = xen_remap_buf[i]; | ||
313 | |||
314 | /* This will use the p2m leaf freed above */ | ||
315 | if (!xen_update_mem_tables(remap_pfn_iter + i, mfn)) { | ||
316 | WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n", | ||
317 | remap_pfn_iter + i, mfn); | ||
318 | return 0; | ||
319 | } | ||
320 | |||
321 | remap_cnt++; | ||
322 | } | ||
227 | 323 | ||
228 | if (pfn < PFN_UP(ISA_END_ADDRESS)) | 324 | left -= P2M_PER_PAGE; |
229 | pte = mfn_pte(pfn, PAGE_KERNEL_IO); | 325 | } |
230 | 326 | ||
231 | (void)HYPERVISOR_update_va_mapping( | 327 | /* Max boundary space possible */ |
232 | (unsigned long)__va(pfn << PAGE_SHIFT), pte, 0); | 328 | BUG_ON(left > (P2M_PER_PAGE - 1) * 2); |
329 | |||
330 | /* Now handle the boundary conditions */ | ||
331 | ident_boundary_pfn = start_pfn; | ||
332 | remap_boundary_pfn = remap_pfn; | ||
333 | for (i = 0; i < left; i++) { | ||
334 | unsigned long mfn; | ||
335 | |||
336 | /* These two checks move from the start to end boundaries */ | ||
337 | if (ident_boundary_pfn == ident_start_pfn_align) | ||
338 | ident_boundary_pfn = ident_pfn_iter; | ||
339 | if (remap_boundary_pfn == remap_start_pfn_align) | ||
340 | remap_boundary_pfn = remap_pfn_iter; | ||
341 | |||
342 | /* Check we aren't past the end */ | ||
343 | BUG_ON(ident_boundary_pfn >= start_pfn + size); | ||
344 | BUG_ON(remap_boundary_pfn >= remap_pfn + size); | ||
345 | |||
346 | mfn = pfn_to_mfn(ident_boundary_pfn); | ||
347 | |||
348 | if (!xen_update_mem_tables(remap_boundary_pfn, mfn)) { | ||
349 | WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n", | ||
350 | remap_pfn_iter + i, mfn); | ||
351 | return 0; | ||
352 | } | ||
353 | remap_cnt++; | ||
354 | |||
355 | ident_boundary_pfn++; | ||
356 | remap_boundary_pfn++; | ||
233 | } | 357 | } |
234 | 358 | ||
235 | if (start_pfn < nr_pages) | 359 | /* Finish up the identity map */ |
236 | *released += xen_release_chunk( | 360 | if (ident_start_pfn_align >= ident_end_pfn_align) { |
237 | start_pfn, min(end_pfn, nr_pages)); | 361 | /* |
362 | * In this case we have an identity range which does not span an | ||
363 | * aligned block so everything needs to be identity mapped here. | ||
364 | * If we didn't check this we might remap too many pages since | ||
365 | * the align boundaries are not meaningful in this case. | ||
366 | */ | ||
367 | ident_cnt += set_phys_range_identity(start_pfn, | ||
368 | start_pfn + size); | ||
369 | } else { | ||
370 | /* Remapped above so check each end of the chunk */ | ||
371 | if (start_pfn < ident_start_pfn_align) | ||
372 | ident_cnt += set_phys_range_identity(start_pfn, | ||
373 | ident_start_pfn_align); | ||
374 | if (start_pfn + size > ident_pfn_iter) | ||
375 | ident_cnt += set_phys_range_identity(ident_pfn_iter, | ||
376 | start_pfn + size); | ||
377 | } | ||
238 | 378 | ||
239 | *identity += set_phys_range_identity(start_pfn, end_pfn); | 379 | BUG_ON(ident_cnt != size); |
380 | BUG_ON(remap_cnt != size); | ||
381 | |||
382 | return size; | ||
240 | } | 383 | } |
241 | 384 | ||
242 | static unsigned long __init xen_set_identity_and_release( | 385 | /* |
243 | const struct e820entry *list, size_t map_size, unsigned long nr_pages) | 386 | * This function takes a contiguous pfn range that needs to be identity mapped |
387 | * and: | ||
388 | * | ||
389 | * 1) Finds a new range of pfns to use to remap based on E820 and remap_pfn. | ||
390 | * 2) Calls the do_ function to actually do the mapping/remapping work. | ||
391 | * | ||
392 | * The goal is to not allocate additional memory but to remap the existing | ||
393 | * pages. In the case of an error the underlying memory is simply released back | ||
394 | * to Xen and not remapped. | ||
395 | */ | ||
396 | static unsigned long __init xen_set_identity_and_remap_chunk( | ||
397 | const struct e820entry *list, size_t map_size, unsigned long start_pfn, | ||
398 | unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn, | ||
399 | unsigned long *identity, unsigned long *remapped, | ||
400 | unsigned long *released) | ||
401 | { | ||
402 | unsigned long pfn; | ||
403 | unsigned long i = 0; | ||
404 | unsigned long n = end_pfn - start_pfn; | ||
405 | |||
406 | while (i < n) { | ||
407 | unsigned long cur_pfn = start_pfn + i; | ||
408 | unsigned long left = n - i; | ||
409 | unsigned long size = left; | ||
410 | unsigned long remap_range_size; | ||
411 | |||
412 | /* Do not remap pages beyond the current allocation */ | ||
413 | if (cur_pfn >= nr_pages) { | ||
414 | /* Identity map remaining pages */ | ||
415 | *identity += set_phys_range_identity(cur_pfn, | ||
416 | cur_pfn + size); | ||
417 | break; | ||
418 | } | ||
419 | if (cur_pfn + size > nr_pages) | ||
420 | size = nr_pages - cur_pfn; | ||
421 | |||
422 | remap_range_size = xen_find_pfn_range(list, map_size, | ||
423 | &remap_pfn); | ||
424 | if (!remap_range_size) { | ||
425 | pr_warning("Unable to find available pfn range, not remapping identity pages\n"); | ||
426 | xen_set_identity_and_release_chunk(cur_pfn, | ||
427 | cur_pfn + left, nr_pages, identity, released); | ||
428 | break; | ||
429 | } | ||
430 | /* Adjust size to fit in current e820 RAM region */ | ||
431 | if (size > remap_range_size) | ||
432 | size = remap_range_size; | ||
433 | |||
434 | if (!xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn)) { | ||
435 | WARN(1, "Failed to remap 1:1 memory cur_pfn=%ld size=%ld remap_pfn=%ld\n", | ||
436 | cur_pfn, size, remap_pfn); | ||
437 | xen_set_identity_and_release_chunk(cur_pfn, | ||
438 | cur_pfn + left, nr_pages, identity, released); | ||
439 | break; | ||
440 | } | ||
441 | |||
442 | /* Update variables to reflect new mappings. */ | ||
443 | i += size; | ||
444 | remap_pfn += size; | ||
445 | *identity += size; | ||
446 | *remapped += size; | ||
447 | } | ||
448 | |||
449 | /* | ||
450 | * If the PFNs are currently mapped, the VA mapping also needs | ||
451 | * to be updated to be 1:1. | ||
452 | */ | ||
453 | for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) | ||
454 | (void)HYPERVISOR_update_va_mapping( | ||
455 | (unsigned long)__va(pfn << PAGE_SHIFT), | ||
456 | mfn_pte(pfn, PAGE_KERNEL_IO), 0); | ||
457 | |||
458 | return remap_pfn; | ||
459 | } | ||
460 | |||
461 | static unsigned long __init xen_set_identity_and_remap( | ||
462 | const struct e820entry *list, size_t map_size, unsigned long nr_pages, | ||
463 | unsigned long *released) | ||
244 | { | 464 | { |
245 | phys_addr_t start = 0; | 465 | phys_addr_t start = 0; |
246 | unsigned long released = 0; | ||
247 | unsigned long identity = 0; | 466 | unsigned long identity = 0; |
467 | unsigned long remapped = 0; | ||
468 | unsigned long last_pfn = nr_pages; | ||
248 | const struct e820entry *entry; | 469 | const struct e820entry *entry; |
470 | unsigned long num_released = 0; | ||
249 | int i; | 471 | int i; |
250 | 472 | ||
251 | /* | 473 | /* |
252 | * Combine non-RAM regions and gaps until a RAM region (or the | 474 | * Combine non-RAM regions and gaps until a RAM region (or the |
253 | * end of the map) is reached, then set the 1:1 map and | 475 | * end of the map) is reached, then set the 1:1 map and |
254 | * release the pages (if available) in those non-RAM regions. | 476 | * remap the memory in those non-RAM regions. |
255 | * | 477 | * |
256 | * The combined non-RAM regions are rounded to a whole number | 478 | * The combined non-RAM regions are rounded to a whole number |
257 | * of pages so any partial pages are accessible via the 1:1 | 479 | * of pages so any partial pages are accessible via the 1:1 |
@@ -269,22 +491,24 @@ static unsigned long __init xen_set_identity_and_release( | |||
269 | end_pfn = PFN_UP(entry->addr); | 491 | end_pfn = PFN_UP(entry->addr); |
270 | 492 | ||
271 | if (start_pfn < end_pfn) | 493 | if (start_pfn < end_pfn) |
272 | xen_set_identity_and_release_chunk( | 494 | last_pfn = xen_set_identity_and_remap_chunk( |
273 | start_pfn, end_pfn, nr_pages, | 495 | list, map_size, start_pfn, |
274 | &released, &identity); | 496 | end_pfn, nr_pages, last_pfn, |
275 | 497 | &identity, &remapped, | |
498 | &num_released); | ||
276 | start = end; | 499 | start = end; |
277 | } | 500 | } |
278 | } | 501 | } |
279 | 502 | ||
280 | if (released) | 503 | *released = num_released; |
281 | printk(KERN_INFO "Released %lu pages of unused memory\n", released); | ||
282 | if (identity) | ||
283 | printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity); | ||
284 | 504 | ||
285 | return released; | 505 | pr_info("Set %ld page(s) to 1-1 mapping\n", identity); |
286 | } | 506 | pr_info("Remapped %ld page(s), last_pfn=%ld\n", remapped, |
507 | last_pfn); | ||
508 | pr_info("Released %ld page(s)\n", num_released); | ||
287 | 509 | ||
510 | return last_pfn; | ||
511 | } | ||
288 | static unsigned long __init xen_get_max_pages(void) | 512 | static unsigned long __init xen_get_max_pages(void) |
289 | { | 513 | { |
290 | unsigned long max_pages = MAX_DOMAIN_PAGES; | 514 | unsigned long max_pages = MAX_DOMAIN_PAGES; |
@@ -347,7 +571,6 @@ char * __init xen_memory_setup(void) | |||
347 | unsigned long max_pages; | 571 | unsigned long max_pages; |
348 | unsigned long last_pfn = 0; | 572 | unsigned long last_pfn = 0; |
349 | unsigned long extra_pages = 0; | 573 | unsigned long extra_pages = 0; |
350 | unsigned long populated; | ||
351 | int i; | 574 | int i; |
352 | int op; | 575 | int op; |
353 | 576 | ||
@@ -392,20 +615,11 @@ char * __init xen_memory_setup(void) | |||
392 | extra_pages += max_pages - max_pfn; | 615 | extra_pages += max_pages - max_pfn; |
393 | 616 | ||
394 | /* | 617 | /* |
395 | * Set P2M for all non-RAM pages and E820 gaps to be identity | 618 | * Set identity map on non-RAM pages and remap the underlying RAM. |
396 | * type PFNs. Any RAM pages that would be made inaccesible by | ||
397 | * this are first released. | ||
398 | */ | 619 | */ |
399 | xen_released_pages = xen_set_identity_and_release( | 620 | last_pfn = xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn, |
400 | map, memmap.nr_entries, max_pfn); | 621 | &xen_released_pages); |
401 | |||
402 | /* | ||
403 | * Populate back the non-RAM pages and E820 gaps that had been | ||
404 | * released. */ | ||
405 | populated = xen_populate_chunk(map, memmap.nr_entries, | ||
406 | max_pfn, &last_pfn, xen_released_pages); | ||
407 | 622 | ||
408 | xen_released_pages -= populated; | ||
409 | extra_pages += xen_released_pages; | 623 | extra_pages += xen_released_pages; |
410 | 624 | ||
411 | if (last_pfn > max_pfn) { | 625 | if (last_pfn > max_pfn) { |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 7005974c3ff3..c670d7518cf4 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <xen/hvc-console.h> | 37 | #include <xen/hvc-console.h> |
38 | #include "xen-ops.h" | 38 | #include "xen-ops.h" |
39 | #include "mmu.h" | 39 | #include "mmu.h" |
40 | #include "smp.h" | ||
40 | 41 | ||
41 | cpumask_var_t xen_cpu_initialized_map; | 42 | cpumask_var_t xen_cpu_initialized_map; |
42 | 43 | ||
@@ -99,10 +100,14 @@ static void cpu_bringup(void) | |||
99 | wmb(); /* make sure everything is out */ | 100 | wmb(); /* make sure everything is out */ |
100 | } | 101 | } |
101 | 102 | ||
102 | /* Note: cpu parameter is only relevant for PVH */ | 103 | /* |
103 | static void cpu_bringup_and_idle(int cpu) | 104 | * Note: cpu parameter is only relevant for PVH. The reason for passing it |
105 | * is we can't do smp_processor_id until the percpu segments are loaded, for | ||
106 | * which we need the cpu number! So we pass it in rdi as first parameter. | ||
107 | */ | ||
108 | asmlinkage __visible void cpu_bringup_and_idle(int cpu) | ||
104 | { | 109 | { |
105 | #ifdef CONFIG_X86_64 | 110 | #ifdef CONFIG_XEN_PVH |
106 | if (xen_feature(XENFEAT_auto_translated_physmap) && | 111 | if (xen_feature(XENFEAT_auto_translated_physmap) && |
107 | xen_feature(XENFEAT_supervisor_mode_kernel)) | 112 | xen_feature(XENFEAT_supervisor_mode_kernel)) |
108 | xen_pvh_secondary_vcpu_init(cpu); | 113 | xen_pvh_secondary_vcpu_init(cpu); |
@@ -374,11 +379,10 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
374 | ctxt->user_regs.fs = __KERNEL_PERCPU; | 379 | ctxt->user_regs.fs = __KERNEL_PERCPU; |
375 | ctxt->user_regs.gs = __KERNEL_STACK_CANARY; | 380 | ctxt->user_regs.gs = __KERNEL_STACK_CANARY; |
376 | #endif | 381 | #endif |
377 | ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; | ||
378 | |||
379 | memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); | 382 | memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); |
380 | 383 | ||
381 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | 384 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
385 | ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; | ||
382 | ctxt->flags = VGCF_IN_KERNEL; | 386 | ctxt->flags = VGCF_IN_KERNEL; |
383 | ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ | 387 | ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ |
384 | ctxt->user_regs.ds = __USER_DS; | 388 | ctxt->user_regs.ds = __USER_DS; |
@@ -413,15 +417,18 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
413 | (unsigned long)xen_failsafe_callback; | 417 | (unsigned long)xen_failsafe_callback; |
414 | ctxt->user_regs.cs = __KERNEL_CS; | 418 | ctxt->user_regs.cs = __KERNEL_CS; |
415 | per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); | 419 | per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); |
416 | #ifdef CONFIG_X86_32 | ||
417 | } | 420 | } |
418 | #else | 421 | #ifdef CONFIG_XEN_PVH |
419 | } else | 422 | else { |
420 | /* N.B. The user_regs.eip (cpu_bringup_and_idle) is called with | 423 | /* |
421 | * %rdi having the cpu number - which means are passing in | 424 | * The vcpu comes on kernel page tables which have the NX pte |
422 | * as the first parameter the cpu. Subtle! | 425 | * bit set. This means before DS/SS is touched, NX in |
426 | * EFER must be set. Hence the following assembly glue code. | ||
423 | */ | 427 | */ |
428 | ctxt->user_regs.eip = (unsigned long)xen_pvh_early_cpu_init; | ||
424 | ctxt->user_regs.rdi = cpu; | 429 | ctxt->user_regs.rdi = cpu; |
430 | ctxt->user_regs.rsi = true; /* entry == true */ | ||
431 | } | ||
425 | #endif | 432 | #endif |
426 | ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); | 433 | ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); |
427 | ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); | 434 | ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); |
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h index c7c2d89efd76..963d62a35c82 100644 --- a/arch/x86/xen/smp.h +++ b/arch/x86/xen/smp.h | |||
@@ -8,4 +8,12 @@ extern void xen_send_IPI_allbutself(int vector); | |||
8 | extern void xen_send_IPI_all(int vector); | 8 | extern void xen_send_IPI_all(int vector); |
9 | extern void xen_send_IPI_self(int vector); | 9 | extern void xen_send_IPI_self(int vector); |
10 | 10 | ||
11 | #ifdef CONFIG_XEN_PVH | ||
12 | extern void xen_pvh_early_cpu_init(int cpu, bool entry); | ||
13 | #else | ||
14 | static inline void xen_pvh_early_cpu_init(int cpu, bool entry) | ||
15 | { | ||
16 | } | ||
17 | #endif | ||
18 | |||
11 | #endif | 19 | #endif |
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 485b69585540..674b222544b7 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S | |||
@@ -47,6 +47,41 @@ ENTRY(startup_xen) | |||
47 | 47 | ||
48 | __FINIT | 48 | __FINIT |
49 | 49 | ||
50 | #ifdef CONFIG_XEN_PVH | ||
51 | /* | ||
52 | * xen_pvh_early_cpu_init() - early PVH VCPU initialization | ||
53 | * @cpu: this cpu number (%rdi) | ||
54 | * @entry: true if this is a secondary vcpu coming up on this entry | ||
55 | * point, false if this is the boot CPU being initialized for | ||
56 | * the first time (%rsi) | ||
57 | * | ||
58 | * Note: This is called as a function on the boot CPU, and is the entry point | ||
59 | * on the secondary CPU. | ||
60 | */ | ||
61 | ENTRY(xen_pvh_early_cpu_init) | ||
62 | mov %rsi, %r11 | ||
63 | |||
64 | /* Gather features to see if NX implemented. */ | ||
65 | mov $0x80000001, %eax | ||
66 | cpuid | ||
67 | mov %edx, %esi | ||
68 | |||
69 | mov $MSR_EFER, %ecx | ||
70 | rdmsr | ||
71 | bts $_EFER_SCE, %eax | ||
72 | |||
73 | bt $20, %esi | ||
74 | jnc 1f /* No NX, skip setting it */ | ||
75 | bts $_EFER_NX, %eax | ||
76 | 1: wrmsr | ||
77 | #ifdef CONFIG_SMP | ||
78 | cmp $0, %r11b | ||
79 | jne cpu_bringup_and_idle | ||
80 | #endif | ||
81 | ret | ||
82 | |||
83 | #endif /* CONFIG_XEN_PVH */ | ||
84 | |||
50 | .pushsection .text | 85 | .pushsection .text |
51 | .balign PAGE_SIZE | 86 | .balign PAGE_SIZE |
52 | ENTRY(hypercall_page) | 87 | ENTRY(hypercall_page) |
@@ -124,6 +159,7 @@ NEXT_HYPERCALL(arch_6) | |||
124 | ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, | 159 | ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, |
125 | .quad _PAGE_PRESENT; .quad _PAGE_PRESENT) | 160 | .quad _PAGE_PRESENT; .quad _PAGE_PRESENT) |
126 | ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) | 161 | ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) |
162 | ELFNOTE(Xen, XEN_ELFNOTE_MOD_START_PFN, .long 1) | ||
127 | ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, _ASM_PTR __HYPERVISOR_VIRT_START) | 163 | ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, _ASM_PTR __HYPERVISOR_VIRT_START) |
128 | ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, _ASM_PTR 0) | 164 | ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, _ASM_PTR 0) |
129 | 165 | ||