diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-16 16:23:03 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-16 16:23:03 -0500 |
| commit | eb64c3c6cdb8fa8a4d324eb71a9033b62e150918 (patch) | |
| tree | 035267874e94585ea25a5ad50f433c8470a5ec27 | |
| parent | 61de8e53640ceeda564a65170a46c1edc2b37e11 (diff) | |
| parent | f1d04b23b2015b4c3c0a8419677179b133afea08 (diff) | |
Merge tag 'stable/for-linus-3.19-rc0b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull additional xen update from David Vrabel:
"Xen: additional features for 3.19-rc0
- Linear p2m for x86 PV guests which simplifies the p2m code,
improves performance and will allow for > 512 GB PV guests in the
future.
A last-minute, configuration specific issue was discovered with this
change which is why it was not included in my previous pull request.
This is now been fixed and tested"
* tag 'stable/for-linus-3.19-rc0b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
xen: switch to post-init routines in xen mmu.c earlier
Revert "swiotlb-xen: pass dev_addr to swiotlb_tbl_unmap_single"
xen: annotate xen_set_identity_and_remap_chunk() with __init
xen: introduce helper functions to do safe read and write accesses
xen: Speed up set_phys_to_machine() by using read-only mappings
xen: switch to linear virtual mapped sparse p2m list
xen: Hide get_phys_to_machine() to be able to tune common path
x86: Introduce function to get pmd entry pointer
xen: Delay invalidating extra memory
xen: Delay m2p_override initialization
xen: Delay remapping memory of pv-domain
xen: use common page allocation function in p2m.c
xen: Make functions static
xen: fix some style issues in p2m.c
| -rw-r--r-- | arch/x86/include/asm/pgtable_types.h | 1 | ||||
| -rw-r--r-- | arch/x86/include/asm/xen/page.h | 64 | ||||
| -rw-r--r-- | arch/x86/mm/pageattr.c | 20 | ||||
| -rw-r--r-- | arch/x86/xen/mmu.c | 40 | ||||
| -rw-r--r-- | arch/x86/xen/p2m.c | 1172 | ||||
| -rw-r--r-- | arch/x86/xen/setup.c | 441 | ||||
| -rw-r--r-- | arch/x86/xen/xen-ops.h | 6 |
7 files changed, 779 insertions, 965 deletions
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index af447f95e3be..25bcd4a89517 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
| @@ -452,6 +452,7 @@ static inline void update_page_count(int level, unsigned long pages) { } | |||
| 452 | extern pte_t *lookup_address(unsigned long address, unsigned int *level); | 452 | extern pte_t *lookup_address(unsigned long address, unsigned int *level); |
| 453 | extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, | 453 | extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, |
| 454 | unsigned int *level); | 454 | unsigned int *level); |
| 455 | extern pmd_t *lookup_pmd_address(unsigned long address); | ||
| 455 | extern phys_addr_t slow_virt_to_phys(void *__address); | 456 | extern phys_addr_t slow_virt_to_phys(void *__address); |
| 456 | extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, | 457 | extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, |
| 457 | unsigned numpages, unsigned long page_flags); | 458 | unsigned numpages, unsigned long page_flags); |
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index f58ef6c0613b..5eea09915a15 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
| @@ -41,10 +41,12 @@ typedef struct xpaddr { | |||
| 41 | 41 | ||
| 42 | extern unsigned long *machine_to_phys_mapping; | 42 | extern unsigned long *machine_to_phys_mapping; |
| 43 | extern unsigned long machine_to_phys_nr; | 43 | extern unsigned long machine_to_phys_nr; |
| 44 | extern unsigned long *xen_p2m_addr; | ||
| 45 | extern unsigned long xen_p2m_size; | ||
| 46 | extern unsigned long xen_max_p2m_pfn; | ||
| 44 | 47 | ||
| 45 | extern unsigned long get_phys_to_machine(unsigned long pfn); | 48 | extern unsigned long get_phys_to_machine(unsigned long pfn); |
| 46 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); | 49 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
| 47 | extern bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn); | ||
| 48 | extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); | 50 | extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
| 49 | extern unsigned long set_phys_range_identity(unsigned long pfn_s, | 51 | extern unsigned long set_phys_range_identity(unsigned long pfn_s, |
| 50 | unsigned long pfn_e); | 52 | unsigned long pfn_e); |
| @@ -52,17 +54,52 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s, | |||
| 52 | extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, | 54 | extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, |
| 53 | struct gnttab_map_grant_ref *kmap_ops, | 55 | struct gnttab_map_grant_ref *kmap_ops, |
| 54 | struct page **pages, unsigned int count); | 56 | struct page **pages, unsigned int count); |
| 55 | extern int m2p_add_override(unsigned long mfn, struct page *page, | ||
| 56 | struct gnttab_map_grant_ref *kmap_op); | ||
| 57 | extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, | 57 | extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, |
| 58 | struct gnttab_map_grant_ref *kmap_ops, | 58 | struct gnttab_map_grant_ref *kmap_ops, |
| 59 | struct page **pages, unsigned int count); | 59 | struct page **pages, unsigned int count); |
| 60 | extern int m2p_remove_override(struct page *page, | ||
| 61 | struct gnttab_map_grant_ref *kmap_op, | ||
| 62 | unsigned long mfn); | ||
| 63 | extern struct page *m2p_find_override(unsigned long mfn); | ||
| 64 | extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); | 60 | extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); |
| 65 | 61 | ||
| 62 | /* | ||
| 63 | * Helper functions to write or read unsigned long values to/from | ||
| 64 | * memory, when the access may fault. | ||
| 65 | */ | ||
| 66 | static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val) | ||
| 67 | { | ||
| 68 | return __put_user(val, (unsigned long __user *)addr); | ||
| 69 | } | ||
| 70 | |||
| 71 | static inline int xen_safe_read_ulong(unsigned long *addr, unsigned long *val) | ||
| 72 | { | ||
| 73 | return __get_user(*val, (unsigned long __user *)addr); | ||
| 74 | } | ||
| 75 | |||
| 76 | /* | ||
| 77 | * When to use pfn_to_mfn(), __pfn_to_mfn() or get_phys_to_machine(): | ||
| 78 | * - pfn_to_mfn() returns either INVALID_P2M_ENTRY or the mfn. No indicator | ||
| 79 | * bits (identity or foreign) are set. | ||
| 80 | * - __pfn_to_mfn() returns the found entry of the p2m table. A possibly set | ||
| 81 | * identity or foreign indicator will be still set. __pfn_to_mfn() is | ||
| 82 | * encapsulating get_phys_to_machine() which is called in special cases only. | ||
| 83 | * - get_phys_to_machine() is to be called by __pfn_to_mfn() only in special | ||
| 84 | * cases needing an extended handling. | ||
| 85 | */ | ||
| 86 | static inline unsigned long __pfn_to_mfn(unsigned long pfn) | ||
| 87 | { | ||
| 88 | unsigned long mfn; | ||
| 89 | |||
| 90 | if (pfn < xen_p2m_size) | ||
| 91 | mfn = xen_p2m_addr[pfn]; | ||
| 92 | else if (unlikely(pfn < xen_max_p2m_pfn)) | ||
| 93 | return get_phys_to_machine(pfn); | ||
| 94 | else | ||
| 95 | return IDENTITY_FRAME(pfn); | ||
| 96 | |||
| 97 | if (unlikely(mfn == INVALID_P2M_ENTRY)) | ||
| 98 | return get_phys_to_machine(pfn); | ||
| 99 | |||
| 100 | return mfn; | ||
| 101 | } | ||
| 102 | |||
| 66 | static inline unsigned long pfn_to_mfn(unsigned long pfn) | 103 | static inline unsigned long pfn_to_mfn(unsigned long pfn) |
| 67 | { | 104 | { |
| 68 | unsigned long mfn; | 105 | unsigned long mfn; |
| @@ -70,7 +107,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn) | |||
| 70 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 107 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
| 71 | return pfn; | 108 | return pfn; |
| 72 | 109 | ||
| 73 | mfn = get_phys_to_machine(pfn); | 110 | mfn = __pfn_to_mfn(pfn); |
| 74 | 111 | ||
| 75 | if (mfn != INVALID_P2M_ENTRY) | 112 | if (mfn != INVALID_P2M_ENTRY) |
| 76 | mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT); | 113 | mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT); |
| @@ -83,7 +120,7 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn) | |||
| 83 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 120 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
| 84 | return 1; | 121 | return 1; |
| 85 | 122 | ||
| 86 | return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY; | 123 | return __pfn_to_mfn(pfn) != INVALID_P2M_ENTRY; |
| 87 | } | 124 | } |
| 88 | 125 | ||
| 89 | static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn) | 126 | static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn) |
| @@ -102,7 +139,7 @@ static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn) | |||
| 102 | * In such cases it doesn't matter what we return (we return garbage), | 139 | * In such cases it doesn't matter what we return (we return garbage), |
| 103 | * but we must handle the fault without crashing! | 140 | * but we must handle the fault without crashing! |
| 104 | */ | 141 | */ |
| 105 | ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); | 142 | ret = xen_safe_read_ulong(&machine_to_phys_mapping[mfn], &pfn); |
| 106 | if (ret < 0) | 143 | if (ret < 0) |
| 107 | return ~0; | 144 | return ~0; |
| 108 | 145 | ||
| @@ -117,7 +154,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) | |||
| 117 | return mfn; | 154 | return mfn; |
| 118 | 155 | ||
| 119 | pfn = mfn_to_pfn_no_overrides(mfn); | 156 | pfn = mfn_to_pfn_no_overrides(mfn); |
| 120 | if (get_phys_to_machine(pfn) != mfn) { | 157 | if (__pfn_to_mfn(pfn) != mfn) { |
| 121 | /* | 158 | /* |
| 122 | * If this appears to be a foreign mfn (because the pfn | 159 | * If this appears to be a foreign mfn (because the pfn |
| 123 | * doesn't map back to the mfn), then check the local override | 160 | * doesn't map back to the mfn), then check the local override |
| @@ -133,8 +170,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) | |||
| 133 | * entry doesn't map back to the mfn and m2p_override doesn't have a | 170 | * entry doesn't map back to the mfn and m2p_override doesn't have a |
| 134 | * valid entry for it. | 171 | * valid entry for it. |
| 135 | */ | 172 | */ |
| 136 | if (pfn == ~0 && | 173 | if (pfn == ~0 && __pfn_to_mfn(mfn) == IDENTITY_FRAME(mfn)) |
| 137 | get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn)) | ||
| 138 | pfn = mfn; | 174 | pfn = mfn; |
| 139 | 175 | ||
| 140 | return pfn; | 176 | return pfn; |
| @@ -180,7 +216,7 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) | |||
| 180 | return mfn; | 216 | return mfn; |
| 181 | 217 | ||
| 182 | pfn = mfn_to_pfn(mfn); | 218 | pfn = mfn_to_pfn(mfn); |
| 183 | if (get_phys_to_machine(pfn) != mfn) | 219 | if (__pfn_to_mfn(pfn) != mfn) |
| 184 | return -1; /* force !pfn_valid() */ | 220 | return -1; /* force !pfn_valid() */ |
| 185 | return pfn; | 221 | return pfn; |
| 186 | } | 222 | } |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index dfaf2e0f5f8f..536ea2fb6e33 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
| @@ -384,6 +384,26 @@ static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address, | |||
| 384 | } | 384 | } |
| 385 | 385 | ||
| 386 | /* | 386 | /* |
| 387 | * Lookup the PMD entry for a virtual address. Return a pointer to the entry | ||
| 388 | * or NULL if not present. | ||
| 389 | */ | ||
| 390 | pmd_t *lookup_pmd_address(unsigned long address) | ||
| 391 | { | ||
| 392 | pgd_t *pgd; | ||
| 393 | pud_t *pud; | ||
| 394 | |||
| 395 | pgd = pgd_offset_k(address); | ||
| 396 | if (pgd_none(*pgd)) | ||
| 397 | return NULL; | ||
| 398 | |||
| 399 | pud = pud_offset(pgd, address); | ||
| 400 | if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud)) | ||
| 401 | return NULL; | ||
| 402 | |||
| 403 | return pmd_offset(pud, address); | ||
| 404 | } | ||
| 405 | |||
| 406 | /* | ||
| 387 | * This is necessary because __pa() does not work on some | 407 | * This is necessary because __pa() does not work on some |
| 388 | * kinds of memory, like vmalloc() or the alloc_remap() | 408 | * kinds of memory, like vmalloc() or the alloc_remap() |
| 389 | * areas on 32-bit NUMA systems. The percpu areas can | 409 | * areas on 32-bit NUMA systems. The percpu areas can |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 8c8298d78185..5c1f9ace7ae7 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
| @@ -387,7 +387,7 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) | |||
| 387 | unsigned long mfn; | 387 | unsigned long mfn; |
| 388 | 388 | ||
| 389 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | 389 | if (!xen_feature(XENFEAT_auto_translated_physmap)) |
| 390 | mfn = get_phys_to_machine(pfn); | 390 | mfn = __pfn_to_mfn(pfn); |
| 391 | else | 391 | else |
| 392 | mfn = pfn; | 392 | mfn = pfn; |
| 393 | /* | 393 | /* |
| @@ -1113,20 +1113,16 @@ static void __init xen_cleanhighmap(unsigned long vaddr, | |||
| 1113 | * instead of somewhere later and be confusing. */ | 1113 | * instead of somewhere later and be confusing. */ |
| 1114 | xen_mc_flush(); | 1114 | xen_mc_flush(); |
| 1115 | } | 1115 | } |
| 1116 | static void __init xen_pagetable_p2m_copy(void) | 1116 | |
| 1117 | static void __init xen_pagetable_p2m_free(void) | ||
| 1117 | { | 1118 | { |
| 1118 | unsigned long size; | 1119 | unsigned long size; |
| 1119 | unsigned long addr; | 1120 | unsigned long addr; |
| 1120 | unsigned long new_mfn_list; | ||
| 1121 | |||
| 1122 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
| 1123 | return; | ||
| 1124 | 1121 | ||
| 1125 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | 1122 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); |
| 1126 | 1123 | ||
| 1127 | new_mfn_list = xen_revector_p2m_tree(); | ||
| 1128 | /* No memory or already called. */ | 1124 | /* No memory or already called. */ |
| 1129 | if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list) | 1125 | if ((unsigned long)xen_p2m_addr == xen_start_info->mfn_list) |
| 1130 | return; | 1126 | return; |
| 1131 | 1127 | ||
| 1132 | /* using __ka address and sticking INVALID_P2M_ENTRY! */ | 1128 | /* using __ka address and sticking INVALID_P2M_ENTRY! */ |
| @@ -1144,8 +1140,6 @@ static void __init xen_pagetable_p2m_copy(void) | |||
| 1144 | 1140 | ||
| 1145 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | 1141 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); |
| 1146 | memblock_free(__pa(xen_start_info->mfn_list), size); | 1142 | memblock_free(__pa(xen_start_info->mfn_list), size); |
| 1147 | /* And revector! Bye bye old array */ | ||
| 1148 | xen_start_info->mfn_list = new_mfn_list; | ||
| 1149 | 1143 | ||
| 1150 | /* At this stage, cleanup_highmap has already cleaned __ka space | 1144 | /* At this stage, cleanup_highmap has already cleaned __ka space |
| 1151 | * from _brk_limit way up to the max_pfn_mapped (which is the end of | 1145 | * from _brk_limit way up to the max_pfn_mapped (which is the end of |
| @@ -1169,17 +1163,35 @@ static void __init xen_pagetable_p2m_copy(void) | |||
| 1169 | } | 1163 | } |
| 1170 | #endif | 1164 | #endif |
| 1171 | 1165 | ||
| 1172 | static void __init xen_pagetable_init(void) | 1166 | static void __init xen_pagetable_p2m_setup(void) |
| 1173 | { | 1167 | { |
| 1174 | paging_init(); | 1168 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
| 1169 | return; | ||
| 1170 | |||
| 1171 | xen_vmalloc_p2m_tree(); | ||
| 1172 | |||
| 1175 | #ifdef CONFIG_X86_64 | 1173 | #ifdef CONFIG_X86_64 |
| 1176 | xen_pagetable_p2m_copy(); | 1174 | xen_pagetable_p2m_free(); |
| 1177 | #endif | 1175 | #endif |
| 1176 | /* And revector! Bye bye old array */ | ||
| 1177 | xen_start_info->mfn_list = (unsigned long)xen_p2m_addr; | ||
| 1178 | } | ||
| 1179 | |||
| 1180 | static void __init xen_pagetable_init(void) | ||
| 1181 | { | ||
| 1182 | paging_init(); | ||
| 1183 | xen_post_allocator_init(); | ||
| 1184 | |||
| 1185 | xen_pagetable_p2m_setup(); | ||
| 1186 | |||
| 1178 | /* Allocate and initialize top and mid mfn levels for p2m structure */ | 1187 | /* Allocate and initialize top and mid mfn levels for p2m structure */ |
| 1179 | xen_build_mfn_list_list(); | 1188 | xen_build_mfn_list_list(); |
| 1180 | 1189 | ||
| 1190 | /* Remap memory freed due to conflicts with E820 map */ | ||
| 1191 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | ||
| 1192 | xen_remap_memory(); | ||
| 1193 | |||
| 1181 | xen_setup_shared_info(); | 1194 | xen_setup_shared_info(); |
| 1182 | xen_post_allocator_init(); | ||
| 1183 | } | 1195 | } |
| 1184 | static void xen_write_cr2(unsigned long cr2) | 1196 | static void xen_write_cr2(unsigned long cr2) |
| 1185 | { | 1197 | { |
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index b456b048eca9..edbc7a63fd73 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
| @@ -3,21 +3,22 @@ | |||
| 3 | * guests themselves, but it must also access and update the p2m array | 3 | * guests themselves, but it must also access and update the p2m array |
| 4 | * during suspend/resume when all the pages are reallocated. | 4 | * during suspend/resume when all the pages are reallocated. |
| 5 | * | 5 | * |
| 6 | * The p2m table is logically a flat array, but we implement it as a | 6 | * The logical flat p2m table is mapped to a linear kernel memory area. |
| 7 | * three-level tree to allow the address space to be sparse. | 7 | * For accesses by Xen a three-level tree linked via mfns only is set up to |
| 8 | * allow the address space to be sparse. | ||
| 8 | * | 9 | * |
| 9 | * Xen | 10 | * Xen |
| 10 | * | | 11 | * | |
| 11 | * p2m_top p2m_top_mfn | 12 | * p2m_top_mfn |
| 12 | * / \ / \ | 13 | * / \ |
| 13 | * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn | 14 | * p2m_mid_mfn p2m_mid_mfn |
| 14 | * / \ / \ / / | 15 | * / / |
| 15 | * p2m p2m p2m p2m p2m p2m p2m ... | 16 | * p2m p2m p2m ... |
| 16 | * | 17 | * |
| 17 | * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. | 18 | * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. |
| 18 | * | 19 | * |
| 19 | * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the | 20 | * The p2m_top_mfn level is limited to 1 page, so the maximum representable |
| 20 | * maximum representable pseudo-physical address space is: | 21 | * pseudo-physical address space is: |
| 21 | * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages | 22 | * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages |
| 22 | * | 23 | * |
| 23 | * P2M_PER_PAGE depends on the architecture, as a mfn is always | 24 | * P2M_PER_PAGE depends on the architecture, as a mfn is always |
| @@ -30,6 +31,9 @@ | |||
| 30 | * leaf entries, or for the top root, or middle one, for which there is a void | 31 | * leaf entries, or for the top root, or middle one, for which there is a void |
| 31 | * entry, we assume it is "missing". So (for example) | 32 | * entry, we assume it is "missing". So (for example) |
| 32 | * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY. | 33 | * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY. |
| 34 | * We have a dedicated page p2m_missing with all entries being | ||
| 35 | * INVALID_P2M_ENTRY. This page may be referenced multiple times in the p2m | ||
| 36 | * list/tree in case there are multiple areas with P2M_PER_PAGE invalid pfns. | ||
| 33 | * | 37 | * |
| 34 | * We also have the possibility of setting 1-1 mappings on certain regions, so | 38 | * We also have the possibility of setting 1-1 mappings on certain regions, so |
| 35 | * that: | 39 | * that: |
| @@ -39,122 +43,20 @@ | |||
| 39 | * PCI BARs, or ACPI spaces), we can create mappings easily because we | 43 | * PCI BARs, or ACPI spaces), we can create mappings easily because we |
| 40 | * get the PFN value to match the MFN. | 44 | * get the PFN value to match the MFN. |
| 41 | * | 45 | * |
| 42 | * For this to work efficiently we have one new page p2m_identity and | 46 | * For this to work efficiently we have one new page p2m_identity. All entries |
| 43 | * allocate (via reserved_brk) any other pages we need to cover the sides | 47 | * in p2m_identity are set to INVALID_P2M_ENTRY type (Xen toolstack only |
| 44 | * (1GB or 4MB boundary violations). All entries in p2m_identity are set to | 48 | * recognizes that and MFNs, no other fancy value). |
| 45 | * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs, | ||
| 46 | * no other fancy value). | ||
| 47 | * | 49 | * |
| 48 | * On lookup we spot that the entry points to p2m_identity and return the | 50 | * On lookup we spot that the entry points to p2m_identity and return the |
| 49 | * identity value instead of dereferencing and returning INVALID_P2M_ENTRY. | 51 | * identity value instead of dereferencing and returning INVALID_P2M_ENTRY. |
| 50 | * If the entry points to an allocated page, we just proceed as before and | 52 | * If the entry points to an allocated page, we just proceed as before and |
| 51 | * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in | 53 | * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in |
| 52 | * appropriate functions (pfn_to_mfn). | 54 | * appropriate functions (pfn_to_mfn). |
| 53 | * | 55 | * |
| 54 | * The reason for having the IDENTITY_FRAME_BIT instead of just returning the | 56 | * The reason for having the IDENTITY_FRAME_BIT instead of just returning the |
| 55 | * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a | 57 | * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a |
| 56 | * non-identity pfn. To protect ourselves against we elect to set (and get) the | 58 | * non-identity pfn. To protect ourselves against we elect to set (and get) the |
| 57 | * IDENTITY_FRAME_BIT on all identity mapped PFNs. | 59 | * IDENTITY_FRAME_BIT on all identity mapped PFNs. |
| 58 | * | ||
| 59 | * This simplistic diagram is used to explain the more subtle piece of code. | ||
| 60 | * There is also a digram of the P2M at the end that can help. | ||
| 61 | * Imagine your E820 looking as so: | ||
| 62 | * | ||
| 63 | * 1GB 2GB 4GB | ||
| 64 | * /-------------------+---------\/----\ /----------\ /---+-----\ | ||
| 65 | * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM | | ||
| 66 | * \-------------------+---------/\----/ \----------/ \---+-----/ | ||
| 67 | * ^- 1029MB ^- 2001MB | ||
| 68 | * | ||
| 69 | * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100), | ||
| 70 | * 2048MB = 524288 (0x80000)] | ||
| 71 | * | ||
| 72 | * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB | ||
| 73 | * is actually not present (would have to kick the balloon driver to put it in). | ||
| 74 | * | ||
| 75 | * When we are told to set the PFNs for identity mapping (see patch: "xen/setup: | ||
| 76 | * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start | ||
| 77 | * of the PFN and the end PFN (263424 and 512256 respectively). The first step | ||
| 78 | * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page | ||
| 79 | * covers 512^2 of page estate (1GB) and in case the start or end PFN is not | ||
| 80 | * aligned on 512^2*PAGE_SIZE (1GB) we reserve_brk new middle and leaf pages as | ||
| 81 | * required to split any existing p2m_mid_missing middle pages. | ||
| 82 | * | ||
| 83 | * With the E820 example above, 263424 is not 1GB aligned so we allocate a | ||
| 84 | * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000. | ||
| 85 | * Each entry in the allocate page is "missing" (points to p2m_missing). | ||
| 86 | * | ||
| 87 | * Next stage is to determine if we need to do a more granular boundary check | ||
| 88 | * on the 4MB (or 2MB depending on architecture) off the start and end pfn's. | ||
| 89 | * We check if the start pfn and end pfn violate that boundary check, and if | ||
| 90 | * so reserve_brk a (p2m[x][y]) leaf page. This way we have a much finer | ||
| 91 | * granularity of setting which PFNs are missing and which ones are identity. | ||
| 92 | * In our example 263424 and 512256 both fail the check so we reserve_brk two | ||
| 93 | * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing" | ||
| 94 | * values) and assign them to p2m[1][2] and p2m[1][488] respectively. | ||
| 95 | * | ||
| 96 | * At this point we would at minimum reserve_brk one page, but could be up to | ||
| 97 | * three. Each call to set_phys_range_identity has at maximum a three page | ||
| 98 | * cost. If we were to query the P2M at this stage, all those entries from | ||
| 99 | * start PFN through end PFN (so 1029MB -> 2001MB) would return | ||
| 100 | * INVALID_P2M_ENTRY ("missing"). | ||
| 101 | * | ||
| 102 | * The next step is to walk from the start pfn to the end pfn setting | ||
| 103 | * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity. | ||
| 104 | * If we find that the middle entry is pointing to p2m_missing we can swap it | ||
| 105 | * over to p2m_identity - this way covering 4MB (or 2MB) PFN space (and | ||
| 106 | * similarly swapping p2m_mid_missing for p2m_mid_identity for larger regions). | ||
| 107 | * At this point we do not need to worry about boundary aligment (so no need to | ||
| 108 | * reserve_brk a middle page, figure out which PFNs are "missing" and which | ||
| 109 | * ones are identity), as that has been done earlier. If we find that the | ||
| 110 | * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference | ||
| 111 | * that page (which covers 512 PFNs) and set the appropriate PFN with | ||
| 112 | * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we | ||
| 113 | * set from p2m[1][2][256->511] and p2m[1][488][0->256] with | ||
| 114 | * IDENTITY_FRAME_BIT set. | ||
| 115 | * | ||
| 116 | * All other regions that are void (or not filled) either point to p2m_missing | ||
| 117 | * (considered missing) or have the default value of INVALID_P2M_ENTRY (also | ||
| 118 | * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511] | ||
| 119 | * contain the INVALID_P2M_ENTRY value and are considered "missing." | ||
| 120 | * | ||
| 121 | * Finally, the region beyond the end of of the E820 (4 GB in this example) | ||
| 122 | * is set to be identity (in case there are MMIO regions placed here). | ||
| 123 | * | ||
| 124 | * This is what the p2m ends up looking (for the E820 above) with this | ||
| 125 | * fabulous drawing: | ||
| 126 | * | ||
| 127 | * p2m /--------------\ | ||
| 128 | * /-----\ | &mfn_list[0],| /-----------------\ | ||
| 129 | * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. | | ||
| 130 | * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] | | ||
| 131 | * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] | | ||
| 132 | * |-----| \ | [p2m_identity]+\\ | .... | | ||
| 133 | * | 2 |--\ \-------------------->| ... | \\ \----------------/ | ||
| 134 | * |-----| \ \---------------/ \\ | ||
| 135 | * | 3 |-\ \ \\ p2m_identity [1] | ||
| 136 | * |-----| \ \-------------------->/---------------\ /-----------------\ | ||
| 137 | * | .. |\ | | [p2m_identity]+-->| ~0, ~0, ~0, ... | | ||
| 138 | * \-----/ | | | [p2m_identity]+-->| ..., ~0 | | ||
| 139 | * | | | .... | \-----------------/ | ||
| 140 | * | | +-[x], ~0, ~0.. +\ | ||
| 141 | * | | \---------------/ \ | ||
| 142 | * | | \-> /---------------\ | ||
| 143 | * | V p2m_mid_missing p2m_missing | IDENTITY[@0] | | ||
| 144 | * | /-----------------\ /------------\ | IDENTITY[@256]| | ||
| 145 | * | | [p2m_missing] +---->| ~0, ~0, ...| | ~0, ~0, .... | | ||
| 146 | * | | [p2m_missing] +---->| ..., ~0 | \---------------/ | ||
| 147 | * | | ... | \------------/ | ||
| 148 | * | \-----------------/ | ||
| 149 | * | | ||
| 150 | * | p2m_mid_identity | ||
| 151 | * | /-----------------\ | ||
| 152 | * \-->| [p2m_identity] +---->[1] | ||
| 153 | * | [p2m_identity] +---->[1] | ||
| 154 | * | ... | | ||
| 155 | * \-----------------/ | ||
| 156 | * | ||
| 157 | * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) | ||
| 158 | */ | 60 | */ |
| 159 | 61 | ||
| 160 | #include <linux/init.h> | 62 | #include <linux/init.h> |
| @@ -164,9 +66,11 @@ | |||
| 164 | #include <linux/sched.h> | 66 | #include <linux/sched.h> |
| 165 | #include <linux/seq_file.h> | 67 | #include <linux/seq_file.h> |
| 166 | #include <linux/bootmem.h> | 68 | #include <linux/bootmem.h> |
| 69 | #include <linux/slab.h> | ||
| 167 | 70 | ||
| 168 | #include <asm/cache.h> | 71 | #include <asm/cache.h> |
| 169 | #include <asm/setup.h> | 72 | #include <asm/setup.h> |
| 73 | #include <asm/uaccess.h> | ||
| 170 | 74 | ||
| 171 | #include <asm/xen/page.h> | 75 | #include <asm/xen/page.h> |
| 172 | #include <asm/xen/hypercall.h> | 76 | #include <asm/xen/hypercall.h> |
| @@ -178,31 +82,26 @@ | |||
| 178 | #include "multicalls.h" | 82 | #include "multicalls.h" |
| 179 | #include "xen-ops.h" | 83 | #include "xen-ops.h" |
| 180 | 84 | ||
| 85 | #define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE) | ||
| 86 | |||
| 181 | static void __init m2p_override_init(void); | 87 | static void __init m2p_override_init(void); |
| 182 | 88 | ||
| 89 | unsigned long *xen_p2m_addr __read_mostly; | ||
| 90 | EXPORT_SYMBOL_GPL(xen_p2m_addr); | ||
| 91 | unsigned long xen_p2m_size __read_mostly; | ||
| 92 | EXPORT_SYMBOL_GPL(xen_p2m_size); | ||
| 183 | unsigned long xen_max_p2m_pfn __read_mostly; | 93 | unsigned long xen_max_p2m_pfn __read_mostly; |
| 94 | EXPORT_SYMBOL_GPL(xen_max_p2m_pfn); | ||
| 95 | |||
| 96 | static DEFINE_SPINLOCK(p2m_update_lock); | ||
| 184 | 97 | ||
| 185 | static unsigned long *p2m_mid_missing_mfn; | 98 | static unsigned long *p2m_mid_missing_mfn; |
| 186 | static unsigned long *p2m_top_mfn; | 99 | static unsigned long *p2m_top_mfn; |
| 187 | static unsigned long **p2m_top_mfn_p; | 100 | static unsigned long **p2m_top_mfn_p; |
| 188 | 101 | static unsigned long *p2m_missing; | |
| 189 | /* Placeholders for holes in the address space */ | 102 | static unsigned long *p2m_identity; |
| 190 | static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); | 103 | static pte_t *p2m_missing_pte; |
| 191 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); | 104 | static pte_t *p2m_identity_pte; |
| 192 | |||
| 193 | static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); | ||
| 194 | |||
| 195 | static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); | ||
| 196 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE); | ||
| 197 | |||
| 198 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
| 199 | |||
| 200 | /* For each I/O range remapped we may lose up to two leaf pages for the boundary | ||
| 201 | * violations and three mid pages to cover up to 3GB. With | ||
| 202 | * early_can_reuse_p2m_middle() most of the leaf pages will be reused by the | ||
| 203 | * remapped region. | ||
| 204 | */ | ||
| 205 | RESERVE_BRK(p2m_identity_remap, PAGE_SIZE * 2 * 3 * MAX_REMAP_RANGES); | ||
| 206 | 105 | ||
| 207 | static inline unsigned p2m_top_index(unsigned long pfn) | 106 | static inline unsigned p2m_top_index(unsigned long pfn) |
| 208 | { | 107 | { |
| @@ -220,14 +119,6 @@ static inline unsigned p2m_index(unsigned long pfn) | |||
| 220 | return pfn % P2M_PER_PAGE; | 119 | return pfn % P2M_PER_PAGE; |
| 221 | } | 120 | } |
| 222 | 121 | ||
| 223 | static void p2m_top_init(unsigned long ***top) | ||
| 224 | { | ||
| 225 | unsigned i; | ||
| 226 | |||
| 227 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
| 228 | top[i] = p2m_mid_missing; | ||
| 229 | } | ||
| 230 | |||
| 231 | static void p2m_top_mfn_init(unsigned long *top) | 122 | static void p2m_top_mfn_init(unsigned long *top) |
| 232 | { | 123 | { |
| 233 | unsigned i; | 124 | unsigned i; |
| @@ -244,28 +135,43 @@ static void p2m_top_mfn_p_init(unsigned long **top) | |||
| 244 | top[i] = p2m_mid_missing_mfn; | 135 | top[i] = p2m_mid_missing_mfn; |
| 245 | } | 136 | } |
| 246 | 137 | ||
| 247 | static void p2m_mid_init(unsigned long **mid, unsigned long *leaf) | 138 | static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf) |
| 248 | { | 139 | { |
| 249 | unsigned i; | 140 | unsigned i; |
| 250 | 141 | ||
| 251 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | 142 | for (i = 0; i < P2M_MID_PER_PAGE; i++) |
| 252 | mid[i] = leaf; | 143 | mid[i] = virt_to_mfn(leaf); |
| 253 | } | 144 | } |
| 254 | 145 | ||
| 255 | static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf) | 146 | static void p2m_init(unsigned long *p2m) |
| 256 | { | 147 | { |
| 257 | unsigned i; | 148 | unsigned i; |
| 258 | 149 | ||
| 259 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | 150 | for (i = 0; i < P2M_PER_PAGE; i++) |
| 260 | mid[i] = virt_to_mfn(leaf); | 151 | p2m[i] = INVALID_P2M_ENTRY; |
| 261 | } | 152 | } |
| 262 | 153 | ||
| 263 | static void p2m_init(unsigned long *p2m) | 154 | static void p2m_init_identity(unsigned long *p2m, unsigned long pfn) |
| 264 | { | 155 | { |
| 265 | unsigned i; | 156 | unsigned i; |
| 266 | 157 | ||
| 267 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | 158 | for (i = 0; i < P2M_PER_PAGE; i++) |
| 268 | p2m[i] = INVALID_P2M_ENTRY; | 159 | p2m[i] = IDENTITY_FRAME(pfn + i); |
| 160 | } | ||
| 161 | |||
| 162 | static void * __ref alloc_p2m_page(void) | ||
| 163 | { | ||
| 164 | if (unlikely(!slab_is_available())) | ||
| 165 | return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); | ||
| 166 | |||
| 167 | return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); | ||
| 168 | } | ||
| 169 | |||
| 170 | /* Only to be called in case of a race for a page just allocated! */ | ||
| 171 | static void free_p2m_page(void *p) | ||
| 172 | { | ||
| 173 | BUG_ON(!slab_is_available()); | ||
| 174 | free_page((unsigned long)p); | ||
| 269 | } | 175 | } |
| 270 | 176 | ||
| 271 | /* | 177 | /* |
| @@ -280,40 +186,46 @@ static void p2m_init(unsigned long *p2m) | |||
| 280 | */ | 186 | */ |
| 281 | void __ref xen_build_mfn_list_list(void) | 187 | void __ref xen_build_mfn_list_list(void) |
| 282 | { | 188 | { |
| 283 | unsigned long pfn; | 189 | unsigned long pfn, mfn; |
| 190 | pte_t *ptep; | ||
| 191 | unsigned int level, topidx, mididx; | ||
| 192 | unsigned long *mid_mfn_p; | ||
| 284 | 193 | ||
| 285 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 194 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
| 286 | return; | 195 | return; |
| 287 | 196 | ||
| 288 | /* Pre-initialize p2m_top_mfn to be completely missing */ | 197 | /* Pre-initialize p2m_top_mfn to be completely missing */ |
| 289 | if (p2m_top_mfn == NULL) { | 198 | if (p2m_top_mfn == NULL) { |
| 290 | p2m_mid_missing_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); | 199 | p2m_mid_missing_mfn = alloc_p2m_page(); |
| 291 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); | 200 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); |
| 292 | 201 | ||
| 293 | p2m_top_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); | 202 | p2m_top_mfn_p = alloc_p2m_page(); |
| 294 | p2m_top_mfn_p_init(p2m_top_mfn_p); | 203 | p2m_top_mfn_p_init(p2m_top_mfn_p); |
| 295 | 204 | ||
| 296 | p2m_top_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); | 205 | p2m_top_mfn = alloc_p2m_page(); |
| 297 | p2m_top_mfn_init(p2m_top_mfn); | 206 | p2m_top_mfn_init(p2m_top_mfn); |
| 298 | } else { | 207 | } else { |
| 299 | /* Reinitialise, mfn's all change after migration */ | 208 | /* Reinitialise, mfn's all change after migration */ |
| 300 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); | 209 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); |
| 301 | } | 210 | } |
| 302 | 211 | ||
| 303 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { | 212 | for (pfn = 0; pfn < xen_max_p2m_pfn && pfn < MAX_P2M_PFN; |
| 304 | unsigned topidx = p2m_top_index(pfn); | 213 | pfn += P2M_PER_PAGE) { |
| 305 | unsigned mididx = p2m_mid_index(pfn); | 214 | topidx = p2m_top_index(pfn); |
| 306 | unsigned long **mid; | 215 | mididx = p2m_mid_index(pfn); |
| 307 | unsigned long *mid_mfn_p; | ||
| 308 | 216 | ||
| 309 | mid = p2m_top[topidx]; | ||
| 310 | mid_mfn_p = p2m_top_mfn_p[topidx]; | 217 | mid_mfn_p = p2m_top_mfn_p[topidx]; |
| 218 | ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), | ||
| 219 | &level); | ||
| 220 | BUG_ON(!ptep || level != PG_LEVEL_4K); | ||
| 221 | mfn = pte_mfn(*ptep); | ||
| 222 | ptep = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); | ||
| 311 | 223 | ||
| 312 | /* Don't bother allocating any mfn mid levels if | 224 | /* Don't bother allocating any mfn mid levels if |
| 313 | * they're just missing, just update the stored mfn, | 225 | * they're just missing, just update the stored mfn, |
| 314 | * since all could have changed over a migrate. | 226 | * since all could have changed over a migrate. |
| 315 | */ | 227 | */ |
| 316 | if (mid == p2m_mid_missing) { | 228 | if (ptep == p2m_missing_pte || ptep == p2m_identity_pte) { |
| 317 | BUG_ON(mididx); | 229 | BUG_ON(mididx); |
| 318 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | 230 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); |
| 319 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); | 231 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); |
| @@ -322,19 +234,14 @@ void __ref xen_build_mfn_list_list(void) | |||
| 322 | } | 234 | } |
| 323 | 235 | ||
| 324 | if (mid_mfn_p == p2m_mid_missing_mfn) { | 236 | if (mid_mfn_p == p2m_mid_missing_mfn) { |
| 325 | /* | 237 | mid_mfn_p = alloc_p2m_page(); |
| 326 | * XXX boot-time only! We should never find | ||
| 327 | * missing parts of the mfn tree after | ||
| 328 | * runtime. | ||
| 329 | */ | ||
| 330 | mid_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); | ||
| 331 | p2m_mid_mfn_init(mid_mfn_p, p2m_missing); | 238 | p2m_mid_mfn_init(mid_mfn_p, p2m_missing); |
| 332 | 239 | ||
| 333 | p2m_top_mfn_p[topidx] = mid_mfn_p; | 240 | p2m_top_mfn_p[topidx] = mid_mfn_p; |
| 334 | } | 241 | } |
| 335 | 242 | ||
| 336 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | 243 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); |
| 337 | mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); | 244 | mid_mfn_p[mididx] = mfn; |
| 338 | } | 245 | } |
| 339 | } | 246 | } |
| 340 | 247 | ||
| @@ -353,171 +260,235 @@ void xen_setup_mfn_list_list(void) | |||
| 353 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | 260 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ |
| 354 | void __init xen_build_dynamic_phys_to_machine(void) | 261 | void __init xen_build_dynamic_phys_to_machine(void) |
| 355 | { | 262 | { |
| 356 | unsigned long *mfn_list; | ||
| 357 | unsigned long max_pfn; | ||
| 358 | unsigned long pfn; | 263 | unsigned long pfn; |
| 359 | 264 | ||
| 360 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 265 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
| 361 | return; | 266 | return; |
| 362 | 267 | ||
| 363 | mfn_list = (unsigned long *)xen_start_info->mfn_list; | 268 | xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list; |
| 364 | max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | 269 | xen_p2m_size = ALIGN(xen_start_info->nr_pages, P2M_PER_PAGE); |
| 365 | xen_max_p2m_pfn = max_pfn; | ||
| 366 | 270 | ||
| 367 | p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | 271 | for (pfn = xen_start_info->nr_pages; pfn < xen_p2m_size; pfn++) |
| 368 | p2m_init(p2m_missing); | 272 | xen_p2m_addr[pfn] = INVALID_P2M_ENTRY; |
| 369 | p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
| 370 | p2m_init(p2m_identity); | ||
| 371 | 273 | ||
| 372 | p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | 274 | xen_max_p2m_pfn = xen_p2m_size; |
| 373 | p2m_mid_init(p2m_mid_missing, p2m_missing); | 275 | } |
| 374 | p2m_mid_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
| 375 | p2m_mid_init(p2m_mid_identity, p2m_identity); | ||
| 376 | 276 | ||
| 377 | p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); | 277 | #define P2M_TYPE_IDENTITY 0 |
| 378 | p2m_top_init(p2m_top); | 278 | #define P2M_TYPE_MISSING 1 |
| 279 | #define P2M_TYPE_PFN 2 | ||
| 280 | #define P2M_TYPE_UNKNOWN 3 | ||
| 379 | 281 | ||
| 380 | /* | 282 | static int xen_p2m_elem_type(unsigned long pfn) |
| 381 | * The domain builder gives us a pre-constructed p2m array in | 283 | { |
| 382 | * mfn_list for all the pages initially given to us, so we just | 284 | unsigned long mfn; |
| 383 | * need to graft that into our tree structure. | ||
| 384 | */ | ||
| 385 | for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) { | ||
| 386 | unsigned topidx = p2m_top_index(pfn); | ||
| 387 | unsigned mididx = p2m_mid_index(pfn); | ||
| 388 | 285 | ||
| 389 | if (p2m_top[topidx] == p2m_mid_missing) { | 286 | if (pfn >= xen_p2m_size) |
| 390 | unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | 287 | return P2M_TYPE_IDENTITY; |
| 391 | p2m_mid_init(mid, p2m_missing); | ||
| 392 | 288 | ||
| 393 | p2m_top[topidx] = mid; | 289 | mfn = xen_p2m_addr[pfn]; |
| 394 | } | ||
| 395 | 290 | ||
| 396 | /* | 291 | if (mfn == INVALID_P2M_ENTRY) |
| 397 | * As long as the mfn_list has enough entries to completely | 292 | return P2M_TYPE_MISSING; |
| 398 | * fill a p2m page, pointing into the array is ok. But if | ||
| 399 | * not the entries beyond the last pfn will be undefined. | ||
| 400 | */ | ||
| 401 | if (unlikely(pfn + P2M_PER_PAGE > max_pfn)) { | ||
| 402 | unsigned long p2midx; | ||
| 403 | 293 | ||
| 404 | p2midx = max_pfn % P2M_PER_PAGE; | 294 | if (mfn & IDENTITY_FRAME_BIT) |
| 405 | for ( ; p2midx < P2M_PER_PAGE; p2midx++) | 295 | return P2M_TYPE_IDENTITY; |
| 406 | mfn_list[pfn + p2midx] = INVALID_P2M_ENTRY; | ||
| 407 | } | ||
| 408 | p2m_top[topidx][mididx] = &mfn_list[pfn]; | ||
| 409 | } | ||
| 410 | 296 | ||
| 411 | m2p_override_init(); | 297 | return P2M_TYPE_PFN; |
| 412 | } | 298 | } |
| 413 | #ifdef CONFIG_X86_64 | 299 | |
| 414 | unsigned long __init xen_revector_p2m_tree(void) | 300 | static void __init xen_rebuild_p2m_list(unsigned long *p2m) |
| 415 | { | 301 | { |
| 416 | unsigned long va_start; | 302 | unsigned int i, chunk; |
| 417 | unsigned long va_end; | ||
| 418 | unsigned long pfn; | 303 | unsigned long pfn; |
| 419 | unsigned long pfn_free = 0; | 304 | unsigned long *mfns; |
| 420 | unsigned long *mfn_list = NULL; | 305 | pte_t *ptep; |
| 421 | unsigned long size; | 306 | pmd_t *pmdp; |
| 422 | 307 | int type; | |
| 423 | va_start = xen_start_info->mfn_list; | ||
| 424 | /*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long), | ||
| 425 | * so make sure it is rounded up to that */ | ||
| 426 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | ||
| 427 | va_end = va_start + size; | ||
| 428 | |||
| 429 | /* If we were revectored already, don't do it again. */ | ||
| 430 | if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET) | ||
| 431 | return 0; | ||
| 432 | 308 | ||
| 433 | mfn_list = alloc_bootmem_align(size, PAGE_SIZE); | 309 | p2m_missing = alloc_p2m_page(); |
| 434 | if (!mfn_list) { | 310 | p2m_init(p2m_missing); |
| 435 | pr_warn("Could not allocate space for a new P2M tree!\n"); | 311 | p2m_identity = alloc_p2m_page(); |
| 436 | return xen_start_info->mfn_list; | 312 | p2m_init(p2m_identity); |
| 437 | } | ||
| 438 | /* Fill it out with INVALID_P2M_ENTRY value */ | ||
| 439 | memset(mfn_list, 0xFF, size); | ||
| 440 | 313 | ||
| 441 | for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) { | 314 | p2m_missing_pte = alloc_p2m_page(); |
| 442 | unsigned topidx = p2m_top_index(pfn); | 315 | paravirt_alloc_pte(&init_mm, __pa(p2m_missing_pte) >> PAGE_SHIFT); |
| 443 | unsigned mididx; | 316 | p2m_identity_pte = alloc_p2m_page(); |
| 444 | unsigned long *mid_p; | 317 | paravirt_alloc_pte(&init_mm, __pa(p2m_identity_pte) >> PAGE_SHIFT); |
| 318 | for (i = 0; i < PTRS_PER_PTE; i++) { | ||
| 319 | set_pte(p2m_missing_pte + i, | ||
| 320 | pfn_pte(PFN_DOWN(__pa(p2m_missing)), PAGE_KERNEL_RO)); | ||
| 321 | set_pte(p2m_identity_pte + i, | ||
| 322 | pfn_pte(PFN_DOWN(__pa(p2m_identity)), PAGE_KERNEL_RO)); | ||
| 323 | } | ||
| 445 | 324 | ||
| 446 | if (!p2m_top[topidx]) | 325 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += chunk) { |
| 326 | /* | ||
| 327 | * Try to map missing/identity PMDs or p2m-pages if possible. | ||
| 328 | * We have to respect the structure of the mfn_list_list | ||
| 329 | * which will be built just afterwards. | ||
| 330 | * Chunk size to test is one p2m page if we are in the middle | ||
| 331 | * of a mfn_list_list mid page and the complete mid page area | ||
| 332 | * if we are at index 0 of the mid page. Please note that a | ||
| 333 | * mid page might cover more than one PMD, e.g. on 32 bit PAE | ||
| 334 | * kernels. | ||
| 335 | */ | ||
| 336 | chunk = (pfn & (P2M_PER_PAGE * P2M_MID_PER_PAGE - 1)) ? | ||
| 337 | P2M_PER_PAGE : P2M_PER_PAGE * P2M_MID_PER_PAGE; | ||
| 338 | |||
| 339 | type = xen_p2m_elem_type(pfn); | ||
| 340 | i = 0; | ||
| 341 | if (type != P2M_TYPE_PFN) | ||
| 342 | for (i = 1; i < chunk; i++) | ||
| 343 | if (xen_p2m_elem_type(pfn + i) != type) | ||
| 344 | break; | ||
| 345 | if (i < chunk) | ||
| 346 | /* Reset to minimal chunk size. */ | ||
| 347 | chunk = P2M_PER_PAGE; | ||
| 348 | |||
| 349 | if (type == P2M_TYPE_PFN || i < chunk) { | ||
| 350 | /* Use initial p2m page contents. */ | ||
| 351 | #ifdef CONFIG_X86_64 | ||
| 352 | mfns = alloc_p2m_page(); | ||
| 353 | copy_page(mfns, xen_p2m_addr + pfn); | ||
| 354 | #else | ||
| 355 | mfns = xen_p2m_addr + pfn; | ||
| 356 | #endif | ||
| 357 | ptep = populate_extra_pte((unsigned long)(p2m + pfn)); | ||
| 358 | set_pte(ptep, | ||
| 359 | pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL)); | ||
| 447 | continue; | 360 | continue; |
| 361 | } | ||
| 448 | 362 | ||
| 449 | if (p2m_top[topidx] == p2m_mid_missing) | 363 | if (chunk == P2M_PER_PAGE) { |
| 364 | /* Map complete missing or identity p2m-page. */ | ||
| 365 | mfns = (type == P2M_TYPE_MISSING) ? | ||
| 366 | p2m_missing : p2m_identity; | ||
| 367 | ptep = populate_extra_pte((unsigned long)(p2m + pfn)); | ||
| 368 | set_pte(ptep, | ||
| 369 | pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL_RO)); | ||
| 450 | continue; | 370 | continue; |
| 371 | } | ||
| 451 | 372 | ||
| 452 | mididx = p2m_mid_index(pfn); | 373 | /* Complete missing or identity PMD(s) can be mapped. */ |
| 453 | mid_p = p2m_top[topidx][mididx]; | 374 | ptep = (type == P2M_TYPE_MISSING) ? |
| 454 | if (!mid_p) | 375 | p2m_missing_pte : p2m_identity_pte; |
| 455 | continue; | 376 | for (i = 0; i < PMDS_PER_MID_PAGE; i++) { |
| 456 | if ((mid_p == p2m_missing) || (mid_p == p2m_identity)) | 377 | pmdp = populate_extra_pmd( |
| 457 | continue; | 378 | (unsigned long)(p2m + pfn + i * PTRS_PER_PTE)); |
| 379 | set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE)); | ||
| 380 | } | ||
| 381 | } | ||
| 382 | } | ||
| 458 | 383 | ||
| 459 | if ((unsigned long)mid_p == INVALID_P2M_ENTRY) | 384 | void __init xen_vmalloc_p2m_tree(void) |
| 460 | continue; | 385 | { |
| 386 | static struct vm_struct vm; | ||
| 461 | 387 | ||
| 462 | /* The old va. Rebase it on mfn_list */ | 388 | vm.flags = VM_ALLOC; |
| 463 | if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) { | 389 | vm.size = ALIGN(sizeof(unsigned long) * xen_max_p2m_pfn, |
| 464 | unsigned long *new; | 390 | PMD_SIZE * PMDS_PER_MID_PAGE); |
| 391 | vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE); | ||
| 392 | pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size); | ||
| 465 | 393 | ||
| 466 | if (pfn_free > (size / sizeof(unsigned long))) { | 394 | xen_max_p2m_pfn = vm.size / sizeof(unsigned long); |
| 467 | WARN(1, "Only allocated for %ld pages, but we want %ld!\n", | ||
| 468 | size / sizeof(unsigned long), pfn_free); | ||
| 469 | return 0; | ||
| 470 | } | ||
| 471 | new = &mfn_list[pfn_free]; | ||
| 472 | 395 | ||
| 473 | copy_page(new, mid_p); | 396 | xen_rebuild_p2m_list(vm.addr); |
| 474 | p2m_top[topidx][mididx] = &mfn_list[pfn_free]; | ||
| 475 | 397 | ||
| 476 | pfn_free += P2M_PER_PAGE; | 398 | xen_p2m_addr = vm.addr; |
| 399 | xen_p2m_size = xen_max_p2m_pfn; | ||
| 477 | 400 | ||
| 478 | } | 401 | xen_inv_extra_mem(); |
| 479 | /* This should be the leafs allocated for identity from _brk. */ | ||
| 480 | } | ||
| 481 | return (unsigned long)mfn_list; | ||
| 482 | 402 | ||
| 403 | m2p_override_init(); | ||
| 483 | } | 404 | } |
| 484 | #else | 405 | |
| 485 | unsigned long __init xen_revector_p2m_tree(void) | ||
| 486 | { | ||
| 487 | return 0; | ||
| 488 | } | ||
| 489 | #endif | ||
| 490 | unsigned long get_phys_to_machine(unsigned long pfn) | 406 | unsigned long get_phys_to_machine(unsigned long pfn) |
| 491 | { | 407 | { |
| 492 | unsigned topidx, mididx, idx; | 408 | pte_t *ptep; |
| 409 | unsigned int level; | ||
| 410 | |||
| 411 | if (unlikely(pfn >= xen_p2m_size)) { | ||
| 412 | if (pfn < xen_max_p2m_pfn) | ||
| 413 | return xen_chk_extra_mem(pfn); | ||
| 493 | 414 | ||
| 494 | if (unlikely(pfn >= MAX_P2M_PFN)) | ||
| 495 | return IDENTITY_FRAME(pfn); | 415 | return IDENTITY_FRAME(pfn); |
| 416 | } | ||
| 496 | 417 | ||
| 497 | topidx = p2m_top_index(pfn); | 418 | ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level); |
| 498 | mididx = p2m_mid_index(pfn); | 419 | BUG_ON(!ptep || level != PG_LEVEL_4K); |
| 499 | idx = p2m_index(pfn); | ||
| 500 | 420 | ||
| 501 | /* | 421 | /* |
| 502 | * The INVALID_P2M_ENTRY is filled in both p2m_*identity | 422 | * The INVALID_P2M_ENTRY is filled in both p2m_*identity |
| 503 | * and in p2m_*missing, so returning the INVALID_P2M_ENTRY | 423 | * and in p2m_*missing, so returning the INVALID_P2M_ENTRY |
| 504 | * would be wrong. | 424 | * would be wrong. |
| 505 | */ | 425 | */ |
| 506 | if (p2m_top[topidx][mididx] == p2m_identity) | 426 | if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity))) |
| 507 | return IDENTITY_FRAME(pfn); | 427 | return IDENTITY_FRAME(pfn); |
| 508 | 428 | ||
| 509 | return p2m_top[topidx][mididx][idx]; | 429 | return xen_p2m_addr[pfn]; |
| 510 | } | 430 | } |
| 511 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | 431 | EXPORT_SYMBOL_GPL(get_phys_to_machine); |
| 512 | 432 | ||
| 513 | static void *alloc_p2m_page(void) | 433 | /* |
| 434 | * Allocate new pmd(s). It is checked whether the old pmd is still in place. | ||
| 435 | * If not, nothing is changed. This is okay as the only reason for allocating | ||
| 436 | * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual | ||
| 437 | * pmd. In case of PAE/x86-32 there are multiple pmds to allocate! | ||
| 438 | */ | ||
| 439 | static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg) | ||
| 514 | { | 440 | { |
| 515 | return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); | 441 | pte_t *ptechk; |
| 516 | } | 442 | pte_t *pteret = ptep; |
| 443 | pte_t *pte_newpg[PMDS_PER_MID_PAGE]; | ||
| 444 | pmd_t *pmdp; | ||
| 445 | unsigned int level; | ||
| 446 | unsigned long flags; | ||
| 447 | unsigned long vaddr; | ||
| 448 | int i; | ||
| 517 | 449 | ||
| 518 | static void free_p2m_page(void *p) | 450 | /* Do all allocations first to bail out in error case. */ |
| 519 | { | 451 | for (i = 0; i < PMDS_PER_MID_PAGE; i++) { |
| 520 | free_page((unsigned long)p); | 452 | pte_newpg[i] = alloc_p2m_page(); |
| 453 | if (!pte_newpg[i]) { | ||
| 454 | for (i--; i >= 0; i--) | ||
| 455 | free_p2m_page(pte_newpg[i]); | ||
| 456 | |||
| 457 | return NULL; | ||
| 458 | } | ||
| 459 | } | ||
| 460 | |||
| 461 | vaddr = addr & ~(PMD_SIZE * PMDS_PER_MID_PAGE - 1); | ||
| 462 | |||
| 463 | for (i = 0; i < PMDS_PER_MID_PAGE; i++) { | ||
| 464 | copy_page(pte_newpg[i], pte_pg); | ||
| 465 | paravirt_alloc_pte(&init_mm, __pa(pte_newpg[i]) >> PAGE_SHIFT); | ||
| 466 | |||
| 467 | pmdp = lookup_pmd_address(vaddr); | ||
| 468 | BUG_ON(!pmdp); | ||
| 469 | |||
| 470 | spin_lock_irqsave(&p2m_update_lock, flags); | ||
| 471 | |||
| 472 | ptechk = lookup_address(vaddr, &level); | ||
| 473 | if (ptechk == pte_pg) { | ||
| 474 | set_pmd(pmdp, | ||
| 475 | __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE)); | ||
| 476 | if (vaddr == (addr & ~(PMD_SIZE - 1))) | ||
| 477 | pteret = pte_offset_kernel(pmdp, addr); | ||
| 478 | pte_newpg[i] = NULL; | ||
| 479 | } | ||
| 480 | |||
| 481 | spin_unlock_irqrestore(&p2m_update_lock, flags); | ||
| 482 | |||
| 483 | if (pte_newpg[i]) { | ||
| 484 | paravirt_release_pte(__pa(pte_newpg[i]) >> PAGE_SHIFT); | ||
| 485 | free_p2m_page(pte_newpg[i]); | ||
| 486 | } | ||
| 487 | |||
| 488 | vaddr += PMD_SIZE; | ||
| 489 | } | ||
| 490 | |||
| 491 | return pteret; | ||
| 521 | } | 492 | } |
| 522 | 493 | ||
| 523 | /* | 494 | /* |
| @@ -530,58 +501,62 @@ static void free_p2m_page(void *p) | |||
| 530 | static bool alloc_p2m(unsigned long pfn) | 501 | static bool alloc_p2m(unsigned long pfn) |
| 531 | { | 502 | { |
| 532 | unsigned topidx, mididx; | 503 | unsigned topidx, mididx; |
| 533 | unsigned long ***top_p, **mid; | ||
| 534 | unsigned long *top_mfn_p, *mid_mfn; | 504 | unsigned long *top_mfn_p, *mid_mfn; |
| 535 | unsigned long *p2m_orig; | 505 | pte_t *ptep, *pte_pg; |
| 506 | unsigned int level; | ||
| 507 | unsigned long flags; | ||
| 508 | unsigned long addr = (unsigned long)(xen_p2m_addr + pfn); | ||
| 509 | unsigned long p2m_pfn; | ||
| 536 | 510 | ||
| 537 | topidx = p2m_top_index(pfn); | 511 | topidx = p2m_top_index(pfn); |
| 538 | mididx = p2m_mid_index(pfn); | 512 | mididx = p2m_mid_index(pfn); |
| 539 | 513 | ||
| 540 | top_p = &p2m_top[topidx]; | 514 | ptep = lookup_address(addr, &level); |
| 541 | mid = ACCESS_ONCE(*top_p); | 515 | BUG_ON(!ptep || level != PG_LEVEL_4K); |
| 516 | pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); | ||
| 542 | 517 | ||
| 543 | if (mid == p2m_mid_missing) { | 518 | if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) { |
| 544 | /* Mid level is missing, allocate a new one */ | 519 | /* PMD level is missing, allocate a new one */ |
| 545 | mid = alloc_p2m_page(); | 520 | ptep = alloc_p2m_pmd(addr, ptep, pte_pg); |
| 546 | if (!mid) | 521 | if (!ptep) |
| 547 | return false; | 522 | return false; |
| 548 | |||
| 549 | p2m_mid_init(mid, p2m_missing); | ||
| 550 | |||
| 551 | if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) | ||
| 552 | free_p2m_page(mid); | ||
| 553 | } | 523 | } |
| 554 | 524 | ||
| 555 | top_mfn_p = &p2m_top_mfn[topidx]; | 525 | if (p2m_top_mfn) { |
| 556 | mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); | 526 | top_mfn_p = &p2m_top_mfn[topidx]; |
| 527 | mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); | ||
| 557 | 528 | ||
| 558 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); | 529 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); |
| 559 | 530 | ||
| 560 | if (mid_mfn == p2m_mid_missing_mfn) { | 531 | if (mid_mfn == p2m_mid_missing_mfn) { |
| 561 | /* Separately check the mid mfn level */ | 532 | /* Separately check the mid mfn level */ |
| 562 | unsigned long missing_mfn; | 533 | unsigned long missing_mfn; |
| 563 | unsigned long mid_mfn_mfn; | 534 | unsigned long mid_mfn_mfn; |
| 564 | unsigned long old_mfn; | 535 | unsigned long old_mfn; |
| 565 | 536 | ||
| 566 | mid_mfn = alloc_p2m_page(); | 537 | mid_mfn = alloc_p2m_page(); |
| 567 | if (!mid_mfn) | 538 | if (!mid_mfn) |
| 568 | return false; | 539 | return false; |
| 569 | 540 | ||
| 570 | p2m_mid_mfn_init(mid_mfn, p2m_missing); | 541 | p2m_mid_mfn_init(mid_mfn, p2m_missing); |
| 571 | 542 | ||
| 572 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); | 543 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); |
| 573 | mid_mfn_mfn = virt_to_mfn(mid_mfn); | 544 | mid_mfn_mfn = virt_to_mfn(mid_mfn); |
| 574 | old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn); | 545 | old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn); |
| 575 | if (old_mfn != missing_mfn) { | 546 | if (old_mfn != missing_mfn) { |
| 576 | free_p2m_page(mid_mfn); | 547 | free_p2m_page(mid_mfn); |
| 577 | mid_mfn = mfn_to_virt(old_mfn); | 548 | mid_mfn = mfn_to_virt(old_mfn); |
| 578 | } else { | 549 | } else { |
| 579 | p2m_top_mfn_p[topidx] = mid_mfn; | 550 | p2m_top_mfn_p[topidx] = mid_mfn; |
| 551 | } | ||
| 580 | } | 552 | } |
| 553 | } else { | ||
| 554 | mid_mfn = NULL; | ||
| 581 | } | 555 | } |
| 582 | 556 | ||
| 583 | p2m_orig = ACCESS_ONCE(p2m_top[topidx][mididx]); | 557 | p2m_pfn = pte_pfn(ACCESS_ONCE(*ptep)); |
| 584 | if (p2m_orig == p2m_identity || p2m_orig == p2m_missing) { | 558 | if (p2m_pfn == PFN_DOWN(__pa(p2m_identity)) || |
| 559 | p2m_pfn == PFN_DOWN(__pa(p2m_missing))) { | ||
| 585 | /* p2m leaf page is missing */ | 560 | /* p2m leaf page is missing */ |
| 586 | unsigned long *p2m; | 561 | unsigned long *p2m; |
| 587 | 562 | ||
| @@ -589,183 +564,36 @@ static bool alloc_p2m(unsigned long pfn) | |||
| 589 | if (!p2m) | 564 | if (!p2m) |
| 590 | return false; | 565 | return false; |
| 591 | 566 | ||
| 592 | p2m_init(p2m); | 567 | if (p2m_pfn == PFN_DOWN(__pa(p2m_missing))) |
| 593 | 568 | p2m_init(p2m); | |
| 594 | if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig) | ||
| 595 | free_p2m_page(p2m); | ||
| 596 | else | 569 | else |
| 597 | mid_mfn[mididx] = virt_to_mfn(p2m); | 570 | p2m_init_identity(p2m, pfn); |
| 598 | } | ||
| 599 | |||
| 600 | return true; | ||
| 601 | } | ||
| 602 | |||
| 603 | static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary) | ||
| 604 | { | ||
| 605 | unsigned topidx, mididx, idx; | ||
| 606 | unsigned long *p2m; | ||
| 607 | |||
| 608 | topidx = p2m_top_index(pfn); | ||
| 609 | mididx = p2m_mid_index(pfn); | ||
| 610 | idx = p2m_index(pfn); | ||
| 611 | |||
| 612 | /* Pfff.. No boundary cross-over, lets get out. */ | ||
| 613 | if (!idx && check_boundary) | ||
| 614 | return false; | ||
| 615 | |||
| 616 | WARN(p2m_top[topidx][mididx] == p2m_identity, | ||
| 617 | "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n", | ||
| 618 | topidx, mididx); | ||
| 619 | |||
| 620 | /* | ||
| 621 | * Could be done by xen_build_dynamic_phys_to_machine.. | ||
| 622 | */ | ||
| 623 | if (p2m_top[topidx][mididx] != p2m_missing) | ||
| 624 | return false; | ||
| 625 | |||
| 626 | /* Boundary cross-over for the edges: */ | ||
| 627 | p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
| 628 | |||
| 629 | p2m_init(p2m); | ||
| 630 | 571 | ||
| 631 | p2m_top[topidx][mididx] = p2m; | 572 | spin_lock_irqsave(&p2m_update_lock, flags); |
| 632 | 573 | ||
| 633 | return true; | 574 | if (pte_pfn(*ptep) == p2m_pfn) { |
| 634 | } | 575 | set_pte(ptep, |
| 635 | 576 | pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL)); | |
| 636 | static bool __init early_alloc_p2m_middle(unsigned long pfn) | 577 | if (mid_mfn) |
| 637 | { | 578 | mid_mfn[mididx] = virt_to_mfn(p2m); |
| 638 | unsigned topidx = p2m_top_index(pfn); | 579 | p2m = NULL; |
| 639 | unsigned long **mid; | ||
| 640 | |||
| 641 | mid = p2m_top[topidx]; | ||
| 642 | if (mid == p2m_mid_missing) { | ||
| 643 | mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
| 644 | |||
| 645 | p2m_mid_init(mid, p2m_missing); | ||
| 646 | |||
| 647 | p2m_top[topidx] = mid; | ||
| 648 | } | ||
| 649 | return true; | ||
| 650 | } | ||
| 651 | |||
| 652 | /* | ||
| 653 | * Skim over the P2M tree looking at pages that are either filled with | ||
| 654 | * INVALID_P2M_ENTRY or with 1:1 PFNs. If found, re-use that page and | ||
| 655 | * replace the P2M leaf with a p2m_missing or p2m_identity. | ||
| 656 | * Stick the old page in the new P2M tree location. | ||
| 657 | */ | ||
| 658 | static bool __init early_can_reuse_p2m_middle(unsigned long set_pfn) | ||
| 659 | { | ||
| 660 | unsigned topidx; | ||
| 661 | unsigned mididx; | ||
| 662 | unsigned ident_pfns; | ||
| 663 | unsigned inv_pfns; | ||
| 664 | unsigned long *p2m; | ||
| 665 | unsigned idx; | ||
| 666 | unsigned long pfn; | ||
| 667 | |||
| 668 | /* We only look when this entails a P2M middle layer */ | ||
| 669 | if (p2m_index(set_pfn)) | ||
| 670 | return false; | ||
| 671 | |||
| 672 | for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) { | ||
| 673 | topidx = p2m_top_index(pfn); | ||
| 674 | |||
| 675 | if (!p2m_top[topidx]) | ||
| 676 | continue; | ||
| 677 | |||
| 678 | if (p2m_top[topidx] == p2m_mid_missing) | ||
| 679 | continue; | ||
| 680 | |||
| 681 | mididx = p2m_mid_index(pfn); | ||
| 682 | p2m = p2m_top[topidx][mididx]; | ||
| 683 | if (!p2m) | ||
| 684 | continue; | ||
| 685 | |||
| 686 | if ((p2m == p2m_missing) || (p2m == p2m_identity)) | ||
| 687 | continue; | ||
| 688 | |||
| 689 | if ((unsigned long)p2m == INVALID_P2M_ENTRY) | ||
| 690 | continue; | ||
| 691 | |||
| 692 | ident_pfns = 0; | ||
| 693 | inv_pfns = 0; | ||
| 694 | for (idx = 0; idx < P2M_PER_PAGE; idx++) { | ||
| 695 | /* IDENTITY_PFNs are 1:1 */ | ||
| 696 | if (p2m[idx] == IDENTITY_FRAME(pfn + idx)) | ||
| 697 | ident_pfns++; | ||
| 698 | else if (p2m[idx] == INVALID_P2M_ENTRY) | ||
| 699 | inv_pfns++; | ||
| 700 | else | ||
| 701 | break; | ||
| 702 | } | 580 | } |
| 703 | if ((ident_pfns == P2M_PER_PAGE) || (inv_pfns == P2M_PER_PAGE)) | ||
| 704 | goto found; | ||
| 705 | } | ||
| 706 | return false; | ||
| 707 | found: | ||
| 708 | /* Found one, replace old with p2m_identity or p2m_missing */ | ||
| 709 | p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing); | ||
| 710 | |||
| 711 | /* Reset where we want to stick the old page in. */ | ||
| 712 | topidx = p2m_top_index(set_pfn); | ||
| 713 | mididx = p2m_mid_index(set_pfn); | ||
| 714 | |||
| 715 | /* This shouldn't happen */ | ||
| 716 | if (WARN_ON(p2m_top[topidx] == p2m_mid_missing)) | ||
| 717 | early_alloc_p2m_middle(set_pfn); | ||
| 718 | |||
| 719 | if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing)) | ||
| 720 | return false; | ||
| 721 | |||
| 722 | p2m_init(p2m); | ||
| 723 | p2m_top[topidx][mididx] = p2m; | ||
| 724 | 581 | ||
| 725 | return true; | 582 | spin_unlock_irqrestore(&p2m_update_lock, flags); |
| 726 | } | ||
| 727 | bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
| 728 | { | ||
| 729 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | ||
| 730 | if (!early_alloc_p2m_middle(pfn)) | ||
| 731 | return false; | ||
| 732 | |||
| 733 | if (early_can_reuse_p2m_middle(pfn)) | ||
| 734 | return __set_phys_to_machine(pfn, mfn); | ||
| 735 | |||
| 736 | if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/)) | ||
| 737 | return false; | ||
| 738 | 583 | ||
| 739 | if (!__set_phys_to_machine(pfn, mfn)) | 584 | if (p2m) |
| 740 | return false; | 585 | free_p2m_page(p2m); |
| 741 | } | 586 | } |
| 742 | 587 | ||
| 743 | return true; | 588 | return true; |
| 744 | } | 589 | } |
| 745 | 590 | ||
| 746 | static void __init early_split_p2m(unsigned long pfn) | ||
| 747 | { | ||
| 748 | unsigned long mididx, idx; | ||
| 749 | |||
| 750 | mididx = p2m_mid_index(pfn); | ||
| 751 | idx = p2m_index(pfn); | ||
| 752 | |||
| 753 | /* | ||
| 754 | * Allocate new middle and leaf pages if this pfn lies in the | ||
| 755 | * middle of one. | ||
| 756 | */ | ||
| 757 | if (mididx || idx) | ||
| 758 | early_alloc_p2m_middle(pfn); | ||
| 759 | if (idx) | ||
| 760 | early_alloc_p2m(pfn, false); | ||
| 761 | } | ||
| 762 | |||
| 763 | unsigned long __init set_phys_range_identity(unsigned long pfn_s, | 591 | unsigned long __init set_phys_range_identity(unsigned long pfn_s, |
| 764 | unsigned long pfn_e) | 592 | unsigned long pfn_e) |
| 765 | { | 593 | { |
| 766 | unsigned long pfn; | 594 | unsigned long pfn; |
| 767 | 595 | ||
| 768 | if (unlikely(pfn_s >= MAX_P2M_PFN)) | 596 | if (unlikely(pfn_s >= xen_p2m_size)) |
| 769 | return 0; | 597 | return 0; |
| 770 | 598 | ||
| 771 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) | 599 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) |
| @@ -774,101 +602,51 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, | |||
| 774 | if (pfn_s > pfn_e) | 602 | if (pfn_s > pfn_e) |
| 775 | return 0; | 603 | return 0; |
| 776 | 604 | ||
| 777 | if (pfn_e > MAX_P2M_PFN) | 605 | if (pfn_e > xen_p2m_size) |
| 778 | pfn_e = MAX_P2M_PFN; | 606 | pfn_e = xen_p2m_size; |
| 779 | |||
| 780 | early_split_p2m(pfn_s); | ||
| 781 | early_split_p2m(pfn_e); | ||
| 782 | |||
| 783 | for (pfn = pfn_s; pfn < pfn_e;) { | ||
| 784 | unsigned topidx = p2m_top_index(pfn); | ||
| 785 | unsigned mididx = p2m_mid_index(pfn); | ||
| 786 | |||
| 787 | if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn))) | ||
| 788 | break; | ||
| 789 | pfn++; | ||
| 790 | |||
| 791 | /* | ||
| 792 | * If the PFN was set to a middle or leaf identity | ||
| 793 | * page the remainder must also be identity, so skip | ||
| 794 | * ahead to the next middle or leaf entry. | ||
| 795 | */ | ||
| 796 | if (p2m_top[topidx] == p2m_mid_identity) | ||
| 797 | pfn = ALIGN(pfn, P2M_MID_PER_PAGE * P2M_PER_PAGE); | ||
| 798 | else if (p2m_top[topidx][mididx] == p2m_identity) | ||
| 799 | pfn = ALIGN(pfn, P2M_PER_PAGE); | ||
| 800 | } | ||
| 801 | 607 | ||
| 802 | WARN((pfn - pfn_s) != (pfn_e - pfn_s), | 608 | for (pfn = pfn_s; pfn < pfn_e; pfn++) |
| 803 | "Identity mapping failed. We are %ld short of 1-1 mappings!\n", | 609 | xen_p2m_addr[pfn] = IDENTITY_FRAME(pfn); |
| 804 | (pfn_e - pfn_s) - (pfn - pfn_s)); | ||
| 805 | 610 | ||
| 806 | return pfn - pfn_s; | 611 | return pfn - pfn_s; |
| 807 | } | 612 | } |
| 808 | 613 | ||
| 809 | /* Try to install p2m mapping; fail if intermediate bits missing */ | ||
| 810 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) | 614 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) |
| 811 | { | 615 | { |
| 812 | unsigned topidx, mididx, idx; | 616 | pte_t *ptep; |
| 617 | unsigned int level; | ||
| 813 | 618 | ||
| 814 | /* don't track P2M changes in autotranslate guests */ | 619 | /* don't track P2M changes in autotranslate guests */ |
| 815 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) | 620 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) |
| 816 | return true; | 621 | return true; |
| 817 | 622 | ||
| 818 | if (unlikely(pfn >= MAX_P2M_PFN)) { | 623 | if (unlikely(pfn >= xen_p2m_size)) { |
| 819 | BUG_ON(mfn != INVALID_P2M_ENTRY); | 624 | BUG_ON(mfn != INVALID_P2M_ENTRY); |
| 820 | return true; | 625 | return true; |
| 821 | } | 626 | } |
| 822 | 627 | ||
| 823 | topidx = p2m_top_index(pfn); | 628 | if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn))) |
| 824 | mididx = p2m_mid_index(pfn); | 629 | return true; |
| 825 | idx = p2m_index(pfn); | ||
| 826 | |||
| 827 | /* For sparse holes were the p2m leaf has real PFN along with | ||
| 828 | * PCI holes, stick in the PFN as the MFN value. | ||
| 829 | * | ||
| 830 | * set_phys_range_identity() will have allocated new middle | ||
| 831 | * and leaf pages as required so an existing p2m_mid_missing | ||
| 832 | * or p2m_missing mean that whole range will be identity so | ||
| 833 | * these can be switched to p2m_mid_identity or p2m_identity. | ||
| 834 | */ | ||
| 835 | if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) { | ||
| 836 | if (p2m_top[topidx] == p2m_mid_identity) | ||
| 837 | return true; | ||
| 838 | |||
| 839 | if (p2m_top[topidx] == p2m_mid_missing) { | ||
| 840 | WARN_ON(cmpxchg(&p2m_top[topidx], p2m_mid_missing, | ||
| 841 | p2m_mid_identity) != p2m_mid_missing); | ||
| 842 | return true; | ||
| 843 | } | ||
| 844 | |||
| 845 | if (p2m_top[topidx][mididx] == p2m_identity) | ||
| 846 | return true; | ||
| 847 | 630 | ||
| 848 | /* Swap over from MISSING to IDENTITY if needed. */ | 631 | ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level); |
| 849 | if (p2m_top[topidx][mididx] == p2m_missing) { | 632 | BUG_ON(!ptep || level != PG_LEVEL_4K); |
| 850 | WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing, | ||
| 851 | p2m_identity) != p2m_missing); | ||
| 852 | return true; | ||
| 853 | } | ||
| 854 | } | ||
| 855 | 633 | ||
| 856 | if (p2m_top[topidx][mididx] == p2m_missing) | 634 | if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_missing))) |
| 857 | return mfn == INVALID_P2M_ENTRY; | 635 | return mfn == INVALID_P2M_ENTRY; |
| 858 | 636 | ||
| 859 | p2m_top[topidx][mididx][idx] = mfn; | 637 | if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity))) |
| 638 | return mfn == IDENTITY_FRAME(pfn); | ||
| 860 | 639 | ||
| 861 | return true; | 640 | return false; |
| 862 | } | 641 | } |
| 863 | 642 | ||
| 864 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) | 643 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) |
| 865 | { | 644 | { |
| 866 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | 645 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { |
| 867 | if (!alloc_p2m(pfn)) | 646 | if (!alloc_p2m(pfn)) |
| 868 | return false; | 647 | return false; |
| 869 | 648 | ||
| 870 | if (!__set_phys_to_machine(pfn, mfn)) | 649 | return __set_phys_to_machine(pfn, mfn); |
| 871 | return false; | ||
| 872 | } | 650 | } |
| 873 | 651 | ||
| 874 | return true; | 652 | return true; |
| @@ -877,15 +655,16 @@ bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) | |||
| 877 | #define M2P_OVERRIDE_HASH_SHIFT 10 | 655 | #define M2P_OVERRIDE_HASH_SHIFT 10 |
| 878 | #define M2P_OVERRIDE_HASH (1 << M2P_OVERRIDE_HASH_SHIFT) | 656 | #define M2P_OVERRIDE_HASH (1 << M2P_OVERRIDE_HASH_SHIFT) |
| 879 | 657 | ||
| 880 | static RESERVE_BRK_ARRAY(struct list_head, m2p_overrides, M2P_OVERRIDE_HASH); | 658 | static struct list_head *m2p_overrides; |
| 881 | static DEFINE_SPINLOCK(m2p_override_lock); | 659 | static DEFINE_SPINLOCK(m2p_override_lock); |
| 882 | 660 | ||
| 883 | static void __init m2p_override_init(void) | 661 | static void __init m2p_override_init(void) |
| 884 | { | 662 | { |
| 885 | unsigned i; | 663 | unsigned i; |
| 886 | 664 | ||
| 887 | m2p_overrides = extend_brk(sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH, | 665 | m2p_overrides = alloc_bootmem_align( |
| 888 | sizeof(unsigned long)); | 666 | sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH, |
| 667 | sizeof(unsigned long)); | ||
| 889 | 668 | ||
| 890 | for (i = 0; i < M2P_OVERRIDE_HASH; i++) | 669 | for (i = 0; i < M2P_OVERRIDE_HASH; i++) |
| 891 | INIT_LIST_HEAD(&m2p_overrides[i]); | 670 | INIT_LIST_HEAD(&m2p_overrides[i]); |
| @@ -896,68 +675,9 @@ static unsigned long mfn_hash(unsigned long mfn) | |||
| 896 | return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT); | 675 | return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT); |
| 897 | } | 676 | } |
| 898 | 677 | ||
| 899 | int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, | ||
| 900 | struct gnttab_map_grant_ref *kmap_ops, | ||
| 901 | struct page **pages, unsigned int count) | ||
| 902 | { | ||
| 903 | int i, ret = 0; | ||
| 904 | bool lazy = false; | ||
| 905 | pte_t *pte; | ||
| 906 | |||
| 907 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
| 908 | return 0; | ||
| 909 | |||
| 910 | if (kmap_ops && | ||
| 911 | !in_interrupt() && | ||
| 912 | paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { | ||
| 913 | arch_enter_lazy_mmu_mode(); | ||
| 914 | lazy = true; | ||
| 915 | } | ||
| 916 | |||
| 917 | for (i = 0; i < count; i++) { | ||
| 918 | unsigned long mfn, pfn; | ||
| 919 | |||
| 920 | /* Do not add to override if the map failed. */ | ||
| 921 | if (map_ops[i].status) | ||
| 922 | continue; | ||
| 923 | |||
| 924 | if (map_ops[i].flags & GNTMAP_contains_pte) { | ||
| 925 | pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + | ||
| 926 | (map_ops[i].host_addr & ~PAGE_MASK)); | ||
| 927 | mfn = pte_mfn(*pte); | ||
| 928 | } else { | ||
| 929 | mfn = PFN_DOWN(map_ops[i].dev_bus_addr); | ||
| 930 | } | ||
| 931 | pfn = page_to_pfn(pages[i]); | ||
| 932 | |||
| 933 | WARN_ON(PagePrivate(pages[i])); | ||
| 934 | SetPagePrivate(pages[i]); | ||
| 935 | set_page_private(pages[i], mfn); | ||
| 936 | pages[i]->index = pfn_to_mfn(pfn); | ||
| 937 | |||
| 938 | if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { | ||
| 939 | ret = -ENOMEM; | ||
| 940 | goto out; | ||
| 941 | } | ||
| 942 | |||
| 943 | if (kmap_ops) { | ||
| 944 | ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]); | ||
| 945 | if (ret) | ||
| 946 | goto out; | ||
| 947 | } | ||
| 948 | } | ||
| 949 | |||
| 950 | out: | ||
| 951 | if (lazy) | ||
| 952 | arch_leave_lazy_mmu_mode(); | ||
| 953 | |||
| 954 | return ret; | ||
| 955 | } | ||
| 956 | EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); | ||
| 957 | |||
| 958 | /* Add an MFN override for a particular page */ | 678 | /* Add an MFN override for a particular page */ |
| 959 | int m2p_add_override(unsigned long mfn, struct page *page, | 679 | static int m2p_add_override(unsigned long mfn, struct page *page, |
| 960 | struct gnttab_map_grant_ref *kmap_op) | 680 | struct gnttab_map_grant_ref *kmap_op) |
| 961 | { | 681 | { |
| 962 | unsigned long flags; | 682 | unsigned long flags; |
| 963 | unsigned long pfn; | 683 | unsigned long pfn; |
| @@ -970,7 +690,7 @@ int m2p_add_override(unsigned long mfn, struct page *page, | |||
| 970 | address = (unsigned long)__va(pfn << PAGE_SHIFT); | 690 | address = (unsigned long)__va(pfn << PAGE_SHIFT); |
| 971 | ptep = lookup_address(address, &level); | 691 | ptep = lookup_address(address, &level); |
| 972 | if (WARN(ptep == NULL || level != PG_LEVEL_4K, | 692 | if (WARN(ptep == NULL || level != PG_LEVEL_4K, |
| 973 | "m2p_add_override: pfn %lx not mapped", pfn)) | 693 | "m2p_add_override: pfn %lx not mapped", pfn)) |
| 974 | return -EINVAL; | 694 | return -EINVAL; |
| 975 | } | 695 | } |
| 976 | 696 | ||
| @@ -1004,19 +724,19 @@ int m2p_add_override(unsigned long mfn, struct page *page, | |||
| 1004 | * because mfn_to_pfn (that ends up being called by GUPF) will | 724 | * because mfn_to_pfn (that ends up being called by GUPF) will |
| 1005 | * return the backend pfn rather than the frontend pfn. */ | 725 | * return the backend pfn rather than the frontend pfn. */ |
| 1006 | pfn = mfn_to_pfn_no_overrides(mfn); | 726 | pfn = mfn_to_pfn_no_overrides(mfn); |
| 1007 | if (get_phys_to_machine(pfn) == mfn) | 727 | if (__pfn_to_mfn(pfn) == mfn) |
| 1008 | set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); | 728 | set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); |
| 1009 | 729 | ||
| 1010 | return 0; | 730 | return 0; |
| 1011 | } | 731 | } |
| 1012 | EXPORT_SYMBOL_GPL(m2p_add_override); | ||
| 1013 | 732 | ||
| 1014 | int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, | 733 | int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, |
| 1015 | struct gnttab_map_grant_ref *kmap_ops, | 734 | struct gnttab_map_grant_ref *kmap_ops, |
| 1016 | struct page **pages, unsigned int count) | 735 | struct page **pages, unsigned int count) |
| 1017 | { | 736 | { |
| 1018 | int i, ret = 0; | 737 | int i, ret = 0; |
| 1019 | bool lazy = false; | 738 | bool lazy = false; |
| 739 | pte_t *pte; | ||
| 1020 | 740 | ||
| 1021 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 741 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
| 1022 | return 0; | 742 | return 0; |
| @@ -1029,35 +749,75 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, | |||
| 1029 | } | 749 | } |
| 1030 | 750 | ||
| 1031 | for (i = 0; i < count; i++) { | 751 | for (i = 0; i < count; i++) { |
| 1032 | unsigned long mfn = get_phys_to_machine(page_to_pfn(pages[i])); | 752 | unsigned long mfn, pfn; |
| 1033 | unsigned long pfn = page_to_pfn(pages[i]); | ||
| 1034 | 753 | ||
| 1035 | if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { | 754 | /* Do not add to override if the map failed. */ |
| 1036 | ret = -EINVAL; | 755 | if (map_ops[i].status) |
| 1037 | goto out; | 756 | continue; |
| 757 | |||
| 758 | if (map_ops[i].flags & GNTMAP_contains_pte) { | ||
| 759 | pte = (pte_t *)(mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + | ||
| 760 | (map_ops[i].host_addr & ~PAGE_MASK)); | ||
| 761 | mfn = pte_mfn(*pte); | ||
| 762 | } else { | ||
| 763 | mfn = PFN_DOWN(map_ops[i].dev_bus_addr); | ||
| 1038 | } | 764 | } |
| 765 | pfn = page_to_pfn(pages[i]); | ||
| 1039 | 766 | ||
| 1040 | set_page_private(pages[i], INVALID_P2M_ENTRY); | 767 | WARN_ON(PagePrivate(pages[i])); |
| 1041 | WARN_ON(!PagePrivate(pages[i])); | 768 | SetPagePrivate(pages[i]); |
| 1042 | ClearPagePrivate(pages[i]); | 769 | set_page_private(pages[i], mfn); |
| 1043 | set_phys_to_machine(pfn, pages[i]->index); | 770 | pages[i]->index = pfn_to_mfn(pfn); |
| 1044 | 771 | ||
| 1045 | if (kmap_ops) | 772 | if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { |
| 1046 | ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn); | 773 | ret = -ENOMEM; |
| 1047 | if (ret) | ||
| 1048 | goto out; | 774 | goto out; |
| 775 | } | ||
| 776 | |||
| 777 | if (kmap_ops) { | ||
| 778 | ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]); | ||
| 779 | if (ret) | ||
| 780 | goto out; | ||
| 781 | } | ||
| 1049 | } | 782 | } |
| 1050 | 783 | ||
| 1051 | out: | 784 | out: |
| 1052 | if (lazy) | 785 | if (lazy) |
| 1053 | arch_leave_lazy_mmu_mode(); | 786 | arch_leave_lazy_mmu_mode(); |
| 787 | |||
| 1054 | return ret; | 788 | return ret; |
| 1055 | } | 789 | } |
| 1056 | EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); | 790 | EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); |
| 1057 | 791 | ||
| 1058 | int m2p_remove_override(struct page *page, | 792 | static struct page *m2p_find_override(unsigned long mfn) |
| 1059 | struct gnttab_map_grant_ref *kmap_op, | 793 | { |
| 1060 | unsigned long mfn) | 794 | unsigned long flags; |
| 795 | struct list_head *bucket; | ||
| 796 | struct page *p, *ret; | ||
| 797 | |||
| 798 | if (unlikely(!m2p_overrides)) | ||
| 799 | return NULL; | ||
| 800 | |||
| 801 | ret = NULL; | ||
| 802 | bucket = &m2p_overrides[mfn_hash(mfn)]; | ||
| 803 | |||
| 804 | spin_lock_irqsave(&m2p_override_lock, flags); | ||
| 805 | |||
| 806 | list_for_each_entry(p, bucket, lru) { | ||
| 807 | if (page_private(p) == mfn) { | ||
| 808 | ret = p; | ||
| 809 | break; | ||
| 810 | } | ||
| 811 | } | ||
| 812 | |||
| 813 | spin_unlock_irqrestore(&m2p_override_lock, flags); | ||
| 814 | |||
| 815 | return ret; | ||
| 816 | } | ||
| 817 | |||
| 818 | static int m2p_remove_override(struct page *page, | ||
| 819 | struct gnttab_map_grant_ref *kmap_op, | ||
| 820 | unsigned long mfn) | ||
| 1061 | { | 821 | { |
| 1062 | unsigned long flags; | 822 | unsigned long flags; |
| 1063 | unsigned long pfn; | 823 | unsigned long pfn; |
| @@ -1072,7 +832,7 @@ int m2p_remove_override(struct page *page, | |||
| 1072 | ptep = lookup_address(address, &level); | 832 | ptep = lookup_address(address, &level); |
| 1073 | 833 | ||
| 1074 | if (WARN(ptep == NULL || level != PG_LEVEL_4K, | 834 | if (WARN(ptep == NULL || level != PG_LEVEL_4K, |
| 1075 | "m2p_remove_override: pfn %lx not mapped", pfn)) | 835 | "m2p_remove_override: pfn %lx not mapped", pfn)) |
| 1076 | return -EINVAL; | 836 | return -EINVAL; |
| 1077 | } | 837 | } |
| 1078 | 838 | ||
| @@ -1102,9 +862,8 @@ int m2p_remove_override(struct page *page, | |||
| 1102 | * hypercall actually returned an error. | 862 | * hypercall actually returned an error. |
| 1103 | */ | 863 | */ |
| 1104 | if (kmap_op->handle == GNTST_general_error) { | 864 | if (kmap_op->handle == GNTST_general_error) { |
| 1105 | printk(KERN_WARNING "m2p_remove_override: " | 865 | pr_warn("m2p_remove_override: pfn %lx mfn %lx, failed to modify kernel mappings", |
| 1106 | "pfn %lx mfn %lx, failed to modify kernel mappings", | 866 | pfn, mfn); |
| 1107 | pfn, mfn); | ||
| 1108 | put_balloon_scratch_page(); | 867 | put_balloon_scratch_page(); |
| 1109 | return -1; | 868 | return -1; |
| 1110 | } | 869 | } |
| @@ -1112,14 +871,14 @@ int m2p_remove_override(struct page *page, | |||
| 1112 | xen_mc_batch(); | 871 | xen_mc_batch(); |
| 1113 | 872 | ||
| 1114 | mcs = __xen_mc_entry( | 873 | mcs = __xen_mc_entry( |
| 1115 | sizeof(struct gnttab_unmap_and_replace)); | 874 | sizeof(struct gnttab_unmap_and_replace)); |
| 1116 | unmap_op = mcs.args; | 875 | unmap_op = mcs.args; |
| 1117 | unmap_op->host_addr = kmap_op->host_addr; | 876 | unmap_op->host_addr = kmap_op->host_addr; |
| 1118 | unmap_op->new_addr = scratch_page_address; | 877 | unmap_op->new_addr = scratch_page_address; |
| 1119 | unmap_op->handle = kmap_op->handle; | 878 | unmap_op->handle = kmap_op->handle; |
| 1120 | 879 | ||
| 1121 | MULTI_grant_table_op(mcs.mc, | 880 | MULTI_grant_table_op(mcs.mc, |
| 1122 | GNTTABOP_unmap_and_replace, unmap_op, 1); | 881 | GNTTABOP_unmap_and_replace, unmap_op, 1); |
| 1123 | 882 | ||
| 1124 | mcs = __xen_mc_entry(0); | 883 | mcs = __xen_mc_entry(0); |
| 1125 | MULTI_update_va_mapping(mcs.mc, scratch_page_address, | 884 | MULTI_update_va_mapping(mcs.mc, scratch_page_address, |
| @@ -1145,35 +904,56 @@ int m2p_remove_override(struct page *page, | |||
| 1145 | * pfn again. */ | 904 | * pfn again. */ |
| 1146 | mfn &= ~FOREIGN_FRAME_BIT; | 905 | mfn &= ~FOREIGN_FRAME_BIT; |
| 1147 | pfn = mfn_to_pfn_no_overrides(mfn); | 906 | pfn = mfn_to_pfn_no_overrides(mfn); |
| 1148 | if (get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) && | 907 | if (__pfn_to_mfn(pfn) == FOREIGN_FRAME(mfn) && |
| 1149 | m2p_find_override(mfn) == NULL) | 908 | m2p_find_override(mfn) == NULL) |
| 1150 | set_phys_to_machine(pfn, mfn); | 909 | set_phys_to_machine(pfn, mfn); |
| 1151 | 910 | ||
| 1152 | return 0; | 911 | return 0; |
| 1153 | } | 912 | } |
| 1154 | EXPORT_SYMBOL_GPL(m2p_remove_override); | ||
| 1155 | 913 | ||
| 1156 | struct page *m2p_find_override(unsigned long mfn) | 914 | int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, |
| 915 | struct gnttab_map_grant_ref *kmap_ops, | ||
| 916 | struct page **pages, unsigned int count) | ||
| 1157 | { | 917 | { |
| 1158 | unsigned long flags; | 918 | int i, ret = 0; |
| 1159 | struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)]; | 919 | bool lazy = false; |
| 1160 | struct page *p, *ret; | ||
| 1161 | 920 | ||
| 1162 | ret = NULL; | 921 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
| 922 | return 0; | ||
| 1163 | 923 | ||
| 1164 | spin_lock_irqsave(&m2p_override_lock, flags); | 924 | if (kmap_ops && |
| 925 | !in_interrupt() && | ||
| 926 | paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { | ||
| 927 | arch_enter_lazy_mmu_mode(); | ||
| 928 | lazy = true; | ||
| 929 | } | ||
| 1165 | 930 | ||
| 1166 | list_for_each_entry(p, bucket, lru) { | 931 | for (i = 0; i < count; i++) { |
| 1167 | if (page_private(p) == mfn) { | 932 | unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i])); |
| 1168 | ret = p; | 933 | unsigned long pfn = page_to_pfn(pages[i]); |
| 1169 | break; | 934 | |
| 935 | if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { | ||
| 936 | ret = -EINVAL; | ||
| 937 | goto out; | ||
| 1170 | } | 938 | } |
| 1171 | } | ||
| 1172 | 939 | ||
| 1173 | spin_unlock_irqrestore(&m2p_override_lock, flags); | 940 | set_page_private(pages[i], INVALID_P2M_ENTRY); |
| 941 | WARN_ON(!PagePrivate(pages[i])); | ||
| 942 | ClearPagePrivate(pages[i]); | ||
| 943 | set_phys_to_machine(pfn, pages[i]->index); | ||
| 944 | |||
| 945 | if (kmap_ops) | ||
| 946 | ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn); | ||
| 947 | if (ret) | ||
| 948 | goto out; | ||
| 949 | } | ||
| 1174 | 950 | ||
| 951 | out: | ||
| 952 | if (lazy) | ||
| 953 | arch_leave_lazy_mmu_mode(); | ||
| 1175 | return ret; | 954 | return ret; |
| 1176 | } | 955 | } |
| 956 | EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); | ||
| 1177 | 957 | ||
| 1178 | unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) | 958 | unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) |
| 1179 | { | 959 | { |
| @@ -1192,79 +972,29 @@ EXPORT_SYMBOL_GPL(m2p_find_override_pfn); | |||
| 1192 | #include "debugfs.h" | 972 | #include "debugfs.h" |
| 1193 | static int p2m_dump_show(struct seq_file *m, void *v) | 973 | static int p2m_dump_show(struct seq_file *m, void *v) |
| 1194 | { | 974 | { |
| 1195 | static const char * const level_name[] = { "top", "middle", | ||
| 1196 | "entry", "abnormal", "error"}; | ||
| 1197 | #define TYPE_IDENTITY 0 | ||
| 1198 | #define TYPE_MISSING 1 | ||
| 1199 | #define TYPE_PFN 2 | ||
| 1200 | #define TYPE_UNKNOWN 3 | ||
| 1201 | static const char * const type_name[] = { | 975 | static const char * const type_name[] = { |
| 1202 | [TYPE_IDENTITY] = "identity", | 976 | [P2M_TYPE_IDENTITY] = "identity", |
| 1203 | [TYPE_MISSING] = "missing", | 977 | [P2M_TYPE_MISSING] = "missing", |
| 1204 | [TYPE_PFN] = "pfn", | 978 | [P2M_TYPE_PFN] = "pfn", |
| 1205 | [TYPE_UNKNOWN] = "abnormal"}; | 979 | [P2M_TYPE_UNKNOWN] = "abnormal"}; |
| 1206 | unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0; | 980 | unsigned long pfn, first_pfn; |
| 1207 | unsigned int uninitialized_var(prev_level); | 981 | int type, prev_type; |
| 1208 | unsigned int uninitialized_var(prev_type); | 982 | |
| 1209 | 983 | prev_type = xen_p2m_elem_type(0); | |
| 1210 | if (!p2m_top) | 984 | first_pfn = 0; |
| 1211 | return 0; | 985 | |
| 1212 | 986 | for (pfn = 0; pfn < xen_p2m_size; pfn++) { | |
| 1213 | for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) { | 987 | type = xen_p2m_elem_type(pfn); |
| 1214 | unsigned topidx = p2m_top_index(pfn); | 988 | if (type != prev_type) { |
| 1215 | unsigned mididx = p2m_mid_index(pfn); | 989 | seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn, |
| 1216 | unsigned idx = p2m_index(pfn); | 990 | type_name[prev_type]); |
| 1217 | unsigned lvl, type; | ||
| 1218 | |||
| 1219 | lvl = 4; | ||
| 1220 | type = TYPE_UNKNOWN; | ||
| 1221 | if (p2m_top[topidx] == p2m_mid_missing) { | ||
| 1222 | lvl = 0; type = TYPE_MISSING; | ||
| 1223 | } else if (p2m_top[topidx] == NULL) { | ||
| 1224 | lvl = 0; type = TYPE_UNKNOWN; | ||
| 1225 | } else if (p2m_top[topidx][mididx] == NULL) { | ||
| 1226 | lvl = 1; type = TYPE_UNKNOWN; | ||
| 1227 | } else if (p2m_top[topidx][mididx] == p2m_identity) { | ||
| 1228 | lvl = 1; type = TYPE_IDENTITY; | ||
| 1229 | } else if (p2m_top[topidx][mididx] == p2m_missing) { | ||
| 1230 | lvl = 1; type = TYPE_MISSING; | ||
| 1231 | } else if (p2m_top[topidx][mididx][idx] == 0) { | ||
| 1232 | lvl = 2; type = TYPE_UNKNOWN; | ||
| 1233 | } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) { | ||
| 1234 | lvl = 2; type = TYPE_IDENTITY; | ||
| 1235 | } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) { | ||
| 1236 | lvl = 2; type = TYPE_MISSING; | ||
| 1237 | } else if (p2m_top[topidx][mididx][idx] == pfn) { | ||
| 1238 | lvl = 2; type = TYPE_PFN; | ||
| 1239 | } else if (p2m_top[topidx][mididx][idx] != pfn) { | ||
| 1240 | lvl = 2; type = TYPE_PFN; | ||
| 1241 | } | ||
| 1242 | if (pfn == 0) { | ||
| 1243 | prev_level = lvl; | ||
| 1244 | prev_type = type; | ||
| 1245 | } | ||
| 1246 | if (pfn == MAX_DOMAIN_PAGES-1) { | ||
| 1247 | lvl = 3; | ||
| 1248 | type = TYPE_UNKNOWN; | ||
| 1249 | } | ||
| 1250 | if (prev_type != type) { | ||
| 1251 | seq_printf(m, " [0x%lx->0x%lx] %s\n", | ||
| 1252 | prev_pfn_type, pfn, type_name[prev_type]); | ||
| 1253 | prev_pfn_type = pfn; | ||
| 1254 | prev_type = type; | 991 | prev_type = type; |
| 1255 | } | 992 | first_pfn = pfn; |
| 1256 | if (prev_level != lvl) { | ||
| 1257 | seq_printf(m, " [0x%lx->0x%lx] level %s\n", | ||
| 1258 | prev_pfn_level, pfn, level_name[prev_level]); | ||
| 1259 | prev_pfn_level = pfn; | ||
| 1260 | prev_level = lvl; | ||
| 1261 | } | 993 | } |
| 1262 | } | 994 | } |
| 995 | seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn, | ||
| 996 | type_name[prev_type]); | ||
| 1263 | return 0; | 997 | return 0; |
| 1264 | #undef TYPE_IDENTITY | ||
| 1265 | #undef TYPE_MISSING | ||
| 1266 | #undef TYPE_PFN | ||
| 1267 | #undef TYPE_UNKNOWN | ||
| 1268 | } | 998 | } |
| 1269 | 999 | ||
| 1270 | static int p2m_dump_open(struct inode *inode, struct file *filp) | 1000 | static int p2m_dump_open(struct inode *inode, struct file *filp) |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 29834b3fd87f..dfd77dec8e2b 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | #include "xen-ops.h" | 30 | #include "xen-ops.h" |
| 31 | #include "vdso.h" | 31 | #include "vdso.h" |
| 32 | #include "p2m.h" | 32 | #include "p2m.h" |
| 33 | #include "mmu.h" | ||
| 33 | 34 | ||
| 34 | /* These are code, but not functions. Defined in entry.S */ | 35 | /* These are code, but not functions. Defined in entry.S */ |
| 35 | extern const char xen_hypervisor_callback[]; | 36 | extern const char xen_hypervisor_callback[]; |
| @@ -47,8 +48,19 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; | |||
| 47 | /* Number of pages released from the initial allocation. */ | 48 | /* Number of pages released from the initial allocation. */ |
| 48 | unsigned long xen_released_pages; | 49 | unsigned long xen_released_pages; |
| 49 | 50 | ||
| 50 | /* Buffer used to remap identity mapped pages */ | 51 | /* |
| 51 | unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata; | 52 | * Buffer used to remap identity mapped pages. We only need the virtual space. |
| 53 | * The physical page behind this address is remapped as needed to different | ||
| 54 | * buffer pages. | ||
| 55 | */ | ||
| 56 | #define REMAP_SIZE (P2M_PER_PAGE - 3) | ||
| 57 | static struct { | ||
| 58 | unsigned long next_area_mfn; | ||
| 59 | unsigned long target_pfn; | ||
| 60 | unsigned long size; | ||
| 61 | unsigned long mfns[REMAP_SIZE]; | ||
| 62 | } xen_remap_buf __initdata __aligned(PAGE_SIZE); | ||
| 63 | static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY; | ||
| 52 | 64 | ||
| 53 | /* | 65 | /* |
| 54 | * The maximum amount of extra memory compared to the base size. The | 66 | * The maximum amount of extra memory compared to the base size. The |
| @@ -64,7 +76,6 @@ unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata; | |||
| 64 | 76 | ||
| 65 | static void __init xen_add_extra_mem(u64 start, u64 size) | 77 | static void __init xen_add_extra_mem(u64 start, u64 size) |
| 66 | { | 78 | { |
| 67 | unsigned long pfn; | ||
| 68 | int i; | 79 | int i; |
| 69 | 80 | ||
| 70 | for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { | 81 | for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { |
| @@ -84,75 +95,76 @@ static void __init xen_add_extra_mem(u64 start, u64 size) | |||
| 84 | printk(KERN_WARNING "Warning: not enough extra memory regions\n"); | 95 | printk(KERN_WARNING "Warning: not enough extra memory regions\n"); |
| 85 | 96 | ||
| 86 | memblock_reserve(start, size); | 97 | memblock_reserve(start, size); |
| 98 | } | ||
| 87 | 99 | ||
| 88 | xen_max_p2m_pfn = PFN_DOWN(start + size); | 100 | static void __init xen_del_extra_mem(u64 start, u64 size) |
| 89 | for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { | 101 | { |
| 90 | unsigned long mfn = pfn_to_mfn(pfn); | 102 | int i; |
| 91 | 103 | u64 start_r, size_r; | |
| 92 | if (WARN_ONCE(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn)) | ||
| 93 | continue; | ||
| 94 | WARN_ONCE(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n", | ||
| 95 | pfn, mfn); | ||
| 96 | 104 | ||
| 97 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | 105 | for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { |
| 106 | start_r = xen_extra_mem[i].start; | ||
| 107 | size_r = xen_extra_mem[i].size; | ||
| 108 | |||
| 109 | /* Start of region. */ | ||
| 110 | if (start_r == start) { | ||
| 111 | BUG_ON(size > size_r); | ||
| 112 | xen_extra_mem[i].start += size; | ||
| 113 | xen_extra_mem[i].size -= size; | ||
| 114 | break; | ||
| 115 | } | ||
| 116 | /* End of region. */ | ||
| 117 | if (start_r + size_r == start + size) { | ||
| 118 | BUG_ON(size > size_r); | ||
| 119 | xen_extra_mem[i].size -= size; | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | /* Mid of region. */ | ||
| 123 | if (start > start_r && start < start_r + size_r) { | ||
| 124 | BUG_ON(start + size > start_r + size_r); | ||
| 125 | xen_extra_mem[i].size = start - start_r; | ||
| 126 | /* Calling memblock_reserve() again is okay. */ | ||
| 127 | xen_add_extra_mem(start + size, start_r + size_r - | ||
| 128 | (start + size)); | ||
| 129 | break; | ||
| 130 | } | ||
| 98 | } | 131 | } |
| 132 | memblock_free(start, size); | ||
| 99 | } | 133 | } |
| 100 | 134 | ||
| 101 | static unsigned long __init xen_do_chunk(unsigned long start, | 135 | /* |
| 102 | unsigned long end, bool release) | 136 | * Called during boot before the p2m list can take entries beyond the |
| 137 | * hypervisor supplied p2m list. Entries in extra mem are to be regarded as | ||
| 138 | * invalid. | ||
| 139 | */ | ||
| 140 | unsigned long __ref xen_chk_extra_mem(unsigned long pfn) | ||
| 103 | { | 141 | { |
| 104 | struct xen_memory_reservation reservation = { | 142 | int i; |
| 105 | .address_bits = 0, | 143 | unsigned long addr = PFN_PHYS(pfn); |
| 106 | .extent_order = 0, | ||
| 107 | .domid = DOMID_SELF | ||
| 108 | }; | ||
| 109 | unsigned long len = 0; | ||
| 110 | unsigned long pfn; | ||
| 111 | int ret; | ||
| 112 | 144 | ||
| 113 | for (pfn = start; pfn < end; pfn++) { | 145 | for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { |
| 114 | unsigned long frame; | 146 | if (addr >= xen_extra_mem[i].start && |
| 115 | unsigned long mfn = pfn_to_mfn(pfn); | 147 | addr < xen_extra_mem[i].start + xen_extra_mem[i].size) |
| 148 | return INVALID_P2M_ENTRY; | ||
| 149 | } | ||
| 116 | 150 | ||
| 117 | if (release) { | 151 | return IDENTITY_FRAME(pfn); |
| 118 | /* Make sure pfn exists to start with */ | 152 | } |
| 119 | if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) | ||
| 120 | continue; | ||
| 121 | frame = mfn; | ||
| 122 | } else { | ||
| 123 | if (mfn != INVALID_P2M_ENTRY) | ||
| 124 | continue; | ||
| 125 | frame = pfn; | ||
| 126 | } | ||
| 127 | set_xen_guest_handle(reservation.extent_start, &frame); | ||
| 128 | reservation.nr_extents = 1; | ||
| 129 | 153 | ||
| 130 | ret = HYPERVISOR_memory_op(release ? XENMEM_decrease_reservation : XENMEM_populate_physmap, | 154 | /* |
| 131 | &reservation); | 155 | * Mark all pfns of extra mem as invalid in p2m list. |
| 132 | WARN(ret != 1, "Failed to %s pfn %lx err=%d\n", | 156 | */ |
| 133 | release ? "release" : "populate", pfn, ret); | 157 | void __init xen_inv_extra_mem(void) |
| 158 | { | ||
| 159 | unsigned long pfn, pfn_s, pfn_e; | ||
| 160 | int i; | ||
| 134 | 161 | ||
| 135 | if (ret == 1) { | 162 | for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { |
| 136 | if (!early_set_phys_to_machine(pfn, release ? INVALID_P2M_ENTRY : frame)) { | 163 | pfn_s = PFN_DOWN(xen_extra_mem[i].start); |
| 137 | if (release) | 164 | pfn_e = PFN_UP(xen_extra_mem[i].start + xen_extra_mem[i].size); |
| 138 | break; | 165 | for (pfn = pfn_s; pfn < pfn_e; pfn++) |
| 139 | set_xen_guest_handle(reservation.extent_start, &frame); | 166 | set_phys_to_machine(pfn, INVALID_P2M_ENTRY); |
| 140 | reservation.nr_extents = 1; | ||
| 141 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | ||
| 142 | &reservation); | ||
| 143 | break; | ||
| 144 | } | ||
| 145 | len++; | ||
| 146 | } else | ||
| 147 | break; | ||
| 148 | } | 167 | } |
| 149 | if (len) | ||
| 150 | printk(KERN_INFO "%s %lx-%lx pfn range: %lu pages %s\n", | ||
| 151 | release ? "Freeing" : "Populating", | ||
| 152 | start, end, len, | ||
| 153 | release ? "freed" : "added"); | ||
| 154 | |||
| 155 | return len; | ||
| 156 | } | 168 | } |
| 157 | 169 | ||
| 158 | /* | 170 | /* |
| @@ -198,26 +210,62 @@ static unsigned long __init xen_find_pfn_range( | |||
| 198 | return done; | 210 | return done; |
| 199 | } | 211 | } |
| 200 | 212 | ||
| 213 | static int __init xen_free_mfn(unsigned long mfn) | ||
| 214 | { | ||
| 215 | struct xen_memory_reservation reservation = { | ||
| 216 | .address_bits = 0, | ||
| 217 | .extent_order = 0, | ||
| 218 | .domid = DOMID_SELF | ||
| 219 | }; | ||
| 220 | |||
| 221 | set_xen_guest_handle(reservation.extent_start, &mfn); | ||
| 222 | reservation.nr_extents = 1; | ||
| 223 | |||
| 224 | return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); | ||
| 225 | } | ||
| 226 | |||
| 201 | /* | 227 | /* |
| 202 | * This releases a chunk of memory and then does the identity map. It's used as | 228 | * This releases a chunk of memory and then does the identity map. It's used |
| 203 | * as a fallback if the remapping fails. | 229 | * as a fallback if the remapping fails. |
| 204 | */ | 230 | */ |
| 205 | static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, | 231 | static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, |
| 206 | unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity, | 232 | unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity, |
| 207 | unsigned long *released) | 233 | unsigned long *released) |
| 208 | { | 234 | { |
| 235 | unsigned long len = 0; | ||
| 236 | unsigned long pfn, end; | ||
| 237 | int ret; | ||
| 238 | |||
| 209 | WARN_ON(start_pfn > end_pfn); | 239 | WARN_ON(start_pfn > end_pfn); |
| 210 | 240 | ||
| 241 | end = min(end_pfn, nr_pages); | ||
| 242 | for (pfn = start_pfn; pfn < end; pfn++) { | ||
| 243 | unsigned long mfn = pfn_to_mfn(pfn); | ||
| 244 | |||
| 245 | /* Make sure pfn exists to start with */ | ||
| 246 | if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) | ||
| 247 | continue; | ||
| 248 | |||
| 249 | ret = xen_free_mfn(mfn); | ||
| 250 | WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret); | ||
| 251 | |||
| 252 | if (ret == 1) { | ||
| 253 | if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY)) | ||
| 254 | break; | ||
| 255 | len++; | ||
| 256 | } else | ||
| 257 | break; | ||
| 258 | } | ||
| 259 | |||
| 211 | /* Need to release pages first */ | 260 | /* Need to release pages first */ |
| 212 | *released += xen_do_chunk(start_pfn, min(end_pfn, nr_pages), true); | 261 | *released += len; |
| 213 | *identity += set_phys_range_identity(start_pfn, end_pfn); | 262 | *identity += set_phys_range_identity(start_pfn, end_pfn); |
| 214 | } | 263 | } |
| 215 | 264 | ||
| 216 | /* | 265 | /* |
| 217 | * Helper function to update both the p2m and m2p tables. | 266 | * Helper function to update the p2m and m2p tables and kernel mapping. |
| 218 | */ | 267 | */ |
| 219 | static unsigned long __init xen_update_mem_tables(unsigned long pfn, | 268 | static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn) |
| 220 | unsigned long mfn) | ||
| 221 | { | 269 | { |
| 222 | struct mmu_update update = { | 270 | struct mmu_update update = { |
| 223 | .ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, | 271 | .ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, |
| @@ -225,161 +273,88 @@ static unsigned long __init xen_update_mem_tables(unsigned long pfn, | |||
| 225 | }; | 273 | }; |
| 226 | 274 | ||
| 227 | /* Update p2m */ | 275 | /* Update p2m */ |
| 228 | if (!early_set_phys_to_machine(pfn, mfn)) { | 276 | if (!set_phys_to_machine(pfn, mfn)) { |
| 229 | WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n", | 277 | WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n", |
| 230 | pfn, mfn); | 278 | pfn, mfn); |
| 231 | return false; | 279 | BUG(); |
| 232 | } | 280 | } |
| 233 | 281 | ||
| 234 | /* Update m2p */ | 282 | /* Update m2p */ |
| 235 | if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) { | 283 | if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) { |
| 236 | WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n", | 284 | WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n", |
| 237 | mfn, pfn); | 285 | mfn, pfn); |
| 238 | return false; | 286 | BUG(); |
| 239 | } | 287 | } |
| 240 | 288 | ||
| 241 | return true; | 289 | /* Update kernel mapping, but not for highmem. */ |
| 290 | if ((pfn << PAGE_SHIFT) >= __pa(high_memory)) | ||
| 291 | return; | ||
| 292 | |||
| 293 | if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT), | ||
| 294 | mfn_pte(mfn, PAGE_KERNEL), 0)) { | ||
| 295 | WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n", | ||
| 296 | mfn, pfn); | ||
| 297 | BUG(); | ||
| 298 | } | ||
| 242 | } | 299 | } |
| 243 | 300 | ||
| 244 | /* | 301 | /* |
| 245 | * This function updates the p2m and m2p tables with an identity map from | 302 | * This function updates the p2m and m2p tables with an identity map from |
| 246 | * start_pfn to start_pfn+size and remaps the underlying RAM of the original | 303 | * start_pfn to start_pfn+size and prepares remapping the underlying RAM of the |
| 247 | * allocation at remap_pfn. It must do so carefully in P2M_PER_PAGE sized blocks | 304 | * original allocation at remap_pfn. The information needed for remapping is |
| 248 | * to not exhaust the reserved brk space. Doing it in properly aligned blocks | 305 | * saved in the memory itself to avoid the need for allocating buffers. The |
| 249 | * ensures we only allocate the minimum required leaf pages in the p2m table. It | 306 | * complete remap information is contained in a list of MFNs each containing |
| 250 | * copies the existing mfns from the p2m table under the 1:1 map, overwrites | 307 | * up to REMAP_SIZE MFNs and the start target PFN for doing the remap. |
| 251 | * them with the identity map and then updates the p2m and m2p tables with the | 308 | * This enables us to preserve the original mfn sequence while doing the |
| 252 | * remapped memory. | 309 | * remapping at a time when the memory management is capable of allocating |
| 310 | * virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and | ||
| 311 | * its callers. | ||
| 253 | */ | 312 | */ |
| 254 | static unsigned long __init xen_do_set_identity_and_remap_chunk( | 313 | static void __init xen_do_set_identity_and_remap_chunk( |
| 255 | unsigned long start_pfn, unsigned long size, unsigned long remap_pfn) | 314 | unsigned long start_pfn, unsigned long size, unsigned long remap_pfn) |
| 256 | { | 315 | { |
| 316 | unsigned long buf = (unsigned long)&xen_remap_buf; | ||
| 317 | unsigned long mfn_save, mfn; | ||
| 257 | unsigned long ident_pfn_iter, remap_pfn_iter; | 318 | unsigned long ident_pfn_iter, remap_pfn_iter; |
| 258 | unsigned long ident_start_pfn_align, remap_start_pfn_align; | 319 | unsigned long ident_end_pfn = start_pfn + size; |
| 259 | unsigned long ident_end_pfn_align, remap_end_pfn_align; | ||
| 260 | unsigned long ident_boundary_pfn, remap_boundary_pfn; | ||
| 261 | unsigned long ident_cnt = 0; | ||
| 262 | unsigned long remap_cnt = 0; | ||
| 263 | unsigned long left = size; | 320 | unsigned long left = size; |
| 264 | unsigned long mod; | 321 | unsigned long ident_cnt = 0; |
| 265 | int i; | 322 | unsigned int i, chunk; |
| 266 | 323 | ||
| 267 | WARN_ON(size == 0); | 324 | WARN_ON(size == 0); |
| 268 | 325 | ||
| 269 | BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); | 326 | BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); |
| 270 | 327 | ||
| 271 | /* | 328 | mfn_save = virt_to_mfn(buf); |
| 272 | * Determine the proper alignment to remap memory in P2M_PER_PAGE sized | ||
| 273 | * blocks. We need to keep track of both the existing pfn mapping and | ||
| 274 | * the new pfn remapping. | ||
| 275 | */ | ||
| 276 | mod = start_pfn % P2M_PER_PAGE; | ||
| 277 | ident_start_pfn_align = | ||
| 278 | mod ? (start_pfn - mod + P2M_PER_PAGE) : start_pfn; | ||
| 279 | mod = remap_pfn % P2M_PER_PAGE; | ||
| 280 | remap_start_pfn_align = | ||
| 281 | mod ? (remap_pfn - mod + P2M_PER_PAGE) : remap_pfn; | ||
| 282 | mod = (start_pfn + size) % P2M_PER_PAGE; | ||
| 283 | ident_end_pfn_align = start_pfn + size - mod; | ||
| 284 | mod = (remap_pfn + size) % P2M_PER_PAGE; | ||
| 285 | remap_end_pfn_align = remap_pfn + size - mod; | ||
| 286 | |||
| 287 | /* Iterate over each p2m leaf node in each range */ | ||
| 288 | for (ident_pfn_iter = ident_start_pfn_align, remap_pfn_iter = remap_start_pfn_align; | ||
| 289 | ident_pfn_iter < ident_end_pfn_align && remap_pfn_iter < remap_end_pfn_align; | ||
| 290 | ident_pfn_iter += P2M_PER_PAGE, remap_pfn_iter += P2M_PER_PAGE) { | ||
| 291 | /* Check we aren't past the end */ | ||
| 292 | BUG_ON(ident_pfn_iter + P2M_PER_PAGE > start_pfn + size); | ||
| 293 | BUG_ON(remap_pfn_iter + P2M_PER_PAGE > remap_pfn + size); | ||
| 294 | |||
| 295 | /* Save p2m mappings */ | ||
| 296 | for (i = 0; i < P2M_PER_PAGE; i++) | ||
| 297 | xen_remap_buf[i] = pfn_to_mfn(ident_pfn_iter + i); | ||
| 298 | |||
| 299 | /* Set identity map which will free a p2m leaf */ | ||
| 300 | ident_cnt += set_phys_range_identity(ident_pfn_iter, | ||
| 301 | ident_pfn_iter + P2M_PER_PAGE); | ||
| 302 | 329 | ||
| 303 | #ifdef DEBUG | 330 | for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn; |
| 304 | /* Helps verify a p2m leaf has been freed */ | 331 | ident_pfn_iter < ident_end_pfn; |
| 305 | for (i = 0; i < P2M_PER_PAGE; i++) { | 332 | ident_pfn_iter += REMAP_SIZE, remap_pfn_iter += REMAP_SIZE) { |
| 306 | unsigned int pfn = ident_pfn_iter + i; | 333 | chunk = (left < REMAP_SIZE) ? left : REMAP_SIZE; |
| 307 | BUG_ON(pfn_to_mfn(pfn) != pfn); | ||
| 308 | } | ||
| 309 | #endif | ||
| 310 | /* Now remap memory */ | ||
| 311 | for (i = 0; i < P2M_PER_PAGE; i++) { | ||
| 312 | unsigned long mfn = xen_remap_buf[i]; | ||
| 313 | |||
| 314 | /* This will use the p2m leaf freed above */ | ||
| 315 | if (!xen_update_mem_tables(remap_pfn_iter + i, mfn)) { | ||
| 316 | WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n", | ||
| 317 | remap_pfn_iter + i, mfn); | ||
| 318 | return 0; | ||
| 319 | } | ||
| 320 | |||
| 321 | remap_cnt++; | ||
| 322 | } | ||
| 323 | |||
| 324 | left -= P2M_PER_PAGE; | ||
| 325 | } | ||
| 326 | |||
| 327 | /* Max boundary space possible */ | ||
| 328 | BUG_ON(left > (P2M_PER_PAGE - 1) * 2); | ||
| 329 | 334 | ||
| 330 | /* Now handle the boundary conditions */ | 335 | /* Map first pfn to xen_remap_buf */ |
| 331 | ident_boundary_pfn = start_pfn; | 336 | mfn = pfn_to_mfn(ident_pfn_iter); |
| 332 | remap_boundary_pfn = remap_pfn; | 337 | set_pte_mfn(buf, mfn, PAGE_KERNEL); |
| 333 | for (i = 0; i < left; i++) { | ||
| 334 | unsigned long mfn; | ||
| 335 | 338 | ||
| 336 | /* These two checks move from the start to end boundaries */ | 339 | /* Save mapping information in page */ |
| 337 | if (ident_boundary_pfn == ident_start_pfn_align) | 340 | xen_remap_buf.next_area_mfn = xen_remap_mfn; |
| 338 | ident_boundary_pfn = ident_pfn_iter; | 341 | xen_remap_buf.target_pfn = remap_pfn_iter; |
| 339 | if (remap_boundary_pfn == remap_start_pfn_align) | 342 | xen_remap_buf.size = chunk; |
| 340 | remap_boundary_pfn = remap_pfn_iter; | 343 | for (i = 0; i < chunk; i++) |
| 344 | xen_remap_buf.mfns[i] = pfn_to_mfn(ident_pfn_iter + i); | ||
| 341 | 345 | ||
| 342 | /* Check we aren't past the end */ | 346 | /* Put remap buf into list. */ |
| 343 | BUG_ON(ident_boundary_pfn >= start_pfn + size); | 347 | xen_remap_mfn = mfn; |
| 344 | BUG_ON(remap_boundary_pfn >= remap_pfn + size); | ||
| 345 | |||
| 346 | mfn = pfn_to_mfn(ident_boundary_pfn); | ||
| 347 | |||
| 348 | if (!xen_update_mem_tables(remap_boundary_pfn, mfn)) { | ||
| 349 | WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n", | ||
| 350 | remap_pfn_iter + i, mfn); | ||
| 351 | return 0; | ||
| 352 | } | ||
| 353 | remap_cnt++; | ||
| 354 | 348 | ||
| 355 | ident_boundary_pfn++; | 349 | /* Set identity map */ |
| 356 | remap_boundary_pfn++; | 350 | ident_cnt += set_phys_range_identity(ident_pfn_iter, |
| 357 | } | 351 | ident_pfn_iter + chunk); |
| 358 | 352 | ||
| 359 | /* Finish up the identity map */ | 353 | left -= chunk; |
| 360 | if (ident_start_pfn_align >= ident_end_pfn_align) { | ||
| 361 | /* | ||
| 362 | * In this case we have an identity range which does not span an | ||
| 363 | * aligned block so everything needs to be identity mapped here. | ||
| 364 | * If we didn't check this we might remap too many pages since | ||
| 365 | * the align boundaries are not meaningful in this case. | ||
| 366 | */ | ||
| 367 | ident_cnt += set_phys_range_identity(start_pfn, | ||
| 368 | start_pfn + size); | ||
| 369 | } else { | ||
| 370 | /* Remapped above so check each end of the chunk */ | ||
| 371 | if (start_pfn < ident_start_pfn_align) | ||
| 372 | ident_cnt += set_phys_range_identity(start_pfn, | ||
| 373 | ident_start_pfn_align); | ||
| 374 | if (start_pfn + size > ident_pfn_iter) | ||
| 375 | ident_cnt += set_phys_range_identity(ident_pfn_iter, | ||
| 376 | start_pfn + size); | ||
| 377 | } | 354 | } |
| 378 | 355 | ||
| 379 | BUG_ON(ident_cnt != size); | 356 | /* Restore old xen_remap_buf mapping */ |
| 380 | BUG_ON(remap_cnt != size); | 357 | set_pte_mfn(buf, mfn_save, PAGE_KERNEL); |
| 381 | |||
| 382 | return size; | ||
| 383 | } | 358 | } |
| 384 | 359 | ||
| 385 | /* | 360 | /* |
| @@ -396,8 +371,7 @@ static unsigned long __init xen_do_set_identity_and_remap_chunk( | |||
| 396 | static unsigned long __init xen_set_identity_and_remap_chunk( | 371 | static unsigned long __init xen_set_identity_and_remap_chunk( |
| 397 | const struct e820entry *list, size_t map_size, unsigned long start_pfn, | 372 | const struct e820entry *list, size_t map_size, unsigned long start_pfn, |
| 398 | unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn, | 373 | unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn, |
| 399 | unsigned long *identity, unsigned long *remapped, | 374 | unsigned long *identity, unsigned long *released) |
| 400 | unsigned long *released) | ||
| 401 | { | 375 | { |
| 402 | unsigned long pfn; | 376 | unsigned long pfn; |
| 403 | unsigned long i = 0; | 377 | unsigned long i = 0; |
| @@ -431,19 +405,12 @@ static unsigned long __init xen_set_identity_and_remap_chunk( | |||
| 431 | if (size > remap_range_size) | 405 | if (size > remap_range_size) |
| 432 | size = remap_range_size; | 406 | size = remap_range_size; |
| 433 | 407 | ||
| 434 | if (!xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn)) { | 408 | xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn); |
| 435 | WARN(1, "Failed to remap 1:1 memory cur_pfn=%ld size=%ld remap_pfn=%ld\n", | ||
| 436 | cur_pfn, size, remap_pfn); | ||
| 437 | xen_set_identity_and_release_chunk(cur_pfn, | ||
| 438 | cur_pfn + left, nr_pages, identity, released); | ||
| 439 | break; | ||
| 440 | } | ||
| 441 | 409 | ||
| 442 | /* Update variables to reflect new mappings. */ | 410 | /* Update variables to reflect new mappings. */ |
| 443 | i += size; | 411 | i += size; |
| 444 | remap_pfn += size; | 412 | remap_pfn += size; |
| 445 | *identity += size; | 413 | *identity += size; |
| 446 | *remapped += size; | ||
| 447 | } | 414 | } |
| 448 | 415 | ||
| 449 | /* | 416 | /* |
| @@ -458,13 +425,12 @@ static unsigned long __init xen_set_identity_and_remap_chunk( | |||
| 458 | return remap_pfn; | 425 | return remap_pfn; |
| 459 | } | 426 | } |
| 460 | 427 | ||
| 461 | static unsigned long __init xen_set_identity_and_remap( | 428 | static void __init xen_set_identity_and_remap( |
| 462 | const struct e820entry *list, size_t map_size, unsigned long nr_pages, | 429 | const struct e820entry *list, size_t map_size, unsigned long nr_pages, |
| 463 | unsigned long *released) | 430 | unsigned long *released) |
| 464 | { | 431 | { |
| 465 | phys_addr_t start = 0; | 432 | phys_addr_t start = 0; |
| 466 | unsigned long identity = 0; | 433 | unsigned long identity = 0; |
| 467 | unsigned long remapped = 0; | ||
| 468 | unsigned long last_pfn = nr_pages; | 434 | unsigned long last_pfn = nr_pages; |
| 469 | const struct e820entry *entry; | 435 | const struct e820entry *entry; |
| 470 | unsigned long num_released = 0; | 436 | unsigned long num_released = 0; |
| @@ -494,8 +460,7 @@ static unsigned long __init xen_set_identity_and_remap( | |||
| 494 | last_pfn = xen_set_identity_and_remap_chunk( | 460 | last_pfn = xen_set_identity_and_remap_chunk( |
| 495 | list, map_size, start_pfn, | 461 | list, map_size, start_pfn, |
| 496 | end_pfn, nr_pages, last_pfn, | 462 | end_pfn, nr_pages, last_pfn, |
| 497 | &identity, &remapped, | 463 | &identity, &num_released); |
| 498 | &num_released); | ||
| 499 | start = end; | 464 | start = end; |
| 500 | } | 465 | } |
| 501 | } | 466 | } |
| @@ -503,12 +468,63 @@ static unsigned long __init xen_set_identity_and_remap( | |||
| 503 | *released = num_released; | 468 | *released = num_released; |
| 504 | 469 | ||
| 505 | pr_info("Set %ld page(s) to 1-1 mapping\n", identity); | 470 | pr_info("Set %ld page(s) to 1-1 mapping\n", identity); |
| 506 | pr_info("Remapped %ld page(s), last_pfn=%ld\n", remapped, | ||
| 507 | last_pfn); | ||
| 508 | pr_info("Released %ld page(s)\n", num_released); | 471 | pr_info("Released %ld page(s)\n", num_released); |
| 472 | } | ||
| 473 | |||
| 474 | /* | ||
| 475 | * Remap the memory prepared in xen_do_set_identity_and_remap_chunk(). | ||
| 476 | * The remap information (which mfn remap to which pfn) is contained in the | ||
| 477 | * to be remapped memory itself in a linked list anchored at xen_remap_mfn. | ||
| 478 | * This scheme allows to remap the different chunks in arbitrary order while | ||
| 479 | * the resulting mapping will be independant from the order. | ||
| 480 | */ | ||
| 481 | void __init xen_remap_memory(void) | ||
| 482 | { | ||
| 483 | unsigned long buf = (unsigned long)&xen_remap_buf; | ||
| 484 | unsigned long mfn_save, mfn, pfn; | ||
| 485 | unsigned long remapped = 0; | ||
| 486 | unsigned int i; | ||
| 487 | unsigned long pfn_s = ~0UL; | ||
| 488 | unsigned long len = 0; | ||
| 489 | |||
| 490 | mfn_save = virt_to_mfn(buf); | ||
| 491 | |||
| 492 | while (xen_remap_mfn != INVALID_P2M_ENTRY) { | ||
| 493 | /* Map the remap information */ | ||
| 494 | set_pte_mfn(buf, xen_remap_mfn, PAGE_KERNEL); | ||
| 509 | 495 | ||
| 510 | return last_pfn; | 496 | BUG_ON(xen_remap_mfn != xen_remap_buf.mfns[0]); |
| 497 | |||
| 498 | pfn = xen_remap_buf.target_pfn; | ||
| 499 | for (i = 0; i < xen_remap_buf.size; i++) { | ||
| 500 | mfn = xen_remap_buf.mfns[i]; | ||
| 501 | xen_update_mem_tables(pfn, mfn); | ||
| 502 | remapped++; | ||
| 503 | pfn++; | ||
| 504 | } | ||
| 505 | if (pfn_s == ~0UL || pfn == pfn_s) { | ||
| 506 | pfn_s = xen_remap_buf.target_pfn; | ||
| 507 | len += xen_remap_buf.size; | ||
| 508 | } else if (pfn_s + len == xen_remap_buf.target_pfn) { | ||
| 509 | len += xen_remap_buf.size; | ||
| 510 | } else { | ||
| 511 | xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len)); | ||
| 512 | pfn_s = xen_remap_buf.target_pfn; | ||
| 513 | len = xen_remap_buf.size; | ||
| 514 | } | ||
| 515 | |||
| 516 | mfn = xen_remap_mfn; | ||
| 517 | xen_remap_mfn = xen_remap_buf.next_area_mfn; | ||
| 518 | } | ||
| 519 | |||
| 520 | if (pfn_s != ~0UL && len) | ||
| 521 | xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len)); | ||
| 522 | |||
| 523 | set_pte_mfn(buf, mfn_save, PAGE_KERNEL); | ||
| 524 | |||
| 525 | pr_info("Remapped %ld page(s)\n", remapped); | ||
| 511 | } | 526 | } |
| 527 | |||
| 512 | static unsigned long __init xen_get_max_pages(void) | 528 | static unsigned long __init xen_get_max_pages(void) |
| 513 | { | 529 | { |
| 514 | unsigned long max_pages = MAX_DOMAIN_PAGES; | 530 | unsigned long max_pages = MAX_DOMAIN_PAGES; |
| @@ -569,7 +585,6 @@ char * __init xen_memory_setup(void) | |||
| 569 | int rc; | 585 | int rc; |
| 570 | struct xen_memory_map memmap; | 586 | struct xen_memory_map memmap; |
| 571 | unsigned long max_pages; | 587 | unsigned long max_pages; |
| 572 | unsigned long last_pfn = 0; | ||
| 573 | unsigned long extra_pages = 0; | 588 | unsigned long extra_pages = 0; |
| 574 | int i; | 589 | int i; |
| 575 | int op; | 590 | int op; |
| @@ -616,17 +631,14 @@ char * __init xen_memory_setup(void) | |||
| 616 | extra_pages += max_pages - max_pfn; | 631 | extra_pages += max_pages - max_pfn; |
| 617 | 632 | ||
| 618 | /* | 633 | /* |
| 619 | * Set identity map on non-RAM pages and remap the underlying RAM. | 634 | * Set identity map on non-RAM pages and prepare remapping the |
| 635 | * underlying RAM. | ||
| 620 | */ | 636 | */ |
| 621 | last_pfn = xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn, | 637 | xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn, |
| 622 | &xen_released_pages); | 638 | &xen_released_pages); |
| 623 | 639 | ||
| 624 | extra_pages += xen_released_pages; | 640 | extra_pages += xen_released_pages; |
| 625 | 641 | ||
| 626 | if (last_pfn > max_pfn) { | ||
| 627 | max_pfn = min(MAX_DOMAIN_PAGES, last_pfn); | ||
| 628 | mem_end = PFN_PHYS(max_pfn); | ||
| 629 | } | ||
| 630 | /* | 642 | /* |
| 631 | * Clamp the amount of extra memory to a EXTRA_MEM_RATIO | 643 | * Clamp the amount of extra memory to a EXTRA_MEM_RATIO |
| 632 | * factor the base size. On non-highmem systems, the base | 644 | * factor the base size. On non-highmem systems, the base |
| @@ -653,6 +665,7 @@ char * __init xen_memory_setup(void) | |||
| 653 | size = min(size, (u64)extra_pages * PAGE_SIZE); | 665 | size = min(size, (u64)extra_pages * PAGE_SIZE); |
| 654 | extra_pages -= size / PAGE_SIZE; | 666 | extra_pages -= size / PAGE_SIZE; |
| 655 | xen_add_extra_mem(addr, size); | 667 | xen_add_extra_mem(addr, size); |
| 668 | xen_max_p2m_pfn = PFN_DOWN(addr + size); | ||
| 656 | } else | 669 | } else |
| 657 | type = E820_UNUSABLE; | 670 | type = E820_UNUSABLE; |
| 658 | } | 671 | } |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 4ab9298c5e17..5686bd9d58cc 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
| @@ -29,11 +29,13 @@ void xen_build_mfn_list_list(void); | |||
| 29 | void xen_setup_machphys_mapping(void); | 29 | void xen_setup_machphys_mapping(void); |
| 30 | void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); | 30 | void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); |
| 31 | void xen_reserve_top(void); | 31 | void xen_reserve_top(void); |
| 32 | extern unsigned long xen_max_p2m_pfn; | ||
| 33 | 32 | ||
| 34 | void xen_mm_pin_all(void); | 33 | void xen_mm_pin_all(void); |
| 35 | void xen_mm_unpin_all(void); | 34 | void xen_mm_unpin_all(void); |
| 36 | 35 | ||
| 36 | unsigned long __ref xen_chk_extra_mem(unsigned long pfn); | ||
| 37 | void __init xen_inv_extra_mem(void); | ||
| 38 | void __init xen_remap_memory(void); | ||
| 37 | char * __init xen_memory_setup(void); | 39 | char * __init xen_memory_setup(void); |
| 38 | char * xen_auto_xlated_memory_setup(void); | 40 | char * xen_auto_xlated_memory_setup(void); |
| 39 | void __init xen_arch_setup(void); | 41 | void __init xen_arch_setup(void); |
| @@ -46,7 +48,7 @@ void xen_hvm_init_shared_info(void); | |||
| 46 | void xen_unplug_emulated_devices(void); | 48 | void xen_unplug_emulated_devices(void); |
| 47 | 49 | ||
| 48 | void __init xen_build_dynamic_phys_to_machine(void); | 50 | void __init xen_build_dynamic_phys_to_machine(void); |
| 49 | unsigned long __init xen_revector_p2m_tree(void); | 51 | void __init xen_vmalloc_p2m_tree(void); |
| 50 | 52 | ||
| 51 | void xen_init_irq_ops(void); | 53 | void xen_init_irq_ops(void); |
| 52 | void xen_setup_timer(int cpu); | 54 | void xen_setup_timer(int cpu); |
