aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen
diff options
context:
space:
mode:
authorJuergen Gross <jgross@suse.com>2014-11-28 05:53:58 -0500
committerDavid Vrabel <david.vrabel@citrix.com>2014-12-04 09:09:15 -0500
commit054954eb051f35e74b75a566a96fe756015352c8 (patch)
treedf657656b63e19dbfa2bbf7bb21f87e45d3d3ddc /arch/x86/xen
parent0aad5689837c882d2539f50f42f686b74046c0a0 (diff)
xen: switch to linear virtual mapped sparse p2m list
At start of the day the Xen hypervisor presents a contiguous mfn list to a pv-domain. In order to support sparse memory this mfn list is accessed via a three level p2m tree built early in the boot process. Whenever the system needs the mfn associated with a pfn this tree is used to find the mfn. Instead of using a software walked tree for accessing a specific mfn list entry this patch is creating a virtual address area for the entire possible mfn list including memory holes. The holes are covered by mapping a pre-defined page consisting only of "invalid mfn" entries. Access to a mfn entry is possible by just using the virtual base address of the mfn list and the pfn as index into that list. This speeds up the (hot) path of determining the mfn of a pfn. Kernel build on a Dell Latitude E6440 (2 cores, HT) in 64 bit Dom0 showed following improvements: Elapsed time: 32:50 -> 32:35 System: 18:07 -> 17:47 User: 104:00 -> 103:30 Tested with following configurations: - 64 bit dom0, 8GB RAM - 64 bit dom0, 128 GB RAM, PCI-area above 4 GB - 32 bit domU, 512 MB, 8 GB, 43 GB (more wouldn't work even without the patch) - 32 bit domU, ballooning up and down - 32 bit domU, save and restore - 32 bit domU with PCI passthrough - 64 bit domU, 8 GB, 2049 MB, 5000 MB - 64 bit domU, ballooning up and down - 64 bit domU, save and restore - 64 bit domU with PCI passthrough Signed-off-by: Juergen Gross <jgross@suse.com> Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Diffstat (limited to 'arch/x86/xen')
-rw-r--r--arch/x86/xen/mmu.c34
-rw-r--r--arch/x86/xen/p2m.c735
-rw-r--r--arch/x86/xen/xen-ops.h2
3 files changed, 331 insertions, 440 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3e3f8f8c3a30..6ab6150c8560 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1158,20 +1158,16 @@ static void __init xen_cleanhighmap(unsigned long vaddr,
1158 * instead of somewhere later and be confusing. */ 1158 * instead of somewhere later and be confusing. */
1159 xen_mc_flush(); 1159 xen_mc_flush();
1160} 1160}
1161static void __init xen_pagetable_p2m_copy(void) 1161
1162static void __init xen_pagetable_p2m_free(void)
1162{ 1163{
1163 unsigned long size; 1164 unsigned long size;
1164 unsigned long addr; 1165 unsigned long addr;
1165 unsigned long new_mfn_list;
1166
1167 if (xen_feature(XENFEAT_auto_translated_physmap))
1168 return;
1169 1166
1170 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); 1167 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1171 1168
1172 new_mfn_list = xen_revector_p2m_tree();
1173 /* No memory or already called. */ 1169 /* No memory or already called. */
1174 if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list) 1170 if ((unsigned long)xen_p2m_addr == xen_start_info->mfn_list)
1175 return; 1171 return;
1176 1172
1177 /* using __ka address and sticking INVALID_P2M_ENTRY! */ 1173 /* using __ka address and sticking INVALID_P2M_ENTRY! */
@@ -1189,8 +1185,6 @@ static void __init xen_pagetable_p2m_copy(void)
1189 1185
1190 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); 1186 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1191 memblock_free(__pa(xen_start_info->mfn_list), size); 1187 memblock_free(__pa(xen_start_info->mfn_list), size);
1192 /* And revector! Bye bye old array */
1193 xen_start_info->mfn_list = new_mfn_list;
1194 1188
1195 /* At this stage, cleanup_highmap has already cleaned __ka space 1189 /* At this stage, cleanup_highmap has already cleaned __ka space
1196 * from _brk_limit way up to the max_pfn_mapped (which is the end of 1190 * from _brk_limit way up to the max_pfn_mapped (which is the end of
@@ -1214,14 +1208,26 @@ static void __init xen_pagetable_p2m_copy(void)
1214} 1208}
1215#endif 1209#endif
1216 1210
1217static void __init xen_pagetable_init(void) 1211static void __init xen_pagetable_p2m_setup(void)
1218{ 1212{
1219 paging_init(); 1213 if (xen_feature(XENFEAT_auto_translated_physmap))
1214 return;
1215
1216 xen_vmalloc_p2m_tree();
1217
1220#ifdef CONFIG_X86_64 1218#ifdef CONFIG_X86_64
1221 xen_pagetable_p2m_copy(); 1219 xen_pagetable_p2m_free();
1222#else
1223 xen_revector_p2m_tree();
1224#endif 1220#endif
1221 /* And revector! Bye bye old array */
1222 xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
1223}
1224
1225static void __init xen_pagetable_init(void)
1226{
1227 paging_init();
1228
1229 xen_pagetable_p2m_setup();
1230
1225 /* Allocate and initialize top and mid mfn levels for p2m structure */ 1231 /* Allocate and initialize top and mid mfn levels for p2m structure */
1226 xen_build_mfn_list_list(); 1232 xen_build_mfn_list_list();
1227 1233
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 8c3d8fbbba93..7d844739e513 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -3,21 +3,22 @@
3 * guests themselves, but it must also access and update the p2m array 3 * guests themselves, but it must also access and update the p2m array
4 * during suspend/resume when all the pages are reallocated. 4 * during suspend/resume when all the pages are reallocated.
5 * 5 *
6 * The p2m table is logically a flat array, but we implement it as a 6 * The logical flat p2m table is mapped to a linear kernel memory area.
7 * three-level tree to allow the address space to be sparse. 7 * For accesses by Xen a three-level tree linked via mfns only is set up to
8 * allow the address space to be sparse.
8 * 9 *
9 * Xen 10 * Xen
10 * | 11 * |
11 * p2m_top p2m_top_mfn 12 * p2m_top_mfn
12 * / \ / \ 13 * / \
13 * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn 14 * p2m_mid_mfn p2m_mid_mfn
14 * / \ / \ / / 15 * / /
15 * p2m p2m p2m p2m p2m p2m p2m ... 16 * p2m p2m p2m ...
16 * 17 *
17 * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. 18 * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p.
18 * 19 *
19 * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the 20 * The p2m_top_mfn level is limited to 1 page, so the maximum representable
20 * maximum representable pseudo-physical address space is: 21 * pseudo-physical address space is:
21 * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages 22 * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages
22 * 23 *
23 * P2M_PER_PAGE depends on the architecture, as a mfn is always 24 * P2M_PER_PAGE depends on the architecture, as a mfn is always
@@ -30,6 +31,9 @@
30 * leaf entries, or for the top root, or middle one, for which there is a void 31 * leaf entries, or for the top root, or middle one, for which there is a void
31 * entry, we assume it is "missing". So (for example) 32 * entry, we assume it is "missing". So (for example)
32 * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY. 33 * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY.
34 * We have a dedicated page p2m_missing with all entries being
35 * INVALID_P2M_ENTRY. This page may be referenced multiple times in the p2m
36 * list/tree in case there are multiple areas with P2M_PER_PAGE invalid pfns.
33 * 37 *
34 * We also have the possibility of setting 1-1 mappings on certain regions, so 38 * We also have the possibility of setting 1-1 mappings on certain regions, so
35 * that: 39 * that:
@@ -39,122 +43,20 @@
39 * PCI BARs, or ACPI spaces), we can create mappings easily because we 43 * PCI BARs, or ACPI spaces), we can create mappings easily because we
40 * get the PFN value to match the MFN. 44 * get the PFN value to match the MFN.
41 * 45 *
42 * For this to work efficiently we have one new page p2m_identity and 46 * For this to work efficiently we have one new page p2m_identity. All entries
43 * allocate (via reserved_brk) any other pages we need to cover the sides 47 * in p2m_identity are set to INVALID_P2M_ENTRY type (Xen toolstack only
44 * (1GB or 4MB boundary violations). All entries in p2m_identity are set to 48 * recognizes that and MFNs, no other fancy value).
45 * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs,
46 * no other fancy value).
47 * 49 *
48 * On lookup we spot that the entry points to p2m_identity and return the 50 * On lookup we spot that the entry points to p2m_identity and return the
49 * identity value instead of dereferencing and returning INVALID_P2M_ENTRY. 51 * identity value instead of dereferencing and returning INVALID_P2M_ENTRY.
50 * If the entry points to an allocated page, we just proceed as before and 52 * If the entry points to an allocated page, we just proceed as before and
51 * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in 53 * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in
52 * appropriate functions (pfn_to_mfn). 54 * appropriate functions (pfn_to_mfn).
53 * 55 *
54 * The reason for having the IDENTITY_FRAME_BIT instead of just returning the 56 * The reason for having the IDENTITY_FRAME_BIT instead of just returning the
55 * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a 57 * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
56 * non-identity pfn. To protect ourselves against we elect to set (and get) the 58 * non-identity pfn. To protect ourselves against we elect to set (and get) the
57 * IDENTITY_FRAME_BIT on all identity mapped PFNs. 59 * IDENTITY_FRAME_BIT on all identity mapped PFNs.
58 *
59 * This simplistic diagram is used to explain the more subtle piece of code.
60 * There is also a digram of the P2M at the end that can help.
61 * Imagine your E820 looking as so:
62 *
63 * 1GB 2GB 4GB
64 * /-------------------+---------\/----\ /----------\ /---+-----\
65 * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM |
66 * \-------------------+---------/\----/ \----------/ \---+-----/
67 * ^- 1029MB ^- 2001MB
68 *
69 * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100),
70 * 2048MB = 524288 (0x80000)]
71 *
72 * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB
73 * is actually not present (would have to kick the balloon driver to put it in).
74 *
75 * When we are told to set the PFNs for identity mapping (see patch: "xen/setup:
76 * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start
77 * of the PFN and the end PFN (263424 and 512256 respectively). The first step
78 * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
79 * covers 512^2 of page estate (1GB) and in case the start or end PFN is not
80 * aligned on 512^2*PAGE_SIZE (1GB) we reserve_brk new middle and leaf pages as
81 * required to split any existing p2m_mid_missing middle pages.
82 *
83 * With the E820 example above, 263424 is not 1GB aligned so we allocate a
84 * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
85 * Each entry in the allocate page is "missing" (points to p2m_missing).
86 *
87 * Next stage is to determine if we need to do a more granular boundary check
88 * on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
89 * We check if the start pfn and end pfn violate that boundary check, and if
90 * so reserve_brk a (p2m[x][y]) leaf page. This way we have a much finer
91 * granularity of setting which PFNs are missing and which ones are identity.
92 * In our example 263424 and 512256 both fail the check so we reserve_brk two
93 * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing"
94 * values) and assign them to p2m[1][2] and p2m[1][488] respectively.
95 *
96 * At this point we would at minimum reserve_brk one page, but could be up to
97 * three. Each call to set_phys_range_identity has at maximum a three page
98 * cost. If we were to query the P2M at this stage, all those entries from
99 * start PFN through end PFN (so 1029MB -> 2001MB) would return
100 * INVALID_P2M_ENTRY ("missing").
101 *
102 * The next step is to walk from the start pfn to the end pfn setting
103 * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity.
104 * If we find that the middle entry is pointing to p2m_missing we can swap it
105 * over to p2m_identity - this way covering 4MB (or 2MB) PFN space (and
106 * similarly swapping p2m_mid_missing for p2m_mid_identity for larger regions).
107 * At this point we do not need to worry about boundary aligment (so no need to
108 * reserve_brk a middle page, figure out which PFNs are "missing" and which
109 * ones are identity), as that has been done earlier. If we find that the
110 * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference
111 * that page (which covers 512 PFNs) and set the appropriate PFN with
112 * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we
113 * set from p2m[1][2][256->511] and p2m[1][488][0->256] with
114 * IDENTITY_FRAME_BIT set.
115 *
116 * All other regions that are void (or not filled) either point to p2m_missing
117 * (considered missing) or have the default value of INVALID_P2M_ENTRY (also
118 * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
119 * contain the INVALID_P2M_ENTRY value and are considered "missing."
120 *
121 * Finally, the region beyond the end of of the E820 (4 GB in this example)
122 * is set to be identity (in case there are MMIO regions placed here).
123 *
124 * This is what the p2m ends up looking (for the E820 above) with this
125 * fabulous drawing:
126 *
127 * p2m /--------------\
128 * /-----\ | &mfn_list[0],| /-----------------\
129 * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. |
130 * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] |
131 * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] |
132 * |-----| \ | [p2m_identity]+\\ | .... |
133 * | 2 |--\ \-------------------->| ... | \\ \----------------/
134 * |-----| \ \---------------/ \\
135 * | 3 |-\ \ \\ p2m_identity [1]
136 * |-----| \ \-------------------->/---------------\ /-----------------\
137 * | .. |\ | | [p2m_identity]+-->| ~0, ~0, ~0, ... |
138 * \-----/ | | | [p2m_identity]+-->| ..., ~0 |
139 * | | | .... | \-----------------/
140 * | | +-[x], ~0, ~0.. +\
141 * | | \---------------/ \
142 * | | \-> /---------------\
143 * | V p2m_mid_missing p2m_missing | IDENTITY[@0] |
144 * | /-----------------\ /------------\ | IDENTITY[@256]|
145 * | | [p2m_missing] +---->| ~0, ~0, ...| | ~0, ~0, .... |
146 * | | [p2m_missing] +---->| ..., ~0 | \---------------/
147 * | | ... | \------------/
148 * | \-----------------/
149 * |
150 * | p2m_mid_identity
151 * | /-----------------\
152 * \-->| [p2m_identity] +---->[1]
153 * | [p2m_identity] +---->[1]
154 * | ... |
155 * \-----------------/
156 *
157 * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
158 */ 60 */
159 61
160#include <linux/init.h> 62#include <linux/init.h>
@@ -179,6 +81,8 @@
179#include "multicalls.h" 81#include "multicalls.h"
180#include "xen-ops.h" 82#include "xen-ops.h"
181 83
84#define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE)
85
182static void __init m2p_override_init(void); 86static void __init m2p_override_init(void);
183 87
184unsigned long *xen_p2m_addr __read_mostly; 88unsigned long *xen_p2m_addr __read_mostly;
@@ -188,22 +92,15 @@ EXPORT_SYMBOL_GPL(xen_p2m_size);
188unsigned long xen_max_p2m_pfn __read_mostly; 92unsigned long xen_max_p2m_pfn __read_mostly;
189EXPORT_SYMBOL_GPL(xen_max_p2m_pfn); 93EXPORT_SYMBOL_GPL(xen_max_p2m_pfn);
190 94
95static DEFINE_SPINLOCK(p2m_update_lock);
96
191static unsigned long *p2m_mid_missing_mfn; 97static unsigned long *p2m_mid_missing_mfn;
192static unsigned long *p2m_top_mfn; 98static unsigned long *p2m_top_mfn;
193static unsigned long **p2m_top_mfn_p; 99static unsigned long **p2m_top_mfn_p;
194 100static unsigned long *p2m_missing;
195/* Placeholders for holes in the address space */ 101static unsigned long *p2m_identity;
196static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); 102static pte_t *p2m_missing_pte;
197static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); 103static pte_t *p2m_identity_pte;
198
199static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
200
201static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
202static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE);
203
204RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
205
206static int use_brk = 1;
207 104
208static inline unsigned p2m_top_index(unsigned long pfn) 105static inline unsigned p2m_top_index(unsigned long pfn)
209{ 106{
@@ -221,14 +118,6 @@ static inline unsigned p2m_index(unsigned long pfn)
221 return pfn % P2M_PER_PAGE; 118 return pfn % P2M_PER_PAGE;
222} 119}
223 120
224static void p2m_top_init(unsigned long ***top)
225{
226 unsigned i;
227
228 for (i = 0; i < P2M_TOP_PER_PAGE; i++)
229 top[i] = p2m_mid_missing;
230}
231
232static void p2m_top_mfn_init(unsigned long *top) 121static void p2m_top_mfn_init(unsigned long *top)
233{ 122{
234 unsigned i; 123 unsigned i;
@@ -245,35 +134,32 @@ static void p2m_top_mfn_p_init(unsigned long **top)
245 top[i] = p2m_mid_missing_mfn; 134 top[i] = p2m_mid_missing_mfn;
246} 135}
247 136
248static void p2m_mid_init(unsigned long **mid, unsigned long *leaf) 137static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf)
249{ 138{
250 unsigned i; 139 unsigned i;
251 140
252 for (i = 0; i < P2M_MID_PER_PAGE; i++) 141 for (i = 0; i < P2M_MID_PER_PAGE; i++)
253 mid[i] = leaf; 142 mid[i] = virt_to_mfn(leaf);
254} 143}
255 144
256static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf) 145static void p2m_init(unsigned long *p2m)
257{ 146{
258 unsigned i; 147 unsigned i;
259 148
260 for (i = 0; i < P2M_MID_PER_PAGE; i++) 149 for (i = 0; i < P2M_PER_PAGE; i++)
261 mid[i] = virt_to_mfn(leaf); 150 p2m[i] = INVALID_P2M_ENTRY;
262} 151}
263 152
264static void p2m_init(unsigned long *p2m) 153static void p2m_init_identity(unsigned long *p2m, unsigned long pfn)
265{ 154{
266 unsigned i; 155 unsigned i;
267 156
268 for (i = 0; i < P2M_MID_PER_PAGE; i++) 157 for (i = 0; i < P2M_PER_PAGE; i++)
269 p2m[i] = INVALID_P2M_ENTRY; 158 p2m[i] = IDENTITY_FRAME(pfn + i);
270} 159}
271 160
272static void * __ref alloc_p2m_page(void) 161static void * __ref alloc_p2m_page(void)
273{ 162{
274 if (unlikely(use_brk))
275 return extend_brk(PAGE_SIZE, PAGE_SIZE);
276
277 if (unlikely(!slab_is_available())) 163 if (unlikely(!slab_is_available()))
278 return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); 164 return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
279 165
@@ -299,7 +185,10 @@ static void free_p2m_page(void *p)
299 */ 185 */
300void __ref xen_build_mfn_list_list(void) 186void __ref xen_build_mfn_list_list(void)
301{ 187{
302 unsigned long pfn; 188 unsigned long pfn, mfn;
189 pte_t *ptep;
190 unsigned int level, topidx, mididx;
191 unsigned long *mid_mfn_p;
303 192
304 if (xen_feature(XENFEAT_auto_translated_physmap)) 193 if (xen_feature(XENFEAT_auto_translated_physmap))
305 return; 194 return;
@@ -319,20 +208,23 @@ void __ref xen_build_mfn_list_list(void)
319 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); 208 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
320 } 209 }
321 210
322 for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { 211 for (pfn = 0; pfn < xen_max_p2m_pfn && pfn < MAX_P2M_PFN;
323 unsigned topidx = p2m_top_index(pfn); 212 pfn += P2M_PER_PAGE) {
324 unsigned mididx = p2m_mid_index(pfn); 213 topidx = p2m_top_index(pfn);
325 unsigned long **mid; 214 mididx = p2m_mid_index(pfn);
326 unsigned long *mid_mfn_p;
327 215
328 mid = p2m_top[topidx];
329 mid_mfn_p = p2m_top_mfn_p[topidx]; 216 mid_mfn_p = p2m_top_mfn_p[topidx];
217 ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn),
218 &level);
219 BUG_ON(!ptep || level != PG_LEVEL_4K);
220 mfn = pte_mfn(*ptep);
221 ptep = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
330 222
331 /* Don't bother allocating any mfn mid levels if 223 /* Don't bother allocating any mfn mid levels if
332 * they're just missing, just update the stored mfn, 224 * they're just missing, just update the stored mfn,
333 * since all could have changed over a migrate. 225 * since all could have changed over a migrate.
334 */ 226 */
335 if (mid == p2m_mid_missing) { 227 if (ptep == p2m_missing_pte || ptep == p2m_identity_pte) {
336 BUG_ON(mididx); 228 BUG_ON(mididx);
337 BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); 229 BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
338 p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); 230 p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn);
@@ -341,11 +233,6 @@ void __ref xen_build_mfn_list_list(void)
341 } 233 }
342 234
343 if (mid_mfn_p == p2m_mid_missing_mfn) { 235 if (mid_mfn_p == p2m_mid_missing_mfn) {
344 /*
345 * XXX boot-time only! We should never find
346 * missing parts of the mfn tree after
347 * runtime.
348 */
349 mid_mfn_p = alloc_p2m_page(); 236 mid_mfn_p = alloc_p2m_page();
350 p2m_mid_mfn_init(mid_mfn_p, p2m_missing); 237 p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
351 238
@@ -353,7 +240,7 @@ void __ref xen_build_mfn_list_list(void)
353 } 240 }
354 241
355 p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); 242 p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
356 mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); 243 mid_mfn_p[mididx] = mfn;
357 } 244 }
358} 245}
359 246
@@ -372,154 +259,153 @@ void xen_setup_mfn_list_list(void)
372/* Set up p2m_top to point to the domain-builder provided p2m pages */ 259/* Set up p2m_top to point to the domain-builder provided p2m pages */
373void __init xen_build_dynamic_phys_to_machine(void) 260void __init xen_build_dynamic_phys_to_machine(void)
374{ 261{
375 unsigned long *mfn_list;
376 unsigned long max_pfn;
377 unsigned long pfn; 262 unsigned long pfn;
378 263
379 if (xen_feature(XENFEAT_auto_translated_physmap)) 264 if (xen_feature(XENFEAT_auto_translated_physmap))
380 return; 265 return;
381 266
382 xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list; 267 xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list;
383 mfn_list = (unsigned long *)xen_start_info->mfn_list; 268 xen_p2m_size = ALIGN(xen_start_info->nr_pages, P2M_PER_PAGE);
384 max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
385 xen_max_p2m_pfn = max_pfn;
386 xen_p2m_size = max_pfn;
387 269
388 p2m_missing = alloc_p2m_page(); 270 for (pfn = xen_start_info->nr_pages; pfn < xen_p2m_size; pfn++)
389 p2m_init(p2m_missing); 271 xen_p2m_addr[pfn] = INVALID_P2M_ENTRY;
390 p2m_identity = alloc_p2m_page();
391 p2m_init(p2m_identity);
392 272
393 p2m_mid_missing = alloc_p2m_page(); 273 xen_max_p2m_pfn = xen_p2m_size;
394 p2m_mid_init(p2m_mid_missing, p2m_missing); 274}
395 p2m_mid_identity = alloc_p2m_page();
396 p2m_mid_init(p2m_mid_identity, p2m_identity);
397 275
398 p2m_top = alloc_p2m_page(); 276#define P2M_TYPE_IDENTITY 0
399 p2m_top_init(p2m_top); 277#define P2M_TYPE_MISSING 1
278#define P2M_TYPE_PFN 2
279#define P2M_TYPE_UNKNOWN 3
400 280
401 /* 281static int xen_p2m_elem_type(unsigned long pfn)
402 * The domain builder gives us a pre-constructed p2m array in 282{
403 * mfn_list for all the pages initially given to us, so we just 283 unsigned long mfn;
404 * need to graft that into our tree structure.
405 */
406 for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) {
407 unsigned topidx = p2m_top_index(pfn);
408 unsigned mididx = p2m_mid_index(pfn);
409 284
410 if (p2m_top[topidx] == p2m_mid_missing) { 285 if (pfn >= xen_p2m_size)
411 unsigned long **mid = alloc_p2m_page(); 286 return P2M_TYPE_IDENTITY;
412 p2m_mid_init(mid, p2m_missing);
413 287
414 p2m_top[topidx] = mid; 288 mfn = xen_p2m_addr[pfn];
415 }
416 289
417 /* 290 if (mfn == INVALID_P2M_ENTRY)
418 * As long as the mfn_list has enough entries to completely 291 return P2M_TYPE_MISSING;
419 * fill a p2m page, pointing into the array is ok. But if
420 * not the entries beyond the last pfn will be undefined.
421 */
422 if (unlikely(pfn + P2M_PER_PAGE > max_pfn)) {
423 unsigned long p2midx;
424 292
425 p2midx = max_pfn % P2M_PER_PAGE; 293 if (mfn & IDENTITY_FRAME_BIT)
426 for ( ; p2midx < P2M_PER_PAGE; p2midx++) 294 return P2M_TYPE_IDENTITY;
427 mfn_list[pfn + p2midx] = INVALID_P2M_ENTRY; 295
428 } 296 return P2M_TYPE_PFN;
429 p2m_top[topidx][mididx] = &mfn_list[pfn];
430 }
431} 297}
432#ifdef CONFIG_X86_64 298
433unsigned long __init xen_revector_p2m_tree(void) 299static void __init xen_rebuild_p2m_list(unsigned long *p2m)
434{ 300{
435 unsigned long va_start; 301 unsigned int i, chunk;
436 unsigned long va_end;
437 unsigned long pfn; 302 unsigned long pfn;
438 unsigned long pfn_free = 0; 303 unsigned long *mfns;
439 unsigned long *mfn_list = NULL; 304 pte_t *ptep;
440 unsigned long size; 305 pmd_t *pmdp;
441 306 int type;
442 use_brk = 0;
443 va_start = xen_start_info->mfn_list;
444 /*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long),
445 * so make sure it is rounded up to that */
446 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
447 va_end = va_start + size;
448
449 /* If we were revectored already, don't do it again. */
450 if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET)
451 return 0;
452
453 mfn_list = alloc_bootmem_align(size, PAGE_SIZE);
454 if (!mfn_list) {
455 pr_warn("Could not allocate space for a new P2M tree!\n");
456 return xen_start_info->mfn_list;
457 }
458 /* Fill it out with INVALID_P2M_ENTRY value */
459 memset(mfn_list, 0xFF, size);
460 307
461 for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) { 308 p2m_missing = alloc_p2m_page();
462 unsigned topidx = p2m_top_index(pfn); 309 p2m_init(p2m_missing);
463 unsigned mididx; 310 p2m_identity = alloc_p2m_page();
464 unsigned long *mid_p; 311 p2m_init(p2m_identity);
465
466 if (!p2m_top[topidx])
467 continue;
468 312
469 if (p2m_top[topidx] == p2m_mid_missing) 313 p2m_missing_pte = alloc_p2m_page();
470 continue; 314 paravirt_alloc_pte(&init_mm, __pa(p2m_missing_pte) >> PAGE_SHIFT);
315 p2m_identity_pte = alloc_p2m_page();
316 paravirt_alloc_pte(&init_mm, __pa(p2m_identity_pte) >> PAGE_SHIFT);
317 for (i = 0; i < PTRS_PER_PTE; i++) {
318 set_pte(p2m_missing_pte + i,
319 pfn_pte(PFN_DOWN(__pa(p2m_missing)), PAGE_KERNEL));
320 set_pte(p2m_identity_pte + i,
321 pfn_pte(PFN_DOWN(__pa(p2m_identity)), PAGE_KERNEL));
322 }
471 323
472 mididx = p2m_mid_index(pfn); 324 for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += chunk) {
473 mid_p = p2m_top[topidx][mididx]; 325 /*
474 if (!mid_p) 326 * Try to map missing/identity PMDs or p2m-pages if possible.
475 continue; 327 * We have to respect the structure of the mfn_list_list
476 if ((mid_p == p2m_missing) || (mid_p == p2m_identity)) 328 * which will be built just afterwards.
329 * Chunk size to test is one p2m page if we are in the middle
330 * of a mfn_list_list mid page and the complete mid page area
331 * if we are at index 0 of the mid page. Please note that a
332 * mid page might cover more than one PMD, e.g. on 32 bit PAE
333 * kernels.
334 */
335 chunk = (pfn & (P2M_PER_PAGE * P2M_MID_PER_PAGE - 1)) ?
336 P2M_PER_PAGE : P2M_PER_PAGE * P2M_MID_PER_PAGE;
337
338 type = xen_p2m_elem_type(pfn);
339 i = 0;
340 if (type != P2M_TYPE_PFN)
341 for (i = 1; i < chunk; i++)
342 if (xen_p2m_elem_type(pfn + i) != type)
343 break;
344 if (i < chunk)
345 /* Reset to minimal chunk size. */
346 chunk = P2M_PER_PAGE;
347
348 if (type == P2M_TYPE_PFN || i < chunk) {
349 /* Use initial p2m page contents. */
350#ifdef CONFIG_X86_64
351 mfns = alloc_p2m_page();
352 copy_page(mfns, xen_p2m_addr + pfn);
353#else
354 mfns = xen_p2m_addr + pfn;
355#endif
356 ptep = populate_extra_pte((unsigned long)(p2m + pfn));
357 set_pte(ptep,
358 pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL));
477 continue; 359 continue;
360 }
478 361
479 if ((unsigned long)mid_p == INVALID_P2M_ENTRY) 362 if (chunk == P2M_PER_PAGE) {
363 /* Map complete missing or identity p2m-page. */
364 mfns = (type == P2M_TYPE_MISSING) ?
365 p2m_missing : p2m_identity;
366 ptep = populate_extra_pte((unsigned long)(p2m + pfn));
367 set_pte(ptep,
368 pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL));
480 continue; 369 continue;
370 }
481 371
482 /* The old va. Rebase it on mfn_list */ 372 /* Complete missing or identity PMD(s) can be mapped. */
483 if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) { 373 ptep = (type == P2M_TYPE_MISSING) ?
484 unsigned long *new; 374 p2m_missing_pte : p2m_identity_pte;
375 for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
376 pmdp = populate_extra_pmd(
377 (unsigned long)(p2m + pfn + i * PTRS_PER_PTE));
378 set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE));
379 }
380 }
381}
485 382
486 if (pfn_free > (size / sizeof(unsigned long))) { 383void __init xen_vmalloc_p2m_tree(void)
487 WARN(1, "Only allocated for %ld pages, but we want %ld!\n", 384{
488 size / sizeof(unsigned long), pfn_free); 385 static struct vm_struct vm;
489 return 0;
490 }
491 new = &mfn_list[pfn_free];
492 386
493 copy_page(new, mid_p); 387 vm.flags = VM_ALLOC;
494 p2m_top[topidx][mididx] = &mfn_list[pfn_free]; 388 vm.size = ALIGN(sizeof(unsigned long) * xen_max_p2m_pfn,
389 PMD_SIZE * PMDS_PER_MID_PAGE);
390 vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE);
391 pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size);
495 392
496 pfn_free += P2M_PER_PAGE; 393 xen_max_p2m_pfn = vm.size / sizeof(unsigned long);
497 394
498 } 395 xen_rebuild_p2m_list(vm.addr);
499 /* This should be the leafs allocated for identity from _brk. */
500 }
501 396
397 xen_p2m_addr = vm.addr;
502 xen_p2m_size = xen_max_p2m_pfn; 398 xen_p2m_size = xen_max_p2m_pfn;
503 xen_p2m_addr = mfn_list;
504 399
505 xen_inv_extra_mem(); 400 xen_inv_extra_mem();
506 401
507 m2p_override_init(); 402 m2p_override_init();
508 return (unsigned long)mfn_list;
509} 403}
510#else 404
511unsigned long __init xen_revector_p2m_tree(void)
512{
513 use_brk = 0;
514 xen_p2m_size = xen_max_p2m_pfn;
515 xen_inv_extra_mem();
516 m2p_override_init();
517 return 0;
518}
519#endif
520unsigned long get_phys_to_machine(unsigned long pfn) 405unsigned long get_phys_to_machine(unsigned long pfn)
521{ 406{
522 unsigned topidx, mididx, idx; 407 pte_t *ptep;
408 unsigned int level;
523 409
524 if (unlikely(pfn >= xen_p2m_size)) { 410 if (unlikely(pfn >= xen_p2m_size)) {
525 if (pfn < xen_max_p2m_pfn) 411 if (pfn < xen_max_p2m_pfn)
@@ -528,23 +414,83 @@ unsigned long get_phys_to_machine(unsigned long pfn)
528 return IDENTITY_FRAME(pfn); 414 return IDENTITY_FRAME(pfn);
529 } 415 }
530 416
531 topidx = p2m_top_index(pfn); 417 ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level);
532 mididx = p2m_mid_index(pfn); 418 BUG_ON(!ptep || level != PG_LEVEL_4K);
533 idx = p2m_index(pfn);
534 419
535 /* 420 /*
536 * The INVALID_P2M_ENTRY is filled in both p2m_*identity 421 * The INVALID_P2M_ENTRY is filled in both p2m_*identity
537 * and in p2m_*missing, so returning the INVALID_P2M_ENTRY 422 * and in p2m_*missing, so returning the INVALID_P2M_ENTRY
538 * would be wrong. 423 * would be wrong.
539 */ 424 */
540 if (p2m_top[topidx][mididx] == p2m_identity) 425 if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity)))
541 return IDENTITY_FRAME(pfn); 426 return IDENTITY_FRAME(pfn);
542 427
543 return p2m_top[topidx][mididx][idx]; 428 return xen_p2m_addr[pfn];
544} 429}
545EXPORT_SYMBOL_GPL(get_phys_to_machine); 430EXPORT_SYMBOL_GPL(get_phys_to_machine);
546 431
547/* 432/*
433 * Allocate new pmd(s). It is checked whether the old pmd is still in place.
434 * If not, nothing is changed. This is okay as the only reason for allocating
435 * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual
436 * pmd. In case of PAE/x86-32 there are multiple pmds to allocate!
437 */
438static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg)
439{
440 pte_t *ptechk;
441 pte_t *pteret = ptep;
442 pte_t *pte_newpg[PMDS_PER_MID_PAGE];
443 pmd_t *pmdp;
444 unsigned int level;
445 unsigned long flags;
446 unsigned long vaddr;
447 int i;
448
449 /* Do all allocations first to bail out in error case. */
450 for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
451 pte_newpg[i] = alloc_p2m_page();
452 if (!pte_newpg[i]) {
453 for (i--; i >= 0; i--)
454 free_p2m_page(pte_newpg[i]);
455
456 return NULL;
457 }
458 }
459
460 vaddr = addr & ~(PMD_SIZE * PMDS_PER_MID_PAGE - 1);
461
462 for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
463 copy_page(pte_newpg[i], pte_pg);
464 paravirt_alloc_pte(&init_mm, __pa(pte_newpg[i]) >> PAGE_SHIFT);
465
466 pmdp = lookup_pmd_address(vaddr);
467 BUG_ON(!pmdp);
468
469 spin_lock_irqsave(&p2m_update_lock, flags);
470
471 ptechk = lookup_address(vaddr, &level);
472 if (ptechk == pte_pg) {
473 set_pmd(pmdp,
474 __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE));
475 if (vaddr == (addr & ~(PMD_SIZE - 1)))
476 pteret = pte_offset_kernel(pmdp, addr);
477 pte_newpg[i] = NULL;
478 }
479
480 spin_unlock_irqrestore(&p2m_update_lock, flags);
481
482 if (pte_newpg[i]) {
483 paravirt_release_pte(__pa(pte_newpg[i]) >> PAGE_SHIFT);
484 free_p2m_page(pte_newpg[i]);
485 }
486
487 vaddr += PMD_SIZE;
488 }
489
490 return pteret;
491}
492
493/*
548 * Fully allocate the p2m structure for a given pfn. We need to check 494 * Fully allocate the p2m structure for a given pfn. We need to check
549 * that both the top and mid levels are allocated, and make sure the 495 * that both the top and mid levels are allocated, and make sure the
550 * parallel mfn tree is kept in sync. We may race with other cpus, so 496 * parallel mfn tree is kept in sync. We may race with other cpus, so
@@ -554,58 +500,62 @@ EXPORT_SYMBOL_GPL(get_phys_to_machine);
554static bool alloc_p2m(unsigned long pfn) 500static bool alloc_p2m(unsigned long pfn)
555{ 501{
556 unsigned topidx, mididx; 502 unsigned topidx, mididx;
557 unsigned long ***top_p, **mid;
558 unsigned long *top_mfn_p, *mid_mfn; 503 unsigned long *top_mfn_p, *mid_mfn;
559 unsigned long *p2m_orig; 504 pte_t *ptep, *pte_pg;
505 unsigned int level;
506 unsigned long flags;
507 unsigned long addr = (unsigned long)(xen_p2m_addr + pfn);
508 unsigned long p2m_pfn;
560 509
561 topidx = p2m_top_index(pfn); 510 topidx = p2m_top_index(pfn);
562 mididx = p2m_mid_index(pfn); 511 mididx = p2m_mid_index(pfn);
563 512
564 top_p = &p2m_top[topidx]; 513 ptep = lookup_address(addr, &level);
565 mid = ACCESS_ONCE(*top_p); 514 BUG_ON(!ptep || level != PG_LEVEL_4K);
515 pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
566 516
567 if (mid == p2m_mid_missing) { 517 if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) {
568 /* Mid level is missing, allocate a new one */ 518 /* PMD level is missing, allocate a new one */
569 mid = alloc_p2m_page(); 519 ptep = alloc_p2m_pmd(addr, ptep, pte_pg);
570 if (!mid) 520 if (!ptep)
571 return false; 521 return false;
572
573 p2m_mid_init(mid, p2m_missing);
574
575 if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing)
576 free_p2m_page(mid);
577 } 522 }
578 523
579 top_mfn_p = &p2m_top_mfn[topidx]; 524 if (p2m_top_mfn) {
580 mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); 525 top_mfn_p = &p2m_top_mfn[topidx];
526 mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]);
581 527
582 BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); 528 BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
583 529
584 if (mid_mfn == p2m_mid_missing_mfn) { 530 if (mid_mfn == p2m_mid_missing_mfn) {
585 /* Separately check the mid mfn level */ 531 /* Separately check the mid mfn level */
586 unsigned long missing_mfn; 532 unsigned long missing_mfn;
587 unsigned long mid_mfn_mfn; 533 unsigned long mid_mfn_mfn;
588 unsigned long old_mfn; 534 unsigned long old_mfn;
589 535
590 mid_mfn = alloc_p2m_page(); 536 mid_mfn = alloc_p2m_page();
591 if (!mid_mfn) 537 if (!mid_mfn)
592 return false; 538 return false;
593 539
594 p2m_mid_mfn_init(mid_mfn, p2m_missing); 540 p2m_mid_mfn_init(mid_mfn, p2m_missing);
595 541
596 missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); 542 missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
597 mid_mfn_mfn = virt_to_mfn(mid_mfn); 543 mid_mfn_mfn = virt_to_mfn(mid_mfn);
598 old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn); 544 old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn);
599 if (old_mfn != missing_mfn) { 545 if (old_mfn != missing_mfn) {
600 free_p2m_page(mid_mfn); 546 free_p2m_page(mid_mfn);
601 mid_mfn = mfn_to_virt(old_mfn); 547 mid_mfn = mfn_to_virt(old_mfn);
602 } else { 548 } else {
603 p2m_top_mfn_p[topidx] = mid_mfn; 549 p2m_top_mfn_p[topidx] = mid_mfn;
550 }
604 } 551 }
552 } else {
553 mid_mfn = NULL;
605 } 554 }
606 555
607 p2m_orig = ACCESS_ONCE(p2m_top[topidx][mididx]); 556 p2m_pfn = pte_pfn(ACCESS_ONCE(*ptep));
608 if (p2m_orig == p2m_identity || p2m_orig == p2m_missing) { 557 if (p2m_pfn == PFN_DOWN(__pa(p2m_identity)) ||
558 p2m_pfn == PFN_DOWN(__pa(p2m_missing))) {
609 /* p2m leaf page is missing */ 559 /* p2m leaf page is missing */
610 unsigned long *p2m; 560 unsigned long *p2m;
611 561
@@ -613,12 +563,25 @@ static bool alloc_p2m(unsigned long pfn)
613 if (!p2m) 563 if (!p2m)
614 return false; 564 return false;
615 565
616 p2m_init(p2m); 566 if (p2m_pfn == PFN_DOWN(__pa(p2m_missing)))
567 p2m_init(p2m);
568 else
569 p2m_init_identity(p2m, pfn);
570
571 spin_lock_irqsave(&p2m_update_lock, flags);
572
573 if (pte_pfn(*ptep) == p2m_pfn) {
574 set_pte(ptep,
575 pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL));
576 if (mid_mfn)
577 mid_mfn[mididx] = virt_to_mfn(p2m);
578 p2m = NULL;
579 }
580
581 spin_unlock_irqrestore(&p2m_update_lock, flags);
617 582
618 if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig) 583 if (p2m)
619 free_p2m_page(p2m); 584 free_p2m_page(p2m);
620 else
621 mid_mfn[mididx] = virt_to_mfn(p2m);
622 } 585 }
623 586
624 return true; 587 return true;
@@ -647,10 +610,10 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s,
647 return pfn - pfn_s; 610 return pfn - pfn_s;
648} 611}
649 612
650/* Try to install p2m mapping; fail if intermediate bits missing */
651bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) 613bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
652{ 614{
653 unsigned topidx, mididx, idx; 615 pte_t *ptep;
616 unsigned int level;
654 617
655 /* don't track P2M changes in autotranslate guests */ 618 /* don't track P2M changes in autotranslate guests */
656 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) 619 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
@@ -661,55 +624,27 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
661 return true; 624 return true;
662 } 625 }
663 626
664 topidx = p2m_top_index(pfn); 627 ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level);
665 mididx = p2m_mid_index(pfn); 628 BUG_ON(!ptep || level != PG_LEVEL_4K);
666 idx = p2m_index(pfn);
667
668 /* For sparse holes were the p2m leaf has real PFN along with
669 * PCI holes, stick in the PFN as the MFN value.
670 *
671 * set_phys_range_identity() will have allocated new middle
672 * and leaf pages as required so an existing p2m_mid_missing
673 * or p2m_missing mean that whole range will be identity so
674 * these can be switched to p2m_mid_identity or p2m_identity.
675 */
676 if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
677 if (p2m_top[topidx] == p2m_mid_identity)
678 return true;
679
680 if (p2m_top[topidx] == p2m_mid_missing) {
681 WARN_ON(cmpxchg(&p2m_top[topidx], p2m_mid_missing,
682 p2m_mid_identity) != p2m_mid_missing);
683 return true;
684 }
685
686 if (p2m_top[topidx][mididx] == p2m_identity)
687 return true;
688
689 /* Swap over from MISSING to IDENTITY if needed. */
690 if (p2m_top[topidx][mididx] == p2m_missing) {
691 WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing,
692 p2m_identity) != p2m_missing);
693 return true;
694 }
695 }
696 629
697 if (p2m_top[topidx][mididx] == p2m_missing) 630 if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_missing)))
698 return mfn == INVALID_P2M_ENTRY; 631 return mfn == INVALID_P2M_ENTRY;
699 632
700 p2m_top[topidx][mididx][idx] = mfn; 633 if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity)))
634 return mfn == IDENTITY_FRAME(pfn);
635
636 xen_p2m_addr[pfn] = mfn;
701 637
702 return true; 638 return true;
703} 639}
704 640
705bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) 641bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
706{ 642{
707 if (unlikely(!__set_phys_to_machine(pfn, mfn))) { 643 if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
708 if (!alloc_p2m(pfn)) 644 if (!alloc_p2m(pfn))
709 return false; 645 return false;
710 646
711 if (!__set_phys_to_machine(pfn, mfn)) 647 return __set_phys_to_machine(pfn, mfn);
712 return false;
713 } 648 }
714 649
715 return true; 650 return true;
@@ -1035,79 +970,29 @@ EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
1035#include "debugfs.h" 970#include "debugfs.h"
1036static int p2m_dump_show(struct seq_file *m, void *v) 971static int p2m_dump_show(struct seq_file *m, void *v)
1037{ 972{
1038 static const char * const level_name[] = { "top", "middle",
1039 "entry", "abnormal", "error"};
1040#define TYPE_IDENTITY 0
1041#define TYPE_MISSING 1
1042#define TYPE_PFN 2
1043#define TYPE_UNKNOWN 3
1044 static const char * const type_name[] = { 973 static const char * const type_name[] = {
1045 [TYPE_IDENTITY] = "identity", 974 [P2M_TYPE_IDENTITY] = "identity",
1046 [TYPE_MISSING] = "missing", 975 [P2M_TYPE_MISSING] = "missing",
1047 [TYPE_PFN] = "pfn", 976 [P2M_TYPE_PFN] = "pfn",
1048 [TYPE_UNKNOWN] = "abnormal"}; 977 [P2M_TYPE_UNKNOWN] = "abnormal"};
1049 unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0; 978 unsigned long pfn, first_pfn;
1050 unsigned int uninitialized_var(prev_level); 979 int type, prev_type;
1051 unsigned int uninitialized_var(prev_type); 980
1052 981 prev_type = xen_p2m_elem_type(0);
1053 if (!p2m_top) 982 first_pfn = 0;
1054 return 0; 983
1055 984 for (pfn = 0; pfn < xen_p2m_size; pfn++) {
1056 for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) { 985 type = xen_p2m_elem_type(pfn);
1057 unsigned topidx = p2m_top_index(pfn); 986 if (type != prev_type) {
1058 unsigned mididx = p2m_mid_index(pfn); 987 seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn,
1059 unsigned idx = p2m_index(pfn); 988 type_name[prev_type]);
1060 unsigned lvl, type;
1061
1062 lvl = 4;
1063 type = TYPE_UNKNOWN;
1064 if (p2m_top[topidx] == p2m_mid_missing) {
1065 lvl = 0; type = TYPE_MISSING;
1066 } else if (p2m_top[topidx] == NULL) {
1067 lvl = 0; type = TYPE_UNKNOWN;
1068 } else if (p2m_top[topidx][mididx] == NULL) {
1069 lvl = 1; type = TYPE_UNKNOWN;
1070 } else if (p2m_top[topidx][mididx] == p2m_identity) {
1071 lvl = 1; type = TYPE_IDENTITY;
1072 } else if (p2m_top[topidx][mididx] == p2m_missing) {
1073 lvl = 1; type = TYPE_MISSING;
1074 } else if (p2m_top[topidx][mididx][idx] == 0) {
1075 lvl = 2; type = TYPE_UNKNOWN;
1076 } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) {
1077 lvl = 2; type = TYPE_IDENTITY;
1078 } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) {
1079 lvl = 2; type = TYPE_MISSING;
1080 } else if (p2m_top[topidx][mididx][idx] == pfn) {
1081 lvl = 2; type = TYPE_PFN;
1082 } else if (p2m_top[topidx][mididx][idx] != pfn) {
1083 lvl = 2; type = TYPE_PFN;
1084 }
1085 if (pfn == 0) {
1086 prev_level = lvl;
1087 prev_type = type; 989 prev_type = type;
1088 } 990 first_pfn = pfn;
1089 if (pfn == MAX_DOMAIN_PAGES-1) {
1090 lvl = 3;
1091 type = TYPE_UNKNOWN;
1092 }
1093 if (prev_type != type) {
1094 seq_printf(m, " [0x%lx->0x%lx] %s\n",
1095 prev_pfn_type, pfn, type_name[prev_type]);
1096 prev_pfn_type = pfn;
1097 prev_type = type;
1098 }
1099 if (prev_level != lvl) {
1100 seq_printf(m, " [0x%lx->0x%lx] level %s\n",
1101 prev_pfn_level, pfn, level_name[prev_level]);
1102 prev_pfn_level = pfn;
1103 prev_level = lvl;
1104 } 991 }
1105 } 992 }
993 seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn,
994 type_name[prev_type]);
1106 return 0; 995 return 0;
1107#undef TYPE_IDENTITY
1108#undef TYPE_MISSING
1109#undef TYPE_PFN
1110#undef TYPE_UNKNOWN
1111} 996}
1112 997
1113static int p2m_dump_open(struct inode *inode, struct file *filp) 998static int p2m_dump_open(struct inode *inode, struct file *filp)
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 02b0b0fba041..f92921fa54f9 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -49,7 +49,7 @@ void xen_hvm_init_shared_info(void);
49void xen_unplug_emulated_devices(void); 49void xen_unplug_emulated_devices(void);
50 50
51void __init xen_build_dynamic_phys_to_machine(void); 51void __init xen_build_dynamic_phys_to_machine(void);
52unsigned long __init xen_revector_p2m_tree(void); 52void __init xen_vmalloc_p2m_tree(void);
53 53
54void xen_init_irq_ops(void); 54void xen_init_irq_ops(void);
55void xen_setup_timer(int cpu); 55void xen_setup_timer(int cpu);