aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/xen/page.h20
-rw-r--r--arch/x86/xen/mmu.c34
-rw-r--r--arch/x86/xen/p2m.c735
-rw-r--r--arch/x86/xen/xen-ops.h2
4 files changed, 347 insertions, 444 deletions
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 57aba6ba6f92..b54a3d20d6b2 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -65,13 +65,25 @@ extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
65 * bits (identity or foreign) are set. 65 * bits (identity or foreign) are set.
66 * - __pfn_to_mfn() returns the found entry of the p2m table. A possibly set 66 * - __pfn_to_mfn() returns the found entry of the p2m table. A possibly set
67 * identity or foreign indicator will be still set. __pfn_to_mfn() is 67 * identity or foreign indicator will be still set. __pfn_to_mfn() is
68 * encapsulating get_phys_to_machine(). 68 * encapsulating get_phys_to_machine() which is called in special cases only.
69 * - get_phys_to_machine() is to be called by __pfn_to_mfn() only to allow 69 * - get_phys_to_machine() is to be called by __pfn_to_mfn() only in special
70 * for future optimizations. 70 * cases needing an extended handling.
71 */ 71 */
72static inline unsigned long __pfn_to_mfn(unsigned long pfn) 72static inline unsigned long __pfn_to_mfn(unsigned long pfn)
73{ 73{
74 return get_phys_to_machine(pfn); 74 unsigned long mfn;
75
76 if (pfn < xen_p2m_size)
77 mfn = xen_p2m_addr[pfn];
78 else if (unlikely(pfn < xen_max_p2m_pfn))
79 return get_phys_to_machine(pfn);
80 else
81 return IDENTITY_FRAME(pfn);
82
83 if (unlikely(mfn == INVALID_P2M_ENTRY))
84 return get_phys_to_machine(pfn);
85
86 return mfn;
75} 87}
76 88
77static inline unsigned long pfn_to_mfn(unsigned long pfn) 89static inline unsigned long pfn_to_mfn(unsigned long pfn)
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3e3f8f8c3a30..6ab6150c8560 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1158,20 +1158,16 @@ static void __init xen_cleanhighmap(unsigned long vaddr,
1158 * instead of somewhere later and be confusing. */ 1158 * instead of somewhere later and be confusing. */
1159 xen_mc_flush(); 1159 xen_mc_flush();
1160} 1160}
1161static void __init xen_pagetable_p2m_copy(void) 1161
1162static void __init xen_pagetable_p2m_free(void)
1162{ 1163{
1163 unsigned long size; 1164 unsigned long size;
1164 unsigned long addr; 1165 unsigned long addr;
1165 unsigned long new_mfn_list;
1166
1167 if (xen_feature(XENFEAT_auto_translated_physmap))
1168 return;
1169 1166
1170 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); 1167 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1171 1168
1172 new_mfn_list = xen_revector_p2m_tree();
1173 /* No memory or already called. */ 1169 /* No memory or already called. */
1174 if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list) 1170 if ((unsigned long)xen_p2m_addr == xen_start_info->mfn_list)
1175 return; 1171 return;
1176 1172
1177 /* using __ka address and sticking INVALID_P2M_ENTRY! */ 1173 /* using __ka address and sticking INVALID_P2M_ENTRY! */
@@ -1189,8 +1185,6 @@ static void __init xen_pagetable_p2m_copy(void)
1189 1185
1190 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); 1186 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1191 memblock_free(__pa(xen_start_info->mfn_list), size); 1187 memblock_free(__pa(xen_start_info->mfn_list), size);
1192 /* And revector! Bye bye old array */
1193 xen_start_info->mfn_list = new_mfn_list;
1194 1188
1195 /* At this stage, cleanup_highmap has already cleaned __ka space 1189 /* At this stage, cleanup_highmap has already cleaned __ka space
1196 * from _brk_limit way up to the max_pfn_mapped (which is the end of 1190 * from _brk_limit way up to the max_pfn_mapped (which is the end of
@@ -1214,14 +1208,26 @@ static void __init xen_pagetable_p2m_copy(void)
1214} 1208}
1215#endif 1209#endif
1216 1210
1217static void __init xen_pagetable_init(void) 1211static void __init xen_pagetable_p2m_setup(void)
1218{ 1212{
1219 paging_init(); 1213 if (xen_feature(XENFEAT_auto_translated_physmap))
1214 return;
1215
1216 xen_vmalloc_p2m_tree();
1217
1220#ifdef CONFIG_X86_64 1218#ifdef CONFIG_X86_64
1221 xen_pagetable_p2m_copy(); 1219 xen_pagetable_p2m_free();
1222#else
1223 xen_revector_p2m_tree();
1224#endif 1220#endif
1221 /* And revector! Bye bye old array */
1222 xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
1223}
1224
1225static void __init xen_pagetable_init(void)
1226{
1227 paging_init();
1228
1229 xen_pagetable_p2m_setup();
1230
1225 /* Allocate and initialize top and mid mfn levels for p2m structure */ 1231 /* Allocate and initialize top and mid mfn levels for p2m structure */
1226 xen_build_mfn_list_list(); 1232 xen_build_mfn_list_list();
1227 1233
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 8c3d8fbbba93..7d844739e513 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -3,21 +3,22 @@
3 * guests themselves, but it must also access and update the p2m array 3 * guests themselves, but it must also access and update the p2m array
4 * during suspend/resume when all the pages are reallocated. 4 * during suspend/resume when all the pages are reallocated.
5 * 5 *
6 * The p2m table is logically a flat array, but we implement it as a 6 * The logical flat p2m table is mapped to a linear kernel memory area.
7 * three-level tree to allow the address space to be sparse. 7 * For accesses by Xen a three-level tree linked via mfns only is set up to
8 * allow the address space to be sparse.
8 * 9 *
9 * Xen 10 * Xen
10 * | 11 * |
11 * p2m_top p2m_top_mfn 12 * p2m_top_mfn
12 * / \ / \ 13 * / \
13 * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn 14 * p2m_mid_mfn p2m_mid_mfn
14 * / \ / \ / / 15 * / /
15 * p2m p2m p2m p2m p2m p2m p2m ... 16 * p2m p2m p2m ...
16 * 17 *
17 * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. 18 * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p.
18 * 19 *
19 * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the 20 * The p2m_top_mfn level is limited to 1 page, so the maximum representable
20 * maximum representable pseudo-physical address space is: 21 * pseudo-physical address space is:
21 * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages 22 * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages
22 * 23 *
23 * P2M_PER_PAGE depends on the architecture, as a mfn is always 24 * P2M_PER_PAGE depends on the architecture, as a mfn is always
@@ -30,6 +31,9 @@
30 * leaf entries, or for the top root, or middle one, for which there is a void 31 * leaf entries, or for the top root, or middle one, for which there is a void
31 * entry, we assume it is "missing". So (for example) 32 * entry, we assume it is "missing". So (for example)
32 * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY. 33 * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY.
34 * We have a dedicated page p2m_missing with all entries being
35 * INVALID_P2M_ENTRY. This page may be referenced multiple times in the p2m
36 * list/tree in case there are multiple areas with P2M_PER_PAGE invalid pfns.
33 * 37 *
34 * We also have the possibility of setting 1-1 mappings on certain regions, so 38 * We also have the possibility of setting 1-1 mappings on certain regions, so
35 * that: 39 * that:
@@ -39,122 +43,20 @@
39 * PCI BARs, or ACPI spaces), we can create mappings easily because we 43 * PCI BARs, or ACPI spaces), we can create mappings easily because we
40 * get the PFN value to match the MFN. 44 * get the PFN value to match the MFN.
41 * 45 *
42 * For this to work efficiently we have one new page p2m_identity and 46 * For this to work efficiently we have one new page p2m_identity. All entries
43 * allocate (via reserved_brk) any other pages we need to cover the sides 47 * in p2m_identity are set to INVALID_P2M_ENTRY type (Xen toolstack only
44 * (1GB or 4MB boundary violations). All entries in p2m_identity are set to 48 * recognizes that and MFNs, no other fancy value).
45 * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs,
46 * no other fancy value).
47 * 49 *
48 * On lookup we spot that the entry points to p2m_identity and return the 50 * On lookup we spot that the entry points to p2m_identity and return the
49 * identity value instead of dereferencing and returning INVALID_P2M_ENTRY. 51 * identity value instead of dereferencing and returning INVALID_P2M_ENTRY.
50 * If the entry points to an allocated page, we just proceed as before and 52 * If the entry points to an allocated page, we just proceed as before and
51 * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in 53 * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in
52 * appropriate functions (pfn_to_mfn). 54 * appropriate functions (pfn_to_mfn).
53 * 55 *
54 * The reason for having the IDENTITY_FRAME_BIT instead of just returning the 56 * The reason for having the IDENTITY_FRAME_BIT instead of just returning the
55 * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a 57 * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
56 * non-identity pfn. To protect ourselves against we elect to set (and get) the 58 * non-identity pfn. To protect ourselves against we elect to set (and get) the
57 * IDENTITY_FRAME_BIT on all identity mapped PFNs. 59 * IDENTITY_FRAME_BIT on all identity mapped PFNs.
58 *
59 * This simplistic diagram is used to explain the more subtle piece of code.
60 * There is also a digram of the P2M at the end that can help.
61 * Imagine your E820 looking as so:
62 *
63 * 1GB 2GB 4GB
64 * /-------------------+---------\/----\ /----------\ /---+-----\
65 * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM |
66 * \-------------------+---------/\----/ \----------/ \---+-----/
67 * ^- 1029MB ^- 2001MB
68 *
69 * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100),
70 * 2048MB = 524288 (0x80000)]
71 *
72 * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB
73 * is actually not present (would have to kick the balloon driver to put it in).
74 *
75 * When we are told to set the PFNs for identity mapping (see patch: "xen/setup:
76 * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start
77 * of the PFN and the end PFN (263424 and 512256 respectively). The first step
78 * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
79 * covers 512^2 of page estate (1GB) and in case the start or end PFN is not
80 * aligned on 512^2*PAGE_SIZE (1GB) we reserve_brk new middle and leaf pages as
81 * required to split any existing p2m_mid_missing middle pages.
82 *
83 * With the E820 example above, 263424 is not 1GB aligned so we allocate a
84 * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
85 * Each entry in the allocate page is "missing" (points to p2m_missing).
86 *
87 * Next stage is to determine if we need to do a more granular boundary check
88 * on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
89 * We check if the start pfn and end pfn violate that boundary check, and if
90 * so reserve_brk a (p2m[x][y]) leaf page. This way we have a much finer
91 * granularity of setting which PFNs are missing and which ones are identity.
92 * In our example 263424 and 512256 both fail the check so we reserve_brk two
93 * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing"
94 * values) and assign them to p2m[1][2] and p2m[1][488] respectively.
95 *
96 * At this point we would at minimum reserve_brk one page, but could be up to
97 * three. Each call to set_phys_range_identity has at maximum a three page
98 * cost. If we were to query the P2M at this stage, all those entries from
99 * start PFN through end PFN (so 1029MB -> 2001MB) would return
100 * INVALID_P2M_ENTRY ("missing").
101 *
102 * The next step is to walk from the start pfn to the end pfn setting
103 * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity.
104 * If we find that the middle entry is pointing to p2m_missing we can swap it
105 * over to p2m_identity - this way covering 4MB (or 2MB) PFN space (and
106 * similarly swapping p2m_mid_missing for p2m_mid_identity for larger regions).
107 * At this point we do not need to worry about boundary aligment (so no need to
108 * reserve_brk a middle page, figure out which PFNs are "missing" and which
109 * ones are identity), as that has been done earlier. If we find that the
110 * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference
111 * that page (which covers 512 PFNs) and set the appropriate PFN with
112 * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we
113 * set from p2m[1][2][256->511] and p2m[1][488][0->256] with
114 * IDENTITY_FRAME_BIT set.
115 *
116 * All other regions that are void (or not filled) either point to p2m_missing
117 * (considered missing) or have the default value of INVALID_P2M_ENTRY (also
118 * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
119 * contain the INVALID_P2M_ENTRY value and are considered "missing."
120 *
121 * Finally, the region beyond the end of of the E820 (4 GB in this example)
122 * is set to be identity (in case there are MMIO regions placed here).
123 *
124 * This is what the p2m ends up looking (for the E820 above) with this
125 * fabulous drawing:
126 *
127 * p2m /--------------\
128 * /-----\ | &mfn_list[0],| /-----------------\
129 * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. |
130 * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] |
131 * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] |
132 * |-----| \ | [p2m_identity]+\\ | .... |
133 * | 2 |--\ \-------------------->| ... | \\ \----------------/
134 * |-----| \ \---------------/ \\
135 * | 3 |-\ \ \\ p2m_identity [1]
136 * |-----| \ \-------------------->/---------------\ /-----------------\
137 * | .. |\ | | [p2m_identity]+-->| ~0, ~0, ~0, ... |
138 * \-----/ | | | [p2m_identity]+-->| ..., ~0 |
139 * | | | .... | \-----------------/
140 * | | +-[x], ~0, ~0.. +\
141 * | | \---------------/ \
142 * | | \-> /---------------\
143 * | V p2m_mid_missing p2m_missing | IDENTITY[@0] |
144 * | /-----------------\ /------------\ | IDENTITY[@256]|
145 * | | [p2m_missing] +---->| ~0, ~0, ...| | ~0, ~0, .... |
146 * | | [p2m_missing] +---->| ..., ~0 | \---------------/
147 * | | ... | \------------/
148 * | \-----------------/
149 * |
150 * | p2m_mid_identity
151 * | /-----------------\
152 * \-->| [p2m_identity] +---->[1]
153 * | [p2m_identity] +---->[1]
154 * | ... |
155 * \-----------------/
156 *
157 * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
158 */ 60 */
159 61
160#include <linux/init.h> 62#include <linux/init.h>
@@ -179,6 +81,8 @@
179#include "multicalls.h" 81#include "multicalls.h"
180#include "xen-ops.h" 82#include "xen-ops.h"
181 83
84#define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE)
85
182static void __init m2p_override_init(void); 86static void __init m2p_override_init(void);
183 87
184unsigned long *xen_p2m_addr __read_mostly; 88unsigned long *xen_p2m_addr __read_mostly;
@@ -188,22 +92,15 @@ EXPORT_SYMBOL_GPL(xen_p2m_size);
188unsigned long xen_max_p2m_pfn __read_mostly; 92unsigned long xen_max_p2m_pfn __read_mostly;
189EXPORT_SYMBOL_GPL(xen_max_p2m_pfn); 93EXPORT_SYMBOL_GPL(xen_max_p2m_pfn);
190 94
95static DEFINE_SPINLOCK(p2m_update_lock);
96
191static unsigned long *p2m_mid_missing_mfn; 97static unsigned long *p2m_mid_missing_mfn;
192static unsigned long *p2m_top_mfn; 98static unsigned long *p2m_top_mfn;
193static unsigned long **p2m_top_mfn_p; 99static unsigned long **p2m_top_mfn_p;
194 100static unsigned long *p2m_missing;
195/* Placeholders for holes in the address space */ 101static unsigned long *p2m_identity;
196static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); 102static pte_t *p2m_missing_pte;
197static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); 103static pte_t *p2m_identity_pte;
198
199static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
200
201static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
202static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE);
203
204RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
205
206static int use_brk = 1;
207 104
208static inline unsigned p2m_top_index(unsigned long pfn) 105static inline unsigned p2m_top_index(unsigned long pfn)
209{ 106{
@@ -221,14 +118,6 @@ static inline unsigned p2m_index(unsigned long pfn)
221 return pfn % P2M_PER_PAGE; 118 return pfn % P2M_PER_PAGE;
222} 119}
223 120
224static void p2m_top_init(unsigned long ***top)
225{
226 unsigned i;
227
228 for (i = 0; i < P2M_TOP_PER_PAGE; i++)
229 top[i] = p2m_mid_missing;
230}
231
232static void p2m_top_mfn_init(unsigned long *top) 121static void p2m_top_mfn_init(unsigned long *top)
233{ 122{
234 unsigned i; 123 unsigned i;
@@ -245,35 +134,32 @@ static void p2m_top_mfn_p_init(unsigned long **top)
245 top[i] = p2m_mid_missing_mfn; 134 top[i] = p2m_mid_missing_mfn;
246} 135}
247 136
248static void p2m_mid_init(unsigned long **mid, unsigned long *leaf) 137static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf)
249{ 138{
250 unsigned i; 139 unsigned i;
251 140
252 for (i = 0; i < P2M_MID_PER_PAGE; i++) 141 for (i = 0; i < P2M_MID_PER_PAGE; i++)
253 mid[i] = leaf; 142 mid[i] = virt_to_mfn(leaf);
254} 143}
255 144
256static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf) 145static void p2m_init(unsigned long *p2m)
257{ 146{
258 unsigned i; 147 unsigned i;
259 148
260 for (i = 0; i < P2M_MID_PER_PAGE; i++) 149 for (i = 0; i < P2M_PER_PAGE; i++)
261 mid[i] = virt_to_mfn(leaf); 150 p2m[i] = INVALID_P2M_ENTRY;
262} 151}
263 152
264static void p2m_init(unsigned long *p2m) 153static void p2m_init_identity(unsigned long *p2m, unsigned long pfn)
265{ 154{
266 unsigned i; 155 unsigned i;
267 156
268 for (i = 0; i < P2M_MID_PER_PAGE; i++) 157 for (i = 0; i < P2M_PER_PAGE; i++)
269 p2m[i] = INVALID_P2M_ENTRY; 158 p2m[i] = IDENTITY_FRAME(pfn + i);
270} 159}
271 160
272static void * __ref alloc_p2m_page(void) 161static void * __ref alloc_p2m_page(void)
273{ 162{
274 if (unlikely(use_brk))
275 return extend_brk(PAGE_SIZE, PAGE_SIZE);
276
277 if (unlikely(!slab_is_available())) 163 if (unlikely(!slab_is_available()))
278 return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); 164 return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
279 165
@@ -299,7 +185,10 @@ static void free_p2m_page(void *p)
299 */ 185 */
300void __ref xen_build_mfn_list_list(void) 186void __ref xen_build_mfn_list_list(void)
301{ 187{
302 unsigned long pfn; 188 unsigned long pfn, mfn;
189 pte_t *ptep;
190 unsigned int level, topidx, mididx;
191 unsigned long *mid_mfn_p;
303 192
304 if (xen_feature(XENFEAT_auto_translated_physmap)) 193 if (xen_feature(XENFEAT_auto_translated_physmap))
305 return; 194 return;
@@ -319,20 +208,23 @@ void __ref xen_build_mfn_list_list(void)
319 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); 208 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
320 } 209 }
321 210
322 for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { 211 for (pfn = 0; pfn < xen_max_p2m_pfn && pfn < MAX_P2M_PFN;
323 unsigned topidx = p2m_top_index(pfn); 212 pfn += P2M_PER_PAGE) {
324 unsigned mididx = p2m_mid_index(pfn); 213 topidx = p2m_top_index(pfn);
325 unsigned long **mid; 214 mididx = p2m_mid_index(pfn);
326 unsigned long *mid_mfn_p;
327 215
328 mid = p2m_top[topidx];
329 mid_mfn_p = p2m_top_mfn_p[topidx]; 216 mid_mfn_p = p2m_top_mfn_p[topidx];
217 ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn),
218 &level);
219 BUG_ON(!ptep || level != PG_LEVEL_4K);
220 mfn = pte_mfn(*ptep);
221 ptep = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
330 222
331 /* Don't bother allocating any mfn mid levels if 223 /* Don't bother allocating any mfn mid levels if
332 * they're just missing, just update the stored mfn, 224 * they're just missing, just update the stored mfn,
333 * since all could have changed over a migrate. 225 * since all could have changed over a migrate.
334 */ 226 */
335 if (mid == p2m_mid_missing) { 227 if (ptep == p2m_missing_pte || ptep == p2m_identity_pte) {
336 BUG_ON(mididx); 228 BUG_ON(mididx);
337 BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); 229 BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
338 p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); 230 p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn);
@@ -341,11 +233,6 @@ void __ref xen_build_mfn_list_list(void)
341 } 233 }
342 234
343 if (mid_mfn_p == p2m_mid_missing_mfn) { 235 if (mid_mfn_p == p2m_mid_missing_mfn) {
344 /*
345 * XXX boot-time only! We should never find
346 * missing parts of the mfn tree after
347 * runtime.
348 */
349 mid_mfn_p = alloc_p2m_page(); 236 mid_mfn_p = alloc_p2m_page();
350 p2m_mid_mfn_init(mid_mfn_p, p2m_missing); 237 p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
351 238
@@ -353,7 +240,7 @@ void __ref xen_build_mfn_list_list(void)
353 } 240 }
354 241
355 p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); 242 p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
356 mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); 243 mid_mfn_p[mididx] = mfn;
357 } 244 }
358} 245}
359 246
@@ -372,154 +259,153 @@ void xen_setup_mfn_list_list(void)
372/* Set up p2m_top to point to the domain-builder provided p2m pages */ 259/* Set up p2m_top to point to the domain-builder provided p2m pages */
373void __init xen_build_dynamic_phys_to_machine(void) 260void __init xen_build_dynamic_phys_to_machine(void)
374{ 261{
375 unsigned long *mfn_list;
376 unsigned long max_pfn;
377 unsigned long pfn; 262 unsigned long pfn;
378 263
379 if (xen_feature(XENFEAT_auto_translated_physmap)) 264 if (xen_feature(XENFEAT_auto_translated_physmap))
380 return; 265 return;
381 266
382 xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list; 267 xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list;
383 mfn_list = (unsigned long *)xen_start_info->mfn_list; 268 xen_p2m_size = ALIGN(xen_start_info->nr_pages, P2M_PER_PAGE);
384 max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
385 xen_max_p2m_pfn = max_pfn;
386 xen_p2m_size = max_pfn;
387 269
388 p2m_missing = alloc_p2m_page(); 270 for (pfn = xen_start_info->nr_pages; pfn < xen_p2m_size; pfn++)
389 p2m_init(p2m_missing); 271 xen_p2m_addr[pfn] = INVALID_P2M_ENTRY;
390 p2m_identity = alloc_p2m_page();
391 p2m_init(p2m_identity);
392 272
393 p2m_mid_missing = alloc_p2m_page(); 273 xen_max_p2m_pfn = xen_p2m_size;
394 p2m_mid_init(p2m_mid_missing, p2m_missing); 274}
395 p2m_mid_identity = alloc_p2m_page();
396 p2m_mid_init(p2m_mid_identity, p2m_identity);
397 275
398 p2m_top = alloc_p2m_page(); 276#define P2M_TYPE_IDENTITY 0
399 p2m_top_init(p2m_top); 277#define P2M_TYPE_MISSING 1
278#define P2M_TYPE_PFN 2
279#define P2M_TYPE_UNKNOWN 3
400 280
401 /* 281static int xen_p2m_elem_type(unsigned long pfn)
402 * The domain builder gives us a pre-constructed p2m array in 282{
403 * mfn_list for all the pages initially given to us, so we just 283 unsigned long mfn;
404 * need to graft that into our tree structure.
405 */
406 for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) {
407 unsigned topidx = p2m_top_index(pfn);
408 unsigned mididx = p2m_mid_index(pfn);
409 284
410 if (p2m_top[topidx] == p2m_mid_missing) { 285 if (pfn >= xen_p2m_size)
411 unsigned long **mid = alloc_p2m_page(); 286 return P2M_TYPE_IDENTITY;
412 p2m_mid_init(mid, p2m_missing);
413 287
414 p2m_top[topidx] = mid; 288 mfn = xen_p2m_addr[pfn];
415 }
416 289
417 /* 290 if (mfn == INVALID_P2M_ENTRY)
418 * As long as the mfn_list has enough entries to completely 291 return P2M_TYPE_MISSING;
419 * fill a p2m page, pointing into the array is ok. But if
420 * not the entries beyond the last pfn will be undefined.
421 */
422 if (unlikely(pfn + P2M_PER_PAGE > max_pfn)) {
423 unsigned long p2midx;
424 292
425 p2midx = max_pfn % P2M_PER_PAGE; 293 if (mfn & IDENTITY_FRAME_BIT)
426 for ( ; p2midx < P2M_PER_PAGE; p2midx++) 294 return P2M_TYPE_IDENTITY;
427 mfn_list[pfn + p2midx] = INVALID_P2M_ENTRY; 295
428 } 296 return P2M_TYPE_PFN;
429 p2m_top[topidx][mididx] = &mfn_list[pfn];
430 }
431} 297}
432#ifdef CONFIG_X86_64 298
433unsigned long __init xen_revector_p2m_tree(void) 299static void __init xen_rebuild_p2m_list(unsigned long *p2m)
434{ 300{
435 unsigned long va_start; 301 unsigned int i, chunk;
436 unsigned long va_end;
437 unsigned long pfn; 302 unsigned long pfn;
438 unsigned long pfn_free = 0; 303 unsigned long *mfns;
439 unsigned long *mfn_list = NULL; 304 pte_t *ptep;
440 unsigned long size; 305 pmd_t *pmdp;
441 306 int type;
442 use_brk = 0;
443 va_start = xen_start_info->mfn_list;
444 /*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long),
445 * so make sure it is rounded up to that */
446 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
447 va_end = va_start + size;
448
449 /* If we were revectored already, don't do it again. */
450 if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET)
451 return 0;
452
453 mfn_list = alloc_bootmem_align(size, PAGE_SIZE);
454 if (!mfn_list) {
455 pr_warn("Could not allocate space for a new P2M tree!\n");
456 return xen_start_info->mfn_list;
457 }
458 /* Fill it out with INVALID_P2M_ENTRY value */
459 memset(mfn_list, 0xFF, size);
460 307
461 for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) { 308 p2m_missing = alloc_p2m_page();
462 unsigned topidx = p2m_top_index(pfn); 309 p2m_init(p2m_missing);
463 unsigned mididx; 310 p2m_identity = alloc_p2m_page();
464 unsigned long *mid_p; 311 p2m_init(p2m_identity);
465
466 if (!p2m_top[topidx])
467 continue;
468 312
469 if (p2m_top[topidx] == p2m_mid_missing) 313 p2m_missing_pte = alloc_p2m_page();
470 continue; 314 paravirt_alloc_pte(&init_mm, __pa(p2m_missing_pte) >> PAGE_SHIFT);
315 p2m_identity_pte = alloc_p2m_page();
316 paravirt_alloc_pte(&init_mm, __pa(p2m_identity_pte) >> PAGE_SHIFT);
317 for (i = 0; i < PTRS_PER_PTE; i++) {
318 set_pte(p2m_missing_pte + i,
319 pfn_pte(PFN_DOWN(__pa(p2m_missing)), PAGE_KERNEL));
320 set_pte(p2m_identity_pte + i,
321 pfn_pte(PFN_DOWN(__pa(p2m_identity)), PAGE_KERNEL));
322 }
471 323
472 mididx = p2m_mid_index(pfn); 324 for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += chunk) {
473 mid_p = p2m_top[topidx][mididx]; 325 /*
474 if (!mid_p) 326 * Try to map missing/identity PMDs or p2m-pages if possible.
475 continue; 327 * We have to respect the structure of the mfn_list_list
476 if ((mid_p == p2m_missing) || (mid_p == p2m_identity)) 328 * which will be built just afterwards.
329 * Chunk size to test is one p2m page if we are in the middle
330 * of a mfn_list_list mid page and the complete mid page area
331 * if we are at index 0 of the mid page. Please note that a
332 * mid page might cover more than one PMD, e.g. on 32 bit PAE
333 * kernels.
334 */
335 chunk = (pfn & (P2M_PER_PAGE * P2M_MID_PER_PAGE - 1)) ?
336 P2M_PER_PAGE : P2M_PER_PAGE * P2M_MID_PER_PAGE;
337
338 type = xen_p2m_elem_type(pfn);
339 i = 0;
340 if (type != P2M_TYPE_PFN)
341 for (i = 1; i < chunk; i++)
342 if (xen_p2m_elem_type(pfn + i) != type)
343 break;
344 if (i < chunk)
345 /* Reset to minimal chunk size. */
346 chunk = P2M_PER_PAGE;
347
348 if (type == P2M_TYPE_PFN || i < chunk) {
349 /* Use initial p2m page contents. */
350#ifdef CONFIG_X86_64
351 mfns = alloc_p2m_page();
352 copy_page(mfns, xen_p2m_addr + pfn);
353#else
354 mfns = xen_p2m_addr + pfn;
355#endif
356 ptep = populate_extra_pte((unsigned long)(p2m + pfn));
357 set_pte(ptep,
358 pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL));
477 continue; 359 continue;
360 }
478 361
479 if ((unsigned long)mid_p == INVALID_P2M_ENTRY) 362 if (chunk == P2M_PER_PAGE) {
363 /* Map complete missing or identity p2m-page. */
364 mfns = (type == P2M_TYPE_MISSING) ?
365 p2m_missing : p2m_identity;
366 ptep = populate_extra_pte((unsigned long)(p2m + pfn));
367 set_pte(ptep,
368 pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL));
480 continue; 369 continue;
370 }
481 371
482 /* The old va. Rebase it on mfn_list */ 372 /* Complete missing or identity PMD(s) can be mapped. */
483 if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) { 373 ptep = (type == P2M_TYPE_MISSING) ?
484 unsigned long *new; 374 p2m_missing_pte : p2m_identity_pte;
375 for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
376 pmdp = populate_extra_pmd(
377 (unsigned long)(p2m + pfn + i * PTRS_PER_PTE));
378 set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE));
379 }
380 }
381}
485 382
486 if (pfn_free > (size / sizeof(unsigned long))) { 383void __init xen_vmalloc_p2m_tree(void)
487 WARN(1, "Only allocated for %ld pages, but we want %ld!\n", 384{
488 size / sizeof(unsigned long), pfn_free); 385 static struct vm_struct vm;
489 return 0;
490 }
491 new = &mfn_list[pfn_free];
492 386
493 copy_page(new, mid_p); 387 vm.flags = VM_ALLOC;
494 p2m_top[topidx][mididx] = &mfn_list[pfn_free]; 388 vm.size = ALIGN(sizeof(unsigned long) * xen_max_p2m_pfn,
389 PMD_SIZE * PMDS_PER_MID_PAGE);
390 vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE);
391 pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size);
495 392
496 pfn_free += P2M_PER_PAGE; 393 xen_max_p2m_pfn = vm.size / sizeof(unsigned long);
497 394
498 } 395 xen_rebuild_p2m_list(vm.addr);
499 /* This should be the leafs allocated for identity from _brk. */
500 }
501 396
397 xen_p2m_addr = vm.addr;
502 xen_p2m_size = xen_max_p2m_pfn; 398 xen_p2m_size = xen_max_p2m_pfn;
503 xen_p2m_addr = mfn_list;
504 399
505 xen_inv_extra_mem(); 400 xen_inv_extra_mem();
506 401
507 m2p_override_init(); 402 m2p_override_init();
508 return (unsigned long)mfn_list;
509} 403}
510#else 404
511unsigned long __init xen_revector_p2m_tree(void)
512{
513 use_brk = 0;
514 xen_p2m_size = xen_max_p2m_pfn;
515 xen_inv_extra_mem();
516 m2p_override_init();
517 return 0;
518}
519#endif
520unsigned long get_phys_to_machine(unsigned long pfn) 405unsigned long get_phys_to_machine(unsigned long pfn)
521{ 406{
522 unsigned topidx, mididx, idx; 407 pte_t *ptep;
408 unsigned int level;
523 409
524 if (unlikely(pfn >= xen_p2m_size)) { 410 if (unlikely(pfn >= xen_p2m_size)) {
525 if (pfn < xen_max_p2m_pfn) 411 if (pfn < xen_max_p2m_pfn)
@@ -528,23 +414,83 @@ unsigned long get_phys_to_machine(unsigned long pfn)
528 return IDENTITY_FRAME(pfn); 414 return IDENTITY_FRAME(pfn);
529 } 415 }
530 416
531 topidx = p2m_top_index(pfn); 417 ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level);
532 mididx = p2m_mid_index(pfn); 418 BUG_ON(!ptep || level != PG_LEVEL_4K);
533 idx = p2m_index(pfn);
534 419
535 /* 420 /*
536 * The INVALID_P2M_ENTRY is filled in both p2m_*identity 421 * The INVALID_P2M_ENTRY is filled in both p2m_*identity
537 * and in p2m_*missing, so returning the INVALID_P2M_ENTRY 422 * and in p2m_*missing, so returning the INVALID_P2M_ENTRY
538 * would be wrong. 423 * would be wrong.
539 */ 424 */
540 if (p2m_top[topidx][mididx] == p2m_identity) 425 if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity)))
541 return IDENTITY_FRAME(pfn); 426 return IDENTITY_FRAME(pfn);
542 427
543 return p2m_top[topidx][mididx][idx]; 428 return xen_p2m_addr[pfn];
544} 429}
545EXPORT_SYMBOL_GPL(get_phys_to_machine); 430EXPORT_SYMBOL_GPL(get_phys_to_machine);
546 431
547/* 432/*
433 * Allocate new pmd(s). It is checked whether the old pmd is still in place.
434 * If not, nothing is changed. This is okay as the only reason for allocating
435 * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual
436 * pmd. In case of PAE/x86-32 there are multiple pmds to allocate!
437 */
438static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg)
439{
440 pte_t *ptechk;
441 pte_t *pteret = ptep;
442 pte_t *pte_newpg[PMDS_PER_MID_PAGE];
443 pmd_t *pmdp;
444 unsigned int level;
445 unsigned long flags;
446 unsigned long vaddr;
447 int i;
448
449 /* Do all allocations first to bail out in error case. */
450 for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
451 pte_newpg[i] = alloc_p2m_page();
452 if (!pte_newpg[i]) {
453 for (i--; i >= 0; i--)
454 free_p2m_page(pte_newpg[i]);
455
456 return NULL;
457 }
458 }
459
460 vaddr = addr & ~(PMD_SIZE * PMDS_PER_MID_PAGE - 1);
461
462 for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
463 copy_page(pte_newpg[i], pte_pg);
464 paravirt_alloc_pte(&init_mm, __pa(pte_newpg[i]) >> PAGE_SHIFT);
465
466 pmdp = lookup_pmd_address(vaddr);
467 BUG_ON(!pmdp);
468
469 spin_lock_irqsave(&p2m_update_lock, flags);
470
471 ptechk = lookup_address(vaddr, &level);
472 if (ptechk == pte_pg) {
473 set_pmd(pmdp,
474 __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE));
475 if (vaddr == (addr & ~(PMD_SIZE - 1)))
476 pteret = pte_offset_kernel(pmdp, addr);
477 pte_newpg[i] = NULL;
478 }
479
480 spin_unlock_irqrestore(&p2m_update_lock, flags);
481
482 if (pte_newpg[i]) {
483 paravirt_release_pte(__pa(pte_newpg[i]) >> PAGE_SHIFT);
484 free_p2m_page(pte_newpg[i]);
485 }
486
487 vaddr += PMD_SIZE;
488 }
489
490 return pteret;
491}
492
493/*
548 * Fully allocate the p2m structure for a given pfn. We need to check 494 * Fully allocate the p2m structure for a given pfn. We need to check
549 * that both the top and mid levels are allocated, and make sure the 495 * that both the top and mid levels are allocated, and make sure the
550 * parallel mfn tree is kept in sync. We may race with other cpus, so 496 * parallel mfn tree is kept in sync. We may race with other cpus, so
@@ -554,58 +500,62 @@ EXPORT_SYMBOL_GPL(get_phys_to_machine);
554static bool alloc_p2m(unsigned long pfn) 500static bool alloc_p2m(unsigned long pfn)
555{ 501{
556 unsigned topidx, mididx; 502 unsigned topidx, mididx;
557 unsigned long ***top_p, **mid;
558 unsigned long *top_mfn_p, *mid_mfn; 503 unsigned long *top_mfn_p, *mid_mfn;
559 unsigned long *p2m_orig; 504 pte_t *ptep, *pte_pg;
505 unsigned int level;
506 unsigned long flags;
507 unsigned long addr = (unsigned long)(xen_p2m_addr + pfn);
508 unsigned long p2m_pfn;
560 509
561 topidx = p2m_top_index(pfn); 510 topidx = p2m_top_index(pfn);
562 mididx = p2m_mid_index(pfn); 511 mididx = p2m_mid_index(pfn);
563 512
564 top_p = &p2m_top[topidx]; 513 ptep = lookup_address(addr, &level);
565 mid = ACCESS_ONCE(*top_p); 514 BUG_ON(!ptep || level != PG_LEVEL_4K);
515 pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
566 516
567 if (mid == p2m_mid_missing) { 517 if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) {
568 /* Mid level is missing, allocate a new one */ 518 /* PMD level is missing, allocate a new one */
569 mid = alloc_p2m_page(); 519 ptep = alloc_p2m_pmd(addr, ptep, pte_pg);
570 if (!mid) 520 if (!ptep)
571 return false; 521 return false;
572
573 p2m_mid_init(mid, p2m_missing);
574
575 if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing)
576 free_p2m_page(mid);
577 } 522 }
578 523
579 top_mfn_p = &p2m_top_mfn[topidx]; 524 if (p2m_top_mfn) {
580 mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); 525 top_mfn_p = &p2m_top_mfn[topidx];
526 mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]);
581 527
582 BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); 528 BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
583 529
584 if (mid_mfn == p2m_mid_missing_mfn) { 530 if (mid_mfn == p2m_mid_missing_mfn) {
585 /* Separately check the mid mfn level */ 531 /* Separately check the mid mfn level */
586 unsigned long missing_mfn; 532 unsigned long missing_mfn;
587 unsigned long mid_mfn_mfn; 533 unsigned long mid_mfn_mfn;
588 unsigned long old_mfn; 534 unsigned long old_mfn;
589 535
590 mid_mfn = alloc_p2m_page(); 536 mid_mfn = alloc_p2m_page();
591 if (!mid_mfn) 537 if (!mid_mfn)
592 return false; 538 return false;
593 539
594 p2m_mid_mfn_init(mid_mfn, p2m_missing); 540 p2m_mid_mfn_init(mid_mfn, p2m_missing);
595 541
596 missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); 542 missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
597 mid_mfn_mfn = virt_to_mfn(mid_mfn); 543 mid_mfn_mfn = virt_to_mfn(mid_mfn);
598 old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn); 544 old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn);
599 if (old_mfn != missing_mfn) { 545 if (old_mfn != missing_mfn) {
600 free_p2m_page(mid_mfn); 546 free_p2m_page(mid_mfn);
601 mid_mfn = mfn_to_virt(old_mfn); 547 mid_mfn = mfn_to_virt(old_mfn);
602 } else { 548 } else {
603 p2m_top_mfn_p[topidx] = mid_mfn; 549 p2m_top_mfn_p[topidx] = mid_mfn;
550 }
604 } 551 }
552 } else {
553 mid_mfn = NULL;
605 } 554 }
606 555
607 p2m_orig = ACCESS_ONCE(p2m_top[topidx][mididx]); 556 p2m_pfn = pte_pfn(ACCESS_ONCE(*ptep));
608 if (p2m_orig == p2m_identity || p2m_orig == p2m_missing) { 557 if (p2m_pfn == PFN_DOWN(__pa(p2m_identity)) ||
558 p2m_pfn == PFN_DOWN(__pa(p2m_missing))) {
609 /* p2m leaf page is missing */ 559 /* p2m leaf page is missing */
610 unsigned long *p2m; 560 unsigned long *p2m;
611 561
@@ -613,12 +563,25 @@ static bool alloc_p2m(unsigned long pfn)
613 if (!p2m) 563 if (!p2m)
614 return false; 564 return false;
615 565
616 p2m_init(p2m); 566 if (p2m_pfn == PFN_DOWN(__pa(p2m_missing)))
567 p2m_init(p2m);
568 else
569 p2m_init_identity(p2m, pfn);
570
571 spin_lock_irqsave(&p2m_update_lock, flags);
572
573 if (pte_pfn(*ptep) == p2m_pfn) {
574 set_pte(ptep,
575 pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL));
576 if (mid_mfn)
577 mid_mfn[mididx] = virt_to_mfn(p2m);
578 p2m = NULL;
579 }
580
581 spin_unlock_irqrestore(&p2m_update_lock, flags);
617 582
618 if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig) 583 if (p2m)
619 free_p2m_page(p2m); 584 free_p2m_page(p2m);
620 else
621 mid_mfn[mididx] = virt_to_mfn(p2m);
622 } 585 }
623 586
624 return true; 587 return true;
@@ -647,10 +610,10 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s,
647 return pfn - pfn_s; 610 return pfn - pfn_s;
648} 611}
649 612
650/* Try to install p2m mapping; fail if intermediate bits missing */
651bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) 613bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
652{ 614{
653 unsigned topidx, mididx, idx; 615 pte_t *ptep;
616 unsigned int level;
654 617
655 /* don't track P2M changes in autotranslate guests */ 618 /* don't track P2M changes in autotranslate guests */
656 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) 619 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
@@ -661,55 +624,27 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
661 return true; 624 return true;
662 } 625 }
663 626
664 topidx = p2m_top_index(pfn); 627 ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level);
665 mididx = p2m_mid_index(pfn); 628 BUG_ON(!ptep || level != PG_LEVEL_4K);
666 idx = p2m_index(pfn);
667
668 /* For sparse holes were the p2m leaf has real PFN along with
669 * PCI holes, stick in the PFN as the MFN value.
670 *
671 * set_phys_range_identity() will have allocated new middle
672 * and leaf pages as required so an existing p2m_mid_missing
673 * or p2m_missing mean that whole range will be identity so
674 * these can be switched to p2m_mid_identity or p2m_identity.
675 */
676 if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
677 if (p2m_top[topidx] == p2m_mid_identity)
678 return true;
679
680 if (p2m_top[topidx] == p2m_mid_missing) {
681 WARN_ON(cmpxchg(&p2m_top[topidx], p2m_mid_missing,
682 p2m_mid_identity) != p2m_mid_missing);
683 return true;
684 }
685
686 if (p2m_top[topidx][mididx] == p2m_identity)
687 return true;
688
689 /* Swap over from MISSING to IDENTITY if needed. */
690 if (p2m_top[topidx][mididx] == p2m_missing) {
691 WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing,
692 p2m_identity) != p2m_missing);
693 return true;
694 }
695 }
696 629
697 if (p2m_top[topidx][mididx] == p2m_missing) 630 if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_missing)))
698 return mfn == INVALID_P2M_ENTRY; 631 return mfn == INVALID_P2M_ENTRY;
699 632
700 p2m_top[topidx][mididx][idx] = mfn; 633 if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity)))
634 return mfn == IDENTITY_FRAME(pfn);
635
636 xen_p2m_addr[pfn] = mfn;
701 637
702 return true; 638 return true;
703} 639}
704 640
705bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) 641bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
706{ 642{
707 if (unlikely(!__set_phys_to_machine(pfn, mfn))) { 643 if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
708 if (!alloc_p2m(pfn)) 644 if (!alloc_p2m(pfn))
709 return false; 645 return false;
710 646
711 if (!__set_phys_to_machine(pfn, mfn)) 647 return __set_phys_to_machine(pfn, mfn);
712 return false;
713 } 648 }
714 649
715 return true; 650 return true;
@@ -1035,79 +970,29 @@ EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
1035#include "debugfs.h" 970#include "debugfs.h"
1036static int p2m_dump_show(struct seq_file *m, void *v) 971static int p2m_dump_show(struct seq_file *m, void *v)
1037{ 972{
1038 static const char * const level_name[] = { "top", "middle",
1039 "entry", "abnormal", "error"};
1040#define TYPE_IDENTITY 0
1041#define TYPE_MISSING 1
1042#define TYPE_PFN 2
1043#define TYPE_UNKNOWN 3
1044 static const char * const type_name[] = { 973 static const char * const type_name[] = {
1045 [TYPE_IDENTITY] = "identity", 974 [P2M_TYPE_IDENTITY] = "identity",
1046 [TYPE_MISSING] = "missing", 975 [P2M_TYPE_MISSING] = "missing",
1047 [TYPE_PFN] = "pfn", 976 [P2M_TYPE_PFN] = "pfn",
1048 [TYPE_UNKNOWN] = "abnormal"}; 977 [P2M_TYPE_UNKNOWN] = "abnormal"};
1049 unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0; 978 unsigned long pfn, first_pfn;
1050 unsigned int uninitialized_var(prev_level); 979 int type, prev_type;
1051 unsigned int uninitialized_var(prev_type); 980
1052 981 prev_type = xen_p2m_elem_type(0);
1053 if (!p2m_top) 982 first_pfn = 0;
1054 return 0; 983
1055 984 for (pfn = 0; pfn < xen_p2m_size; pfn++) {
1056 for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) { 985 type = xen_p2m_elem_type(pfn);
1057 unsigned topidx = p2m_top_index(pfn); 986 if (type != prev_type) {
1058 unsigned mididx = p2m_mid_index(pfn); 987 seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn,
1059 unsigned idx = p2m_index(pfn); 988 type_name[prev_type]);
1060 unsigned lvl, type;
1061
1062 lvl = 4;
1063 type = TYPE_UNKNOWN;
1064 if (p2m_top[topidx] == p2m_mid_missing) {
1065 lvl = 0; type = TYPE_MISSING;
1066 } else if (p2m_top[topidx] == NULL) {
1067 lvl = 0; type = TYPE_UNKNOWN;
1068 } else if (p2m_top[topidx][mididx] == NULL) {
1069 lvl = 1; type = TYPE_UNKNOWN;
1070 } else if (p2m_top[topidx][mididx] == p2m_identity) {
1071 lvl = 1; type = TYPE_IDENTITY;
1072 } else if (p2m_top[topidx][mididx] == p2m_missing) {
1073 lvl = 1; type = TYPE_MISSING;
1074 } else if (p2m_top[topidx][mididx][idx] == 0) {
1075 lvl = 2; type = TYPE_UNKNOWN;
1076 } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) {
1077 lvl = 2; type = TYPE_IDENTITY;
1078 } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) {
1079 lvl = 2; type = TYPE_MISSING;
1080 } else if (p2m_top[topidx][mididx][idx] == pfn) {
1081 lvl = 2; type = TYPE_PFN;
1082 } else if (p2m_top[topidx][mididx][idx] != pfn) {
1083 lvl = 2; type = TYPE_PFN;
1084 }
1085 if (pfn == 0) {
1086 prev_level = lvl;
1087 prev_type = type; 989 prev_type = type;
1088 } 990 first_pfn = pfn;
1089 if (pfn == MAX_DOMAIN_PAGES-1) {
1090 lvl = 3;
1091 type = TYPE_UNKNOWN;
1092 }
1093 if (prev_type != type) {
1094 seq_printf(m, " [0x%lx->0x%lx] %s\n",
1095 prev_pfn_type, pfn, type_name[prev_type]);
1096 prev_pfn_type = pfn;
1097 prev_type = type;
1098 }
1099 if (prev_level != lvl) {
1100 seq_printf(m, " [0x%lx->0x%lx] level %s\n",
1101 prev_pfn_level, pfn, level_name[prev_level]);
1102 prev_pfn_level = pfn;
1103 prev_level = lvl;
1104 } 991 }
1105 } 992 }
993 seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn,
994 type_name[prev_type]);
1106 return 0; 995 return 0;
1107#undef TYPE_IDENTITY
1108#undef TYPE_MISSING
1109#undef TYPE_PFN
1110#undef TYPE_UNKNOWN
1111} 996}
1112 997
1113static int p2m_dump_open(struct inode *inode, struct file *filp) 998static int p2m_dump_open(struct inode *inode, struct file *filp)
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 02b0b0fba041..f92921fa54f9 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -49,7 +49,7 @@ void xen_hvm_init_shared_info(void);
49void xen_unplug_emulated_devices(void); 49void xen_unplug_emulated_devices(void);
50 50
51void __init xen_build_dynamic_phys_to_machine(void); 51void __init xen_build_dynamic_phys_to_machine(void);
52unsigned long __init xen_revector_p2m_tree(void); 52void __init xen_vmalloc_p2m_tree(void);
53 53
54void xen_init_irq_ops(void); 54void xen_init_irq_ops(void);
55void xen_setup_timer(int cpu); 55void xen_setup_timer(int cpu);