diff options
-rw-r--r-- | arch/x86/include/asm/xen/page.h | 20 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 34 | ||||
-rw-r--r-- | arch/x86/xen/p2m.c | 735 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 2 |
4 files changed, 347 insertions, 444 deletions
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 57aba6ba6f92..b54a3d20d6b2 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
@@ -65,13 +65,25 @@ extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) | |||
65 | * bits (identity or foreign) are set. | 65 | * bits (identity or foreign) are set. |
66 | * - __pfn_to_mfn() returns the found entry of the p2m table. A possibly set | 66 | * - __pfn_to_mfn() returns the found entry of the p2m table. A possibly set |
67 | * identity or foreign indicator will be still set. __pfn_to_mfn() is | 67 | * identity or foreign indicator will be still set. __pfn_to_mfn() is |
68 | * encapsulating get_phys_to_machine(). | 68 | * encapsulating get_phys_to_machine() which is called in special cases only. |
69 | * - get_phys_to_machine() is to be called by __pfn_to_mfn() only to allow | 69 | * - get_phys_to_machine() is to be called by __pfn_to_mfn() only in special |
70 | * for future optimizations. | 70 | * cases needing an extended handling. |
71 | */ | 71 | */ |
72 | static inline unsigned long __pfn_to_mfn(unsigned long pfn) | 72 | static inline unsigned long __pfn_to_mfn(unsigned long pfn) |
73 | { | 73 | { |
74 | return get_phys_to_machine(pfn); | 74 | unsigned long mfn; |
75 | |||
76 | if (pfn < xen_p2m_size) | ||
77 | mfn = xen_p2m_addr[pfn]; | ||
78 | else if (unlikely(pfn < xen_max_p2m_pfn)) | ||
79 | return get_phys_to_machine(pfn); | ||
80 | else | ||
81 | return IDENTITY_FRAME(pfn); | ||
82 | |||
83 | if (unlikely(mfn == INVALID_P2M_ENTRY)) | ||
84 | return get_phys_to_machine(pfn); | ||
85 | |||
86 | return mfn; | ||
75 | } | 87 | } |
76 | 88 | ||
77 | static inline unsigned long pfn_to_mfn(unsigned long pfn) | 89 | static inline unsigned long pfn_to_mfn(unsigned long pfn) |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3e3f8f8c3a30..6ab6150c8560 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -1158,20 +1158,16 @@ static void __init xen_cleanhighmap(unsigned long vaddr, | |||
1158 | * instead of somewhere later and be confusing. */ | 1158 | * instead of somewhere later and be confusing. */ |
1159 | xen_mc_flush(); | 1159 | xen_mc_flush(); |
1160 | } | 1160 | } |
1161 | static void __init xen_pagetable_p2m_copy(void) | 1161 | |
1162 | static void __init xen_pagetable_p2m_free(void) | ||
1162 | { | 1163 | { |
1163 | unsigned long size; | 1164 | unsigned long size; |
1164 | unsigned long addr; | 1165 | unsigned long addr; |
1165 | unsigned long new_mfn_list; | ||
1166 | |||
1167 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
1168 | return; | ||
1169 | 1166 | ||
1170 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | 1167 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); |
1171 | 1168 | ||
1172 | new_mfn_list = xen_revector_p2m_tree(); | ||
1173 | /* No memory or already called. */ | 1169 | /* No memory or already called. */ |
1174 | if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list) | 1170 | if ((unsigned long)xen_p2m_addr == xen_start_info->mfn_list) |
1175 | return; | 1171 | return; |
1176 | 1172 | ||
1177 | /* using __ka address and sticking INVALID_P2M_ENTRY! */ | 1173 | /* using __ka address and sticking INVALID_P2M_ENTRY! */ |
@@ -1189,8 +1185,6 @@ static void __init xen_pagetable_p2m_copy(void) | |||
1189 | 1185 | ||
1190 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | 1186 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); |
1191 | memblock_free(__pa(xen_start_info->mfn_list), size); | 1187 | memblock_free(__pa(xen_start_info->mfn_list), size); |
1192 | /* And revector! Bye bye old array */ | ||
1193 | xen_start_info->mfn_list = new_mfn_list; | ||
1194 | 1188 | ||
1195 | /* At this stage, cleanup_highmap has already cleaned __ka space | 1189 | /* At this stage, cleanup_highmap has already cleaned __ka space |
1196 | * from _brk_limit way up to the max_pfn_mapped (which is the end of | 1190 | * from _brk_limit way up to the max_pfn_mapped (which is the end of |
@@ -1214,14 +1208,26 @@ static void __init xen_pagetable_p2m_copy(void) | |||
1214 | } | 1208 | } |
1215 | #endif | 1209 | #endif |
1216 | 1210 | ||
1217 | static void __init xen_pagetable_init(void) | 1211 | static void __init xen_pagetable_p2m_setup(void) |
1218 | { | 1212 | { |
1219 | paging_init(); | 1213 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
1214 | return; | ||
1215 | |||
1216 | xen_vmalloc_p2m_tree(); | ||
1217 | |||
1220 | #ifdef CONFIG_X86_64 | 1218 | #ifdef CONFIG_X86_64 |
1221 | xen_pagetable_p2m_copy(); | 1219 | xen_pagetable_p2m_free(); |
1222 | #else | ||
1223 | xen_revector_p2m_tree(); | ||
1224 | #endif | 1220 | #endif |
1221 | /* And revector! Bye bye old array */ | ||
1222 | xen_start_info->mfn_list = (unsigned long)xen_p2m_addr; | ||
1223 | } | ||
1224 | |||
1225 | static void __init xen_pagetable_init(void) | ||
1226 | { | ||
1227 | paging_init(); | ||
1228 | |||
1229 | xen_pagetable_p2m_setup(); | ||
1230 | |||
1225 | /* Allocate and initialize top and mid mfn levels for p2m structure */ | 1231 | /* Allocate and initialize top and mid mfn levels for p2m structure */ |
1226 | xen_build_mfn_list_list(); | 1232 | xen_build_mfn_list_list(); |
1227 | 1233 | ||
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 8c3d8fbbba93..7d844739e513 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -3,21 +3,22 @@ | |||
3 | * guests themselves, but it must also access and update the p2m array | 3 | * guests themselves, but it must also access and update the p2m array |
4 | * during suspend/resume when all the pages are reallocated. | 4 | * during suspend/resume when all the pages are reallocated. |
5 | * | 5 | * |
6 | * The p2m table is logically a flat array, but we implement it as a | 6 | * The logical flat p2m table is mapped to a linear kernel memory area. |
7 | * three-level tree to allow the address space to be sparse. | 7 | * For accesses by Xen a three-level tree linked via mfns only is set up to |
8 | * allow the address space to be sparse. | ||
8 | * | 9 | * |
9 | * Xen | 10 | * Xen |
10 | * | | 11 | * | |
11 | * p2m_top p2m_top_mfn | 12 | * p2m_top_mfn |
12 | * / \ / \ | 13 | * / \ |
13 | * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn | 14 | * p2m_mid_mfn p2m_mid_mfn |
14 | * / \ / \ / / | 15 | * / / |
15 | * p2m p2m p2m p2m p2m p2m p2m ... | 16 | * p2m p2m p2m ... |
16 | * | 17 | * |
17 | * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. | 18 | * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. |
18 | * | 19 | * |
19 | * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the | 20 | * The p2m_top_mfn level is limited to 1 page, so the maximum representable |
20 | * maximum representable pseudo-physical address space is: | 21 | * pseudo-physical address space is: |
21 | * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages | 22 | * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages |
22 | * | 23 | * |
23 | * P2M_PER_PAGE depends on the architecture, as a mfn is always | 24 | * P2M_PER_PAGE depends on the architecture, as a mfn is always |
@@ -30,6 +31,9 @@ | |||
30 | * leaf entries, or for the top root, or middle one, for which there is a void | 31 | * leaf entries, or for the top root, or middle one, for which there is a void |
31 | * entry, we assume it is "missing". So (for example) | 32 | * entry, we assume it is "missing". So (for example) |
32 | * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY. | 33 | * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY. |
34 | * We have a dedicated page p2m_missing with all entries being | ||
35 | * INVALID_P2M_ENTRY. This page may be referenced multiple times in the p2m | ||
36 | * list/tree in case there are multiple areas with P2M_PER_PAGE invalid pfns. | ||
33 | * | 37 | * |
34 | * We also have the possibility of setting 1-1 mappings on certain regions, so | 38 | * We also have the possibility of setting 1-1 mappings on certain regions, so |
35 | * that: | 39 | * that: |
@@ -39,122 +43,20 @@ | |||
39 | * PCI BARs, or ACPI spaces), we can create mappings easily because we | 43 | * PCI BARs, or ACPI spaces), we can create mappings easily because we |
40 | * get the PFN value to match the MFN. | 44 | * get the PFN value to match the MFN. |
41 | * | 45 | * |
42 | * For this to work efficiently we have one new page p2m_identity and | 46 | * For this to work efficiently we have one new page p2m_identity. All entries |
43 | * allocate (via reserved_brk) any other pages we need to cover the sides | 47 | * in p2m_identity are set to INVALID_P2M_ENTRY type (Xen toolstack only |
44 | * (1GB or 4MB boundary violations). All entries in p2m_identity are set to | 48 | * recognizes that and MFNs, no other fancy value). |
45 | * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs, | ||
46 | * no other fancy value). | ||
47 | * | 49 | * |
48 | * On lookup we spot that the entry points to p2m_identity and return the | 50 | * On lookup we spot that the entry points to p2m_identity and return the |
49 | * identity value instead of dereferencing and returning INVALID_P2M_ENTRY. | 51 | * identity value instead of dereferencing and returning INVALID_P2M_ENTRY. |
50 | * If the entry points to an allocated page, we just proceed as before and | 52 | * If the entry points to an allocated page, we just proceed as before and |
51 | * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in | 53 | * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in |
52 | * appropriate functions (pfn_to_mfn). | 54 | * appropriate functions (pfn_to_mfn). |
53 | * | 55 | * |
54 | * The reason for having the IDENTITY_FRAME_BIT instead of just returning the | 56 | * The reason for having the IDENTITY_FRAME_BIT instead of just returning the |
55 | * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a | 57 | * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a |
56 | * non-identity pfn. To protect ourselves against we elect to set (and get) the | 58 | * non-identity pfn. To protect ourselves against we elect to set (and get) the |
57 | * IDENTITY_FRAME_BIT on all identity mapped PFNs. | 59 | * IDENTITY_FRAME_BIT on all identity mapped PFNs. |
58 | * | ||
59 | * This simplistic diagram is used to explain the more subtle piece of code. | ||
60 | * There is also a digram of the P2M at the end that can help. | ||
61 | * Imagine your E820 looking as so: | ||
62 | * | ||
63 | * 1GB 2GB 4GB | ||
64 | * /-------------------+---------\/----\ /----------\ /---+-----\ | ||
65 | * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM | | ||
66 | * \-------------------+---------/\----/ \----------/ \---+-----/ | ||
67 | * ^- 1029MB ^- 2001MB | ||
68 | * | ||
69 | * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100), | ||
70 | * 2048MB = 524288 (0x80000)] | ||
71 | * | ||
72 | * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB | ||
73 | * is actually not present (would have to kick the balloon driver to put it in). | ||
74 | * | ||
75 | * When we are told to set the PFNs for identity mapping (see patch: "xen/setup: | ||
76 | * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start | ||
77 | * of the PFN and the end PFN (263424 and 512256 respectively). The first step | ||
78 | * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page | ||
79 | * covers 512^2 of page estate (1GB) and in case the start or end PFN is not | ||
80 | * aligned on 512^2*PAGE_SIZE (1GB) we reserve_brk new middle and leaf pages as | ||
81 | * required to split any existing p2m_mid_missing middle pages. | ||
82 | * | ||
83 | * With the E820 example above, 263424 is not 1GB aligned so we allocate a | ||
84 | * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000. | ||
85 | * Each entry in the allocate page is "missing" (points to p2m_missing). | ||
86 | * | ||
87 | * Next stage is to determine if we need to do a more granular boundary check | ||
88 | * on the 4MB (or 2MB depending on architecture) off the start and end pfn's. | ||
89 | * We check if the start pfn and end pfn violate that boundary check, and if | ||
90 | * so reserve_brk a (p2m[x][y]) leaf page. This way we have a much finer | ||
91 | * granularity of setting which PFNs are missing and which ones are identity. | ||
92 | * In our example 263424 and 512256 both fail the check so we reserve_brk two | ||
93 | * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing" | ||
94 | * values) and assign them to p2m[1][2] and p2m[1][488] respectively. | ||
95 | * | ||
96 | * At this point we would at minimum reserve_brk one page, but could be up to | ||
97 | * three. Each call to set_phys_range_identity has at maximum a three page | ||
98 | * cost. If we were to query the P2M at this stage, all those entries from | ||
99 | * start PFN through end PFN (so 1029MB -> 2001MB) would return | ||
100 | * INVALID_P2M_ENTRY ("missing"). | ||
101 | * | ||
102 | * The next step is to walk from the start pfn to the end pfn setting | ||
103 | * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity. | ||
104 | * If we find that the middle entry is pointing to p2m_missing we can swap it | ||
105 | * over to p2m_identity - this way covering 4MB (or 2MB) PFN space (and | ||
106 | * similarly swapping p2m_mid_missing for p2m_mid_identity for larger regions). | ||
107 | * At this point we do not need to worry about boundary aligment (so no need to | ||
108 | * reserve_brk a middle page, figure out which PFNs are "missing" and which | ||
109 | * ones are identity), as that has been done earlier. If we find that the | ||
110 | * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference | ||
111 | * that page (which covers 512 PFNs) and set the appropriate PFN with | ||
112 | * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we | ||
113 | * set from p2m[1][2][256->511] and p2m[1][488][0->256] with | ||
114 | * IDENTITY_FRAME_BIT set. | ||
115 | * | ||
116 | * All other regions that are void (or not filled) either point to p2m_missing | ||
117 | * (considered missing) or have the default value of INVALID_P2M_ENTRY (also | ||
118 | * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511] | ||
119 | * contain the INVALID_P2M_ENTRY value and are considered "missing." | ||
120 | * | ||
121 | * Finally, the region beyond the end of of the E820 (4 GB in this example) | ||
122 | * is set to be identity (in case there are MMIO regions placed here). | ||
123 | * | ||
124 | * This is what the p2m ends up looking (for the E820 above) with this | ||
125 | * fabulous drawing: | ||
126 | * | ||
127 | * p2m /--------------\ | ||
128 | * /-----\ | &mfn_list[0],| /-----------------\ | ||
129 | * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. | | ||
130 | * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] | | ||
131 | * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] | | ||
132 | * |-----| \ | [p2m_identity]+\\ | .... | | ||
133 | * | 2 |--\ \-------------------->| ... | \\ \----------------/ | ||
134 | * |-----| \ \---------------/ \\ | ||
135 | * | 3 |-\ \ \\ p2m_identity [1] | ||
136 | * |-----| \ \-------------------->/---------------\ /-----------------\ | ||
137 | * | .. |\ | | [p2m_identity]+-->| ~0, ~0, ~0, ... | | ||
138 | * \-----/ | | | [p2m_identity]+-->| ..., ~0 | | ||
139 | * | | | .... | \-----------------/ | ||
140 | * | | +-[x], ~0, ~0.. +\ | ||
141 | * | | \---------------/ \ | ||
142 | * | | \-> /---------------\ | ||
143 | * | V p2m_mid_missing p2m_missing | IDENTITY[@0] | | ||
144 | * | /-----------------\ /------------\ | IDENTITY[@256]| | ||
145 | * | | [p2m_missing] +---->| ~0, ~0, ...| | ~0, ~0, .... | | ||
146 | * | | [p2m_missing] +---->| ..., ~0 | \---------------/ | ||
147 | * | | ... | \------------/ | ||
148 | * | \-----------------/ | ||
149 | * | | ||
150 | * | p2m_mid_identity | ||
151 | * | /-----------------\ | ||
152 | * \-->| [p2m_identity] +---->[1] | ||
153 | * | [p2m_identity] +---->[1] | ||
154 | * | ... | | ||
155 | * \-----------------/ | ||
156 | * | ||
157 | * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) | ||
158 | */ | 60 | */ |
159 | 61 | ||
160 | #include <linux/init.h> | 62 | #include <linux/init.h> |
@@ -179,6 +81,8 @@ | |||
179 | #include "multicalls.h" | 81 | #include "multicalls.h" |
180 | #include "xen-ops.h" | 82 | #include "xen-ops.h" |
181 | 83 | ||
84 | #define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE) | ||
85 | |||
182 | static void __init m2p_override_init(void); | 86 | static void __init m2p_override_init(void); |
183 | 87 | ||
184 | unsigned long *xen_p2m_addr __read_mostly; | 88 | unsigned long *xen_p2m_addr __read_mostly; |
@@ -188,22 +92,15 @@ EXPORT_SYMBOL_GPL(xen_p2m_size); | |||
188 | unsigned long xen_max_p2m_pfn __read_mostly; | 92 | unsigned long xen_max_p2m_pfn __read_mostly; |
189 | EXPORT_SYMBOL_GPL(xen_max_p2m_pfn); | 93 | EXPORT_SYMBOL_GPL(xen_max_p2m_pfn); |
190 | 94 | ||
95 | static DEFINE_SPINLOCK(p2m_update_lock); | ||
96 | |||
191 | static unsigned long *p2m_mid_missing_mfn; | 97 | static unsigned long *p2m_mid_missing_mfn; |
192 | static unsigned long *p2m_top_mfn; | 98 | static unsigned long *p2m_top_mfn; |
193 | static unsigned long **p2m_top_mfn_p; | 99 | static unsigned long **p2m_top_mfn_p; |
194 | 100 | static unsigned long *p2m_missing; | |
195 | /* Placeholders for holes in the address space */ | 101 | static unsigned long *p2m_identity; |
196 | static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); | 102 | static pte_t *p2m_missing_pte; |
197 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); | 103 | static pte_t *p2m_identity_pte; |
198 | |||
199 | static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); | ||
200 | |||
201 | static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); | ||
202 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE); | ||
203 | |||
204 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
205 | |||
206 | static int use_brk = 1; | ||
207 | 104 | ||
208 | static inline unsigned p2m_top_index(unsigned long pfn) | 105 | static inline unsigned p2m_top_index(unsigned long pfn) |
209 | { | 106 | { |
@@ -221,14 +118,6 @@ static inline unsigned p2m_index(unsigned long pfn) | |||
221 | return pfn % P2M_PER_PAGE; | 118 | return pfn % P2M_PER_PAGE; |
222 | } | 119 | } |
223 | 120 | ||
224 | static void p2m_top_init(unsigned long ***top) | ||
225 | { | ||
226 | unsigned i; | ||
227 | |||
228 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
229 | top[i] = p2m_mid_missing; | ||
230 | } | ||
231 | |||
232 | static void p2m_top_mfn_init(unsigned long *top) | 121 | static void p2m_top_mfn_init(unsigned long *top) |
233 | { | 122 | { |
234 | unsigned i; | 123 | unsigned i; |
@@ -245,35 +134,32 @@ static void p2m_top_mfn_p_init(unsigned long **top) | |||
245 | top[i] = p2m_mid_missing_mfn; | 134 | top[i] = p2m_mid_missing_mfn; |
246 | } | 135 | } |
247 | 136 | ||
248 | static void p2m_mid_init(unsigned long **mid, unsigned long *leaf) | 137 | static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf) |
249 | { | 138 | { |
250 | unsigned i; | 139 | unsigned i; |
251 | 140 | ||
252 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | 141 | for (i = 0; i < P2M_MID_PER_PAGE; i++) |
253 | mid[i] = leaf; | 142 | mid[i] = virt_to_mfn(leaf); |
254 | } | 143 | } |
255 | 144 | ||
256 | static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf) | 145 | static void p2m_init(unsigned long *p2m) |
257 | { | 146 | { |
258 | unsigned i; | 147 | unsigned i; |
259 | 148 | ||
260 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | 149 | for (i = 0; i < P2M_PER_PAGE; i++) |
261 | mid[i] = virt_to_mfn(leaf); | 150 | p2m[i] = INVALID_P2M_ENTRY; |
262 | } | 151 | } |
263 | 152 | ||
264 | static void p2m_init(unsigned long *p2m) | 153 | static void p2m_init_identity(unsigned long *p2m, unsigned long pfn) |
265 | { | 154 | { |
266 | unsigned i; | 155 | unsigned i; |
267 | 156 | ||
268 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | 157 | for (i = 0; i < P2M_PER_PAGE; i++) |
269 | p2m[i] = INVALID_P2M_ENTRY; | 158 | p2m[i] = IDENTITY_FRAME(pfn + i); |
270 | } | 159 | } |
271 | 160 | ||
272 | static void * __ref alloc_p2m_page(void) | 161 | static void * __ref alloc_p2m_page(void) |
273 | { | 162 | { |
274 | if (unlikely(use_brk)) | ||
275 | return extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
276 | |||
277 | if (unlikely(!slab_is_available())) | 163 | if (unlikely(!slab_is_available())) |
278 | return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); | 164 | return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); |
279 | 165 | ||
@@ -299,7 +185,10 @@ static void free_p2m_page(void *p) | |||
299 | */ | 185 | */ |
300 | void __ref xen_build_mfn_list_list(void) | 186 | void __ref xen_build_mfn_list_list(void) |
301 | { | 187 | { |
302 | unsigned long pfn; | 188 | unsigned long pfn, mfn; |
189 | pte_t *ptep; | ||
190 | unsigned int level, topidx, mididx; | ||
191 | unsigned long *mid_mfn_p; | ||
303 | 192 | ||
304 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 193 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
305 | return; | 194 | return; |
@@ -319,20 +208,23 @@ void __ref xen_build_mfn_list_list(void) | |||
319 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); | 208 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); |
320 | } | 209 | } |
321 | 210 | ||
322 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { | 211 | for (pfn = 0; pfn < xen_max_p2m_pfn && pfn < MAX_P2M_PFN; |
323 | unsigned topidx = p2m_top_index(pfn); | 212 | pfn += P2M_PER_PAGE) { |
324 | unsigned mididx = p2m_mid_index(pfn); | 213 | topidx = p2m_top_index(pfn); |
325 | unsigned long **mid; | 214 | mididx = p2m_mid_index(pfn); |
326 | unsigned long *mid_mfn_p; | ||
327 | 215 | ||
328 | mid = p2m_top[topidx]; | ||
329 | mid_mfn_p = p2m_top_mfn_p[topidx]; | 216 | mid_mfn_p = p2m_top_mfn_p[topidx]; |
217 | ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), | ||
218 | &level); | ||
219 | BUG_ON(!ptep || level != PG_LEVEL_4K); | ||
220 | mfn = pte_mfn(*ptep); | ||
221 | ptep = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); | ||
330 | 222 | ||
331 | /* Don't bother allocating any mfn mid levels if | 223 | /* Don't bother allocating any mfn mid levels if |
332 | * they're just missing, just update the stored mfn, | 224 | * they're just missing, just update the stored mfn, |
333 | * since all could have changed over a migrate. | 225 | * since all could have changed over a migrate. |
334 | */ | 226 | */ |
335 | if (mid == p2m_mid_missing) { | 227 | if (ptep == p2m_missing_pte || ptep == p2m_identity_pte) { |
336 | BUG_ON(mididx); | 228 | BUG_ON(mididx); |
337 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | 229 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); |
338 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); | 230 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); |
@@ -341,11 +233,6 @@ void __ref xen_build_mfn_list_list(void) | |||
341 | } | 233 | } |
342 | 234 | ||
343 | if (mid_mfn_p == p2m_mid_missing_mfn) { | 235 | if (mid_mfn_p == p2m_mid_missing_mfn) { |
344 | /* | ||
345 | * XXX boot-time only! We should never find | ||
346 | * missing parts of the mfn tree after | ||
347 | * runtime. | ||
348 | */ | ||
349 | mid_mfn_p = alloc_p2m_page(); | 236 | mid_mfn_p = alloc_p2m_page(); |
350 | p2m_mid_mfn_init(mid_mfn_p, p2m_missing); | 237 | p2m_mid_mfn_init(mid_mfn_p, p2m_missing); |
351 | 238 | ||
@@ -353,7 +240,7 @@ void __ref xen_build_mfn_list_list(void) | |||
353 | } | 240 | } |
354 | 241 | ||
355 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | 242 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); |
356 | mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); | 243 | mid_mfn_p[mididx] = mfn; |
357 | } | 244 | } |
358 | } | 245 | } |
359 | 246 | ||
@@ -372,154 +259,153 @@ void xen_setup_mfn_list_list(void) | |||
372 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | 259 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ |
373 | void __init xen_build_dynamic_phys_to_machine(void) | 260 | void __init xen_build_dynamic_phys_to_machine(void) |
374 | { | 261 | { |
375 | unsigned long *mfn_list; | ||
376 | unsigned long max_pfn; | ||
377 | unsigned long pfn; | 262 | unsigned long pfn; |
378 | 263 | ||
379 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 264 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
380 | return; | 265 | return; |
381 | 266 | ||
382 | xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list; | 267 | xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list; |
383 | mfn_list = (unsigned long *)xen_start_info->mfn_list; | 268 | xen_p2m_size = ALIGN(xen_start_info->nr_pages, P2M_PER_PAGE); |
384 | max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | ||
385 | xen_max_p2m_pfn = max_pfn; | ||
386 | xen_p2m_size = max_pfn; | ||
387 | 269 | ||
388 | p2m_missing = alloc_p2m_page(); | 270 | for (pfn = xen_start_info->nr_pages; pfn < xen_p2m_size; pfn++) |
389 | p2m_init(p2m_missing); | 271 | xen_p2m_addr[pfn] = INVALID_P2M_ENTRY; |
390 | p2m_identity = alloc_p2m_page(); | ||
391 | p2m_init(p2m_identity); | ||
392 | 272 | ||
393 | p2m_mid_missing = alloc_p2m_page(); | 273 | xen_max_p2m_pfn = xen_p2m_size; |
394 | p2m_mid_init(p2m_mid_missing, p2m_missing); | 274 | } |
395 | p2m_mid_identity = alloc_p2m_page(); | ||
396 | p2m_mid_init(p2m_mid_identity, p2m_identity); | ||
397 | 275 | ||
398 | p2m_top = alloc_p2m_page(); | 276 | #define P2M_TYPE_IDENTITY 0 |
399 | p2m_top_init(p2m_top); | 277 | #define P2M_TYPE_MISSING 1 |
278 | #define P2M_TYPE_PFN 2 | ||
279 | #define P2M_TYPE_UNKNOWN 3 | ||
400 | 280 | ||
401 | /* | 281 | static int xen_p2m_elem_type(unsigned long pfn) |
402 | * The domain builder gives us a pre-constructed p2m array in | 282 | { |
403 | * mfn_list for all the pages initially given to us, so we just | 283 | unsigned long mfn; |
404 | * need to graft that into our tree structure. | ||
405 | */ | ||
406 | for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) { | ||
407 | unsigned topidx = p2m_top_index(pfn); | ||
408 | unsigned mididx = p2m_mid_index(pfn); | ||
409 | 284 | ||
410 | if (p2m_top[topidx] == p2m_mid_missing) { | 285 | if (pfn >= xen_p2m_size) |
411 | unsigned long **mid = alloc_p2m_page(); | 286 | return P2M_TYPE_IDENTITY; |
412 | p2m_mid_init(mid, p2m_missing); | ||
413 | 287 | ||
414 | p2m_top[topidx] = mid; | 288 | mfn = xen_p2m_addr[pfn]; |
415 | } | ||
416 | 289 | ||
417 | /* | 290 | if (mfn == INVALID_P2M_ENTRY) |
418 | * As long as the mfn_list has enough entries to completely | 291 | return P2M_TYPE_MISSING; |
419 | * fill a p2m page, pointing into the array is ok. But if | ||
420 | * not the entries beyond the last pfn will be undefined. | ||
421 | */ | ||
422 | if (unlikely(pfn + P2M_PER_PAGE > max_pfn)) { | ||
423 | unsigned long p2midx; | ||
424 | 292 | ||
425 | p2midx = max_pfn % P2M_PER_PAGE; | 293 | if (mfn & IDENTITY_FRAME_BIT) |
426 | for ( ; p2midx < P2M_PER_PAGE; p2midx++) | 294 | return P2M_TYPE_IDENTITY; |
427 | mfn_list[pfn + p2midx] = INVALID_P2M_ENTRY; | 295 | |
428 | } | 296 | return P2M_TYPE_PFN; |
429 | p2m_top[topidx][mididx] = &mfn_list[pfn]; | ||
430 | } | ||
431 | } | 297 | } |
432 | #ifdef CONFIG_X86_64 | 298 | |
433 | unsigned long __init xen_revector_p2m_tree(void) | 299 | static void __init xen_rebuild_p2m_list(unsigned long *p2m) |
434 | { | 300 | { |
435 | unsigned long va_start; | 301 | unsigned int i, chunk; |
436 | unsigned long va_end; | ||
437 | unsigned long pfn; | 302 | unsigned long pfn; |
438 | unsigned long pfn_free = 0; | 303 | unsigned long *mfns; |
439 | unsigned long *mfn_list = NULL; | 304 | pte_t *ptep; |
440 | unsigned long size; | 305 | pmd_t *pmdp; |
441 | 306 | int type; | |
442 | use_brk = 0; | ||
443 | va_start = xen_start_info->mfn_list; | ||
444 | /*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long), | ||
445 | * so make sure it is rounded up to that */ | ||
446 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | ||
447 | va_end = va_start + size; | ||
448 | |||
449 | /* If we were revectored already, don't do it again. */ | ||
450 | if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET) | ||
451 | return 0; | ||
452 | |||
453 | mfn_list = alloc_bootmem_align(size, PAGE_SIZE); | ||
454 | if (!mfn_list) { | ||
455 | pr_warn("Could not allocate space for a new P2M tree!\n"); | ||
456 | return xen_start_info->mfn_list; | ||
457 | } | ||
458 | /* Fill it out with INVALID_P2M_ENTRY value */ | ||
459 | memset(mfn_list, 0xFF, size); | ||
460 | 307 | ||
461 | for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) { | 308 | p2m_missing = alloc_p2m_page(); |
462 | unsigned topidx = p2m_top_index(pfn); | 309 | p2m_init(p2m_missing); |
463 | unsigned mididx; | 310 | p2m_identity = alloc_p2m_page(); |
464 | unsigned long *mid_p; | 311 | p2m_init(p2m_identity); |
465 | |||
466 | if (!p2m_top[topidx]) | ||
467 | continue; | ||
468 | 312 | ||
469 | if (p2m_top[topidx] == p2m_mid_missing) | 313 | p2m_missing_pte = alloc_p2m_page(); |
470 | continue; | 314 | paravirt_alloc_pte(&init_mm, __pa(p2m_missing_pte) >> PAGE_SHIFT); |
315 | p2m_identity_pte = alloc_p2m_page(); | ||
316 | paravirt_alloc_pte(&init_mm, __pa(p2m_identity_pte) >> PAGE_SHIFT); | ||
317 | for (i = 0; i < PTRS_PER_PTE; i++) { | ||
318 | set_pte(p2m_missing_pte + i, | ||
319 | pfn_pte(PFN_DOWN(__pa(p2m_missing)), PAGE_KERNEL)); | ||
320 | set_pte(p2m_identity_pte + i, | ||
321 | pfn_pte(PFN_DOWN(__pa(p2m_identity)), PAGE_KERNEL)); | ||
322 | } | ||
471 | 323 | ||
472 | mididx = p2m_mid_index(pfn); | 324 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += chunk) { |
473 | mid_p = p2m_top[topidx][mididx]; | 325 | /* |
474 | if (!mid_p) | 326 | * Try to map missing/identity PMDs or p2m-pages if possible. |
475 | continue; | 327 | * We have to respect the structure of the mfn_list_list |
476 | if ((mid_p == p2m_missing) || (mid_p == p2m_identity)) | 328 | * which will be built just afterwards. |
329 | * Chunk size to test is one p2m page if we are in the middle | ||
330 | * of a mfn_list_list mid page and the complete mid page area | ||
331 | * if we are at index 0 of the mid page. Please note that a | ||
332 | * mid page might cover more than one PMD, e.g. on 32 bit PAE | ||
333 | * kernels. | ||
334 | */ | ||
335 | chunk = (pfn & (P2M_PER_PAGE * P2M_MID_PER_PAGE - 1)) ? | ||
336 | P2M_PER_PAGE : P2M_PER_PAGE * P2M_MID_PER_PAGE; | ||
337 | |||
338 | type = xen_p2m_elem_type(pfn); | ||
339 | i = 0; | ||
340 | if (type != P2M_TYPE_PFN) | ||
341 | for (i = 1; i < chunk; i++) | ||
342 | if (xen_p2m_elem_type(pfn + i) != type) | ||
343 | break; | ||
344 | if (i < chunk) | ||
345 | /* Reset to minimal chunk size. */ | ||
346 | chunk = P2M_PER_PAGE; | ||
347 | |||
348 | if (type == P2M_TYPE_PFN || i < chunk) { | ||
349 | /* Use initial p2m page contents. */ | ||
350 | #ifdef CONFIG_X86_64 | ||
351 | mfns = alloc_p2m_page(); | ||
352 | copy_page(mfns, xen_p2m_addr + pfn); | ||
353 | #else | ||
354 | mfns = xen_p2m_addr + pfn; | ||
355 | #endif | ||
356 | ptep = populate_extra_pte((unsigned long)(p2m + pfn)); | ||
357 | set_pte(ptep, | ||
358 | pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL)); | ||
477 | continue; | 359 | continue; |
360 | } | ||
478 | 361 | ||
479 | if ((unsigned long)mid_p == INVALID_P2M_ENTRY) | 362 | if (chunk == P2M_PER_PAGE) { |
363 | /* Map complete missing or identity p2m-page. */ | ||
364 | mfns = (type == P2M_TYPE_MISSING) ? | ||
365 | p2m_missing : p2m_identity; | ||
366 | ptep = populate_extra_pte((unsigned long)(p2m + pfn)); | ||
367 | set_pte(ptep, | ||
368 | pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL)); | ||
480 | continue; | 369 | continue; |
370 | } | ||
481 | 371 | ||
482 | /* The old va. Rebase it on mfn_list */ | 372 | /* Complete missing or identity PMD(s) can be mapped. */ |
483 | if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) { | 373 | ptep = (type == P2M_TYPE_MISSING) ? |
484 | unsigned long *new; | 374 | p2m_missing_pte : p2m_identity_pte; |
375 | for (i = 0; i < PMDS_PER_MID_PAGE; i++) { | ||
376 | pmdp = populate_extra_pmd( | ||
377 | (unsigned long)(p2m + pfn + i * PTRS_PER_PTE)); | ||
378 | set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE)); | ||
379 | } | ||
380 | } | ||
381 | } | ||
485 | 382 | ||
486 | if (pfn_free > (size / sizeof(unsigned long))) { | 383 | void __init xen_vmalloc_p2m_tree(void) |
487 | WARN(1, "Only allocated for %ld pages, but we want %ld!\n", | 384 | { |
488 | size / sizeof(unsigned long), pfn_free); | 385 | static struct vm_struct vm; |
489 | return 0; | ||
490 | } | ||
491 | new = &mfn_list[pfn_free]; | ||
492 | 386 | ||
493 | copy_page(new, mid_p); | 387 | vm.flags = VM_ALLOC; |
494 | p2m_top[topidx][mididx] = &mfn_list[pfn_free]; | 388 | vm.size = ALIGN(sizeof(unsigned long) * xen_max_p2m_pfn, |
389 | PMD_SIZE * PMDS_PER_MID_PAGE); | ||
390 | vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE); | ||
391 | pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size); | ||
495 | 392 | ||
496 | pfn_free += P2M_PER_PAGE; | 393 | xen_max_p2m_pfn = vm.size / sizeof(unsigned long); |
497 | 394 | ||
498 | } | 395 | xen_rebuild_p2m_list(vm.addr); |
499 | /* This should be the leafs allocated for identity from _brk. */ | ||
500 | } | ||
501 | 396 | ||
397 | xen_p2m_addr = vm.addr; | ||
502 | xen_p2m_size = xen_max_p2m_pfn; | 398 | xen_p2m_size = xen_max_p2m_pfn; |
503 | xen_p2m_addr = mfn_list; | ||
504 | 399 | ||
505 | xen_inv_extra_mem(); | 400 | xen_inv_extra_mem(); |
506 | 401 | ||
507 | m2p_override_init(); | 402 | m2p_override_init(); |
508 | return (unsigned long)mfn_list; | ||
509 | } | 403 | } |
510 | #else | 404 | |
511 | unsigned long __init xen_revector_p2m_tree(void) | ||
512 | { | ||
513 | use_brk = 0; | ||
514 | xen_p2m_size = xen_max_p2m_pfn; | ||
515 | xen_inv_extra_mem(); | ||
516 | m2p_override_init(); | ||
517 | return 0; | ||
518 | } | ||
519 | #endif | ||
520 | unsigned long get_phys_to_machine(unsigned long pfn) | 405 | unsigned long get_phys_to_machine(unsigned long pfn) |
521 | { | 406 | { |
522 | unsigned topidx, mididx, idx; | 407 | pte_t *ptep; |
408 | unsigned int level; | ||
523 | 409 | ||
524 | if (unlikely(pfn >= xen_p2m_size)) { | 410 | if (unlikely(pfn >= xen_p2m_size)) { |
525 | if (pfn < xen_max_p2m_pfn) | 411 | if (pfn < xen_max_p2m_pfn) |
@@ -528,23 +414,83 @@ unsigned long get_phys_to_machine(unsigned long pfn) | |||
528 | return IDENTITY_FRAME(pfn); | 414 | return IDENTITY_FRAME(pfn); |
529 | } | 415 | } |
530 | 416 | ||
531 | topidx = p2m_top_index(pfn); | 417 | ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level); |
532 | mididx = p2m_mid_index(pfn); | 418 | BUG_ON(!ptep || level != PG_LEVEL_4K); |
533 | idx = p2m_index(pfn); | ||
534 | 419 | ||
535 | /* | 420 | /* |
536 | * The INVALID_P2M_ENTRY is filled in both p2m_*identity | 421 | * The INVALID_P2M_ENTRY is filled in both p2m_*identity |
537 | * and in p2m_*missing, so returning the INVALID_P2M_ENTRY | 422 | * and in p2m_*missing, so returning the INVALID_P2M_ENTRY |
538 | * would be wrong. | 423 | * would be wrong. |
539 | */ | 424 | */ |
540 | if (p2m_top[topidx][mididx] == p2m_identity) | 425 | if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity))) |
541 | return IDENTITY_FRAME(pfn); | 426 | return IDENTITY_FRAME(pfn); |
542 | 427 | ||
543 | return p2m_top[topidx][mididx][idx]; | 428 | return xen_p2m_addr[pfn]; |
544 | } | 429 | } |
545 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | 430 | EXPORT_SYMBOL_GPL(get_phys_to_machine); |
546 | 431 | ||
547 | /* | 432 | /* |
433 | * Allocate new pmd(s). It is checked whether the old pmd is still in place. | ||
434 | * If not, nothing is changed. This is okay as the only reason for allocating | ||
435 | * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual | ||
436 | * pmd. In case of PAE/x86-32 there are multiple pmds to allocate! | ||
437 | */ | ||
438 | static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg) | ||
439 | { | ||
440 | pte_t *ptechk; | ||
441 | pte_t *pteret = ptep; | ||
442 | pte_t *pte_newpg[PMDS_PER_MID_PAGE]; | ||
443 | pmd_t *pmdp; | ||
444 | unsigned int level; | ||
445 | unsigned long flags; | ||
446 | unsigned long vaddr; | ||
447 | int i; | ||
448 | |||
449 | /* Do all allocations first to bail out in error case. */ | ||
450 | for (i = 0; i < PMDS_PER_MID_PAGE; i++) { | ||
451 | pte_newpg[i] = alloc_p2m_page(); | ||
452 | if (!pte_newpg[i]) { | ||
453 | for (i--; i >= 0; i--) | ||
454 | free_p2m_page(pte_newpg[i]); | ||
455 | |||
456 | return NULL; | ||
457 | } | ||
458 | } | ||
459 | |||
460 | vaddr = addr & ~(PMD_SIZE * PMDS_PER_MID_PAGE - 1); | ||
461 | |||
462 | for (i = 0; i < PMDS_PER_MID_PAGE; i++) { | ||
463 | copy_page(pte_newpg[i], pte_pg); | ||
464 | paravirt_alloc_pte(&init_mm, __pa(pte_newpg[i]) >> PAGE_SHIFT); | ||
465 | |||
466 | pmdp = lookup_pmd_address(vaddr); | ||
467 | BUG_ON(!pmdp); | ||
468 | |||
469 | spin_lock_irqsave(&p2m_update_lock, flags); | ||
470 | |||
471 | ptechk = lookup_address(vaddr, &level); | ||
472 | if (ptechk == pte_pg) { | ||
473 | set_pmd(pmdp, | ||
474 | __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE)); | ||
475 | if (vaddr == (addr & ~(PMD_SIZE - 1))) | ||
476 | pteret = pte_offset_kernel(pmdp, addr); | ||
477 | pte_newpg[i] = NULL; | ||
478 | } | ||
479 | |||
480 | spin_unlock_irqrestore(&p2m_update_lock, flags); | ||
481 | |||
482 | if (pte_newpg[i]) { | ||
483 | paravirt_release_pte(__pa(pte_newpg[i]) >> PAGE_SHIFT); | ||
484 | free_p2m_page(pte_newpg[i]); | ||
485 | } | ||
486 | |||
487 | vaddr += PMD_SIZE; | ||
488 | } | ||
489 | |||
490 | return pteret; | ||
491 | } | ||
492 | |||
493 | /* | ||
548 | * Fully allocate the p2m structure for a given pfn. We need to check | 494 | * Fully allocate the p2m structure for a given pfn. We need to check |
549 | * that both the top and mid levels are allocated, and make sure the | 495 | * that both the top and mid levels are allocated, and make sure the |
550 | * parallel mfn tree is kept in sync. We may race with other cpus, so | 496 | * parallel mfn tree is kept in sync. We may race with other cpus, so |
@@ -554,58 +500,62 @@ EXPORT_SYMBOL_GPL(get_phys_to_machine); | |||
554 | static bool alloc_p2m(unsigned long pfn) | 500 | static bool alloc_p2m(unsigned long pfn) |
555 | { | 501 | { |
556 | unsigned topidx, mididx; | 502 | unsigned topidx, mididx; |
557 | unsigned long ***top_p, **mid; | ||
558 | unsigned long *top_mfn_p, *mid_mfn; | 503 | unsigned long *top_mfn_p, *mid_mfn; |
559 | unsigned long *p2m_orig; | 504 | pte_t *ptep, *pte_pg; |
505 | unsigned int level; | ||
506 | unsigned long flags; | ||
507 | unsigned long addr = (unsigned long)(xen_p2m_addr + pfn); | ||
508 | unsigned long p2m_pfn; | ||
560 | 509 | ||
561 | topidx = p2m_top_index(pfn); | 510 | topidx = p2m_top_index(pfn); |
562 | mididx = p2m_mid_index(pfn); | 511 | mididx = p2m_mid_index(pfn); |
563 | 512 | ||
564 | top_p = &p2m_top[topidx]; | 513 | ptep = lookup_address(addr, &level); |
565 | mid = ACCESS_ONCE(*top_p); | 514 | BUG_ON(!ptep || level != PG_LEVEL_4K); |
515 | pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); | ||
566 | 516 | ||
567 | if (mid == p2m_mid_missing) { | 517 | if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) { |
568 | /* Mid level is missing, allocate a new one */ | 518 | /* PMD level is missing, allocate a new one */ |
569 | mid = alloc_p2m_page(); | 519 | ptep = alloc_p2m_pmd(addr, ptep, pte_pg); |
570 | if (!mid) | 520 | if (!ptep) |
571 | return false; | 521 | return false; |
572 | |||
573 | p2m_mid_init(mid, p2m_missing); | ||
574 | |||
575 | if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) | ||
576 | free_p2m_page(mid); | ||
577 | } | 522 | } |
578 | 523 | ||
579 | top_mfn_p = &p2m_top_mfn[topidx]; | 524 | if (p2m_top_mfn) { |
580 | mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); | 525 | top_mfn_p = &p2m_top_mfn[topidx]; |
526 | mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); | ||
581 | 527 | ||
582 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); | 528 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); |
583 | 529 | ||
584 | if (mid_mfn == p2m_mid_missing_mfn) { | 530 | if (mid_mfn == p2m_mid_missing_mfn) { |
585 | /* Separately check the mid mfn level */ | 531 | /* Separately check the mid mfn level */ |
586 | unsigned long missing_mfn; | 532 | unsigned long missing_mfn; |
587 | unsigned long mid_mfn_mfn; | 533 | unsigned long mid_mfn_mfn; |
588 | unsigned long old_mfn; | 534 | unsigned long old_mfn; |
589 | 535 | ||
590 | mid_mfn = alloc_p2m_page(); | 536 | mid_mfn = alloc_p2m_page(); |
591 | if (!mid_mfn) | 537 | if (!mid_mfn) |
592 | return false; | 538 | return false; |
593 | 539 | ||
594 | p2m_mid_mfn_init(mid_mfn, p2m_missing); | 540 | p2m_mid_mfn_init(mid_mfn, p2m_missing); |
595 | 541 | ||
596 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); | 542 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); |
597 | mid_mfn_mfn = virt_to_mfn(mid_mfn); | 543 | mid_mfn_mfn = virt_to_mfn(mid_mfn); |
598 | old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn); | 544 | old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn); |
599 | if (old_mfn != missing_mfn) { | 545 | if (old_mfn != missing_mfn) { |
600 | free_p2m_page(mid_mfn); | 546 | free_p2m_page(mid_mfn); |
601 | mid_mfn = mfn_to_virt(old_mfn); | 547 | mid_mfn = mfn_to_virt(old_mfn); |
602 | } else { | 548 | } else { |
603 | p2m_top_mfn_p[topidx] = mid_mfn; | 549 | p2m_top_mfn_p[topidx] = mid_mfn; |
550 | } | ||
604 | } | 551 | } |
552 | } else { | ||
553 | mid_mfn = NULL; | ||
605 | } | 554 | } |
606 | 555 | ||
607 | p2m_orig = ACCESS_ONCE(p2m_top[topidx][mididx]); | 556 | p2m_pfn = pte_pfn(ACCESS_ONCE(*ptep)); |
608 | if (p2m_orig == p2m_identity || p2m_orig == p2m_missing) { | 557 | if (p2m_pfn == PFN_DOWN(__pa(p2m_identity)) || |
558 | p2m_pfn == PFN_DOWN(__pa(p2m_missing))) { | ||
609 | /* p2m leaf page is missing */ | 559 | /* p2m leaf page is missing */ |
610 | unsigned long *p2m; | 560 | unsigned long *p2m; |
611 | 561 | ||
@@ -613,12 +563,25 @@ static bool alloc_p2m(unsigned long pfn) | |||
613 | if (!p2m) | 563 | if (!p2m) |
614 | return false; | 564 | return false; |
615 | 565 | ||
616 | p2m_init(p2m); | 566 | if (p2m_pfn == PFN_DOWN(__pa(p2m_missing))) |
567 | p2m_init(p2m); | ||
568 | else | ||
569 | p2m_init_identity(p2m, pfn); | ||
570 | |||
571 | spin_lock_irqsave(&p2m_update_lock, flags); | ||
572 | |||
573 | if (pte_pfn(*ptep) == p2m_pfn) { | ||
574 | set_pte(ptep, | ||
575 | pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL)); | ||
576 | if (mid_mfn) | ||
577 | mid_mfn[mididx] = virt_to_mfn(p2m); | ||
578 | p2m = NULL; | ||
579 | } | ||
580 | |||
581 | spin_unlock_irqrestore(&p2m_update_lock, flags); | ||
617 | 582 | ||
618 | if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig) | 583 | if (p2m) |
619 | free_p2m_page(p2m); | 584 | free_p2m_page(p2m); |
620 | else | ||
621 | mid_mfn[mididx] = virt_to_mfn(p2m); | ||
622 | } | 585 | } |
623 | 586 | ||
624 | return true; | 587 | return true; |
@@ -647,10 +610,10 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, | |||
647 | return pfn - pfn_s; | 610 | return pfn - pfn_s; |
648 | } | 611 | } |
649 | 612 | ||
650 | /* Try to install p2m mapping; fail if intermediate bits missing */ | ||
651 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) | 613 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) |
652 | { | 614 | { |
653 | unsigned topidx, mididx, idx; | 615 | pte_t *ptep; |
616 | unsigned int level; | ||
654 | 617 | ||
655 | /* don't track P2M changes in autotranslate guests */ | 618 | /* don't track P2M changes in autotranslate guests */ |
656 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) | 619 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) |
@@ -661,55 +624,27 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) | |||
661 | return true; | 624 | return true; |
662 | } | 625 | } |
663 | 626 | ||
664 | topidx = p2m_top_index(pfn); | 627 | ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level); |
665 | mididx = p2m_mid_index(pfn); | 628 | BUG_ON(!ptep || level != PG_LEVEL_4K); |
666 | idx = p2m_index(pfn); | ||
667 | |||
668 | /* For sparse holes were the p2m leaf has real PFN along with | ||
669 | * PCI holes, stick in the PFN as the MFN value. | ||
670 | * | ||
671 | * set_phys_range_identity() will have allocated new middle | ||
672 | * and leaf pages as required so an existing p2m_mid_missing | ||
673 | * or p2m_missing mean that whole range will be identity so | ||
674 | * these can be switched to p2m_mid_identity or p2m_identity. | ||
675 | */ | ||
676 | if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) { | ||
677 | if (p2m_top[topidx] == p2m_mid_identity) | ||
678 | return true; | ||
679 | |||
680 | if (p2m_top[topidx] == p2m_mid_missing) { | ||
681 | WARN_ON(cmpxchg(&p2m_top[topidx], p2m_mid_missing, | ||
682 | p2m_mid_identity) != p2m_mid_missing); | ||
683 | return true; | ||
684 | } | ||
685 | |||
686 | if (p2m_top[topidx][mididx] == p2m_identity) | ||
687 | return true; | ||
688 | |||
689 | /* Swap over from MISSING to IDENTITY if needed. */ | ||
690 | if (p2m_top[topidx][mididx] == p2m_missing) { | ||
691 | WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing, | ||
692 | p2m_identity) != p2m_missing); | ||
693 | return true; | ||
694 | } | ||
695 | } | ||
696 | 629 | ||
697 | if (p2m_top[topidx][mididx] == p2m_missing) | 630 | if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_missing))) |
698 | return mfn == INVALID_P2M_ENTRY; | 631 | return mfn == INVALID_P2M_ENTRY; |
699 | 632 | ||
700 | p2m_top[topidx][mididx][idx] = mfn; | 633 | if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity))) |
634 | return mfn == IDENTITY_FRAME(pfn); | ||
635 | |||
636 | xen_p2m_addr[pfn] = mfn; | ||
701 | 637 | ||
702 | return true; | 638 | return true; |
703 | } | 639 | } |
704 | 640 | ||
705 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) | 641 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) |
706 | { | 642 | { |
707 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | 643 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { |
708 | if (!alloc_p2m(pfn)) | 644 | if (!alloc_p2m(pfn)) |
709 | return false; | 645 | return false; |
710 | 646 | ||
711 | if (!__set_phys_to_machine(pfn, mfn)) | 647 | return __set_phys_to_machine(pfn, mfn); |
712 | return false; | ||
713 | } | 648 | } |
714 | 649 | ||
715 | return true; | 650 | return true; |
@@ -1035,79 +970,29 @@ EXPORT_SYMBOL_GPL(m2p_find_override_pfn); | |||
1035 | #include "debugfs.h" | 970 | #include "debugfs.h" |
1036 | static int p2m_dump_show(struct seq_file *m, void *v) | 971 | static int p2m_dump_show(struct seq_file *m, void *v) |
1037 | { | 972 | { |
1038 | static const char * const level_name[] = { "top", "middle", | ||
1039 | "entry", "abnormal", "error"}; | ||
1040 | #define TYPE_IDENTITY 0 | ||
1041 | #define TYPE_MISSING 1 | ||
1042 | #define TYPE_PFN 2 | ||
1043 | #define TYPE_UNKNOWN 3 | ||
1044 | static const char * const type_name[] = { | 973 | static const char * const type_name[] = { |
1045 | [TYPE_IDENTITY] = "identity", | 974 | [P2M_TYPE_IDENTITY] = "identity", |
1046 | [TYPE_MISSING] = "missing", | 975 | [P2M_TYPE_MISSING] = "missing", |
1047 | [TYPE_PFN] = "pfn", | 976 | [P2M_TYPE_PFN] = "pfn", |
1048 | [TYPE_UNKNOWN] = "abnormal"}; | 977 | [P2M_TYPE_UNKNOWN] = "abnormal"}; |
1049 | unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0; | 978 | unsigned long pfn, first_pfn; |
1050 | unsigned int uninitialized_var(prev_level); | 979 | int type, prev_type; |
1051 | unsigned int uninitialized_var(prev_type); | 980 | |
1052 | 981 | prev_type = xen_p2m_elem_type(0); | |
1053 | if (!p2m_top) | 982 | first_pfn = 0; |
1054 | return 0; | 983 | |
1055 | 984 | for (pfn = 0; pfn < xen_p2m_size; pfn++) { | |
1056 | for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) { | 985 | type = xen_p2m_elem_type(pfn); |
1057 | unsigned topidx = p2m_top_index(pfn); | 986 | if (type != prev_type) { |
1058 | unsigned mididx = p2m_mid_index(pfn); | 987 | seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn, |
1059 | unsigned idx = p2m_index(pfn); | 988 | type_name[prev_type]); |
1060 | unsigned lvl, type; | ||
1061 | |||
1062 | lvl = 4; | ||
1063 | type = TYPE_UNKNOWN; | ||
1064 | if (p2m_top[topidx] == p2m_mid_missing) { | ||
1065 | lvl = 0; type = TYPE_MISSING; | ||
1066 | } else if (p2m_top[topidx] == NULL) { | ||
1067 | lvl = 0; type = TYPE_UNKNOWN; | ||
1068 | } else if (p2m_top[topidx][mididx] == NULL) { | ||
1069 | lvl = 1; type = TYPE_UNKNOWN; | ||
1070 | } else if (p2m_top[topidx][mididx] == p2m_identity) { | ||
1071 | lvl = 1; type = TYPE_IDENTITY; | ||
1072 | } else if (p2m_top[topidx][mididx] == p2m_missing) { | ||
1073 | lvl = 1; type = TYPE_MISSING; | ||
1074 | } else if (p2m_top[topidx][mididx][idx] == 0) { | ||
1075 | lvl = 2; type = TYPE_UNKNOWN; | ||
1076 | } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) { | ||
1077 | lvl = 2; type = TYPE_IDENTITY; | ||
1078 | } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) { | ||
1079 | lvl = 2; type = TYPE_MISSING; | ||
1080 | } else if (p2m_top[topidx][mididx][idx] == pfn) { | ||
1081 | lvl = 2; type = TYPE_PFN; | ||
1082 | } else if (p2m_top[topidx][mididx][idx] != pfn) { | ||
1083 | lvl = 2; type = TYPE_PFN; | ||
1084 | } | ||
1085 | if (pfn == 0) { | ||
1086 | prev_level = lvl; | ||
1087 | prev_type = type; | 989 | prev_type = type; |
1088 | } | 990 | first_pfn = pfn; |
1089 | if (pfn == MAX_DOMAIN_PAGES-1) { | ||
1090 | lvl = 3; | ||
1091 | type = TYPE_UNKNOWN; | ||
1092 | } | ||
1093 | if (prev_type != type) { | ||
1094 | seq_printf(m, " [0x%lx->0x%lx] %s\n", | ||
1095 | prev_pfn_type, pfn, type_name[prev_type]); | ||
1096 | prev_pfn_type = pfn; | ||
1097 | prev_type = type; | ||
1098 | } | ||
1099 | if (prev_level != lvl) { | ||
1100 | seq_printf(m, " [0x%lx->0x%lx] level %s\n", | ||
1101 | prev_pfn_level, pfn, level_name[prev_level]); | ||
1102 | prev_pfn_level = pfn; | ||
1103 | prev_level = lvl; | ||
1104 | } | 991 | } |
1105 | } | 992 | } |
993 | seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn, | ||
994 | type_name[prev_type]); | ||
1106 | return 0; | 995 | return 0; |
1107 | #undef TYPE_IDENTITY | ||
1108 | #undef TYPE_MISSING | ||
1109 | #undef TYPE_PFN | ||
1110 | #undef TYPE_UNKNOWN | ||
1111 | } | 996 | } |
1112 | 997 | ||
1113 | static int p2m_dump_open(struct inode *inode, struct file *filp) | 998 | static int p2m_dump_open(struct inode *inode, struct file *filp) |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 02b0b0fba041..f92921fa54f9 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -49,7 +49,7 @@ void xen_hvm_init_shared_info(void); | |||
49 | void xen_unplug_emulated_devices(void); | 49 | void xen_unplug_emulated_devices(void); |
50 | 50 | ||
51 | void __init xen_build_dynamic_phys_to_machine(void); | 51 | void __init xen_build_dynamic_phys_to_machine(void); |
52 | unsigned long __init xen_revector_p2m_tree(void); | 52 | void __init xen_vmalloc_p2m_tree(void); |
53 | 53 | ||
54 | void xen_init_irq_ops(void); | 54 | void xen_init_irq_ops(void); |
55 | void xen_setup_timer(int cpu); | 55 | void xen_setup_timer(int cpu); |