diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-10-25 03:17:07 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-10-25 03:17:07 -0400 |
commit | 5eef150c1d7e41baaefd00dd56c153debcd86aee (patch) | |
tree | 612723d8b0930ba8c6292cd317231249713c1877 /arch/x86/xen | |
parent | 315eb8a2a1b7f335d40ceeeb11b9e067475eb881 (diff) | |
parent | f3f436e33b925ead21e3f9b47b1e2aed965511d9 (diff) |
Merge branch 'stable/e820-3.2' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen
* 'stable/e820-3.2' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
xen: release all pages within 1-1 p2m mappings
xen: allow extra memory to be in multiple regions
xen: allow balloon driver to use more than one memory region
xen/balloon: simplify test for the end of usable RAM
xen/balloon: account for pages released during memory setup
Diffstat (limited to 'arch/x86/xen')
-rw-r--r-- | arch/x86/xen/setup.c | 284 |
1 files changed, 123 insertions, 161 deletions
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 46d6d21dbdbe..38d0af4fefec 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -37,7 +37,10 @@ extern void xen_syscall_target(void); | |||
37 | extern void xen_syscall32_target(void); | 37 | extern void xen_syscall32_target(void); |
38 | 38 | ||
39 | /* Amount of extra memory space we add to the e820 ranges */ | 39 | /* Amount of extra memory space we add to the e820 ranges */ |
40 | phys_addr_t xen_extra_mem_start, xen_extra_mem_size; | 40 | struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; |
41 | |||
42 | /* Number of pages released from the initial allocation. */ | ||
43 | unsigned long xen_released_pages; | ||
41 | 44 | ||
42 | /* | 45 | /* |
43 | * The maximum amount of extra memory compared to the base size. The | 46 | * The maximum amount of extra memory compared to the base size. The |
@@ -51,48 +54,47 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size; | |||
51 | */ | 54 | */ |
52 | #define EXTRA_MEM_RATIO (10) | 55 | #define EXTRA_MEM_RATIO (10) |
53 | 56 | ||
54 | static void __init xen_add_extra_mem(unsigned long pages) | 57 | static void __init xen_add_extra_mem(u64 start, u64 size) |
55 | { | 58 | { |
56 | unsigned long pfn; | 59 | unsigned long pfn; |
60 | int i; | ||
57 | 61 | ||
58 | u64 size = (u64)pages * PAGE_SIZE; | 62 | for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { |
59 | u64 extra_start = xen_extra_mem_start + xen_extra_mem_size; | 63 | /* Add new region. */ |
60 | 64 | if (xen_extra_mem[i].size == 0) { | |
61 | if (!pages) | 65 | xen_extra_mem[i].start = start; |
62 | return; | 66 | xen_extra_mem[i].size = size; |
63 | 67 | break; | |
64 | e820_add_region(extra_start, size, E820_RAM); | 68 | } |
65 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 69 | /* Append to existing region. */ |
66 | 70 | if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) { | |
67 | memblock_x86_reserve_range(extra_start, extra_start + size, "XEN EXTRA"); | 71 | xen_extra_mem[i].size += size; |
72 | break; | ||
73 | } | ||
74 | } | ||
75 | if (i == XEN_EXTRA_MEM_MAX_REGIONS) | ||
76 | printk(KERN_WARNING "Warning: not enough extra memory regions\n"); | ||
68 | 77 | ||
69 | xen_extra_mem_size += size; | 78 | memblock_x86_reserve_range(start, start + size, "XEN EXTRA"); |
70 | 79 | ||
71 | xen_max_p2m_pfn = PFN_DOWN(extra_start + size); | 80 | xen_max_p2m_pfn = PFN_DOWN(start + size); |
72 | 81 | ||
73 | for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++) | 82 | for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++) |
74 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | 83 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); |
75 | } | 84 | } |
76 | 85 | ||
77 | static unsigned long __init xen_release_chunk(phys_addr_t start_addr, | 86 | static unsigned long __init xen_release_chunk(unsigned long start, |
78 | phys_addr_t end_addr) | 87 | unsigned long end) |
79 | { | 88 | { |
80 | struct xen_memory_reservation reservation = { | 89 | struct xen_memory_reservation reservation = { |
81 | .address_bits = 0, | 90 | .address_bits = 0, |
82 | .extent_order = 0, | 91 | .extent_order = 0, |
83 | .domid = DOMID_SELF | 92 | .domid = DOMID_SELF |
84 | }; | 93 | }; |
85 | unsigned long start, end; | ||
86 | unsigned long len = 0; | 94 | unsigned long len = 0; |
87 | unsigned long pfn; | 95 | unsigned long pfn; |
88 | int ret; | 96 | int ret; |
89 | 97 | ||
90 | start = PFN_UP(start_addr); | ||
91 | end = PFN_DOWN(end_addr); | ||
92 | |||
93 | if (end <= start) | ||
94 | return 0; | ||
95 | |||
96 | for(pfn = start; pfn < end; pfn++) { | 98 | for(pfn = start; pfn < end; pfn++) { |
97 | unsigned long mfn = pfn_to_mfn(pfn); | 99 | unsigned long mfn = pfn_to_mfn(pfn); |
98 | 100 | ||
@@ -117,72 +119,52 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr, | |||
117 | return len; | 119 | return len; |
118 | } | 120 | } |
119 | 121 | ||
120 | static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, | 122 | static unsigned long __init xen_set_identity_and_release( |
121 | const struct e820map *e820) | 123 | const struct e820entry *list, size_t map_size, unsigned long nr_pages) |
122 | { | 124 | { |
123 | phys_addr_t max_addr = PFN_PHYS(max_pfn); | 125 | phys_addr_t start = 0; |
124 | phys_addr_t last_end = ISA_END_ADDRESS; | ||
125 | unsigned long released = 0; | 126 | unsigned long released = 0; |
126 | int i; | ||
127 | |||
128 | /* Free any unused memory above the low 1Mbyte. */ | ||
129 | for (i = 0; i < e820->nr_map && last_end < max_addr; i++) { | ||
130 | phys_addr_t end = e820->map[i].addr; | ||
131 | end = min(max_addr, end); | ||
132 | |||
133 | if (last_end < end) | ||
134 | released += xen_release_chunk(last_end, end); | ||
135 | last_end = max(last_end, e820->map[i].addr + e820->map[i].size); | ||
136 | } | ||
137 | |||
138 | if (last_end < max_addr) | ||
139 | released += xen_release_chunk(last_end, max_addr); | ||
140 | |||
141 | printk(KERN_INFO "released %lu pages of unused memory\n", released); | ||
142 | return released; | ||
143 | } | ||
144 | |||
145 | static unsigned long __init xen_set_identity(const struct e820entry *list, | ||
146 | ssize_t map_size) | ||
147 | { | ||
148 | phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS; | ||
149 | phys_addr_t start_pci = last; | ||
150 | const struct e820entry *entry; | ||
151 | unsigned long identity = 0; | 127 | unsigned long identity = 0; |
128 | const struct e820entry *entry; | ||
152 | int i; | 129 | int i; |
153 | 130 | ||
131 | /* | ||
132 | * Combine non-RAM regions and gaps until a RAM region (or the | ||
133 | * end of the map) is reached, then set the 1:1 map and | ||
134 | * release the pages (if available) in those non-RAM regions. | ||
135 | * | ||
136 | * The combined non-RAM regions are rounded to a whole number | ||
137 | * of pages so any partial pages are accessible via the 1:1 | ||
138 | * mapping. This is needed for some BIOSes that put (for | ||
139 | * example) the DMI tables in a reserved region that begins on | ||
140 | * a non-page boundary. | ||
141 | */ | ||
154 | for (i = 0, entry = list; i < map_size; i++, entry++) { | 142 | for (i = 0, entry = list; i < map_size; i++, entry++) { |
155 | phys_addr_t start = entry->addr; | 143 | phys_addr_t end = entry->addr + entry->size; |
156 | phys_addr_t end = start + entry->size; | ||
157 | 144 | ||
158 | if (start < last) | 145 | if (entry->type == E820_RAM || i == map_size - 1) { |
159 | start = last; | 146 | unsigned long start_pfn = PFN_DOWN(start); |
147 | unsigned long end_pfn = PFN_UP(end); | ||
160 | 148 | ||
161 | if (end <= start) | 149 | if (entry->type == E820_RAM) |
162 | continue; | 150 | end_pfn = PFN_UP(entry->addr); |
163 | 151 | ||
164 | /* Skip over the 1MB region. */ | 152 | if (start_pfn < end_pfn) { |
165 | if (last > end) | 153 | if (start_pfn < nr_pages) |
166 | continue; | 154 | released += xen_release_chunk( |
155 | start_pfn, min(end_pfn, nr_pages)); | ||
167 | 156 | ||
168 | if ((entry->type == E820_RAM) || (entry->type == E820_UNUSABLE)) { | ||
169 | if (start > start_pci) | ||
170 | identity += set_phys_range_identity( | 157 | identity += set_phys_range_identity( |
171 | PFN_UP(start_pci), PFN_DOWN(start)); | 158 | start_pfn, end_pfn); |
172 | 159 | } | |
173 | /* Without saving 'last' we would gooble RAM too | 160 | start = end; |
174 | * at the end of the loop. */ | ||
175 | last = end; | ||
176 | start_pci = end; | ||
177 | continue; | ||
178 | } | 161 | } |
179 | start_pci = min(start, start_pci); | ||
180 | last = end; | ||
181 | } | 162 | } |
182 | if (last > start_pci) | 163 | |
183 | identity += set_phys_range_identity( | 164 | printk(KERN_INFO "Released %lu pages of unused memory\n", released); |
184 | PFN_UP(start_pci), PFN_DOWN(last)); | 165 | printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity); |
185 | return identity; | 166 | |
167 | return released; | ||
186 | } | 168 | } |
187 | 169 | ||
188 | static unsigned long __init xen_get_max_pages(void) | 170 | static unsigned long __init xen_get_max_pages(void) |
@@ -197,21 +179,32 @@ static unsigned long __init xen_get_max_pages(void) | |||
197 | return min(max_pages, MAX_DOMAIN_PAGES); | 179 | return min(max_pages, MAX_DOMAIN_PAGES); |
198 | } | 180 | } |
199 | 181 | ||
182 | static void xen_align_and_add_e820_region(u64 start, u64 size, int type) | ||
183 | { | ||
184 | u64 end = start + size; | ||
185 | |||
186 | /* Align RAM regions to page boundaries. */ | ||
187 | if (type == E820_RAM) { | ||
188 | start = PAGE_ALIGN(start); | ||
189 | end &= ~((u64)PAGE_SIZE - 1); | ||
190 | } | ||
191 | |||
192 | e820_add_region(start, end - start, type); | ||
193 | } | ||
194 | |||
200 | /** | 195 | /** |
201 | * machine_specific_memory_setup - Hook for machine specific memory setup. | 196 | * machine_specific_memory_setup - Hook for machine specific memory setup. |
202 | **/ | 197 | **/ |
203 | char * __init xen_memory_setup(void) | 198 | char * __init xen_memory_setup(void) |
204 | { | 199 | { |
205 | static struct e820entry map[E820MAX] __initdata; | 200 | static struct e820entry map[E820MAX] __initdata; |
206 | static struct e820entry map_raw[E820MAX] __initdata; | ||
207 | 201 | ||
208 | unsigned long max_pfn = xen_start_info->nr_pages; | 202 | unsigned long max_pfn = xen_start_info->nr_pages; |
209 | unsigned long long mem_end; | 203 | unsigned long long mem_end; |
210 | int rc; | 204 | int rc; |
211 | struct xen_memory_map memmap; | 205 | struct xen_memory_map memmap; |
206 | unsigned long max_pages; | ||
212 | unsigned long extra_pages = 0; | 207 | unsigned long extra_pages = 0; |
213 | unsigned long extra_limit; | ||
214 | unsigned long identity_pages = 0; | ||
215 | int i; | 208 | int i; |
216 | int op; | 209 | int op; |
217 | 210 | ||
@@ -237,58 +230,65 @@ char * __init xen_memory_setup(void) | |||
237 | } | 230 | } |
238 | BUG_ON(rc); | 231 | BUG_ON(rc); |
239 | 232 | ||
240 | memcpy(map_raw, map, sizeof(map)); | 233 | /* Make sure the Xen-supplied memory map is well-ordered. */ |
241 | e820.nr_map = 0; | 234 | sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries); |
242 | xen_extra_mem_start = mem_end; | 235 | |
243 | for (i = 0; i < memmap.nr_entries; i++) { | 236 | max_pages = xen_get_max_pages(); |
244 | unsigned long long end; | 237 | if (max_pages > max_pfn) |
245 | 238 | extra_pages += max_pages - max_pfn; | |
246 | /* Guard against non-page aligned E820 entries. */ | 239 | |
247 | if (map[i].type == E820_RAM) | 240 | /* |
248 | map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE; | 241 | * Set P2M for all non-RAM pages and E820 gaps to be identity |
249 | 242 | * type PFNs. Any RAM pages that would be made inaccesible by | |
250 | end = map[i].addr + map[i].size; | 243 | * this are first released. |
251 | if (map[i].type == E820_RAM && end > mem_end) { | 244 | */ |
252 | /* RAM off the end - may be partially included */ | 245 | xen_released_pages = xen_set_identity_and_release( |
253 | u64 delta = min(map[i].size, end - mem_end); | 246 | map, memmap.nr_entries, max_pfn); |
254 | 247 | extra_pages += xen_released_pages; | |
255 | map[i].size -= delta; | 248 | |
256 | end -= delta; | 249 | /* |
257 | 250 | * Clamp the amount of extra memory to a EXTRA_MEM_RATIO | |
258 | extra_pages += PFN_DOWN(delta); | 251 | * factor the base size. On non-highmem systems, the base |
259 | /* | 252 | * size is the full initial memory allocation; on highmem it |
260 | * Set RAM below 4GB that is not for us to be unusable. | 253 | * is limited to the max size of lowmem, so that it doesn't |
261 | * This prevents "System RAM" address space from being | 254 | * get completely filled. |
262 | * used as potential resource for I/O address (happens | 255 | * |
263 | * when 'allocate_resource' is called). | 256 | * In principle there could be a problem in lowmem systems if |
264 | */ | 257 | * the initial memory is also very large with respect to |
265 | if (delta && | 258 | * lowmem, but we won't try to deal with that here. |
266 | (xen_initial_domain() && end < 0x100000000ULL)) | 259 | */ |
267 | e820_add_region(end, delta, E820_UNUSABLE); | 260 | extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), |
261 | extra_pages); | ||
262 | |||
263 | i = 0; | ||
264 | while (i < memmap.nr_entries) { | ||
265 | u64 addr = map[i].addr; | ||
266 | u64 size = map[i].size; | ||
267 | u32 type = map[i].type; | ||
268 | |||
269 | if (type == E820_RAM) { | ||
270 | if (addr < mem_end) { | ||
271 | size = min(size, mem_end - addr); | ||
272 | } else if (extra_pages) { | ||
273 | size = min(size, (u64)extra_pages * PAGE_SIZE); | ||
274 | extra_pages -= size / PAGE_SIZE; | ||
275 | xen_add_extra_mem(addr, size); | ||
276 | } else | ||
277 | type = E820_UNUSABLE; | ||
268 | } | 278 | } |
269 | 279 | ||
270 | if (map[i].size > 0 && end > xen_extra_mem_start) | 280 | xen_align_and_add_e820_region(addr, size, type); |
271 | xen_extra_mem_start = end; | ||
272 | 281 | ||
273 | /* Add region if any remains */ | 282 | map[i].addr += size; |
274 | if (map[i].size > 0) | 283 | map[i].size -= size; |
275 | e820_add_region(map[i].addr, map[i].size, map[i].type); | 284 | if (map[i].size == 0) |
285 | i++; | ||
276 | } | 286 | } |
277 | /* Align the balloon area so that max_low_pfn does not get set | ||
278 | * to be at the _end_ of the PCI gap at the far end (fee01000). | ||
279 | * Note that xen_extra_mem_start gets set in the loop above to be | ||
280 | * past the last E820 region. */ | ||
281 | if (xen_initial_domain() && (xen_extra_mem_start < (1ULL<<32))) | ||
282 | xen_extra_mem_start = (1ULL<<32); | ||
283 | 287 | ||
284 | /* | 288 | /* |
285 | * In domU, the ISA region is normal, usable memory, but we | 289 | * In domU, the ISA region is normal, usable memory, but we |
286 | * reserve ISA memory anyway because too many things poke | 290 | * reserve ISA memory anyway because too many things poke |
287 | * about in there. | 291 | * about in there. |
288 | * | ||
289 | * In Dom0, the host E820 information can leave gaps in the | ||
290 | * ISA range, which would cause us to release those pages. To | ||
291 | * avoid this, we unconditionally reserve them here. | ||
292 | */ | 292 | */ |
293 | e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, | 293 | e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, |
294 | E820_RESERVED); | 294 | E820_RESERVED); |
@@ -305,44 +305,6 @@ char * __init xen_memory_setup(void) | |||
305 | 305 | ||
306 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 306 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
307 | 307 | ||
308 | extra_limit = xen_get_max_pages(); | ||
309 | if (max_pfn + extra_pages > extra_limit) { | ||
310 | if (extra_limit > max_pfn) | ||
311 | extra_pages = extra_limit - max_pfn; | ||
312 | else | ||
313 | extra_pages = 0; | ||
314 | } | ||
315 | |||
316 | extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820); | ||
317 | |||
318 | /* | ||
319 | * Clamp the amount of extra memory to a EXTRA_MEM_RATIO | ||
320 | * factor the base size. On non-highmem systems, the base | ||
321 | * size is the full initial memory allocation; on highmem it | ||
322 | * is limited to the max size of lowmem, so that it doesn't | ||
323 | * get completely filled. | ||
324 | * | ||
325 | * In principle there could be a problem in lowmem systems if | ||
326 | * the initial memory is also very large with respect to | ||
327 | * lowmem, but we won't try to deal with that here. | ||
328 | */ | ||
329 | extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), | ||
330 | max_pfn + extra_pages); | ||
331 | |||
332 | if (extra_limit >= max_pfn) | ||
333 | extra_pages = extra_limit - max_pfn; | ||
334 | else | ||
335 | extra_pages = 0; | ||
336 | |||
337 | xen_add_extra_mem(extra_pages); | ||
338 | |||
339 | /* | ||
340 | * Set P2M for all non-RAM pages and E820 gaps to be identity | ||
341 | * type PFNs. We supply it with the non-sanitized version | ||
342 | * of the E820. | ||
343 | */ | ||
344 | identity_pages = xen_set_identity(map_raw, memmap.nr_entries); | ||
345 | printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages); | ||
346 | return "Xen"; | 308 | return "Xen"; |
347 | } | 309 | } |
348 | 310 | ||