aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Vrabel <david.vrabel@citrix.com>2011-09-29 07:26:19 -0400
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2011-09-29 11:12:10 -0400
commitdc91c728fddc29dfed1ae96f6807216b5f42d3a1 (patch)
treeff1b08bd3a06d7a8204766a3f714cf7c2533e8f8
parent8b5d44a5ac93cd7a1b044db3ff0ba4955b4ba5ec (diff)
xen: allow extra memory to be in multiple regions
Allow the extra memory (used by the balloon driver) to be in multiple regions (typically two regions, one for low memory and one for high memory). This allows the balloon driver to increase the number of available low pages (if the initial number if pages is small). As a side effect, the algorithm for building the e820 memory map is simpler and more obviously correct as the map supplied by the hypervisor is (almost) used as is (in particular, all reserved regions and gaps are preserved). Only RAM regions are altered and RAM regions above max_pfn + extra_pages are marked as unused (the region is split in two if necessary). Signed-off-by: David Vrabel <david.vrabel@citrix.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-rw-r--r--arch/x86/xen/setup.c182
1 files changed, 86 insertions, 96 deletions
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 0c8e974c738a..2ad2fd53bd32 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -54,26 +54,32 @@ unsigned long xen_released_pages;
54 */ 54 */
55#define EXTRA_MEM_RATIO (10) 55#define EXTRA_MEM_RATIO (10)
56 56
57static void __init xen_add_extra_mem(unsigned long pages) 57static void __init xen_add_extra_mem(u64 start, u64 size)
58{ 58{
59 unsigned long pfn; 59 unsigned long pfn;
60 int i;
60 61
61 u64 size = (u64)pages * PAGE_SIZE; 62 for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
62 u64 extra_start = xen_extra_mem[0].start + xen_extra_mem[0].size; 63 /* Add new region. */
63 64 if (xen_extra_mem[i].size == 0) {
64 if (!pages) 65 xen_extra_mem[i].start = start;
65 return; 66 xen_extra_mem[i].size = size;
66 67 break;
67 e820_add_region(extra_start, size, E820_RAM); 68 }
68 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 69 /* Append to existing region. */
69 70 if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) {
70 memblock_x86_reserve_range(extra_start, extra_start + size, "XEN EXTRA"); 71 xen_extra_mem[i].size += size;
72 break;
73 }
74 }
75 if (i == XEN_EXTRA_MEM_MAX_REGIONS)
76 printk(KERN_WARNING "Warning: not enough extra memory regions\n");
71 77
72 xen_extra_mem[0].size += size; 78 memblock_x86_reserve_range(start, start + size, "XEN EXTRA");
73 79
74 xen_max_p2m_pfn = PFN_DOWN(extra_start + size); 80 xen_max_p2m_pfn = PFN_DOWN(start + size);
75 81
76 for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++) 82 for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++)
77 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 83 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
78} 84}
79 85
@@ -120,8 +126,8 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
120 return len; 126 return len;
121} 127}
122 128
123static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, 129static unsigned long __init xen_return_unused_memory(
124 const struct e820map *e820) 130 unsigned long max_pfn, const struct e820entry *map, int nr_map)
125{ 131{
126 phys_addr_t max_addr = PFN_PHYS(max_pfn); 132 phys_addr_t max_addr = PFN_PHYS(max_pfn);
127 phys_addr_t last_end = ISA_END_ADDRESS; 133 phys_addr_t last_end = ISA_END_ADDRESS;
@@ -129,13 +135,13 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
129 int i; 135 int i;
130 136
131 /* Free any unused memory above the low 1Mbyte. */ 137 /* Free any unused memory above the low 1Mbyte. */
132 for (i = 0; i < e820->nr_map && last_end < max_addr; i++) { 138 for (i = 0; i < nr_map && last_end < max_addr; i++) {
133 phys_addr_t end = e820->map[i].addr; 139 phys_addr_t end = map[i].addr;
134 end = min(max_addr, end); 140 end = min(max_addr, end);
135 141
136 if (last_end < end) 142 if (last_end < end)
137 released += xen_release_chunk(last_end, end); 143 released += xen_release_chunk(last_end, end);
138 last_end = max(last_end, e820->map[i].addr + e820->map[i].size); 144 last_end = max(last_end, map[i].addr + map[i].size);
139 } 145 }
140 146
141 if (last_end < max_addr) 147 if (last_end < max_addr)
@@ -200,20 +206,32 @@ static unsigned long __init xen_get_max_pages(void)
200 return min(max_pages, MAX_DOMAIN_PAGES); 206 return min(max_pages, MAX_DOMAIN_PAGES);
201} 207}
202 208
209static void xen_align_and_add_e820_region(u64 start, u64 size, int type)
210{
211 u64 end = start + size;
212
213 /* Align RAM regions to page boundaries. */
214 if (type == E820_RAM) {
215 start = PAGE_ALIGN(start);
216 end &= ~((u64)PAGE_SIZE - 1);
217 }
218
219 e820_add_region(start, end - start, type);
220}
221
203/** 222/**
204 * machine_specific_memory_setup - Hook for machine specific memory setup. 223 * machine_specific_memory_setup - Hook for machine specific memory setup.
205 **/ 224 **/
206char * __init xen_memory_setup(void) 225char * __init xen_memory_setup(void)
207{ 226{
208 static struct e820entry map[E820MAX] __initdata; 227 static struct e820entry map[E820MAX] __initdata;
209 static struct e820entry map_raw[E820MAX] __initdata;
210 228
211 unsigned long max_pfn = xen_start_info->nr_pages; 229 unsigned long max_pfn = xen_start_info->nr_pages;
212 unsigned long long mem_end; 230 unsigned long long mem_end;
213 int rc; 231 int rc;
214 struct xen_memory_map memmap; 232 struct xen_memory_map memmap;
233 unsigned long max_pages;
215 unsigned long extra_pages = 0; 234 unsigned long extra_pages = 0;
216 unsigned long extra_limit;
217 unsigned long identity_pages = 0; 235 unsigned long identity_pages = 0;
218 int i; 236 int i;
219 int op; 237 int op;
@@ -240,49 +258,55 @@ char * __init xen_memory_setup(void)
240 } 258 }
241 BUG_ON(rc); 259 BUG_ON(rc);
242 260
243 memcpy(map_raw, map, sizeof(map)); 261 /* Make sure the Xen-supplied memory map is well-ordered. */
244 e820.nr_map = 0; 262 sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);
245 xen_extra_mem[0].start = mem_end; 263
246 for (i = 0; i < memmap.nr_entries; i++) { 264 max_pages = xen_get_max_pages();
247 unsigned long long end; 265 if (max_pages > max_pfn)
248 266 extra_pages += max_pages - max_pfn;
249 /* Guard against non-page aligned E820 entries. */ 267
250 if (map[i].type == E820_RAM) 268 xen_released_pages = xen_return_unused_memory(max_pfn, map,
251 map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE; 269 memmap.nr_entries);
252 270 extra_pages += xen_released_pages;
253 end = map[i].addr + map[i].size; 271
254 if (map[i].type == E820_RAM && end > mem_end) { 272 /*
255 /* RAM off the end - may be partially included */ 273 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
256 u64 delta = min(map[i].size, end - mem_end); 274 * factor the base size. On non-highmem systems, the base
257 275 * size is the full initial memory allocation; on highmem it
258 map[i].size -= delta; 276 * is limited to the max size of lowmem, so that it doesn't
259 end -= delta; 277 * get completely filled.
260 278 *
261 extra_pages += PFN_DOWN(delta); 279 * In principle there could be a problem in lowmem systems if
262 /* 280 * the initial memory is also very large with respect to
263 * Set RAM below 4GB that is not for us to be unusable. 281 * lowmem, but we won't try to deal with that here.
264 * This prevents "System RAM" address space from being 282 */
265 * used as potential resource for I/O address (happens 283 extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
266 * when 'allocate_resource' is called). 284 extra_pages);
267 */ 285
268 if (delta && 286 i = 0;
269 (xen_initial_domain() && end < 0x100000000ULL)) 287 while (i < memmap.nr_entries) {
270 e820_add_region(end, delta, E820_UNUSABLE); 288 u64 addr = map[i].addr;
289 u64 size = map[i].size;
290 u32 type = map[i].type;
291
292 if (type == E820_RAM) {
293 if (addr < mem_end) {
294 size = min(size, mem_end - addr);
295 } else if (extra_pages) {
296 size = min(size, (u64)extra_pages * PAGE_SIZE);
297 extra_pages -= size / PAGE_SIZE;
298 xen_add_extra_mem(addr, size);
299 } else
300 type = E820_UNUSABLE;
271 } 301 }
272 302
273 if (map[i].size > 0 && end > xen_extra_mem[0].start) 303 xen_align_and_add_e820_region(addr, size, type);
274 xen_extra_mem[0].start = end;
275 304
276 /* Add region if any remains */ 305 map[i].addr += size;
277 if (map[i].size > 0) 306 map[i].size -= size;
278 e820_add_region(map[i].addr, map[i].size, map[i].type); 307 if (map[i].size == 0)
308 i++;
279 } 309 }
280 /* Align the balloon area so that max_low_pfn does not get set
281 * to be at the _end_ of the PCI gap at the far end (fee01000).
282 * Note that the start of balloon area gets set in the loop above
283 * to be past the last E820 region. */
284 if (xen_initial_domain() && (xen_extra_mem[0].start < (1ULL<<32)))
285 xen_extra_mem[0].start = (1ULL<<32);
286 310
287 /* 311 /*
288 * In domU, the ISA region is normal, usable memory, but we 312 * In domU, the ISA region is normal, usable memory, but we
@@ -308,45 +332,11 @@ char * __init xen_memory_setup(void)
308 332
309 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 333 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
310 334
311 extra_limit = xen_get_max_pages();
312 if (max_pfn + extra_pages > extra_limit) {
313 if (extra_limit > max_pfn)
314 extra_pages = extra_limit - max_pfn;
315 else
316 extra_pages = 0;
317 }
318
319 xen_released_pages = xen_return_unused_memory(xen_start_info->nr_pages,
320 &e820);
321 extra_pages += xen_released_pages;
322
323 /*
324 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
325 * factor the base size. On non-highmem systems, the base
326 * size is the full initial memory allocation; on highmem it
327 * is limited to the max size of lowmem, so that it doesn't
328 * get completely filled.
329 *
330 * In principle there could be a problem in lowmem systems if
331 * the initial memory is also very large with respect to
332 * lowmem, but we won't try to deal with that here.
333 */
334 extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
335 max_pfn + extra_pages);
336
337 if (extra_limit >= max_pfn)
338 extra_pages = extra_limit - max_pfn;
339 else
340 extra_pages = 0;
341
342 xen_add_extra_mem(extra_pages);
343
344 /* 335 /*
345 * Set P2M for all non-RAM pages and E820 gaps to be identity 336 * Set P2M for all non-RAM pages and E820 gaps to be identity
346 * type PFNs. We supply it with the non-sanitized version 337 * type PFNs.
347 * of the E820.
348 */ 338 */
349 identity_pages = xen_set_identity(map_raw, memmap.nr_entries); 339 identity_pages = xen_set_identity(e820.map, e820.nr_map);
350 printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages); 340 printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
351 return "Xen"; 341 return "Xen";
352} 342}