aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-10-25 03:17:07 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-10-25 03:17:07 -0400
commit5eef150c1d7e41baaefd00dd56c153debcd86aee (patch)
tree612723d8b0930ba8c6292cd317231249713c1877 /arch/x86/xen
parent315eb8a2a1b7f335d40ceeeb11b9e067475eb881 (diff)
parentf3f436e33b925ead21e3f9b47b1e2aed965511d9 (diff)
Merge branch 'stable/e820-3.2' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen
* 'stable/e820-3.2' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen: xen: release all pages within 1-1 p2m mappings xen: allow extra memory to be in multiple regions xen: allow balloon driver to use more than one memory region xen/balloon: simplify test for the end of usable RAM xen/balloon: account for pages released during memory setup
Diffstat (limited to 'arch/x86/xen')
-rw-r--r--arch/x86/xen/setup.c284
1 files changed, 123 insertions, 161 deletions
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 46d6d21dbdbe..38d0af4fefec 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -37,7 +37,10 @@ extern void xen_syscall_target(void);
37extern void xen_syscall32_target(void); 37extern void xen_syscall32_target(void);
38 38
39/* Amount of extra memory space we add to the e820 ranges */ 39/* Amount of extra memory space we add to the e820 ranges */
40phys_addr_t xen_extra_mem_start, xen_extra_mem_size; 40struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
41
42/* Number of pages released from the initial allocation. */
43unsigned long xen_released_pages;
41 44
42/* 45/*
43 * The maximum amount of extra memory compared to the base size. The 46 * The maximum amount of extra memory compared to the base size. The
@@ -51,48 +54,47 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
51 */ 54 */
52#define EXTRA_MEM_RATIO (10) 55#define EXTRA_MEM_RATIO (10)
53 56
54static void __init xen_add_extra_mem(unsigned long pages) 57static void __init xen_add_extra_mem(u64 start, u64 size)
55{ 58{
56 unsigned long pfn; 59 unsigned long pfn;
60 int i;
57 61
58 u64 size = (u64)pages * PAGE_SIZE; 62 for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
59 u64 extra_start = xen_extra_mem_start + xen_extra_mem_size; 63 /* Add new region. */
60 64 if (xen_extra_mem[i].size == 0) {
61 if (!pages) 65 xen_extra_mem[i].start = start;
62 return; 66 xen_extra_mem[i].size = size;
63 67 break;
64 e820_add_region(extra_start, size, E820_RAM); 68 }
65 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 69 /* Append to existing region. */
66 70 if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) {
67 memblock_x86_reserve_range(extra_start, extra_start + size, "XEN EXTRA"); 71 xen_extra_mem[i].size += size;
72 break;
73 }
74 }
75 if (i == XEN_EXTRA_MEM_MAX_REGIONS)
76 printk(KERN_WARNING "Warning: not enough extra memory regions\n");
68 77
69 xen_extra_mem_size += size; 78 memblock_x86_reserve_range(start, start + size, "XEN EXTRA");
70 79
71 xen_max_p2m_pfn = PFN_DOWN(extra_start + size); 80 xen_max_p2m_pfn = PFN_DOWN(start + size);
72 81
73 for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++) 82 for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++)
74 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 83 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
75} 84}
76 85
77static unsigned long __init xen_release_chunk(phys_addr_t start_addr, 86static unsigned long __init xen_release_chunk(unsigned long start,
78 phys_addr_t end_addr) 87 unsigned long end)
79{ 88{
80 struct xen_memory_reservation reservation = { 89 struct xen_memory_reservation reservation = {
81 .address_bits = 0, 90 .address_bits = 0,
82 .extent_order = 0, 91 .extent_order = 0,
83 .domid = DOMID_SELF 92 .domid = DOMID_SELF
84 }; 93 };
85 unsigned long start, end;
86 unsigned long len = 0; 94 unsigned long len = 0;
87 unsigned long pfn; 95 unsigned long pfn;
88 int ret; 96 int ret;
89 97
90 start = PFN_UP(start_addr);
91 end = PFN_DOWN(end_addr);
92
93 if (end <= start)
94 return 0;
95
96 for(pfn = start; pfn < end; pfn++) { 98 for(pfn = start; pfn < end; pfn++) {
97 unsigned long mfn = pfn_to_mfn(pfn); 99 unsigned long mfn = pfn_to_mfn(pfn);
98 100
@@ -117,72 +119,52 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
117 return len; 119 return len;
118} 120}
119 121
120static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, 122static unsigned long __init xen_set_identity_and_release(
121 const struct e820map *e820) 123 const struct e820entry *list, size_t map_size, unsigned long nr_pages)
122{ 124{
123 phys_addr_t max_addr = PFN_PHYS(max_pfn); 125 phys_addr_t start = 0;
124 phys_addr_t last_end = ISA_END_ADDRESS;
125 unsigned long released = 0; 126 unsigned long released = 0;
126 int i;
127
128 /* Free any unused memory above the low 1Mbyte. */
129 for (i = 0; i < e820->nr_map && last_end < max_addr; i++) {
130 phys_addr_t end = e820->map[i].addr;
131 end = min(max_addr, end);
132
133 if (last_end < end)
134 released += xen_release_chunk(last_end, end);
135 last_end = max(last_end, e820->map[i].addr + e820->map[i].size);
136 }
137
138 if (last_end < max_addr)
139 released += xen_release_chunk(last_end, max_addr);
140
141 printk(KERN_INFO "released %lu pages of unused memory\n", released);
142 return released;
143}
144
145static unsigned long __init xen_set_identity(const struct e820entry *list,
146 ssize_t map_size)
147{
148 phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS;
149 phys_addr_t start_pci = last;
150 const struct e820entry *entry;
151 unsigned long identity = 0; 127 unsigned long identity = 0;
128 const struct e820entry *entry;
152 int i; 129 int i;
153 130
131 /*
132 * Combine non-RAM regions and gaps until a RAM region (or the
133 * end of the map) is reached, then set the 1:1 map and
134 * release the pages (if available) in those non-RAM regions.
135 *
136 * The combined non-RAM regions are rounded to a whole number
137 * of pages so any partial pages are accessible via the 1:1
138 * mapping. This is needed for some BIOSes that put (for
139 * example) the DMI tables in a reserved region that begins on
140 * a non-page boundary.
141 */
154 for (i = 0, entry = list; i < map_size; i++, entry++) { 142 for (i = 0, entry = list; i < map_size; i++, entry++) {
155 phys_addr_t start = entry->addr; 143 phys_addr_t end = entry->addr + entry->size;
156 phys_addr_t end = start + entry->size;
157 144
158 if (start < last) 145 if (entry->type == E820_RAM || i == map_size - 1) {
159 start = last; 146 unsigned long start_pfn = PFN_DOWN(start);
147 unsigned long end_pfn = PFN_UP(end);
160 148
161 if (end <= start) 149 if (entry->type == E820_RAM)
162 continue; 150 end_pfn = PFN_UP(entry->addr);
163 151
164 /* Skip over the 1MB region. */ 152 if (start_pfn < end_pfn) {
165 if (last > end) 153 if (start_pfn < nr_pages)
166 continue; 154 released += xen_release_chunk(
155 start_pfn, min(end_pfn, nr_pages));
167 156
168 if ((entry->type == E820_RAM) || (entry->type == E820_UNUSABLE)) {
169 if (start > start_pci)
170 identity += set_phys_range_identity( 157 identity += set_phys_range_identity(
171 PFN_UP(start_pci), PFN_DOWN(start)); 158 start_pfn, end_pfn);
172 159 }
173 /* Without saving 'last' we would gooble RAM too 160 start = end;
174 * at the end of the loop. */
175 last = end;
176 start_pci = end;
177 continue;
178 } 161 }
179 start_pci = min(start, start_pci);
180 last = end;
181 } 162 }
182 if (last > start_pci) 163
183 identity += set_phys_range_identity( 164 printk(KERN_INFO "Released %lu pages of unused memory\n", released);
184 PFN_UP(start_pci), PFN_DOWN(last)); 165 printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);
185 return identity; 166
167 return released;
186} 168}
187 169
188static unsigned long __init xen_get_max_pages(void) 170static unsigned long __init xen_get_max_pages(void)
@@ -197,21 +179,32 @@ static unsigned long __init xen_get_max_pages(void)
197 return min(max_pages, MAX_DOMAIN_PAGES); 179 return min(max_pages, MAX_DOMAIN_PAGES);
198} 180}
199 181
182static void xen_align_and_add_e820_region(u64 start, u64 size, int type)
183{
184 u64 end = start + size;
185
186 /* Align RAM regions to page boundaries. */
187 if (type == E820_RAM) {
188 start = PAGE_ALIGN(start);
189 end &= ~((u64)PAGE_SIZE - 1);
190 }
191
192 e820_add_region(start, end - start, type);
193}
194
200/** 195/**
201 * machine_specific_memory_setup - Hook for machine specific memory setup. 196 * machine_specific_memory_setup - Hook for machine specific memory setup.
202 **/ 197 **/
203char * __init xen_memory_setup(void) 198char * __init xen_memory_setup(void)
204{ 199{
205 static struct e820entry map[E820MAX] __initdata; 200 static struct e820entry map[E820MAX] __initdata;
206 static struct e820entry map_raw[E820MAX] __initdata;
207 201
208 unsigned long max_pfn = xen_start_info->nr_pages; 202 unsigned long max_pfn = xen_start_info->nr_pages;
209 unsigned long long mem_end; 203 unsigned long long mem_end;
210 int rc; 204 int rc;
211 struct xen_memory_map memmap; 205 struct xen_memory_map memmap;
206 unsigned long max_pages;
212 unsigned long extra_pages = 0; 207 unsigned long extra_pages = 0;
213 unsigned long extra_limit;
214 unsigned long identity_pages = 0;
215 int i; 208 int i;
216 int op; 209 int op;
217 210
@@ -237,58 +230,65 @@ char * __init xen_memory_setup(void)
237 } 230 }
238 BUG_ON(rc); 231 BUG_ON(rc);
239 232
240 memcpy(map_raw, map, sizeof(map)); 233 /* Make sure the Xen-supplied memory map is well-ordered. */
241 e820.nr_map = 0; 234 sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);
242 xen_extra_mem_start = mem_end; 235
243 for (i = 0; i < memmap.nr_entries; i++) { 236 max_pages = xen_get_max_pages();
244 unsigned long long end; 237 if (max_pages > max_pfn)
245 238 extra_pages += max_pages - max_pfn;
246 /* Guard against non-page aligned E820 entries. */ 239
247 if (map[i].type == E820_RAM) 240 /*
248 map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE; 241 * Set P2M for all non-RAM pages and E820 gaps to be identity
249 242 * type PFNs. Any RAM pages that would be made inaccesible by
250 end = map[i].addr + map[i].size; 243 * this are first released.
251 if (map[i].type == E820_RAM && end > mem_end) { 244 */
252 /* RAM off the end - may be partially included */ 245 xen_released_pages = xen_set_identity_and_release(
253 u64 delta = min(map[i].size, end - mem_end); 246 map, memmap.nr_entries, max_pfn);
254 247 extra_pages += xen_released_pages;
255 map[i].size -= delta; 248
256 end -= delta; 249 /*
257 250 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
258 extra_pages += PFN_DOWN(delta); 251 * factor the base size. On non-highmem systems, the base
259 /* 252 * size is the full initial memory allocation; on highmem it
260 * Set RAM below 4GB that is not for us to be unusable. 253 * is limited to the max size of lowmem, so that it doesn't
261 * This prevents "System RAM" address space from being 254 * get completely filled.
262 * used as potential resource for I/O address (happens 255 *
263 * when 'allocate_resource' is called). 256 * In principle there could be a problem in lowmem systems if
264 */ 257 * the initial memory is also very large with respect to
265 if (delta && 258 * lowmem, but we won't try to deal with that here.
266 (xen_initial_domain() && end < 0x100000000ULL)) 259 */
267 e820_add_region(end, delta, E820_UNUSABLE); 260 extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
261 extra_pages);
262
263 i = 0;
264 while (i < memmap.nr_entries) {
265 u64 addr = map[i].addr;
266 u64 size = map[i].size;
267 u32 type = map[i].type;
268
269 if (type == E820_RAM) {
270 if (addr < mem_end) {
271 size = min(size, mem_end - addr);
272 } else if (extra_pages) {
273 size = min(size, (u64)extra_pages * PAGE_SIZE);
274 extra_pages -= size / PAGE_SIZE;
275 xen_add_extra_mem(addr, size);
276 } else
277 type = E820_UNUSABLE;
268 } 278 }
269 279
270 if (map[i].size > 0 && end > xen_extra_mem_start) 280 xen_align_and_add_e820_region(addr, size, type);
271 xen_extra_mem_start = end;
272 281
273 /* Add region if any remains */ 282 map[i].addr += size;
274 if (map[i].size > 0) 283 map[i].size -= size;
275 e820_add_region(map[i].addr, map[i].size, map[i].type); 284 if (map[i].size == 0)
285 i++;
276 } 286 }
277 /* Align the balloon area so that max_low_pfn does not get set
278 * to be at the _end_ of the PCI gap at the far end (fee01000).
279 * Note that xen_extra_mem_start gets set in the loop above to be
280 * past the last E820 region. */
281 if (xen_initial_domain() && (xen_extra_mem_start < (1ULL<<32)))
282 xen_extra_mem_start = (1ULL<<32);
283 287
284 /* 288 /*
285 * In domU, the ISA region is normal, usable memory, but we 289 * In domU, the ISA region is normal, usable memory, but we
286 * reserve ISA memory anyway because too many things poke 290 * reserve ISA memory anyway because too many things poke
287 * about in there. 291 * about in there.
288 *
289 * In Dom0, the host E820 information can leave gaps in the
290 * ISA range, which would cause us to release those pages. To
291 * avoid this, we unconditionally reserve them here.
292 */ 292 */
293 e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, 293 e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
294 E820_RESERVED); 294 E820_RESERVED);
@@ -305,44 +305,6 @@ char * __init xen_memory_setup(void)
305 305
306 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 306 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
307 307
308 extra_limit = xen_get_max_pages();
309 if (max_pfn + extra_pages > extra_limit) {
310 if (extra_limit > max_pfn)
311 extra_pages = extra_limit - max_pfn;
312 else
313 extra_pages = 0;
314 }
315
316 extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);
317
318 /*
319 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
320 * factor the base size. On non-highmem systems, the base
321 * size is the full initial memory allocation; on highmem it
322 * is limited to the max size of lowmem, so that it doesn't
323 * get completely filled.
324 *
325 * In principle there could be a problem in lowmem systems if
326 * the initial memory is also very large with respect to
327 * lowmem, but we won't try to deal with that here.
328 */
329 extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
330 max_pfn + extra_pages);
331
332 if (extra_limit >= max_pfn)
333 extra_pages = extra_limit - max_pfn;
334 else
335 extra_pages = 0;
336
337 xen_add_extra_mem(extra_pages);
338
339 /*
340 * Set P2M for all non-RAM pages and E820 gaps to be identity
341 * type PFNs. We supply it with the non-sanitized version
342 * of the E820.
343 */
344 identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
345 printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
346 return "Xen"; 308 return "Xen";
347} 309}
348 310