aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen/setup.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen/setup.c')
-rw-r--r--arch/x86/xen/setup.c294
1 files changed, 138 insertions, 156 deletions
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 73daaf75801a..f5e1362550e7 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -9,6 +9,7 @@
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/pm.h> 10#include <linux/pm.h>
11#include <linux/memblock.h> 11#include <linux/memblock.h>
12#include <linux/cpuidle.h>
12 13
13#include <asm/elf.h> 14#include <asm/elf.h>
14#include <asm/vdso.h> 15#include <asm/vdso.h>
@@ -36,7 +37,10 @@ extern void xen_syscall_target(void);
36extern void xen_syscall32_target(void); 37extern void xen_syscall32_target(void);
37 38
38/* Amount of extra memory space we add to the e820 ranges */ 39/* Amount of extra memory space we add to the e820 ranges */
39phys_addr_t xen_extra_mem_start, xen_extra_mem_size; 40struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
41
42/* Number of pages released from the initial allocation. */
43unsigned long xen_released_pages;
40 44
41/* 45/*
42 * The maximum amount of extra memory compared to the base size. The 46 * The maximum amount of extra memory compared to the base size. The
@@ -50,50 +54,47 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
50 */ 54 */
51#define EXTRA_MEM_RATIO (10) 55#define EXTRA_MEM_RATIO (10)
52 56
53static void __init xen_add_extra_mem(unsigned long pages) 57static void __init xen_add_extra_mem(u64 start, u64 size)
54{ 58{
55 unsigned long pfn; 59 unsigned long pfn;
60 int i;
56 61
57 u64 size = (u64)pages * PAGE_SIZE; 62 for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
58 u64 extra_start = xen_extra_mem_start + xen_extra_mem_size; 63 /* Add new region. */
59 64 if (xen_extra_mem[i].size == 0) {
60 if (!pages) 65 xen_extra_mem[i].start = start;
61 return; 66 xen_extra_mem[i].size = size;
62 67 break;
63 e820_add_region(extra_start, size, E820_RAM); 68 }
64 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 69 /* Append to existing region. */
65 70 if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) {
66 memblock_reserve(extra_start, size); 71 xen_extra_mem[i].size += size;
72 break;
73 }
74 }
75 if (i == XEN_EXTRA_MEM_MAX_REGIONS)
76 printk(KERN_WARNING "Warning: not enough extra memory regions\n");
67 77
68 xen_extra_mem_size += size; 78 memblock_reserve(start, size);
69 79
70 xen_max_p2m_pfn = PFN_DOWN(extra_start + size); 80 xen_max_p2m_pfn = PFN_DOWN(start + size);
71 81
72 for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++) 82 for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++)
73 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 83 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
74} 84}
75 85
76static unsigned long __init xen_release_chunk(phys_addr_t start_addr, 86static unsigned long __init xen_release_chunk(unsigned long start,
77 phys_addr_t end_addr) 87 unsigned long end)
78{ 88{
79 struct xen_memory_reservation reservation = { 89 struct xen_memory_reservation reservation = {
80 .address_bits = 0, 90 .address_bits = 0,
81 .extent_order = 0, 91 .extent_order = 0,
82 .domid = DOMID_SELF 92 .domid = DOMID_SELF
83 }; 93 };
84 unsigned long start, end;
85 unsigned long len = 0; 94 unsigned long len = 0;
86 unsigned long pfn; 95 unsigned long pfn;
87 int ret; 96 int ret;
88 97
89 start = PFN_UP(start_addr);
90 end = PFN_DOWN(end_addr);
91
92 if (end <= start)
93 return 0;
94
95 printk(KERN_INFO "xen_release_chunk: looking at area pfn %lx-%lx: ",
96 start, end);
97 for(pfn = start; pfn < end; pfn++) { 98 for(pfn = start; pfn < end; pfn++) {
98 unsigned long mfn = pfn_to_mfn(pfn); 99 unsigned long mfn = pfn_to_mfn(pfn);
99 100
@@ -106,100 +107,104 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
106 107
107 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 108 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
108 &reservation); 109 &reservation);
109 WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", 110 WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
110 start, end, ret);
111 if (ret == 1) { 111 if (ret == 1) {
112 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 112 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
113 len++; 113 len++;
114 } 114 }
115 } 115 }
116 printk(KERN_CONT "%ld pages freed\n", len); 116 printk(KERN_INFO "Freeing %lx-%lx pfn range: %lu pages freed\n",
117 start, end, len);
117 118
118 return len; 119 return len;
119} 120}
120 121
121static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, 122static unsigned long __init xen_set_identity_and_release(
122 const struct e820map *e820) 123 const struct e820entry *list, size_t map_size, unsigned long nr_pages)
123{ 124{
124 phys_addr_t max_addr = PFN_PHYS(max_pfn); 125 phys_addr_t start = 0;
125 phys_addr_t last_end = ISA_END_ADDRESS;
126 unsigned long released = 0; 126 unsigned long released = 0;
127 unsigned long identity = 0;
128 const struct e820entry *entry;
127 int i; 129 int i;
128 130
129 /* Free any unused memory above the low 1Mbyte. */ 131 /*
130 for (i = 0; i < e820->nr_map && last_end < max_addr; i++) { 132 * Combine non-RAM regions and gaps until a RAM region (or the
131 phys_addr_t end = e820->map[i].addr; 133 * end of the map) is reached, then set the 1:1 map and
132 end = min(max_addr, end); 134 * release the pages (if available) in those non-RAM regions.
135 *
136 * The combined non-RAM regions are rounded to a whole number
137 * of pages so any partial pages are accessible via the 1:1
138 * mapping. This is needed for some BIOSes that put (for
139 * example) the DMI tables in a reserved region that begins on
140 * a non-page boundary.
141 */
142 for (i = 0, entry = list; i < map_size; i++, entry++) {
143 phys_addr_t end = entry->addr + entry->size;
144
145 if (entry->type == E820_RAM || i == map_size - 1) {
146 unsigned long start_pfn = PFN_DOWN(start);
147 unsigned long end_pfn = PFN_UP(end);
133 148
134 if (last_end < end) 149 if (entry->type == E820_RAM)
135 released += xen_release_chunk(last_end, end); 150 end_pfn = PFN_UP(entry->addr);
136 last_end = max(last_end, e820->map[i].addr + e820->map[i].size); 151
152 if (start_pfn < end_pfn) {
153 if (start_pfn < nr_pages)
154 released += xen_release_chunk(
155 start_pfn, min(end_pfn, nr_pages));
156
157 identity += set_phys_range_identity(
158 start_pfn, end_pfn);
159 }
160 start = end;
161 }
137 } 162 }
138 163
139 if (last_end < max_addr) 164 printk(KERN_INFO "Released %lu pages of unused memory\n", released);
140 released += xen_release_chunk(last_end, max_addr); 165 printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);
141 166
142 printk(KERN_INFO "released %ld pages of unused memory\n", released);
143 return released; 167 return released;
144} 168}
145 169
146static unsigned long __init xen_set_identity(const struct e820entry *list, 170static unsigned long __init xen_get_max_pages(void)
147 ssize_t map_size)
148{ 171{
149 phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS; 172 unsigned long max_pages = MAX_DOMAIN_PAGES;
150 phys_addr_t start_pci = last; 173 domid_t domid = DOMID_SELF;
151 const struct e820entry *entry; 174 int ret;
152 unsigned long identity = 0;
153 int i;
154
155 for (i = 0, entry = list; i < map_size; i++, entry++) {
156 phys_addr_t start = entry->addr;
157 phys_addr_t end = start + entry->size;
158
159 if (start < last)
160 start = last;
161
162 if (end <= start)
163 continue;
164 175
165 /* Skip over the 1MB region. */ 176 ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
166 if (last > end) 177 if (ret > 0)
167 continue; 178 max_pages = ret;
179 return min(max_pages, MAX_DOMAIN_PAGES);
180}
168 181
169 if ((entry->type == E820_RAM) || (entry->type == E820_UNUSABLE)) { 182static void xen_align_and_add_e820_region(u64 start, u64 size, int type)
170 if (start > start_pci) 183{
171 identity += set_phys_range_identity( 184 u64 end = start + size;
172 PFN_UP(start_pci), PFN_DOWN(start));
173 185
174 /* Without saving 'last' we would gooble RAM too 186 /* Align RAM regions to page boundaries. */
175 * at the end of the loop. */ 187 if (type == E820_RAM) {
176 last = end; 188 start = PAGE_ALIGN(start);
177 start_pci = end; 189 end &= ~((u64)PAGE_SIZE - 1);
178 continue;
179 }
180 start_pci = min(start, start_pci);
181 last = end;
182 } 190 }
183 if (last > start_pci) 191
184 identity += set_phys_range_identity( 192 e820_add_region(start, end - start, type);
185 PFN_UP(start_pci), PFN_DOWN(last));
186 return identity;
187} 193}
194
188/** 195/**
189 * machine_specific_memory_setup - Hook for machine specific memory setup. 196 * machine_specific_memory_setup - Hook for machine specific memory setup.
190 **/ 197 **/
191char * __init xen_memory_setup(void) 198char * __init xen_memory_setup(void)
192{ 199{
193 static struct e820entry map[E820MAX] __initdata; 200 static struct e820entry map[E820MAX] __initdata;
194 static struct e820entry map_raw[E820MAX] __initdata;
195 201
196 unsigned long max_pfn = xen_start_info->nr_pages; 202 unsigned long max_pfn = xen_start_info->nr_pages;
197 unsigned long long mem_end; 203 unsigned long long mem_end;
198 int rc; 204 int rc;
199 struct xen_memory_map memmap; 205 struct xen_memory_map memmap;
206 unsigned long max_pages;
200 unsigned long extra_pages = 0; 207 unsigned long extra_pages = 0;
201 unsigned long extra_limit;
202 unsigned long identity_pages = 0;
203 int i; 208 int i;
204 int op; 209 int op;
205 210
@@ -225,58 +230,65 @@ char * __init xen_memory_setup(void)
225 } 230 }
226 BUG_ON(rc); 231 BUG_ON(rc);
227 232
228 memcpy(map_raw, map, sizeof(map)); 233 /* Make sure the Xen-supplied memory map is well-ordered. */
229 e820.nr_map = 0; 234 sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);
230 xen_extra_mem_start = mem_end; 235
231 for (i = 0; i < memmap.nr_entries; i++) { 236 max_pages = xen_get_max_pages();
232 unsigned long long end; 237 if (max_pages > max_pfn)
233 238 extra_pages += max_pages - max_pfn;
234 /* Guard against non-page aligned E820 entries. */ 239
235 if (map[i].type == E820_RAM) 240 /*
236 map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE; 241 * Set P2M for all non-RAM pages and E820 gaps to be identity
237 242 * type PFNs. Any RAM pages that would be made inaccesible by
238 end = map[i].addr + map[i].size; 243 * this are first released.
239 if (map[i].type == E820_RAM && end > mem_end) { 244 */
240 /* RAM off the end - may be partially included */ 245 xen_released_pages = xen_set_identity_and_release(
241 u64 delta = min(map[i].size, end - mem_end); 246 map, memmap.nr_entries, max_pfn);
242 247 extra_pages += xen_released_pages;
243 map[i].size -= delta; 248
244 end -= delta; 249 /*
245 250 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
246 extra_pages += PFN_DOWN(delta); 251 * factor the base size. On non-highmem systems, the base
247 /* 252 * size is the full initial memory allocation; on highmem it
248 * Set RAM below 4GB that is not for us to be unusable. 253 * is limited to the max size of lowmem, so that it doesn't
249 * This prevents "System RAM" address space from being 254 * get completely filled.
250 * used as potential resource for I/O address (happens 255 *
251 * when 'allocate_resource' is called). 256 * In principle there could be a problem in lowmem systems if
252 */ 257 * the initial memory is also very large with respect to
253 if (delta && 258 * lowmem, but we won't try to deal with that here.
254 (xen_initial_domain() && end < 0x100000000ULL)) 259 */
255 e820_add_region(end, delta, E820_UNUSABLE); 260 extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
261 extra_pages);
262
263 i = 0;
264 while (i < memmap.nr_entries) {
265 u64 addr = map[i].addr;
266 u64 size = map[i].size;
267 u32 type = map[i].type;
268
269 if (type == E820_RAM) {
270 if (addr < mem_end) {
271 size = min(size, mem_end - addr);
272 } else if (extra_pages) {
273 size = min(size, (u64)extra_pages * PAGE_SIZE);
274 extra_pages -= size / PAGE_SIZE;
275 xen_add_extra_mem(addr, size);
276 } else
277 type = E820_UNUSABLE;
256 } 278 }
257 279
258 if (map[i].size > 0 && end > xen_extra_mem_start) 280 xen_align_and_add_e820_region(addr, size, type);
259 xen_extra_mem_start = end;
260 281
261 /* Add region if any remains */ 282 map[i].addr += size;
262 if (map[i].size > 0) 283 map[i].size -= size;
263 e820_add_region(map[i].addr, map[i].size, map[i].type); 284 if (map[i].size == 0)
285 i++;
264 } 286 }
265 /* Align the balloon area so that max_low_pfn does not get set
266 * to be at the _end_ of the PCI gap at the far end (fee01000).
267 * Note that xen_extra_mem_start gets set in the loop above to be
268 * past the last E820 region. */
269 if (xen_initial_domain() && (xen_extra_mem_start < (1ULL<<32)))
270 xen_extra_mem_start = (1ULL<<32);
271 287
272 /* 288 /*
273 * In domU, the ISA region is normal, usable memory, but we 289 * In domU, the ISA region is normal, usable memory, but we
274 * reserve ISA memory anyway because too many things poke 290 * reserve ISA memory anyway because too many things poke
275 * about in there. 291 * about in there.
276 *
277 * In Dom0, the host E820 information can leave gaps in the
278 * ISA range, which would cause us to release those pages. To
279 * avoid this, we unconditionally reserve them here.
280 */ 292 */
281 e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, 293 e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
282 E820_RESERVED); 294 E820_RESERVED);
@@ -292,36 +304,6 @@ char * __init xen_memory_setup(void)
292 304
293 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 305 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
294 306
295 extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);
296
297 /*
298 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
299 * factor the base size. On non-highmem systems, the base
300 * size is the full initial memory allocation; on highmem it
301 * is limited to the max size of lowmem, so that it doesn't
302 * get completely filled.
303 *
304 * In principle there could be a problem in lowmem systems if
305 * the initial memory is also very large with respect to
306 * lowmem, but we won't try to deal with that here.
307 */
308 extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
309 max_pfn + extra_pages);
310
311 if (extra_limit >= max_pfn)
312 extra_pages = extra_limit - max_pfn;
313 else
314 extra_pages = 0;
315
316 xen_add_extra_mem(extra_pages);
317
318 /*
319 * Set P2M for all non-RAM pages and E820 gaps to be identity
320 * type PFNs. We supply it with the non-sanitized version
321 * of the E820.
322 */
323 identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
324 printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
325 return "Xen"; 307 return "Xen";
326} 308}
327 309
@@ -425,7 +407,7 @@ void __init xen_arch_setup(void)
425#ifdef CONFIG_X86_32 407#ifdef CONFIG_X86_32
426 boot_cpu_data.hlt_works_ok = 1; 408 boot_cpu_data.hlt_works_ok = 1;
427#endif 409#endif
428 pm_idle = default_idle; 410 disable_cpuidle();
429 boot_option_idle_override = IDLE_HALT; 411 boot_option_idle_override = IDLE_HALT;
430 412
431 fiddle_vdso(); 413 fiddle_vdso();