aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen/setup.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen/setup.c')
-rw-r--r--arch/x86/xen/setup.c241
1 files changed, 204 insertions, 37 deletions
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 328b00305426..60aeeb56948f 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -8,6 +8,7 @@
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/pm.h> 10#include <linux/pm.h>
11#include <linux/memblock.h>
11 12
12#include <asm/elf.h> 13#include <asm/elf.h>
13#include <asm/vdso.h> 14#include <asm/vdso.h>
@@ -17,10 +18,11 @@
17#include <asm/xen/hypervisor.h> 18#include <asm/xen/hypervisor.h>
18#include <asm/xen/hypercall.h> 19#include <asm/xen/hypercall.h>
19 20
21#include <xen/xen.h>
20#include <xen/page.h> 22#include <xen/page.h>
21#include <xen/interface/callback.h> 23#include <xen/interface/callback.h>
22#include <xen/interface/physdev.h>
23#include <xen/interface/memory.h> 24#include <xen/interface/memory.h>
25#include <xen/interface/physdev.h>
24#include <xen/features.h> 26#include <xen/features.h>
25 27
26#include "xen-ops.h" 28#include "xen-ops.h"
@@ -33,6 +35,44 @@ extern void xen_sysenter_target(void);
33extern void xen_syscall_target(void); 35extern void xen_syscall_target(void);
34extern void xen_syscall32_target(void); 36extern void xen_syscall32_target(void);
35 37
38/* Amount of extra memory space we add to the e820 ranges */
39phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
40
41/*
42 * The maximum amount of extra memory compared to the base size. The
43 * main scaling factor is the size of struct page. At extreme ratios
44 * of base:extra, all the base memory can be filled with page
45 * structures for the extra memory, leaving no space for anything
46 * else.
47 *
48 * 10x seems like a reasonable balance between scaling flexibility and
49 * leaving a practically usable system.
50 */
51#define EXTRA_MEM_RATIO (10)
52
53static void __init xen_add_extra_mem(unsigned long pages)
54{
55 unsigned long pfn;
56
57 u64 size = (u64)pages * PAGE_SIZE;
58 u64 extra_start = xen_extra_mem_start + xen_extra_mem_size;
59
60 if (!pages)
61 return;
62
63 e820_add_region(extra_start, size, E820_RAM);
64 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
65
66 memblock_x86_reserve_range(extra_start, extra_start + size, "XEN EXTRA");
67
68 xen_extra_mem_size += size;
69
70 xen_max_p2m_pfn = PFN_DOWN(extra_start + size);
71
72 for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++)
73 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
74}
75
36static unsigned long __init xen_release_chunk(phys_addr_t start_addr, 76static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
37 phys_addr_t end_addr) 77 phys_addr_t end_addr)
38{ 78{
@@ -69,7 +109,7 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
69 WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", 109 WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n",
70 start, end, ret); 110 start, end, ret);
71 if (ret == 1) { 111 if (ret == 1) {
72 set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 112 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
73 len++; 113 len++;
74 } 114 }
75 } 115 }
@@ -82,16 +122,18 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
82 const struct e820map *e820) 122 const struct e820map *e820)
83{ 123{
84 phys_addr_t max_addr = PFN_PHYS(max_pfn); 124 phys_addr_t max_addr = PFN_PHYS(max_pfn);
85 phys_addr_t last_end = 0; 125 phys_addr_t last_end = ISA_END_ADDRESS;
86 unsigned long released = 0; 126 unsigned long released = 0;
87 int i; 127 int i;
88 128
129 /* Free any unused memory above the low 1Mbyte. */
89 for (i = 0; i < e820->nr_map && last_end < max_addr; i++) { 130 for (i = 0; i < e820->nr_map && last_end < max_addr; i++) {
90 phys_addr_t end = e820->map[i].addr; 131 phys_addr_t end = e820->map[i].addr;
91 end = min(max_addr, end); 132 end = min(max_addr, end);
92 133
93 released += xen_release_chunk(last_end, end); 134 if (last_end < end)
94 last_end = e820->map[i].addr + e820->map[i].size; 135 released += xen_release_chunk(last_end, end);
136 last_end = max(last_end, e820->map[i].addr + e820->map[i].size);
95 } 137 }
96 138
97 if (last_end < max_addr) 139 if (last_end < max_addr)
@@ -101,24 +143,140 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
101 return released; 143 return released;
102} 144}
103 145
146static unsigned long __init xen_set_identity(const struct e820entry *list,
147 ssize_t map_size)
148{
149 phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS;
150 phys_addr_t start_pci = last;
151 const struct e820entry *entry;
152 unsigned long identity = 0;
153 int i;
154
155 for (i = 0, entry = list; i < map_size; i++, entry++) {
156 phys_addr_t start = entry->addr;
157 phys_addr_t end = start + entry->size;
158
159 if (start < last)
160 start = last;
161
162 if (end <= start)
163 continue;
164
165 /* Skip over the 1MB region. */
166 if (last > end)
167 continue;
168
169 if ((entry->type == E820_RAM) || (entry->type == E820_UNUSABLE)) {
170 if (start > start_pci)
171 identity += set_phys_range_identity(
172 PFN_UP(start_pci), PFN_DOWN(start));
173
174 /* Without saving 'last' we would gooble RAM too
175 * at the end of the loop. */
176 last = end;
177 start_pci = end;
178 continue;
179 }
180 start_pci = min(start, start_pci);
181 last = end;
182 }
183 if (last > start_pci)
184 identity += set_phys_range_identity(
185 PFN_UP(start_pci), PFN_DOWN(last));
186 return identity;
187}
104/** 188/**
105 * machine_specific_memory_setup - Hook for machine specific memory setup. 189 * machine_specific_memory_setup - Hook for machine specific memory setup.
106 **/ 190 **/
107
108char * __init xen_memory_setup(void) 191char * __init xen_memory_setup(void)
109{ 192{
193 static struct e820entry map[E820MAX] __initdata;
194 static struct e820entry map_raw[E820MAX] __initdata;
195
110 unsigned long max_pfn = xen_start_info->nr_pages; 196 unsigned long max_pfn = xen_start_info->nr_pages;
197 unsigned long long mem_end;
198 int rc;
199 struct xen_memory_map memmap;
200 unsigned long extra_pages = 0;
201 unsigned long extra_limit;
202 unsigned long identity_pages = 0;
203 int i;
204 int op;
111 205
112 max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); 206 max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
207 mem_end = PFN_PHYS(max_pfn);
208
209 memmap.nr_entries = E820MAX;
210 set_xen_guest_handle(memmap.buffer, map);
211
212 op = xen_initial_domain() ?
213 XENMEM_machine_memory_map :
214 XENMEM_memory_map;
215 rc = HYPERVISOR_memory_op(op, &memmap);
216 if (rc == -ENOSYS) {
217 BUG_ON(xen_initial_domain());
218 memmap.nr_entries = 1;
219 map[0].addr = 0ULL;
220 map[0].size = mem_end;
221 /* 8MB slack (to balance backend allocations). */
222 map[0].size += 8ULL << 20;
223 map[0].type = E820_RAM;
224 rc = 0;
225 }
226 BUG_ON(rc);
113 227
228 memcpy(map_raw, map, sizeof(map));
114 e820.nr_map = 0; 229 e820.nr_map = 0;
230 xen_extra_mem_start = mem_end;
231 for (i = 0; i < memmap.nr_entries; i++) {
232 unsigned long long end;
233
234 /* Guard against non-page aligned E820 entries. */
235 if (map[i].type == E820_RAM)
236 map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE;
237
238 end = map[i].addr + map[i].size;
239 if (map[i].type == E820_RAM && end > mem_end) {
240 /* RAM off the end - may be partially included */
241 u64 delta = min(map[i].size, end - mem_end);
242
243 map[i].size -= delta;
244 end -= delta;
245
246 extra_pages += PFN_DOWN(delta);
247 /*
248 * Set RAM below 4GB that is not for us to be unusable.
249 * This prevents "System RAM" address space from being
250 * used as potential resource for I/O address (happens
251 * when 'allocate_resource' is called).
252 */
253 if (delta &&
254 (xen_initial_domain() && end < 0x100000000ULL))
255 e820_add_region(end, delta, E820_UNUSABLE);
256 }
115 257
116 e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM); 258 if (map[i].size > 0 && end > xen_extra_mem_start)
259 xen_extra_mem_start = end;
260
261 /* Add region if any remains */
262 if (map[i].size > 0)
263 e820_add_region(map[i].addr, map[i].size, map[i].type);
264 }
265 /* Align the balloon area so that max_low_pfn does not get set
266 * to be at the _end_ of the PCI gap at the far end (fee01000).
267 * Note that xen_extra_mem_start gets set in the loop above to be
268 * past the last E820 region. */
269 if (xen_initial_domain() && (xen_extra_mem_start < (1ULL<<32)))
270 xen_extra_mem_start = (1ULL<<32);
117 271
118 /* 272 /*
119 * Even though this is normal, usable memory under Xen, reserve 273 * In domU, the ISA region is normal, usable memory, but we
120 * ISA memory anyway because too many things think they can poke 274 * reserve ISA memory anyway because too many things poke
121 * about in there. 275 * about in there.
276 *
277 * In Dom0, the host E820 information can leave gaps in the
278 * ISA range, which would cause us to release those pages. To
279 * avoid this, we unconditionally reserve them here.
122 */ 280 */
123 e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, 281 e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
124 E820_RESERVED); 282 E820_RESERVED);
@@ -129,29 +287,43 @@ char * __init xen_memory_setup(void)
129 * - xen_start_info 287 * - xen_start_info
130 * See comment above "struct start_info" in <xen/interface/xen.h> 288 * See comment above "struct start_info" in <xen/interface/xen.h>
131 */ 289 */
132 reserve_early(__pa(xen_start_info->mfn_list), 290 memblock_x86_reserve_range(__pa(xen_start_info->mfn_list),
133 __pa(xen_start_info->pt_base), 291 __pa(xen_start_info->pt_base),
134 "XEN START INFO"); 292 "XEN START INFO");
135 293
136 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 294 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
137 295
138 xen_return_unused_memory(xen_start_info->nr_pages, &e820); 296 extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);
139 297
140 return "Xen"; 298 /*
141} 299 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
300 * factor the base size. On non-highmem systems, the base
301 * size is the full initial memory allocation; on highmem it
302 * is limited to the max size of lowmem, so that it doesn't
303 * get completely filled.
304 *
305 * In principle there could be a problem in lowmem systems if
306 * the initial memory is also very large with respect to
307 * lowmem, but we won't try to deal with that here.
308 */
309 extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
310 max_pfn + extra_pages);
142 311
143static void xen_idle(void) 312 if (extra_limit >= max_pfn)
144{ 313 extra_pages = extra_limit - max_pfn;
145 local_irq_disable(); 314 else
146 315 extra_pages = 0;
147 if (need_resched()) 316
148 local_irq_enable(); 317 xen_add_extra_mem(extra_pages);
149 else { 318
150 current_thread_info()->status &= ~TS_POLLING; 319 /*
151 smp_mb__after_clear_bit(); 320 * Set P2M for all non-RAM pages and E820 gaps to be identity
152 safe_halt(); 321 * type PFNs. We supply it with the non-sanitized version
153 current_thread_info()->status |= TS_POLLING; 322 * of the E820.
154 } 323 */
324 identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
325 printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
326 return "Xen";
155} 327}
156 328
157/* 329/*
@@ -170,7 +342,7 @@ static void __init fiddle_vdso(void)
170#endif 342#endif
171} 343}
172 344
173static __cpuinit int register_callback(unsigned type, const void *func) 345static int __cpuinit register_callback(unsigned type, const void *func)
174{ 346{
175 struct callback_register callback = { 347 struct callback_register callback = {
176 .type = type, 348 .type = type,
@@ -223,9 +395,6 @@ void __cpuinit xen_enable_syscall(void)
223 395
224void __init xen_arch_setup(void) 396void __init xen_arch_setup(void)
225{ 397{
226 struct physdev_set_iopl set_iopl;
227 int rc;
228
229 xen_panic_handler_init(); 398 xen_panic_handler_init();
230 399
231 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); 400 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
@@ -242,11 +411,6 @@ void __init xen_arch_setup(void)
242 xen_enable_sysenter(); 411 xen_enable_sysenter();
243 xen_enable_syscall(); 412 xen_enable_syscall();
244 413
245 set_iopl.iopl = 1;
246 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
247 if (rc != 0)
248 printk(KERN_INFO "physdev_op failed %d\n", rc);
249
250#ifdef CONFIG_ACPI 414#ifdef CONFIG_ACPI
251 if (!(xen_start_info->flags & SIF_INITDOMAIN)) { 415 if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
252 printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); 416 printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
@@ -258,9 +422,12 @@ void __init xen_arch_setup(void)
258 MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? 422 MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
259 COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); 423 COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
260 424
261 pm_idle = xen_idle; 425 /* Set up idle, making sure it calls safe_halt() pvop */
262 426#ifdef CONFIG_X86_32
263 paravirt_disable_iospace(); 427 boot_cpu_data.hlt_works_ok = 1;
428#endif
429 pm_idle = default_idle;
430 boot_option_idle_override = IDLE_HALT;
264 431
265 fiddle_vdso(); 432 fiddle_vdso();
266} 433}