diff options
Diffstat (limited to 'arch/x86/xen/setup.c')
-rw-r--r-- | arch/x86/xen/setup.c | 241 |
1 files changed, 204 insertions, 37 deletions
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 328b00305426..60aeeb56948f 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/pm.h> | 10 | #include <linux/pm.h> |
11 | #include <linux/memblock.h> | ||
11 | 12 | ||
12 | #include <asm/elf.h> | 13 | #include <asm/elf.h> |
13 | #include <asm/vdso.h> | 14 | #include <asm/vdso.h> |
@@ -17,10 +18,11 @@ | |||
17 | #include <asm/xen/hypervisor.h> | 18 | #include <asm/xen/hypervisor.h> |
18 | #include <asm/xen/hypercall.h> | 19 | #include <asm/xen/hypercall.h> |
19 | 20 | ||
21 | #include <xen/xen.h> | ||
20 | #include <xen/page.h> | 22 | #include <xen/page.h> |
21 | #include <xen/interface/callback.h> | 23 | #include <xen/interface/callback.h> |
22 | #include <xen/interface/physdev.h> | ||
23 | #include <xen/interface/memory.h> | 24 | #include <xen/interface/memory.h> |
25 | #include <xen/interface/physdev.h> | ||
24 | #include <xen/features.h> | 26 | #include <xen/features.h> |
25 | 27 | ||
26 | #include "xen-ops.h" | 28 | #include "xen-ops.h" |
@@ -33,6 +35,44 @@ extern void xen_sysenter_target(void); | |||
33 | extern void xen_syscall_target(void); | 35 | extern void xen_syscall_target(void); |
34 | extern void xen_syscall32_target(void); | 36 | extern void xen_syscall32_target(void); |
35 | 37 | ||
38 | /* Amount of extra memory space we add to the e820 ranges */ | ||
39 | phys_addr_t xen_extra_mem_start, xen_extra_mem_size; | ||
40 | |||
41 | /* | ||
42 | * The maximum amount of extra memory compared to the base size. The | ||
43 | * main scaling factor is the size of struct page. At extreme ratios | ||
44 | * of base:extra, all the base memory can be filled with page | ||
45 | * structures for the extra memory, leaving no space for anything | ||
46 | * else. | ||
47 | * | ||
48 | * 10x seems like a reasonable balance between scaling flexibility and | ||
49 | * leaving a practically usable system. | ||
50 | */ | ||
51 | #define EXTRA_MEM_RATIO (10) | ||
52 | |||
53 | static void __init xen_add_extra_mem(unsigned long pages) | ||
54 | { | ||
55 | unsigned long pfn; | ||
56 | |||
57 | u64 size = (u64)pages * PAGE_SIZE; | ||
58 | u64 extra_start = xen_extra_mem_start + xen_extra_mem_size; | ||
59 | |||
60 | if (!pages) | ||
61 | return; | ||
62 | |||
63 | e820_add_region(extra_start, size, E820_RAM); | ||
64 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | ||
65 | |||
66 | memblock_x86_reserve_range(extra_start, extra_start + size, "XEN EXTRA"); | ||
67 | |||
68 | xen_extra_mem_size += size; | ||
69 | |||
70 | xen_max_p2m_pfn = PFN_DOWN(extra_start + size); | ||
71 | |||
72 | for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++) | ||
73 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | ||
74 | } | ||
75 | |||
36 | static unsigned long __init xen_release_chunk(phys_addr_t start_addr, | 76 | static unsigned long __init xen_release_chunk(phys_addr_t start_addr, |
37 | phys_addr_t end_addr) | 77 | phys_addr_t end_addr) |
38 | { | 78 | { |
@@ -69,7 +109,7 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr, | |||
69 | WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", | 109 | WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", |
70 | start, end, ret); | 110 | start, end, ret); |
71 | if (ret == 1) { | 111 | if (ret == 1) { |
72 | set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | 112 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); |
73 | len++; | 113 | len++; |
74 | } | 114 | } |
75 | } | 115 | } |
@@ -82,16 +122,18 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, | |||
82 | const struct e820map *e820) | 122 | const struct e820map *e820) |
83 | { | 123 | { |
84 | phys_addr_t max_addr = PFN_PHYS(max_pfn); | 124 | phys_addr_t max_addr = PFN_PHYS(max_pfn); |
85 | phys_addr_t last_end = 0; | 125 | phys_addr_t last_end = ISA_END_ADDRESS; |
86 | unsigned long released = 0; | 126 | unsigned long released = 0; |
87 | int i; | 127 | int i; |
88 | 128 | ||
129 | /* Free any unused memory above the low 1Mbyte. */ | ||
89 | for (i = 0; i < e820->nr_map && last_end < max_addr; i++) { | 130 | for (i = 0; i < e820->nr_map && last_end < max_addr; i++) { |
90 | phys_addr_t end = e820->map[i].addr; | 131 | phys_addr_t end = e820->map[i].addr; |
91 | end = min(max_addr, end); | 132 | end = min(max_addr, end); |
92 | 133 | ||
93 | released += xen_release_chunk(last_end, end); | 134 | if (last_end < end) |
94 | last_end = e820->map[i].addr + e820->map[i].size; | 135 | released += xen_release_chunk(last_end, end); |
136 | last_end = max(last_end, e820->map[i].addr + e820->map[i].size); | ||
95 | } | 137 | } |
96 | 138 | ||
97 | if (last_end < max_addr) | 139 | if (last_end < max_addr) |
@@ -101,24 +143,140 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, | |||
101 | return released; | 143 | return released; |
102 | } | 144 | } |
103 | 145 | ||
146 | static unsigned long __init xen_set_identity(const struct e820entry *list, | ||
147 | ssize_t map_size) | ||
148 | { | ||
149 | phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS; | ||
150 | phys_addr_t start_pci = last; | ||
151 | const struct e820entry *entry; | ||
152 | unsigned long identity = 0; | ||
153 | int i; | ||
154 | |||
155 | for (i = 0, entry = list; i < map_size; i++, entry++) { | ||
156 | phys_addr_t start = entry->addr; | ||
157 | phys_addr_t end = start + entry->size; | ||
158 | |||
159 | if (start < last) | ||
160 | start = last; | ||
161 | |||
162 | if (end <= start) | ||
163 | continue; | ||
164 | |||
165 | /* Skip over the 1MB region. */ | ||
166 | if (last > end) | ||
167 | continue; | ||
168 | |||
169 | if ((entry->type == E820_RAM) || (entry->type == E820_UNUSABLE)) { | ||
170 | if (start > start_pci) | ||
171 | identity += set_phys_range_identity( | ||
172 | PFN_UP(start_pci), PFN_DOWN(start)); | ||
173 | |||
174 | /* Without saving 'last' we would gooble RAM too | ||
175 | * at the end of the loop. */ | ||
176 | last = end; | ||
177 | start_pci = end; | ||
178 | continue; | ||
179 | } | ||
180 | start_pci = min(start, start_pci); | ||
181 | last = end; | ||
182 | } | ||
183 | if (last > start_pci) | ||
184 | identity += set_phys_range_identity( | ||
185 | PFN_UP(start_pci), PFN_DOWN(last)); | ||
186 | return identity; | ||
187 | } | ||
104 | /** | 188 | /** |
105 | * machine_specific_memory_setup - Hook for machine specific memory setup. | 189 | * machine_specific_memory_setup - Hook for machine specific memory setup. |
106 | **/ | 190 | **/ |
107 | |||
108 | char * __init xen_memory_setup(void) | 191 | char * __init xen_memory_setup(void) |
109 | { | 192 | { |
193 | static struct e820entry map[E820MAX] __initdata; | ||
194 | static struct e820entry map_raw[E820MAX] __initdata; | ||
195 | |||
110 | unsigned long max_pfn = xen_start_info->nr_pages; | 196 | unsigned long max_pfn = xen_start_info->nr_pages; |
197 | unsigned long long mem_end; | ||
198 | int rc; | ||
199 | struct xen_memory_map memmap; | ||
200 | unsigned long extra_pages = 0; | ||
201 | unsigned long extra_limit; | ||
202 | unsigned long identity_pages = 0; | ||
203 | int i; | ||
204 | int op; | ||
111 | 205 | ||
112 | max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); | 206 | max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); |
207 | mem_end = PFN_PHYS(max_pfn); | ||
208 | |||
209 | memmap.nr_entries = E820MAX; | ||
210 | set_xen_guest_handle(memmap.buffer, map); | ||
211 | |||
212 | op = xen_initial_domain() ? | ||
213 | XENMEM_machine_memory_map : | ||
214 | XENMEM_memory_map; | ||
215 | rc = HYPERVISOR_memory_op(op, &memmap); | ||
216 | if (rc == -ENOSYS) { | ||
217 | BUG_ON(xen_initial_domain()); | ||
218 | memmap.nr_entries = 1; | ||
219 | map[0].addr = 0ULL; | ||
220 | map[0].size = mem_end; | ||
221 | /* 8MB slack (to balance backend allocations). */ | ||
222 | map[0].size += 8ULL << 20; | ||
223 | map[0].type = E820_RAM; | ||
224 | rc = 0; | ||
225 | } | ||
226 | BUG_ON(rc); | ||
113 | 227 | ||
228 | memcpy(map_raw, map, sizeof(map)); | ||
114 | e820.nr_map = 0; | 229 | e820.nr_map = 0; |
230 | xen_extra_mem_start = mem_end; | ||
231 | for (i = 0; i < memmap.nr_entries; i++) { | ||
232 | unsigned long long end; | ||
233 | |||
234 | /* Guard against non-page aligned E820 entries. */ | ||
235 | if (map[i].type == E820_RAM) | ||
236 | map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE; | ||
237 | |||
238 | end = map[i].addr + map[i].size; | ||
239 | if (map[i].type == E820_RAM && end > mem_end) { | ||
240 | /* RAM off the end - may be partially included */ | ||
241 | u64 delta = min(map[i].size, end - mem_end); | ||
242 | |||
243 | map[i].size -= delta; | ||
244 | end -= delta; | ||
245 | |||
246 | extra_pages += PFN_DOWN(delta); | ||
247 | /* | ||
248 | * Set RAM below 4GB that is not for us to be unusable. | ||
249 | * This prevents "System RAM" address space from being | ||
250 | * used as potential resource for I/O address (happens | ||
251 | * when 'allocate_resource' is called). | ||
252 | */ | ||
253 | if (delta && | ||
254 | (xen_initial_domain() && end < 0x100000000ULL)) | ||
255 | e820_add_region(end, delta, E820_UNUSABLE); | ||
256 | } | ||
115 | 257 | ||
116 | e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM); | 258 | if (map[i].size > 0 && end > xen_extra_mem_start) |
259 | xen_extra_mem_start = end; | ||
260 | |||
261 | /* Add region if any remains */ | ||
262 | if (map[i].size > 0) | ||
263 | e820_add_region(map[i].addr, map[i].size, map[i].type); | ||
264 | } | ||
265 | /* Align the balloon area so that max_low_pfn does not get set | ||
266 | * to be at the _end_ of the PCI gap at the far end (fee01000). | ||
267 | * Note that xen_extra_mem_start gets set in the loop above to be | ||
268 | * past the last E820 region. */ | ||
269 | if (xen_initial_domain() && (xen_extra_mem_start < (1ULL<<32))) | ||
270 | xen_extra_mem_start = (1ULL<<32); | ||
117 | 271 | ||
118 | /* | 272 | /* |
119 | * Even though this is normal, usable memory under Xen, reserve | 273 | * In domU, the ISA region is normal, usable memory, but we |
120 | * ISA memory anyway because too many things think they can poke | 274 | * reserve ISA memory anyway because too many things poke |
121 | * about in there. | 275 | * about in there. |
276 | * | ||
277 | * In Dom0, the host E820 information can leave gaps in the | ||
278 | * ISA range, which would cause us to release those pages. To | ||
279 | * avoid this, we unconditionally reserve them here. | ||
122 | */ | 280 | */ |
123 | e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, | 281 | e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, |
124 | E820_RESERVED); | 282 | E820_RESERVED); |
@@ -129,29 +287,43 @@ char * __init xen_memory_setup(void) | |||
129 | * - xen_start_info | 287 | * - xen_start_info |
130 | * See comment above "struct start_info" in <xen/interface/xen.h> | 288 | * See comment above "struct start_info" in <xen/interface/xen.h> |
131 | */ | 289 | */ |
132 | reserve_early(__pa(xen_start_info->mfn_list), | 290 | memblock_x86_reserve_range(__pa(xen_start_info->mfn_list), |
133 | __pa(xen_start_info->pt_base), | 291 | __pa(xen_start_info->pt_base), |
134 | "XEN START INFO"); | 292 | "XEN START INFO"); |
135 | 293 | ||
136 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 294 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
137 | 295 | ||
138 | xen_return_unused_memory(xen_start_info->nr_pages, &e820); | 296 | extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820); |
139 | 297 | ||
140 | return "Xen"; | 298 | /* |
141 | } | 299 | * Clamp the amount of extra memory to a EXTRA_MEM_RATIO |
300 | * factor the base size. On non-highmem systems, the base | ||
301 | * size is the full initial memory allocation; on highmem it | ||
302 | * is limited to the max size of lowmem, so that it doesn't | ||
303 | * get completely filled. | ||
304 | * | ||
305 | * In principle there could be a problem in lowmem systems if | ||
306 | * the initial memory is also very large with respect to | ||
307 | * lowmem, but we won't try to deal with that here. | ||
308 | */ | ||
309 | extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), | ||
310 | max_pfn + extra_pages); | ||
142 | 311 | ||
143 | static void xen_idle(void) | 312 | if (extra_limit >= max_pfn) |
144 | { | 313 | extra_pages = extra_limit - max_pfn; |
145 | local_irq_disable(); | 314 | else |
146 | 315 | extra_pages = 0; | |
147 | if (need_resched()) | 316 | |
148 | local_irq_enable(); | 317 | xen_add_extra_mem(extra_pages); |
149 | else { | 318 | |
150 | current_thread_info()->status &= ~TS_POLLING; | 319 | /* |
151 | smp_mb__after_clear_bit(); | 320 | * Set P2M for all non-RAM pages and E820 gaps to be identity |
152 | safe_halt(); | 321 | * type PFNs. We supply it with the non-sanitized version |
153 | current_thread_info()->status |= TS_POLLING; | 322 | * of the E820. |
154 | } | 323 | */ |
324 | identity_pages = xen_set_identity(map_raw, memmap.nr_entries); | ||
325 | printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages); | ||
326 | return "Xen"; | ||
155 | } | 327 | } |
156 | 328 | ||
157 | /* | 329 | /* |
@@ -170,7 +342,7 @@ static void __init fiddle_vdso(void) | |||
170 | #endif | 342 | #endif |
171 | } | 343 | } |
172 | 344 | ||
173 | static __cpuinit int register_callback(unsigned type, const void *func) | 345 | static int __cpuinit register_callback(unsigned type, const void *func) |
174 | { | 346 | { |
175 | struct callback_register callback = { | 347 | struct callback_register callback = { |
176 | .type = type, | 348 | .type = type, |
@@ -223,9 +395,6 @@ void __cpuinit xen_enable_syscall(void) | |||
223 | 395 | ||
224 | void __init xen_arch_setup(void) | 396 | void __init xen_arch_setup(void) |
225 | { | 397 | { |
226 | struct physdev_set_iopl set_iopl; | ||
227 | int rc; | ||
228 | |||
229 | xen_panic_handler_init(); | 398 | xen_panic_handler_init(); |
230 | 399 | ||
231 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); | 400 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); |
@@ -242,11 +411,6 @@ void __init xen_arch_setup(void) | |||
242 | xen_enable_sysenter(); | 411 | xen_enable_sysenter(); |
243 | xen_enable_syscall(); | 412 | xen_enable_syscall(); |
244 | 413 | ||
245 | set_iopl.iopl = 1; | ||
246 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); | ||
247 | if (rc != 0) | ||
248 | printk(KERN_INFO "physdev_op failed %d\n", rc); | ||
249 | |||
250 | #ifdef CONFIG_ACPI | 414 | #ifdef CONFIG_ACPI |
251 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { | 415 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { |
252 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); | 416 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); |
@@ -258,9 +422,12 @@ void __init xen_arch_setup(void) | |||
258 | MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? | 422 | MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? |
259 | COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); | 423 | COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); |
260 | 424 | ||
261 | pm_idle = xen_idle; | 425 | /* Set up idle, making sure it calls safe_halt() pvop */ |
262 | 426 | #ifdef CONFIG_X86_32 | |
263 | paravirt_disable_iospace(); | 427 | boot_cpu_data.hlt_works_ok = 1; |
428 | #endif | ||
429 | pm_idle = default_idle; | ||
430 | boot_option_idle_override = IDLE_HALT; | ||
264 | 431 | ||
265 | fiddle_vdso(); | 432 | fiddle_vdso(); |
266 | } | 433 | } |