diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2007-10-21 21:03:36 -0400 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2007-10-23 01:49:54 -0400 |
commit | 47436aa4ad054c1c7c8231618e86ebd9305308dc (patch) | |
tree | a9ba6e0521f9116442144a86e781a3164ec86094 | |
parent | c18acd73ffc209def08003a1927473096f66c5ad (diff) |
Boot with virtual == physical to get closer to native Linux.
1) This allows us to get alot closer to booting bzImages.
2) It means we don't have to know page_offset.
3) The Guest needs to modify the boot pagetables to create the
PAGE_OFFSET mapping before jumping to C code.
4) guest_pa() walks the page tables rather than using page_offset.
5) We don't use page_offset to figure out whether to emulate: it was
always kinda quesationable, and won't work for instructions done
before remapping (bzImage unpacking in particular).
6) We still want the kernel address for tlb flushing: have the initial
hypercall give us that, too.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
-rw-r--r-- | Documentation/lguest/lguest.c | 134 | ||||
-rw-r--r-- | arch/x86/kernel/asm-offsets_32.c | 1 | ||||
-rw-r--r-- | arch/x86/lguest/boot.c | 7 | ||||
-rw-r--r-- | arch/x86/lguest/i386_head.S | 41 | ||||
-rw-r--r-- | drivers/lguest/hypercalls.c | 8 | ||||
-rw-r--r-- | drivers/lguest/interrupts_and_traps.c | 13 | ||||
-rw-r--r-- | drivers/lguest/lg.h | 8 | ||||
-rw-r--r-- | drivers/lguest/lguest_user.c | 11 | ||||
-rw-r--r-- | drivers/lguest/page_tables.c | 47 | ||||
-rw-r--r-- | drivers/lguest/x86/core.c | 7 | ||||
-rw-r--r-- | include/asm-x86/lguest_hcall.h | 7 | ||||
-rw-r--r-- | include/linux/lguest.h | 5 |
12 files changed, 141 insertions, 148 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 4950b03514e6..32c2eaf94c4d 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
@@ -178,19 +178,16 @@ static void *get_pages(unsigned int num) | |||
178 | /* To find out where to start we look for the magic Guest string, which marks | 178 | /* To find out where to start we look for the magic Guest string, which marks |
179 | * the code we see in lguest_asm.S. This is a hack which we are currently | 179 | * the code we see in lguest_asm.S. This is a hack which we are currently |
180 | * plotting to replace with the normal Linux entry point. */ | 180 | * plotting to replace with the normal Linux entry point. */ |
181 | static unsigned long entry_point(const void *start, const void *end, | 181 | static unsigned long entry_point(const void *start, const void *end) |
182 | unsigned long page_offset) | ||
183 | { | 182 | { |
184 | const void *p; | 183 | const void *p; |
185 | 184 | ||
186 | /* The scan gives us the physical starting address. We want the | 185 | /* The scan gives us the physical starting address. We boot with |
187 | * virtual address in this case, and fortunately, we already figured | 186 | * pagetables set up with virtual and physical the same, so that's |
188 | * out the physical-virtual difference and passed it here in | 187 | * OK. */ |
189 | * "page_offset". */ | ||
190 | for (p = start; p < end; p++) | 188 | for (p = start; p < end; p++) |
191 | if (memcmp(p, "GenuineLguest", strlen("GenuineLguest")) == 0) | 189 | if (memcmp(p, "GenuineLguest", strlen("GenuineLguest")) == 0) |
192 | return to_guest_phys(p + strlen("GenuineLguest")) | 190 | return to_guest_phys(p + strlen("GenuineLguest")); |
193 | + page_offset; | ||
194 | 191 | ||
195 | errx(1, "Is this image a genuine lguest?"); | 192 | errx(1, "Is this image a genuine lguest?"); |
196 | } | 193 | } |
@@ -224,14 +221,11 @@ static void map_at(int fd, void *addr, unsigned long offset, unsigned long len) | |||
224 | * by all modern binaries on Linux including the kernel. | 221 | * by all modern binaries on Linux including the kernel. |
225 | * | 222 | * |
226 | * The ELF headers give *two* addresses: a physical address, and a virtual | 223 | * The ELF headers give *two* addresses: a physical address, and a virtual |
227 | * address. The Guest kernel expects to be placed in memory at the physical | 224 | * address. We use the physical address; the Guest will map itself to the |
228 | * address, and the page tables set up so it will correspond to that virtual | 225 | * virtual address. |
229 | * address. We return the difference between the virtual and physical | ||
230 | * addresses in the "page_offset" pointer. | ||
231 | * | 226 | * |
232 | * We return the starting address. */ | 227 | * We return the starting address. */ |
233 | static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr, | 228 | static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr) |
234 | unsigned long *page_offset) | ||
235 | { | 229 | { |
236 | void *start = (void *)-1, *end = NULL; | 230 | void *start = (void *)-1, *end = NULL; |
237 | Elf32_Phdr phdr[ehdr->e_phnum]; | 231 | Elf32_Phdr phdr[ehdr->e_phnum]; |
@@ -255,9 +249,6 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr, | |||
255 | if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr)) | 249 | if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr)) |
256 | err(1, "Reading program headers"); | 250 | err(1, "Reading program headers"); |
257 | 251 | ||
258 | /* We don't know page_offset yet. */ | ||
259 | *page_offset = 0; | ||
260 | |||
261 | /* Try all the headers: there are usually only three. A read-only one, | 252 | /* Try all the headers: there are usually only three. A read-only one, |
262 | * a read-write one, and a "note" section which isn't loadable. */ | 253 | * a read-write one, and a "note" section which isn't loadable. */ |
263 | for (i = 0; i < ehdr->e_phnum; i++) { | 254 | for (i = 0; i < ehdr->e_phnum; i++) { |
@@ -268,14 +259,6 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr, | |||
268 | verbose("Section %i: size %i addr %p\n", | 259 | verbose("Section %i: size %i addr %p\n", |
269 | i, phdr[i].p_memsz, (void *)phdr[i].p_paddr); | 260 | i, phdr[i].p_memsz, (void *)phdr[i].p_paddr); |
270 | 261 | ||
271 | /* We expect a simple linear address space: every segment must | ||
272 | * have the same difference between virtual (p_vaddr) and | ||
273 | * physical (p_paddr) address. */ | ||
274 | if (!*page_offset) | ||
275 | *page_offset = phdr[i].p_vaddr - phdr[i].p_paddr; | ||
276 | else if (*page_offset != phdr[i].p_vaddr - phdr[i].p_paddr) | ||
277 | errx(1, "Page offset of section %i different", i); | ||
278 | |||
279 | /* We track the first and last address we mapped, so we can | 262 | /* We track the first and last address we mapped, so we can |
280 | * tell entry_point() where to scan. */ | 263 | * tell entry_point() where to scan. */ |
281 | if (from_guest_phys(phdr[i].p_paddr) < start) | 264 | if (from_guest_phys(phdr[i].p_paddr) < start) |
@@ -288,50 +271,13 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr, | |||
288 | phdr[i].p_offset, phdr[i].p_filesz); | 271 | phdr[i].p_offset, phdr[i].p_filesz); |
289 | } | 272 | } |
290 | 273 | ||
291 | return entry_point(start, end, *page_offset); | 274 | return entry_point(start, end); |
292 | } | ||
293 | |||
294 | /*L:170 Prepare to be SHOCKED and AMAZED. And possibly a trifle nauseated. | ||
295 | * | ||
296 | * We know that CONFIG_PAGE_OFFSET sets what virtual address the kernel expects | ||
297 | * to be. We don't know what that option was, but we can figure it out | ||
298 | * approximately by looking at the addresses in the code. I chose the common | ||
299 | * case of reading a memory location into the %eax register: | ||
300 | * | ||
301 | * movl <some-address>, %eax | ||
302 | * | ||
303 | * This gets encoded as five bytes: "0xA1 <4-byte-address>". For example, | ||
304 | * "0xA1 0x18 0x60 0x47 0xC0" reads the address 0xC0476018 into %eax. | ||
305 | * | ||
306 | * In this example can guess that the kernel was compiled with | ||
307 | * CONFIG_PAGE_OFFSET set to 0xC0000000 (it's always a round number). If the | ||
308 | * kernel were larger than 16MB, we might see 0xC1 addresses show up, but our | ||
309 | * kernel isn't that bloated yet. | ||
310 | * | ||
311 | * Unfortunately, x86 has variable-length instructions, so finding this | ||
312 | * particular instruction properly involves writing a disassembler. Instead, | ||
313 | * we rely on statistics. We look for "0xA1" and tally the different bytes | ||
314 | * which occur 4 bytes later (the "0xC0" in our example above). When one of | ||
315 | * those bytes appears three times, we can be reasonably confident that it | ||
316 | * forms the start of CONFIG_PAGE_OFFSET. | ||
317 | * | ||
318 | * This is amazingly reliable. */ | ||
319 | static unsigned long intuit_page_offset(unsigned char *img, unsigned long len) | ||
320 | { | ||
321 | unsigned int i, possibilities[256] = { 0 }; | ||
322 | |||
323 | for (i = 0; i + 4 < len; i++) { | ||
324 | /* mov 0xXXXXXXXX,%eax */ | ||
325 | if (img[i] == 0xA1 && ++possibilities[img[i+4]] > 3) | ||
326 | return (unsigned long)img[i+4] << 24; | ||
327 | } | ||
328 | errx(1, "could not determine page offset"); | ||
329 | } | 275 | } |
330 | 276 | ||
331 | /*L:160 Unfortunately the entire ELF image isn't compressed: the segments | 277 | /*L:160 Unfortunately the entire ELF image isn't compressed: the segments |
332 | * which need loading are extracted and compressed raw. This denies us the | 278 | * which need loading are extracted and compressed raw. This denies us the |
333 | * information we need to make a fully-general loader. */ | 279 | * information we need to make a fully-general loader. */ |
334 | static unsigned long unpack_bzimage(int fd, unsigned long *page_offset) | 280 | static unsigned long unpack_bzimage(int fd) |
335 | { | 281 | { |
336 | gzFile f; | 282 | gzFile f; |
337 | int ret, len = 0; | 283 | int ret, len = 0; |
@@ -352,12 +298,7 @@ static unsigned long unpack_bzimage(int fd, unsigned long *page_offset) | |||
352 | 298 | ||
353 | verbose("Unpacked size %i addr %p\n", len, img); | 299 | verbose("Unpacked size %i addr %p\n", len, img); |
354 | 300 | ||
355 | /* Without the ELF header, we can't tell virtual-physical gap. This is | 301 | return entry_point(img, img + len); |
356 | * CONFIG_PAGE_OFFSET, and people do actually change it. Fortunately, | ||
357 | * I have a clever way of figuring it out from the code itself. */ | ||
358 | *page_offset = intuit_page_offset(img, len); | ||
359 | |||
360 | return entry_point(img, img + len, *page_offset); | ||
361 | } | 302 | } |
362 | 303 | ||
363 | /*L:150 A bzImage, unlike an ELF file, is not meant to be loaded. You're | 304 | /*L:150 A bzImage, unlike an ELF file, is not meant to be loaded. You're |
@@ -368,7 +309,7 @@ static unsigned long unpack_bzimage(int fd, unsigned long *page_offset) | |||
368 | * The bzImage is formed by putting the decompressing code in front of the | 309 | * The bzImage is formed by putting the decompressing code in front of the |
369 | * compressed kernel code. So we can simple scan through it looking for the | 310 | * compressed kernel code. So we can simple scan through it looking for the |
370 | * first "gzip" header, and start decompressing from there. */ | 311 | * first "gzip" header, and start decompressing from there. */ |
371 | static unsigned long load_bzimage(int fd, unsigned long *page_offset) | 312 | static unsigned long load_bzimage(int fd) |
372 | { | 313 | { |
373 | unsigned char c; | 314 | unsigned char c; |
374 | int state = 0; | 315 | int state = 0; |
@@ -396,7 +337,7 @@ static unsigned long load_bzimage(int fd, unsigned long *page_offset) | |||
396 | if (c != 0x03) | 337 | if (c != 0x03) |
397 | state = -1; | 338 | state = -1; |
398 | else | 339 | else |
399 | return unpack_bzimage(fd, page_offset); | 340 | return unpack_bzimage(fd); |
400 | } | 341 | } |
401 | } | 342 | } |
402 | errx(1, "Could not find kernel in bzImage"); | 343 | errx(1, "Could not find kernel in bzImage"); |
@@ -405,7 +346,7 @@ static unsigned long load_bzimage(int fd, unsigned long *page_offset) | |||
405 | /*L:140 Loading the kernel is easy when it's a "vmlinux", but most kernels | 346 | /*L:140 Loading the kernel is easy when it's a "vmlinux", but most kernels |
406 | * come wrapped up in the self-decompressing "bzImage" format. With some funky | 347 | * come wrapped up in the self-decompressing "bzImage" format. With some funky |
407 | * coding, we can load those, too. */ | 348 | * coding, we can load those, too. */ |
408 | static unsigned long load_kernel(int fd, unsigned long *page_offset) | 349 | static unsigned long load_kernel(int fd) |
409 | { | 350 | { |
410 | Elf32_Ehdr hdr; | 351 | Elf32_Ehdr hdr; |
411 | 352 | ||
@@ -415,10 +356,10 @@ static unsigned long load_kernel(int fd, unsigned long *page_offset) | |||
415 | 356 | ||
416 | /* If it's an ELF file, it starts with "\177ELF" */ | 357 | /* If it's an ELF file, it starts with "\177ELF" */ |
417 | if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) | 358 | if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) |
418 | return map_elf(fd, &hdr, page_offset); | 359 | return map_elf(fd, &hdr); |
419 | 360 | ||
420 | /* Otherwise we assume it's a bzImage, and try to unpack it */ | 361 | /* Otherwise we assume it's a bzImage, and try to unpack it */ |
421 | return load_bzimage(fd, page_offset); | 362 | return load_bzimage(fd); |
422 | } | 363 | } |
423 | 364 | ||
424 | /* This is a trivial little helper to align pages. Andi Kleen hated it because | 365 | /* This is a trivial little helper to align pages. Andi Kleen hated it because |
@@ -463,27 +404,20 @@ static unsigned long load_initrd(const char *name, unsigned long mem) | |||
463 | return len; | 404 | return len; |
464 | } | 405 | } |
465 | 406 | ||
466 | /* Once we know the address the Guest kernel expects, we can construct simple | 407 | /* Once we know how much memory we have, we can construct simple linear page |
467 | * linear page tables for all of memory which will get the Guest far enough | 408 | * tables which set virtual == physical which will get the Guest far enough |
468 | * into the boot to create its own. | 409 | * into the boot to create its own. |
469 | * | 410 | * |
470 | * We lay them out of the way, just below the initrd (which is why we need to | 411 | * We lay them out of the way, just below the initrd (which is why we need to |
471 | * know its size). */ | 412 | * know its size). */ |
472 | static unsigned long setup_pagetables(unsigned long mem, | 413 | static unsigned long setup_pagetables(unsigned long mem, |
473 | unsigned long initrd_size, | 414 | unsigned long initrd_size) |
474 | unsigned long page_offset) | ||
475 | { | 415 | { |
476 | unsigned long *pgdir, *linear; | 416 | unsigned long *pgdir, *linear; |
477 | unsigned int mapped_pages, i, linear_pages; | 417 | unsigned int mapped_pages, i, linear_pages; |
478 | unsigned int ptes_per_page = getpagesize()/sizeof(void *); | 418 | unsigned int ptes_per_page = getpagesize()/sizeof(void *); |
479 | 419 | ||
480 | /* Ideally we map all physical memory starting at page_offset. | 420 | mapped_pages = mem/getpagesize(); |
481 | * However, if page_offset is 0xC0000000 we can only map 1G of physical | ||
482 | * (0xC0000000 + 1G overflows). */ | ||
483 | if (mem <= -page_offset) | ||
484 | mapped_pages = mem/getpagesize(); | ||
485 | else | ||
486 | mapped_pages = -page_offset/getpagesize(); | ||
487 | 421 | ||
488 | /* Each PTE page can map ptes_per_page pages: how many do we need? */ | 422 | /* Each PTE page can map ptes_per_page pages: how many do we need? */ |
489 | linear_pages = (mapped_pages + ptes_per_page-1)/ptes_per_page; | 423 | linear_pages = (mapped_pages + ptes_per_page-1)/ptes_per_page; |
@@ -500,11 +434,9 @@ static unsigned long setup_pagetables(unsigned long mem, | |||
500 | for (i = 0; i < mapped_pages; i++) | 434 | for (i = 0; i < mapped_pages; i++) |
501 | linear[i] = ((i * getpagesize()) | PAGE_PRESENT); | 435 | linear[i] = ((i * getpagesize()) | PAGE_PRESENT); |
502 | 436 | ||
503 | /* The top level points to the linear page table pages above. The | 437 | /* The top level points to the linear page table pages above. */ |
504 | * entry representing page_offset points to the first one, and they | ||
505 | * continue from there. */ | ||
506 | for (i = 0; i < mapped_pages; i += ptes_per_page) { | 438 | for (i = 0; i < mapped_pages; i += ptes_per_page) { |
507 | pgdir[(i + page_offset/getpagesize())/ptes_per_page] | 439 | pgdir[i/ptes_per_page] |
508 | = ((to_guest_phys(linear) + i*sizeof(void *)) | 440 | = ((to_guest_phys(linear) + i*sizeof(void *)) |
509 | | PAGE_PRESENT); | 441 | | PAGE_PRESENT); |
510 | } | 442 | } |
@@ -535,15 +467,12 @@ static void concat(char *dst, char *args[]) | |||
535 | /* This is where we actually tell the kernel to initialize the Guest. We saw | 467 | /* This is where we actually tell the kernel to initialize the Guest. We saw |
536 | * the arguments it expects when we looked at initialize() in lguest_user.c: | 468 | * the arguments it expects when we looked at initialize() in lguest_user.c: |
537 | * the base of guest "physical" memory, the top physical page to allow, the | 469 | * the base of guest "physical" memory, the top physical page to allow, the |
538 | * top level pagetable, the entry point and the page_offset constant for the | 470 | * top level pagetable and the entry point for the Guest. */ |
539 | * Guest. */ | 471 | static int tell_kernel(unsigned long pgdir, unsigned long start) |
540 | static int tell_kernel(unsigned long pgdir, unsigned long start, | ||
541 | unsigned long page_offset) | ||
542 | { | 472 | { |
543 | unsigned long args[] = { LHREQ_INITIALIZE, | 473 | unsigned long args[] = { LHREQ_INITIALIZE, |
544 | (unsigned long)guest_base, | 474 | (unsigned long)guest_base, |
545 | guest_limit / getpagesize(), | 475 | guest_limit / getpagesize(), pgdir, start }; |
546 | pgdir, start, page_offset }; | ||
547 | int fd; | 476 | int fd; |
548 | 477 | ||
549 | verbose("Guest: %p - %p (%#lx)\n", | 478 | verbose("Guest: %p - %p (%#lx)\n", |
@@ -1424,9 +1353,9 @@ static void usage(void) | |||
1424 | /*L:105 The main routine is where the real work begins: */ | 1353 | /*L:105 The main routine is where the real work begins: */ |
1425 | int main(int argc, char *argv[]) | 1354 | int main(int argc, char *argv[]) |
1426 | { | 1355 | { |
1427 | /* Memory, top-level pagetable, code startpoint, PAGE_OFFSET and size | 1356 | /* Memory, top-level pagetable, code startpoint and size of the |
1428 | * of the (optional) initrd. */ | 1357 | * (optional) initrd. */ |
1429 | unsigned long mem = 0, pgdir, start, page_offset, initrd_size = 0; | 1358 | unsigned long mem = 0, pgdir, start, initrd_size = 0; |
1430 | /* A temporary and the /dev/lguest file descriptor. */ | 1359 | /* A temporary and the /dev/lguest file descriptor. */ |
1431 | int i, c, lguest_fd; | 1360 | int i, c, lguest_fd; |
1432 | /* The list of Guest devices, based on command line arguments. */ | 1361 | /* The list of Guest devices, based on command line arguments. */ |
@@ -1500,8 +1429,7 @@ int main(int argc, char *argv[]) | |||
1500 | setup_console(&device_list); | 1429 | setup_console(&device_list); |
1501 | 1430 | ||
1502 | /* Now we load the kernel */ | 1431 | /* Now we load the kernel */ |
1503 | start = load_kernel(open_or_die(argv[optind+1], O_RDONLY), | 1432 | start = load_kernel(open_or_die(argv[optind+1], O_RDONLY)); |
1504 | &page_offset); | ||
1505 | 1433 | ||
1506 | /* Boot information is stashed at physical address 0 */ | 1434 | /* Boot information is stashed at physical address 0 */ |
1507 | boot = from_guest_phys(0); | 1435 | boot = from_guest_phys(0); |
@@ -1518,7 +1446,7 @@ int main(int argc, char *argv[]) | |||
1518 | } | 1446 | } |
1519 | 1447 | ||
1520 | /* Set up the initial linear pagetables, starting below the initrd. */ | 1448 | /* Set up the initial linear pagetables, starting below the initrd. */ |
1521 | pgdir = setup_pagetables(mem, initrd_size, page_offset); | 1449 | pgdir = setup_pagetables(mem, initrd_size); |
1522 | 1450 | ||
1523 | /* The Linux boot header contains an "E820" memory map: ours is a | 1451 | /* The Linux boot header contains an "E820" memory map: ours is a |
1524 | * simple, single region. */ | 1452 | * simple, single region. */ |
@@ -1535,7 +1463,7 @@ int main(int argc, char *argv[]) | |||
1535 | 1463 | ||
1536 | /* We tell the kernel to initialize the Guest: this returns the open | 1464 | /* We tell the kernel to initialize the Guest: this returns the open |
1537 | * /dev/lguest file descriptor. */ | 1465 | * /dev/lguest file descriptor. */ |
1538 | lguest_fd = tell_kernel(pgdir, start, page_offset); | 1466 | lguest_fd = tell_kernel(pgdir, start); |
1539 | 1467 | ||
1540 | /* We fork off a child process, which wakes the Launcher whenever one | 1468 | /* We fork off a child process, which wakes the Launcher whenever one |
1541 | * of the input file descriptors needs attention. Otherwise we would | 1469 | * of the input file descriptors needs attention. Otherwise we would |
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index f8764716b0c0..0e45981b2dd7 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -136,6 +136,7 @@ void foo(void) | |||
136 | #ifdef CONFIG_LGUEST_GUEST | 136 | #ifdef CONFIG_LGUEST_GUEST |
137 | BLANK(); | 137 | BLANK(); |
138 | OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); | 138 | OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); |
139 | OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir); | ||
139 | OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc); | 140 | OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc); |
140 | OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc); | 141 | OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc); |
141 | OFFSET(LGUEST_PAGES_host_cr3, lguest_pages, state.host_cr3); | 142 | OFFSET(LGUEST_PAGES_host_cr3, lguest_pages, state.host_cr3); |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 3a06b51c98ad..090f30cbf24c 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -86,6 +86,7 @@ struct lguest_data lguest_data = { | |||
86 | .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF }, | 86 | .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF }, |
87 | .noirq_start = (u32)lguest_noirq_start, | 87 | .noirq_start = (u32)lguest_noirq_start, |
88 | .noirq_end = (u32)lguest_noirq_end, | 88 | .noirq_end = (u32)lguest_noirq_end, |
89 | .kernel_address = PAGE_OFFSET, | ||
89 | .blocked_interrupts = { 1 }, /* Block timer interrupts */ | 90 | .blocked_interrupts = { 1 }, /* Block timer interrupts */ |
90 | .syscall_vec = SYSCALL_VECTOR, | 91 | .syscall_vec = SYSCALL_VECTOR, |
91 | }; | 92 | }; |
@@ -1033,11 +1034,7 @@ __init void lguest_init(void *boot) | |||
1033 | 1034 | ||
1034 | /*G:070 Now we've seen all the paravirt_ops, we return to | 1035 | /*G:070 Now we've seen all the paravirt_ops, we return to |
1035 | * lguest_init() where the rest of the fairly chaotic boot setup | 1036 | * lguest_init() where the rest of the fairly chaotic boot setup |
1036 | * occurs. | 1037 | * occurs. */ |
1037 | * | ||
1038 | * The Host expects our first hypercall to tell it where our "struct | ||
1039 | * lguest_data" is, so we do that first. */ | ||
1040 | hcall(LHCALL_LGUEST_INIT, __pa(&lguest_data), 0, 0); | ||
1041 | 1038 | ||
1042 | /* The native boot code sets up initial page tables immediately after | 1039 | /* The native boot code sets up initial page tables immediately after |
1043 | * the kernel itself, and sets init_pg_tables_end so they're not | 1040 | * the kernel itself, and sets init_pg_tables_end so they're not |
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index 6d7a74f07c41..ba4282eba5bf 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S | |||
@@ -1,5 +1,6 @@ | |||
1 | #include <linux/linkage.h> | 1 | #include <linux/linkage.h> |
2 | #include <linux/lguest.h> | 2 | #include <linux/lguest.h> |
3 | #include <asm/lguest_hcall.h> | ||
3 | #include <asm/asm-offsets.h> | 4 | #include <asm/asm-offsets.h> |
4 | #include <asm/thread_info.h> | 5 | #include <asm/thread_info.h> |
5 | #include <asm/processor-flags.h> | 6 | #include <asm/processor-flags.h> |
@@ -8,18 +9,48 @@ | |||
8 | * looks for. The plan is that the Linux boot protocol will be extended with a | 9 | * looks for. The plan is that the Linux boot protocol will be extended with a |
9 | * "platform type" field which will guide us here from the normal entry point, | 10 | * "platform type" field which will guide us here from the normal entry point, |
10 | * but for the moment this suffices. The normal boot code uses %esi for the | 11 | * but for the moment this suffices. The normal boot code uses %esi for the |
11 | * boot header, so we do too. We convert it to a virtual address by adding | 12 | * boot header, so we do too. |
12 | * PAGE_OFFSET, and hand it to lguest_init() as its argument (ie. %eax). | 13 | * |
14 | * WARNING: be very careful here! We're running at addresses equal to physical | ||
15 | * addesses (around 0), not above PAGE_OFFSET as most code expectes | ||
16 | * (eg. 0xC0000000). Jumps are relative, so they're OK, but we can't touch any | ||
17 | * data. | ||
13 | * | 18 | * |
14 | * The .section line puts this code in .init.text so it will be discarded after | 19 | * The .section line puts this code in .init.text so it will be discarded after |
15 | * boot. */ | 20 | * boot. */ |
16 | .section .init.text, "ax", @progbits | 21 | .section .init.text, "ax", @progbits |
17 | .ascii "GenuineLguest" | 22 | .ascii "GenuineLguest" |
18 | /* Set up initial stack. */ | 23 | /* Make initial hypercall now, so we can set up the pagetables. */ |
19 | movl $(init_thread_union+THREAD_SIZE),%esp | 24 | movl $LHCALL_LGUEST_INIT, %eax |
25 | movl $lguest_data - __PAGE_OFFSET, %edx | ||
26 | int $LGUEST_TRAP_ENTRY | ||
27 | |||
28 | /* Set up boot information pointer to hand to lguest_init(): it wants | ||
29 | * a virtual address. */ | ||
20 | movl %esi, %eax | 30 | movl %esi, %eax |
21 | addl $__PAGE_OFFSET, %eax | 31 | addl $__PAGE_OFFSET, %eax |
22 | jmp lguest_init | 32 | |
33 | /* The Host put the toplevel pagetable in lguest_data.pgdir. The movsl | ||
34 | * instruction uses %esi, so we needed to save it above. */ | ||
35 | movl lguest_data - __PAGE_OFFSET + LGUEST_DATA_pgdir, %esi | ||
36 | |||
37 | /* Copy first 32 entries of page directory to __PAGE_OFFSET entries. | ||
38 | * This means the first 128M of kernel memory will be mapped at | ||
39 | * PAGE_OFFSET where the kernel expects to run. This will get it far | ||
40 | * enough through boot to switch to its own pagetables. */ | ||
41 | movl $32, %ecx | ||
42 | movl %esi, %edi | ||
43 | addl $((__PAGE_OFFSET >> 22) * 4), %edi | ||
44 | rep | ||
45 | movsl | ||
46 | |||
47 | /* Set up the initial stack so we can run C code. */ | ||
48 | movl $(init_thread_union+THREAD_SIZE),%esp | ||
49 | |||
50 | |||
51 | /* Jumps are relative, and we're running __PAGE_OFFSET too low at the | ||
52 | * moment. */ | ||
53 | jmp lguest_init+__PAGE_OFFSET | ||
23 | 54 | ||
24 | /*G:055 We create a macro which puts the assembler code between lgstart_ and | 55 | /*G:055 We create a macro which puts the assembler code between lgstart_ and |
25 | * lgend_ markers. These templates are put in the .text section: they can't be | 56 | * lgend_ markers. These templates are put in the .text section: they can't be |
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index 02d0ae268267..13b5f2f813de 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c | |||
@@ -181,15 +181,15 @@ static void initialize(struct lguest *lg) | |||
181 | /* The Guest tells us where we're not to deliver interrupts by putting | 181 | /* The Guest tells us where we're not to deliver interrupts by putting |
182 | * the range of addresses into "struct lguest_data". */ | 182 | * the range of addresses into "struct lguest_data". */ |
183 | if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start) | 183 | if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start) |
184 | || get_user(lg->noirq_end, &lg->lguest_data->noirq_end) | 184 | || get_user(lg->noirq_end, &lg->lguest_data->noirq_end)) |
185 | /* We tell the Guest that it can't use the top 4MB of virtual | ||
186 | * addresses used by the Switcher. */ | ||
187 | || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem)) | ||
188 | kill_guest(lg, "bad guest page %p", lg->lguest_data); | 185 | kill_guest(lg, "bad guest page %p", lg->lguest_data); |
189 | 186 | ||
190 | /* We write the current time into the Guest's data page once now. */ | 187 | /* We write the current time into the Guest's data page once now. */ |
191 | write_timestamp(lg); | 188 | write_timestamp(lg); |
192 | 189 | ||
190 | /* page_tables.c will also do some setup. */ | ||
191 | page_table_guest_data_init(lg); | ||
192 | |||
193 | /* This is the one case where the above accesses might have been the | 193 | /* This is the one case where the above accesses might have been the |
194 | * first write to a Guest page. This may have caused a copy-on-write | 194 | * first write to a Guest page. This may have caused a copy-on-write |
195 | * fault, but the Guest might be referring to the old (read-only) | 195 | * fault, but the Guest might be referring to the old (read-only) |
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index a57d757eab6e..3271c0031a1b 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c | |||
@@ -62,8 +62,9 @@ static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val) | |||
62 | * it). */ | 62 | * it). */ |
63 | static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) | 63 | static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) |
64 | { | 64 | { |
65 | unsigned long gstack; | 65 | unsigned long gstack, origstack; |
66 | u32 eflags, ss, irq_enable; | 66 | u32 eflags, ss, irq_enable; |
67 | unsigned long virtstack; | ||
67 | 68 | ||
68 | /* There are two cases for interrupts: one where the Guest is already | 69 | /* There are two cases for interrupts: one where the Guest is already |
69 | * in the kernel, and a more complex one where the Guest is in | 70 | * in the kernel, and a more complex one where the Guest is in |
@@ -71,8 +72,10 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) | |||
71 | if ((lg->regs->ss&0x3) != GUEST_PL) { | 72 | if ((lg->regs->ss&0x3) != GUEST_PL) { |
72 | /* The Guest told us their kernel stack with the SET_STACK | 73 | /* The Guest told us their kernel stack with the SET_STACK |
73 | * hypercall: both the virtual address and the segment */ | 74 | * hypercall: both the virtual address and the segment */ |
74 | gstack = guest_pa(lg, lg->esp1); | 75 | virtstack = lg->esp1; |
75 | ss = lg->ss1; | 76 | ss = lg->ss1; |
77 | |||
78 | origstack = gstack = guest_pa(lg, virtstack); | ||
76 | /* We push the old stack segment and pointer onto the new | 79 | /* We push the old stack segment and pointer onto the new |
77 | * stack: when the Guest does an "iret" back from the interrupt | 80 | * stack: when the Guest does an "iret" back from the interrupt |
78 | * handler the CPU will notice they're dropping privilege | 81 | * handler the CPU will notice they're dropping privilege |
@@ -81,8 +84,10 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) | |||
81 | push_guest_stack(lg, &gstack, lg->regs->esp); | 84 | push_guest_stack(lg, &gstack, lg->regs->esp); |
82 | } else { | 85 | } else { |
83 | /* We're staying on the same Guest (kernel) stack. */ | 86 | /* We're staying on the same Guest (kernel) stack. */ |
84 | gstack = guest_pa(lg, lg->regs->esp); | 87 | virtstack = lg->regs->esp; |
85 | ss = lg->regs->ss; | 88 | ss = lg->regs->ss; |
89 | |||
90 | origstack = gstack = guest_pa(lg, virtstack); | ||
86 | } | 91 | } |
87 | 92 | ||
88 | /* Remember that we never let the Guest actually disable interrupts, so | 93 | /* Remember that we never let the Guest actually disable interrupts, so |
@@ -108,7 +113,7 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) | |||
108 | /* Now we've pushed all the old state, we change the stack, the code | 113 | /* Now we've pushed all the old state, we change the stack, the code |
109 | * segment and the address to execute. */ | 114 | * segment and the address to execute. */ |
110 | lg->regs->ss = ss; | 115 | lg->regs->ss = ss; |
111 | lg->regs->esp = gstack + lg->page_offset; | 116 | lg->regs->esp = virtstack + (gstack - origstack); |
112 | lg->regs->cs = (__KERNEL_CS|GUEST_PL); | 117 | lg->regs->cs = (__KERNEL_CS|GUEST_PL); |
113 | lg->regs->eip = idt_address(lo, hi); | 118 | lg->regs->eip = idt_address(lo, hi); |
114 | 119 | ||
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 7408cebe995e..e4845d7f0688 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h | |||
@@ -63,7 +63,7 @@ struct lguest | |||
63 | /* This provides the offset to the base of guest-physical | 63 | /* This provides the offset to the base of guest-physical |
64 | * memory in the Launcher. */ | 64 | * memory in the Launcher. */ |
65 | void __user *mem_base; | 65 | void __user *mem_base; |
66 | u32 page_offset; | 66 | unsigned long kernel_address; |
67 | u32 cr2; | 67 | u32 cr2; |
68 | int halted; | 68 | int halted; |
69 | int ts; | 69 | int ts; |
@@ -165,6 +165,8 @@ void guest_set_pte(struct lguest *lg, unsigned long gpgdir, | |||
165 | void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages); | 165 | void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages); |
166 | int demand_page(struct lguest *info, unsigned long cr2, int errcode); | 166 | int demand_page(struct lguest *info, unsigned long cr2, int errcode); |
167 | void pin_page(struct lguest *lg, unsigned long vaddr); | 167 | void pin_page(struct lguest *lg, unsigned long vaddr); |
168 | unsigned long guest_pa(struct lguest *lg, unsigned long vaddr); | ||
169 | void page_table_guest_data_init(struct lguest *lg); | ||
168 | 170 | ||
169 | /* <arch>/core.c: */ | 171 | /* <arch>/core.c: */ |
170 | void lguest_arch_host_init(void); | 172 | void lguest_arch_host_init(void); |
@@ -229,9 +231,5 @@ do { \ | |||
229 | } while(0) | 231 | } while(0) |
230 | /* (End of aside) :*/ | 232 | /* (End of aside) :*/ |
231 | 233 | ||
232 | static inline unsigned long guest_pa(struct lguest *lg, unsigned long vaddr) | ||
233 | { | ||
234 | return vaddr - lg->page_offset; | ||
235 | } | ||
236 | #endif /* __ASSEMBLY__ */ | 234 | #endif /* __ASSEMBLY__ */ |
237 | #endif /* _LGUEST_H */ | 235 | #endif /* _LGUEST_H */ |
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index b184652e45d7..61b177e1e649 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c | |||
@@ -111,7 +111,7 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) | |||
111 | return run_guest(lg, (unsigned long __user *)user); | 111 | return run_guest(lg, (unsigned long __user *)user); |
112 | } | 112 | } |
113 | 113 | ||
114 | /*L:020 The initialization write supplies 5 pointer sized (32 or 64 bit) | 114 | /*L:020 The initialization write supplies 4 pointer sized (32 or 64 bit) |
115 | * values (in addition to the LHREQ_INITIALIZE value). These are: | 115 | * values (in addition to the LHREQ_INITIALIZE value). These are: |
116 | * | 116 | * |
117 | * base: The start of the Guest-physical memory inside the Launcher memory. | 117 | * base: The start of the Guest-physical memory inside the Launcher memory. |
@@ -124,12 +124,6 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) | |||
124 | * pagetables (which are set up by the Launcher). | 124 | * pagetables (which are set up by the Launcher). |
125 | * | 125 | * |
126 | * start: The first instruction to execute ("eip" in x86-speak). | 126 | * start: The first instruction to execute ("eip" in x86-speak). |
127 | * | ||
128 | * page_offset: The PAGE_OFFSET constant in the Guest kernel. We should | ||
129 | * probably wean the code off this, but it's a very useful constant! Any | ||
130 | * address above this is within the Guest kernel, and any kernel address can | ||
131 | * quickly converted from physical to virtual by adding PAGE_OFFSET. It's | ||
132 | * 0xC0000000 (3G) by default, but it's configurable at kernel build time. | ||
133 | */ | 127 | */ |
134 | static int initialize(struct file *file, const unsigned long __user *input) | 128 | static int initialize(struct file *file, const unsigned long __user *input) |
135 | { | 129 | { |
@@ -137,7 +131,7 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
137 | * Guest. */ | 131 | * Guest. */ |
138 | struct lguest *lg; | 132 | struct lguest *lg; |
139 | int err; | 133 | int err; |
140 | unsigned long args[5]; | 134 | unsigned long args[4]; |
141 | 135 | ||
142 | /* We grab the Big Lguest lock, which protects against multiple | 136 | /* We grab the Big Lguest lock, which protects against multiple |
143 | * simultaneous initializations. */ | 137 | * simultaneous initializations. */ |
@@ -162,7 +156,6 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
162 | /* Populate the easy fields of our "struct lguest" */ | 156 | /* Populate the easy fields of our "struct lguest" */ |
163 | lg->mem_base = (void __user *)(long)args[0]; | 157 | lg->mem_base = (void __user *)(long)args[0]; |
164 | lg->pfn_limit = args[1]; | 158 | lg->pfn_limit = args[1]; |
165 | lg->page_offset = args[4]; | ||
166 | 159 | ||
167 | /* We need a complete page for the Guest registers: they are accessible | 160 | /* We need a complete page for the Guest registers: they are accessible |
168 | * to the Guest and we can only grant it access to whole pages. */ | 161 | * to the Guest and we can only grant it access to whole pages. */ |
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index bfe3650b28d6..fe3c7575647b 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/random.h> | 13 | #include <linux/random.h> |
14 | #include <linux/percpu.h> | 14 | #include <linux/percpu.h> |
15 | #include <asm/tlbflush.h> | 15 | #include <asm/tlbflush.h> |
16 | #include <asm/uaccess.h> | ||
16 | #include "lg.h" | 17 | #include "lg.h" |
17 | 18 | ||
18 | /*M:008 We hold reference to pages, which prevents them from being swapped. | 19 | /*M:008 We hold reference to pages, which prevents them from being swapped. |
@@ -345,7 +346,7 @@ static void flush_user_mappings(struct lguest *lg, int idx) | |||
345 | { | 346 | { |
346 | unsigned int i; | 347 | unsigned int i; |
347 | /* Release every pgd entry up to the kernel's address. */ | 348 | /* Release every pgd entry up to the kernel's address. */ |
348 | for (i = 0; i < pgd_index(lg->page_offset); i++) | 349 | for (i = 0; i < pgd_index(lg->kernel_address); i++) |
349 | release_pgd(lg, lg->pgdirs[idx].pgdir + i); | 350 | release_pgd(lg, lg->pgdirs[idx].pgdir + i); |
350 | } | 351 | } |
351 | 352 | ||
@@ -358,6 +359,25 @@ void guest_pagetable_flush_user(struct lguest *lg) | |||
358 | } | 359 | } |
359 | /*:*/ | 360 | /*:*/ |
360 | 361 | ||
362 | /* We walk down the guest page tables to get a guest-physical address */ | ||
363 | unsigned long guest_pa(struct lguest *lg, unsigned long vaddr) | ||
364 | { | ||
365 | pgd_t gpgd; | ||
366 | pte_t gpte; | ||
367 | |||
368 | /* First step: get the top-level Guest page table entry. */ | ||
369 | gpgd = __pgd(lgread_u32(lg, gpgd_addr(lg, vaddr))); | ||
370 | /* Toplevel not present? We can't map it in. */ | ||
371 | if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) | ||
372 | kill_guest(lg, "Bad address %#lx", vaddr); | ||
373 | |||
374 | gpte = __pte(lgread_u32(lg, gpte_addr(lg, gpgd, vaddr))); | ||
375 | if (!(pte_flags(gpte) & _PAGE_PRESENT)) | ||
376 | kill_guest(lg, "Bad address %#lx", vaddr); | ||
377 | |||
378 | return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK); | ||
379 | } | ||
380 | |||
361 | /* We keep several page tables. This is a simple routine to find the page | 381 | /* We keep several page tables. This is a simple routine to find the page |
362 | * table (if any) corresponding to this top-level address the Guest has given | 382 | * table (if any) corresponding to this top-level address the Guest has given |
363 | * us. */ | 383 | * us. */ |
@@ -500,7 +520,7 @@ void guest_set_pte(struct lguest *lg, | |||
500 | { | 520 | { |
501 | /* Kernel mappings must be changed on all top levels. Slow, but | 521 | /* Kernel mappings must be changed on all top levels. Slow, but |
502 | * doesn't happen often. */ | 522 | * doesn't happen often. */ |
503 | if (vaddr >= lg->page_offset) { | 523 | if (vaddr >= lg->kernel_address) { |
504 | unsigned int i; | 524 | unsigned int i; |
505 | for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) | 525 | for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) |
506 | if (lg->pgdirs[i].pgdir) | 526 | if (lg->pgdirs[i].pgdir) |
@@ -550,11 +570,6 @@ void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 idx) | |||
550 | * its first page table is. We set some things up here: */ | 570 | * its first page table is. We set some things up here: */ |
551 | int init_guest_pagetable(struct lguest *lg, unsigned long pgtable) | 571 | int init_guest_pagetable(struct lguest *lg, unsigned long pgtable) |
552 | { | 572 | { |
553 | /* In flush_user_mappings() we loop from 0 to | ||
554 | * "pgd_index(lg->page_offset)". This assumes it won't hit | ||
555 | * the Switcher mappings, so check that now. */ | ||
556 | if (pgd_index(lg->page_offset) >= SWITCHER_PGD_INDEX) | ||
557 | return -EINVAL; | ||
558 | /* We start on the first shadow page table, and give it a blank PGD | 573 | /* We start on the first shadow page table, and give it a blank PGD |
559 | * page. */ | 574 | * page. */ |
560 | lg->pgdidx = 0; | 575 | lg->pgdidx = 0; |
@@ -565,6 +580,24 @@ int init_guest_pagetable(struct lguest *lg, unsigned long pgtable) | |||
565 | return 0; | 580 | return 0; |
566 | } | 581 | } |
567 | 582 | ||
583 | /* When the Guest calls LHCALL_LGUEST_INIT we do more setup. */ | ||
584 | void page_table_guest_data_init(struct lguest *lg) | ||
585 | { | ||
586 | /* We get the kernel address: above this is all kernel memory. */ | ||
587 | if (get_user(lg->kernel_address, &lg->lguest_data->kernel_address) | ||
588 | /* We tell the Guest that it can't use the top 4MB of virtual | ||
589 | * addresses used by the Switcher. */ | ||
590 | || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem) | ||
591 | || put_user(lg->pgdirs[lg->pgdidx].gpgdir,&lg->lguest_data->pgdir)) | ||
592 | kill_guest(lg, "bad guest page %p", lg->lguest_data); | ||
593 | |||
594 | /* In flush_user_mappings() we loop from 0 to | ||
595 | * "pgd_index(lg->kernel_address)". This assumes it won't hit the | ||
596 | * Switcher mappings, so check that now. */ | ||
597 | if (pgd_index(lg->kernel_address) >= SWITCHER_PGD_INDEX) | ||
598 | kill_guest(lg, "bad kernel address %#lx", lg->kernel_address); | ||
599 | } | ||
600 | |||
568 | /* When a Guest dies, our cleanup is fairly simple. */ | 601 | /* When a Guest dies, our cleanup is fairly simple. */ |
569 | void free_guest_pagetable(struct lguest *lg) | 602 | void free_guest_pagetable(struct lguest *lg) |
570 | { | 603 | { |
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index a125109446dc..39f64c95de18 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c | |||
@@ -216,9 +216,10 @@ static int emulate_insn(struct lguest *lg) | |||
216 | * guest_pa just subtracts the Guest's page_offset. */ | 216 | * guest_pa just subtracts the Guest's page_offset. */ |
217 | unsigned long physaddr = guest_pa(lg, lg->regs->eip); | 217 | unsigned long physaddr = guest_pa(lg, lg->regs->eip); |
218 | 218 | ||
219 | /* The guest_pa() function only works for Guest kernel addresses, but | 219 | /* This must be the Guest kernel trying to do something, not userspace! |
220 | * that's all we're trying to do anyway. */ | 220 | * The bottom two bits of the CS segment register are the privilege |
221 | if (lg->regs->eip < lg->page_offset) | 221 | * level. */ |
222 | if ((lg->regs->cs & 3) != GUEST_PL) | ||
222 | return 0; | 223 | return 0; |
223 | 224 | ||
224 | /* Decoding x86 instructions is icky. */ | 225 | /* Decoding x86 instructions is icky. */ |
diff --git a/include/asm-x86/lguest_hcall.h b/include/asm-x86/lguest_hcall.h index 8f2a1edc4fe2..0c553ef36240 100644 --- a/include/asm-x86/lguest_hcall.h +++ b/include/asm-x86/lguest_hcall.h | |||
@@ -2,8 +2,6 @@ | |||
2 | #ifndef _X86_LGUEST_HCALL_H | 2 | #ifndef _X86_LGUEST_HCALL_H |
3 | #define _X86_LGUEST_HCALL_H | 3 | #define _X86_LGUEST_HCALL_H |
4 | 4 | ||
5 | #include <asm/hw_irq.h> | ||
6 | |||
7 | #define LHCALL_FLUSH_ASYNC 0 | 5 | #define LHCALL_FLUSH_ASYNC 0 |
8 | #define LHCALL_LGUEST_INIT 1 | 6 | #define LHCALL_LGUEST_INIT 1 |
9 | #define LHCALL_CRASH 2 | 7 | #define LHCALL_CRASH 2 |
@@ -36,6 +34,9 @@ | |||
36 | * definition of a gentleman: "someone who is only rude intentionally". */ | 34 | * definition of a gentleman: "someone who is only rude intentionally". */ |
37 | #define LGUEST_TRAP_ENTRY 0x1F | 35 | #define LGUEST_TRAP_ENTRY 0x1F |
38 | 36 | ||
37 | #ifndef __ASSEMBLY__ | ||
38 | #include <asm/hw_irq.h> | ||
39 | |||
39 | static inline unsigned long | 40 | static inline unsigned long |
40 | hcall(unsigned long call, | 41 | hcall(unsigned long call, |
41 | unsigned long arg1, unsigned long arg2, unsigned long arg3) | 42 | unsigned long arg1, unsigned long arg2, unsigned long arg3) |
@@ -66,4 +67,6 @@ struct hcall_args | |||
66 | /* These map directly onto eax, ebx, ecx, edx in struct lguest_regs */ | 67 | /* These map directly onto eax, ebx, ecx, edx in struct lguest_regs */ |
67 | unsigned long arg0, arg2, arg3, arg1; | 68 | unsigned long arg0, arg2, arg3, arg1; |
68 | }; | 69 | }; |
70 | |||
71 | #endif /* !__ASSEMBLY__ */ | ||
69 | #endif /* _I386_LGUEST_HCALL_H */ | 72 | #endif /* _I386_LGUEST_HCALL_H */ |
diff --git a/include/linux/lguest.h b/include/linux/lguest.h index 083052236db9..8beb29134626 100644 --- a/include/linux/lguest.h +++ b/include/linux/lguest.h | |||
@@ -44,11 +44,14 @@ struct lguest_data | |||
44 | unsigned long reserve_mem; | 44 | unsigned long reserve_mem; |
45 | /* KHz for the TSC clock. */ | 45 | /* KHz for the TSC clock. */ |
46 | u32 tsc_khz; | 46 | u32 tsc_khz; |
47 | /* Page where the top-level pagetable is */ | ||
48 | unsigned long pgdir; | ||
47 | 49 | ||
48 | /* Fields initialized by the Guest at boot: */ | 50 | /* Fields initialized by the Guest at boot: */ |
49 | /* Instruction range to suppress interrupts even if enabled */ | 51 | /* Instruction range to suppress interrupts even if enabled */ |
50 | unsigned long noirq_start, noirq_end; | 52 | unsigned long noirq_start, noirq_end; |
51 | 53 | /* Address above which page tables are all identical. */ | |
54 | unsigned long kernel_address; | ||
52 | /* The vector to try to use for system calls (0x40 or 0x80). */ | 55 | /* The vector to try to use for system calls (0x40 or 0x80). */ |
53 | unsigned int syscall_vec; | 56 | unsigned int syscall_vec; |
54 | }; | 57 | }; |