aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2007-10-21 21:03:26 -0400
committerRusty Russell <rusty@rustcorp.com.au>2007-10-23 01:49:50 -0400
commit3c6b5bfa3cf3b4057788e08482a468cc3bc00780 (patch)
treef0d67890f6f8c9d0840c9b19a483ec06cbf822ef
parent6649bb7af6a819b675bfcf22ab704737e905645a (diff)
Introduce guest mem offset, static link example launcher
In order to avoid problematic special linking of the Launcher, we give the Host an offset: this means we can use any memory region in the Launcher as Guest memory rather than insisting on mmap() at 0. The result is quite pleasing: a number of casts are replaced with simple additions. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
-rw-r--r--Documentation/lguest/Makefile26
-rw-r--r--Documentation/lguest/lguest.c189
-rw-r--r--drivers/lguest/core.c22
-rw-r--r--drivers/lguest/hypercalls.c15
-rw-r--r--drivers/lguest/io.c18
-rw-r--r--drivers/lguest/lg.h3
-rw-r--r--drivers/lguest/lguest_user.c23
-rw-r--r--drivers/lguest/page_tables.c7
8 files changed, 163 insertions, 140 deletions
diff --git a/Documentation/lguest/Makefile b/Documentation/lguest/Makefile
index 526c15fd83af..bac037eb1cda 100644
--- a/Documentation/lguest/Makefile
+++ b/Documentation/lguest/Makefile
@@ -1,28 +1,8 @@
1# This creates the demonstration utility "lguest" which runs a Linux guest. 1# This creates the demonstration utility "lguest" which runs a Linux guest.
2 2CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include
3# For those people that have a separate object dir, look there for .config
4KBUILD_OUTPUT := ../..
5ifdef O
6 ifeq ("$(origin O)", "command line")
7 KBUILD_OUTPUT := $(O)
8 endif
9endif
10# We rely on CONFIG_PAGE_OFFSET to know where to put lguest binary.
11include $(KBUILD_OUTPUT)/.config
12LGUEST_GUEST_TOP := ($(CONFIG_PAGE_OFFSET) - 0x08000000)
13
14CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include -Wl,-T,lguest.lds
15LDLIBS:=-lz 3LDLIBS:=-lz
16# Removing this works for some versions of ld.so (eg. Ubuntu Feisty) and
17# not others (eg. FC7).
18LDFLAGS+=-static
19all: lguest.lds lguest
20 4
21# The linker script on x86 is so complex the only way of creating one 5all: lguest
22# which will link our binary in the right place is to mangle the
23# default one.
24lguest.lds:
25 $(LD) --verbose | awk '/^==========/ { PRINT=1; next; } /SIZEOF_HEADERS/ { gsub(/0x[0-9A-F]*/, "$(LGUEST_GUEST_TOP)") } { if (PRINT) print $$0; }' > $@
26 6
27clean: 7clean:
28 rm -f lguest.lds lguest 8 rm -f lguest
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 401d26b464ff..140bd98a8417 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -1,10 +1,7 @@
1/*P:100 This is the Launcher code, a simple program which lays out the 1/*P:100 This is the Launcher code, a simple program which lays out the
2 * "physical" memory for the new Guest by mapping the kernel image and the 2 * "physical" memory for the new Guest by mapping the kernel image and the
3 * virtual devices, then reads repeatedly from /dev/lguest to run the Guest. 3 * virtual devices, then reads repeatedly from /dev/lguest to run the Guest.
4 * 4:*/
5 * The only trick: the Makefile links it at a high address so it will be clear
6 * of the guest memory region. It means that each Guest cannot have more than
7 * about 2.5G of memory on a normally configured Host. :*/
8#define _LARGEFILE64_SOURCE 5#define _LARGEFILE64_SOURCE
9#define _GNU_SOURCE 6#define _GNU_SOURCE
10#include <stdio.h> 7#include <stdio.h>
@@ -56,6 +53,8 @@ typedef uint8_t u8;
56#ifndef SIOCBRADDIF 53#ifndef SIOCBRADDIF
57#define SIOCBRADDIF 0x89a2 /* add interface to bridge */ 54#define SIOCBRADDIF 0x89a2 /* add interface to bridge */
58#endif 55#endif
56/* We can have up to 256 pages for devices. */
57#define DEVICE_PAGES 256
59 58
60/*L:120 verbose is both a global flag and a macro. The C preprocessor allows 59/*L:120 verbose is both a global flag and a macro. The C preprocessor allows
61 * this, and although I wouldn't recommend it, it works quite nicely here. */ 60 * this, and although I wouldn't recommend it, it works quite nicely here. */
@@ -66,8 +65,10 @@ static bool verbose;
66 65
67/* The pipe to send commands to the waker process */ 66/* The pipe to send commands to the waker process */
68static int waker_fd; 67static int waker_fd;
69/* The top of guest physical memory. */ 68/* The pointer to the start of guest memory. */
70static u32 top; 69static void *guest_base;
70/* The maximum guest physical address allowed, and maximum possible. */
71static unsigned long guest_limit, guest_max;
71 72
72/* This is our list of devices. */ 73/* This is our list of devices. */
73struct device_list 74struct device_list
@@ -111,6 +112,29 @@ struct device
111 void *priv; 112 void *priv;
112}; 113};
113 114
115/*L:100 The Launcher code itself takes us out into userspace, that scary place
116 * where pointers run wild and free! Unfortunately, like most userspace
117 * programs, it's quite boring (which is why everyone likes to hack on the
118 * kernel!). Perhaps if you make up an Lguest Drinking Game at this point, it
119 * will get you through this section. Or, maybe not.
120 *
121 * The Launcher sets up a big chunk of memory to be the Guest's "physical"
122 * memory and stores it in "guest_base". In other words, Guest physical ==
123 * Launcher virtual with an offset.
124 *
125 * This can be tough to get your head around, but usually it just means that we
126 * use these trivial conversion functions when the Guest gives us it's
127 * "physical" addresses: */
128static void *from_guest_phys(unsigned long addr)
129{
130 return guest_base + addr;
131}
132
133static unsigned long to_guest_phys(const void *addr)
134{
135 return (addr - guest_base);
136}
137
114/*L:130 138/*L:130
115 * Loading the Kernel. 139 * Loading the Kernel.
116 * 140 *
@@ -124,33 +148,40 @@ static int open_or_die(const char *name, int flags)
124 return fd; 148 return fd;
125} 149}
126 150
127/* map_zeroed_pages() takes a (page-aligned) address and a number of pages. */ 151/* map_zeroed_pages() takes a number of pages. */
128static void *map_zeroed_pages(unsigned long addr, unsigned int num) 152static void *map_zeroed_pages(unsigned int num)
129{ 153{
130 /* We cache the /dev/zero file-descriptor so we only open it once. */ 154 int fd = open_or_die("/dev/zero", O_RDONLY);
131 static int fd = -1; 155 void *addr;
132
133 if (fd == -1)
134 fd = open_or_die("/dev/zero", O_RDONLY);
135 156
136 /* We use a private mapping (ie. if we write to the page, it will be 157 /* We use a private mapping (ie. if we write to the page, it will be
137 * copied), and obviously we insist that it be mapped where we ask. */ 158 * copied). */
138 if (mmap((void *)addr, getpagesize() * num, 159 addr = mmap(NULL, getpagesize() * num,
139 PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED|MAP_PRIVATE, fd, 0) 160 PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, fd, 0);
140 != (void *)addr) 161 if (addr == MAP_FAILED)
141 err(1, "Mmaping %u pages of /dev/zero @%p", num, (void *)addr); 162 err(1, "Mmaping %u pages of /dev/zero", num);
142 163
143 /* Returning the address is just a courtesy: can simplify callers. */ 164 return addr;
144 return (void *)addr; 165}
166
167/* Get some more pages for a device. */
168static void *get_pages(unsigned int num)
169{
170 void *addr = from_guest_phys(guest_limit);
171
172 guest_limit += num * getpagesize();
173 if (guest_limit > guest_max)
174 errx(1, "Not enough memory for devices");
175 return addr;
145} 176}
146 177
147/* To find out where to start we look for the magic Guest string, which marks 178/* To find out where to start we look for the magic Guest string, which marks
148 * the code we see in lguest_asm.S. This is a hack which we are currently 179 * the code we see in lguest_asm.S. This is a hack which we are currently
149 * plotting to replace with the normal Linux entry point. */ 180 * plotting to replace with the normal Linux entry point. */
150static unsigned long entry_point(void *start, void *end, 181static unsigned long entry_point(const void *start, const void *end,
151 unsigned long page_offset) 182 unsigned long page_offset)
152{ 183{
153 void *p; 184 const void *p;
154 185
155 /* The scan gives us the physical starting address. We want the 186 /* The scan gives us the physical starting address. We want the
156 * virtual address in this case, and fortunately, we already figured 187 * virtual address in this case, and fortunately, we already figured
@@ -158,7 +189,8 @@ static unsigned long entry_point(void *start, void *end,
158 * "page_offset". */ 189 * "page_offset". */
159 for (p = start; p < end; p++) 190 for (p = start; p < end; p++)
160 if (memcmp(p, "GenuineLguest", strlen("GenuineLguest")) == 0) 191 if (memcmp(p, "GenuineLguest", strlen("GenuineLguest")) == 0)
161 return (long)p + strlen("GenuineLguest") + page_offset; 192 return to_guest_phys(p + strlen("GenuineLguest"))
193 + page_offset;
162 194
163 errx(1, "Is this image a genuine lguest?"); 195 errx(1, "Is this image a genuine lguest?");
164} 196}
@@ -201,9 +233,9 @@ static void map_at(int fd, void *addr, unsigned long offset, unsigned long len)
201static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr, 233static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr,
202 unsigned long *page_offset) 234 unsigned long *page_offset)
203{ 235{
236 void *start = (void *)-1, *end = NULL;
204 Elf32_Phdr phdr[ehdr->e_phnum]; 237 Elf32_Phdr phdr[ehdr->e_phnum];
205 unsigned int i; 238 unsigned int i;
206 unsigned long start = -1UL, end = 0;
207 239
208 /* Sanity checks on the main ELF header: an x86 executable with a 240 /* Sanity checks on the main ELF header: an x86 executable with a
209 * reasonable number of correctly-sized program headers. */ 241 * reasonable number of correctly-sized program headers. */
@@ -246,17 +278,17 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr,
246 278
247 /* We track the first and last address we mapped, so we can 279 /* We track the first and last address we mapped, so we can
248 * tell entry_point() where to scan. */ 280 * tell entry_point() where to scan. */
249 if (phdr[i].p_paddr < start) 281 if (from_guest_phys(phdr[i].p_paddr) < start)
250 start = phdr[i].p_paddr; 282 start = from_guest_phys(phdr[i].p_paddr);
251 if (phdr[i].p_paddr + phdr[i].p_filesz > end) 283 if (from_guest_phys(phdr[i].p_paddr) + phdr[i].p_filesz > end)
252 end = phdr[i].p_paddr + phdr[i].p_filesz; 284 end=from_guest_phys(phdr[i].p_paddr)+phdr[i].p_filesz;
253 285
254 /* We map this section of the file at its physical address. */ 286 /* We map this section of the file at its physical address. */
255 map_at(elf_fd, (void *)phdr[i].p_paddr, 287 map_at(elf_fd, from_guest_phys(phdr[i].p_paddr),
256 phdr[i].p_offset, phdr[i].p_filesz); 288 phdr[i].p_offset, phdr[i].p_filesz);
257 } 289 }
258 290
259 return entry_point((void *)start, (void *)end, *page_offset); 291 return entry_point(start, end, *page_offset);
260} 292}
261 293
262/*L:170 Prepare to be SHOCKED and AMAZED. And possibly a trifle nauseated. 294/*L:170 Prepare to be SHOCKED and AMAZED. And possibly a trifle nauseated.
@@ -307,7 +339,7 @@ static unsigned long unpack_bzimage(int fd, unsigned long *page_offset)
307 * actually configurable as CONFIG_PHYSICAL_START, but as the comment 339 * actually configurable as CONFIG_PHYSICAL_START, but as the comment
308 * there says, "Don't change this unless you know what you are doing". 340 * there says, "Don't change this unless you know what you are doing".
309 * Indeed. */ 341 * Indeed. */
310 void *img = (void *)0x100000; 342 void *img = from_guest_phys(0x100000);
311 343
312 /* gzdopen takes our file descriptor (carefully placed at the start of 344 /* gzdopen takes our file descriptor (carefully placed at the start of
313 * the GZIP header we found) and returns a gzFile. */ 345 * the GZIP header we found) and returns a gzFile. */
@@ -421,7 +453,7 @@ static unsigned long load_initrd(const char *name, unsigned long mem)
421 /* We map the initrd at the top of memory, but mmap wants it to be 453 /* We map the initrd at the top of memory, but mmap wants it to be
422 * page-aligned, so we round the size up for that. */ 454 * page-aligned, so we round the size up for that. */
423 len = page_align(st.st_size); 455 len = page_align(st.st_size);
424 map_at(ifd, (void *)mem - len, 0, st.st_size); 456 map_at(ifd, from_guest_phys(mem - len), 0, st.st_size);
425 /* Once a file is mapped, you can close the file descriptor. It's a 457 /* Once a file is mapped, you can close the file descriptor. It's a
426 * little odd, but quite useful. */ 458 * little odd, but quite useful. */
427 close(ifd); 459 close(ifd);
@@ -431,9 +463,9 @@ static unsigned long load_initrd(const char *name, unsigned long mem)
431 return len; 463 return len;
432} 464}
433 465
434/* Once we know how much memory we have, and the address the Guest kernel 466/* Once we know the address the Guest kernel expects, we can construct simple
435 * expects, we can construct simple linear page tables which will get the Guest 467 * linear page tables for all of memory which will get the Guest far enough
436 * far enough into the boot to create its own. 468 * into the boot to create its own.
437 * 469 *
438 * We lay them out of the way, just below the initrd (which is why we need to 470 * We lay them out of the way, just below the initrd (which is why we need to
439 * know its size). */ 471 * know its size). */
@@ -457,7 +489,7 @@ static unsigned long setup_pagetables(unsigned long mem,
457 linear_pages = (mapped_pages + ptes_per_page-1)/ptes_per_page; 489 linear_pages = (mapped_pages + ptes_per_page-1)/ptes_per_page;
458 490
459 /* We put the toplevel page directory page at the top of memory. */ 491 /* We put the toplevel page directory page at the top of memory. */
460 pgdir = (void *)mem - initrd_size - getpagesize(); 492 pgdir = from_guest_phys(mem) - initrd_size - getpagesize();
461 493
462 /* Now we use the next linear_pages pages as pte pages */ 494 /* Now we use the next linear_pages pages as pte pages */
463 linear = (void *)pgdir - linear_pages*getpagesize(); 495 linear = (void *)pgdir - linear_pages*getpagesize();
@@ -473,15 +505,16 @@ static unsigned long setup_pagetables(unsigned long mem,
473 * continue from there. */ 505 * continue from there. */
474 for (i = 0; i < mapped_pages; i += ptes_per_page) { 506 for (i = 0; i < mapped_pages; i += ptes_per_page) {
475 pgdir[(i + page_offset/getpagesize())/ptes_per_page] 507 pgdir[(i + page_offset/getpagesize())/ptes_per_page]
476 = (((u32)linear + i*sizeof(u32)) | PAGE_PRESENT); 508 = ((to_guest_phys(linear) + i*sizeof(u32))
509 | PAGE_PRESENT);
477 } 510 }
478 511
479 verbose("Linear mapping of %u pages in %u pte pages at %p\n", 512 verbose("Linear mapping of %u pages in %u pte pages at %#lx\n",
480 mapped_pages, linear_pages, linear); 513 mapped_pages, linear_pages, to_guest_phys(linear));
481 514
482 /* We return the top level (guest-physical) address: the kernel needs 515 /* We return the top level (guest-physical) address: the kernel needs
483 * to know where it is. */ 516 * to know where it is. */
484 return (unsigned long)pgdir; 517 return to_guest_phys(pgdir);
485} 518}
486 519
487/* Simple routine to roll all the commandline arguments together with spaces 520/* Simple routine to roll all the commandline arguments together with spaces
@@ -501,14 +534,19 @@ static void concat(char *dst, char *args[])
501 534
502/* This is where we actually tell the kernel to initialize the Guest. We saw 535/* This is where we actually tell the kernel to initialize the Guest. We saw
503 * the arguments it expects when we looked at initialize() in lguest_user.c: 536 * the arguments it expects when we looked at initialize() in lguest_user.c:
504 * the top physical page to allow, the top level pagetable, the entry point and 537 * the base of guest "physical" memory, the top physical page to allow, the
505 * the page_offset constant for the Guest. */ 538 * top level pagetable, the entry point and the page_offset constant for the
539 * Guest. */
506static int tell_kernel(u32 pgdir, u32 start, u32 page_offset) 540static int tell_kernel(u32 pgdir, u32 start, u32 page_offset)
507{ 541{
508 u32 args[] = { LHREQ_INITIALIZE, 542 u32 args[] = { LHREQ_INITIALIZE,
509 top/getpagesize(), pgdir, start, page_offset }; 543 (unsigned long)guest_base,
544 guest_limit / getpagesize(),
545 pgdir, start, page_offset };
510 int fd; 546 int fd;
511 547
548 verbose("Guest: %p - %p (%#lx)\n",
549 guest_base, guest_base + guest_limit, guest_limit);
512 fd = open_or_die("/dev/lguest", O_RDWR); 550 fd = open_or_die("/dev/lguest", O_RDWR);
513 if (write(fd, args, sizeof(args)) < 0) 551 if (write(fd, args, sizeof(args)) < 0)
514 err(1, "Writing to /dev/lguest"); 552 err(1, "Writing to /dev/lguest");
@@ -605,11 +643,11 @@ static void *_check_pointer(unsigned long addr, unsigned int size,
605{ 643{
606 /* We have to separately check addr and addr+size, because size could 644 /* We have to separately check addr and addr+size, because size could
607 * be huge and addr + size might wrap around. */ 645 * be huge and addr + size might wrap around. */
608 if (addr >= top || addr + size >= top) 646 if (addr >= guest_limit || addr + size >= guest_limit)
609 errx(1, "%s:%i: Invalid address %li", __FILE__, line, addr); 647 errx(1, "%s:%i: Invalid address %li", __FILE__, line, addr);
610 /* We return a pointer for the caller's convenience, now we know it's 648 /* We return a pointer for the caller's convenience, now we know it's
611 * safe to use. */ 649 * safe to use. */
612 return (void *)addr; 650 return from_guest_phys(addr);
613} 651}
614/* A macro which transparently hands the line number to the real function. */ 652/* A macro which transparently hands the line number to the real function. */
615#define check_pointer(addr,size) _check_pointer(addr, size, __LINE__) 653#define check_pointer(addr,size) _check_pointer(addr, size, __LINE__)
@@ -646,7 +684,7 @@ static u32 *dma2iov(unsigned long dma, struct iovec iov[], unsigned *num)
646static u32 *get_dma_buffer(int fd, void *key, 684static u32 *get_dma_buffer(int fd, void *key,
647 struct iovec iov[], unsigned int *num, u32 *irq) 685 struct iovec iov[], unsigned int *num, u32 *irq)
648{ 686{
649 u32 buf[] = { LHREQ_GETDMA, (u32)key }; 687 u32 buf[] = { LHREQ_GETDMA, to_guest_phys(key) };
650 unsigned long udma; 688 unsigned long udma;
651 u32 *res; 689 u32 *res;
652 690
@@ -998,11 +1036,11 @@ new_dev_desc(struct lguest_device_desc *descs,
998 descs[i].features = features; 1036 descs[i].features = features;
999 descs[i].num_pages = num_pages; 1037 descs[i].num_pages = num_pages;
1000 /* If they said the device needs memory, we allocate 1038 /* If they said the device needs memory, we allocate
1001 * that now, bumping up the top of Guest memory. */ 1039 * that now. */
1002 if (num_pages) { 1040 if (num_pages) {
1003 map_zeroed_pages(top, num_pages); 1041 unsigned long pa;
1004 descs[i].pfn = top/getpagesize(); 1042 pa = to_guest_phys(get_pages(num_pages));
1005 top += num_pages*getpagesize(); 1043 descs[i].pfn = pa / getpagesize();
1006 } 1044 }
1007 return &descs[i]; 1045 return &descs[i];
1008 } 1046 }
@@ -1040,9 +1078,9 @@ static struct device *new_device(struct device_list *devices,
1040 if (handle_input) 1078 if (handle_input)
1041 set_fd(dev->fd, devices); 1079 set_fd(dev->fd, devices);
1042 dev->desc = new_dev_desc(devices->descs, type, features, num_pages); 1080 dev->desc = new_dev_desc(devices->descs, type, features, num_pages);
1043 dev->mem = (void *)(dev->desc->pfn * getpagesize()); 1081 dev->mem = from_guest_phys(dev->desc->pfn * getpagesize());
1044 dev->handle_input = handle_input; 1082 dev->handle_input = handle_input;
1045 dev->watch_key = (unsigned long)dev->mem + watch_off; 1083 dev->watch_key = to_guest_phys(dev->mem) + watch_off;
1046 dev->handle_output = handle_output; 1084 dev->handle_output = handle_output;
1047 return dev; 1085 return dev;
1048} 1086}
@@ -1382,21 +1420,7 @@ static void usage(void)
1382 "<mem-in-mb> vmlinux [args...]"); 1420 "<mem-in-mb> vmlinux [args...]");
1383} 1421}
1384 1422
1385/*L:100 The Launcher code itself takes us out into userspace, that scary place 1423/*L:105 The main routine is where the real work begins: */
1386 * where pointers run wild and free! Unfortunately, like most userspace
1387 * programs, it's quite boring (which is why everyone like to hack on the
1388 * kernel!). Perhaps if you make up an Lguest Drinking Game at this point, it
1389 * will get you through this section. Or, maybe not.
1390 *
1391 * The Launcher binary sits up high, usually starting at address 0xB8000000.
1392 * Everything below this is the "physical" memory for the Guest. For example,
1393 * if the Guest were to write a "1" at physical address 0, we would see a "1"
1394 * in the Launcher at "(int *)0". Guest physical == Launcher virtual.
1395 *
1396 * This can be tough to get your head around, but usually it just means that we
1397 * don't need to do any conversion when the Guest gives us it's "physical"
1398 * addresses.
1399 */
1400int main(int argc, char *argv[]) 1424int main(int argc, char *argv[])
1401{ 1425{
1402 /* Memory, top-level pagetable, code startpoint, PAGE_OFFSET and size 1426 /* Memory, top-level pagetable, code startpoint, PAGE_OFFSET and size
@@ -1406,8 +1430,8 @@ int main(int argc, char *argv[])
1406 int i, c, lguest_fd; 1430 int i, c, lguest_fd;
1407 /* The list of Guest devices, based on command line arguments. */ 1431 /* The list of Guest devices, based on command line arguments. */
1408 struct device_list device_list; 1432 struct device_list device_list;
1409 /* The boot information for the Guest: at guest-physical address 0. */ 1433 /* The boot information for the Guest. */
1410 void *boot = (void *)0; 1434 void *boot;
1411 /* If they specify an initrd file to load. */ 1435 /* If they specify an initrd file to load. */
1412 const char *initrd_name = NULL; 1436 const char *initrd_name = NULL;
1413 1437
@@ -1427,9 +1451,16 @@ int main(int argc, char *argv[])
1427 * of memory now. */ 1451 * of memory now. */
1428 for (i = 1; i < argc; i++) { 1452 for (i = 1; i < argc; i++) {
1429 if (argv[i][0] != '-') { 1453 if (argv[i][0] != '-') {
1430 mem = top = atoi(argv[i]) * 1024 * 1024; 1454 mem = atoi(argv[i]) * 1024 * 1024;
1431 device_list.descs = map_zeroed_pages(top, 1); 1455 /* We start by mapping anonymous pages over all of
1432 top += getpagesize(); 1456 * guest-physical memory range. This fills it with 0,
1457 * and ensures that the Guest won't be killed when it
1458 * tries to access it. */
1459 guest_base = map_zeroed_pages(mem / getpagesize()
1460 + DEVICE_PAGES);
1461 guest_limit = mem;
1462 guest_max = mem + DEVICE_PAGES*getpagesize();
1463 device_list.descs = get_pages(1);
1433 break; 1464 break;
1434 } 1465 }
1435 } 1466 }
@@ -1462,18 +1493,18 @@ int main(int argc, char *argv[])
1462 if (optind + 2 > argc) 1493 if (optind + 2 > argc)
1463 usage(); 1494 usage();
1464 1495
1496 verbose("Guest base is at %p\n", guest_base);
1497
1465 /* We always have a console device */ 1498 /* We always have a console device */
1466 setup_console(&device_list); 1499 setup_console(&device_list);
1467 1500
1468 /* We start by mapping anonymous pages over all of guest-physical
1469 * memory range. This fills it with 0, and ensures that the Guest
1470 * won't be killed when it tries to access it. */
1471 map_zeroed_pages(0, mem / getpagesize());
1472
1473 /* Now we load the kernel */ 1501 /* Now we load the kernel */
1474 start = load_kernel(open_or_die(argv[optind+1], O_RDONLY), 1502 start = load_kernel(open_or_die(argv[optind+1], O_RDONLY),
1475 &page_offset); 1503 &page_offset);
1476 1504
1505 /* Boot information is stashed at physical address 0 */
1506 boot = from_guest_phys(0);
1507
1477 /* Map the initrd image if requested (at top of physical memory) */ 1508 /* Map the initrd image if requested (at top of physical memory) */
1478 if (initrd_name) { 1509 if (initrd_name) {
1479 initrd_size = load_initrd(initrd_name, mem); 1510 initrd_size = load_initrd(initrd_name, mem);
@@ -1495,7 +1526,7 @@ int main(int argc, char *argv[])
1495 = ((struct e820entry) { 0, mem, E820_RAM }); 1526 = ((struct e820entry) { 0, mem, E820_RAM });
1496 /* The boot header contains a command line pointer: we put the command 1527 /* The boot header contains a command line pointer: we put the command
1497 * line after the boot header (at address 4096) */ 1528 * line after the boot header (at address 4096) */
1498 *(void **)(boot + 0x228) = boot + 4096; 1529 *(u32 *)(boot + 0x228) = 4096;
1499 concat(boot + 4096, argv+optind+2); 1530 concat(boot + 4096, argv+optind+2);
1500 1531
1501 /* The guest type value of "1" tells the Guest it's under lguest. */ 1532 /* The guest type value of "1" tells the Guest it's under lguest. */
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index a0788c12b392..eb95860cf098 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -325,8 +325,8 @@ static int emulate_insn(struct lguest *lg)
325 * Dealing With Guest Memory. 325 * Dealing With Guest Memory.
326 * 326 *
327 * When the Guest gives us (what it thinks is) a physical address, we can use 327 * When the Guest gives us (what it thinks is) a physical address, we can use
328 * the normal copy_from_user() & copy_to_user() on that address: remember, 328 * the normal copy_from_user() & copy_to_user() on the corresponding place in
329 * Guest physical == Launcher virtual. 329 * the memory region allocated by the Launcher.
330 * 330 *
331 * But we can't trust the Guest: it might be trying to access the Launcher 331 * But we can't trust the Guest: it might be trying to access the Launcher
332 * code. We have to check that the range is below the pfn_limit the Launcher 332 * code. We have to check that the range is below the pfn_limit the Launcher
@@ -348,8 +348,8 @@ u32 lgread_u32(struct lguest *lg, unsigned long addr)
348 348
349 /* Don't let them access lguest binary. */ 349 /* Don't let them access lguest binary. */
350 if (!lguest_address_ok(lg, addr, sizeof(val)) 350 if (!lguest_address_ok(lg, addr, sizeof(val))
351 || get_user(val, (u32 __user *)addr) != 0) 351 || get_user(val, (u32 *)(lg->mem_base + addr)) != 0)
352 kill_guest(lg, "bad read address %#lx", addr); 352 kill_guest(lg, "bad read address %#lx: pfn_limit=%u membase=%p", addr, lg->pfn_limit, lg->mem_base);
353 return val; 353 return val;
354} 354}
355 355
@@ -357,7 +357,7 @@ u32 lgread_u32(struct lguest *lg, unsigned long addr)
357void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val) 357void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val)
358{ 358{
359 if (!lguest_address_ok(lg, addr, sizeof(val)) 359 if (!lguest_address_ok(lg, addr, sizeof(val))
360 || put_user(val, (u32 __user *)addr) != 0) 360 || put_user(val, (u32 *)(lg->mem_base + addr)) != 0)
361 kill_guest(lg, "bad write address %#lx", addr); 361 kill_guest(lg, "bad write address %#lx", addr);
362} 362}
363 363
@@ -367,7 +367,7 @@ void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val)
367void lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes) 367void lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes)
368{ 368{
369 if (!lguest_address_ok(lg, addr, bytes) 369 if (!lguest_address_ok(lg, addr, bytes)
370 || copy_from_user(b, (void __user *)addr, bytes) != 0) { 370 || copy_from_user(b, lg->mem_base + addr, bytes) != 0) {
371 /* copy_from_user should do this, but as we rely on it... */ 371 /* copy_from_user should do this, but as we rely on it... */
372 memset(b, 0, bytes); 372 memset(b, 0, bytes);
373 kill_guest(lg, "bad read address %#lx len %u", addr, bytes); 373 kill_guest(lg, "bad read address %#lx len %u", addr, bytes);
@@ -379,7 +379,7 @@ void lgwrite(struct lguest *lg, unsigned long addr, const void *b,
379 unsigned bytes) 379 unsigned bytes)
380{ 380{
381 if (!lguest_address_ok(lg, addr, bytes) 381 if (!lguest_address_ok(lg, addr, bytes)
382 || copy_to_user((void __user *)addr, b, bytes) != 0) 382 || copy_to_user(lg->mem_base + addr, b, bytes) != 0)
383 kill_guest(lg, "bad write address %#lx len %u", addr, bytes); 383 kill_guest(lg, "bad write address %#lx len %u", addr, bytes);
384} 384}
385/* (end of memory access helper routines) :*/ 385/* (end of memory access helper routines) :*/
@@ -616,11 +616,9 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
616 * 616 *
617 * Note that if the Guest were really messed up, this 617 * Note that if the Guest were really messed up, this
618 * could happen before it's done the INITIALIZE 618 * could happen before it's done the INITIALIZE
619 * hypercall, so lg->lguest_data will be NULL, so 619 * hypercall, so lg->lguest_data will be NULL */
620 * &lg->lguest_data->cr2 will be address 8. Writing 620 if (lg->lguest_data
621 * into that address won't hurt the Host at all, 621 && put_user(cr2, &lg->lguest_data->cr2))
622 * though. */
623 if (put_user(cr2, &lg->lguest_data->cr2))
624 kill_guest(lg, "Writing cr2"); 622 kill_guest(lg, "Writing cr2");
625 break; 623 break;
626 case 7: /* We've intercepted a Device Not Available fault. */ 624 case 7: /* We've intercepted a Device Not Available fault. */
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index 5ecd60b54201..02e67b49ea4f 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -205,16 +205,19 @@ static void initialize(struct lguest *lg)
205 tsc_speed = 0; 205 tsc_speed = 0;
206 206
207 /* The pointer to the Guest's "struct lguest_data" is the only 207 /* The pointer to the Guest's "struct lguest_data" is the only
208 * argument. */ 208 * argument. We check that address now. */
209 lg->lguest_data = (struct lguest_data __user *)lg->regs->edx;
210 /* If we check the address they gave is OK now, we can simply
211 * copy_to_user/from_user from now on rather than using lgread/lgwrite.
212 * I put this in to show that I'm not immune to writing stupid
213 * optimizations. */
214 if (!lguest_address_ok(lg, lg->regs->edx, sizeof(*lg->lguest_data))) { 209 if (!lguest_address_ok(lg, lg->regs->edx, sizeof(*lg->lguest_data))) {
215 kill_guest(lg, "bad guest page %p", lg->lguest_data); 210 kill_guest(lg, "bad guest page %p", lg->lguest_data);
216 return; 211 return;
217 } 212 }
213
214 /* Having checked it, we simply set lg->lguest_data to point straight
215 * into the Launcher's memory at the right place and then use
216 * copy_to_user/from_user from now on, instead of lgread/write. I put
217 * this in to show that I'm not immune to writing stupid
218 * optimizations. */
219 lg->lguest_data = lg->mem_base + lg->regs->edx;
220
218 /* The Guest tells us where we're not to deliver interrupts by putting 221 /* The Guest tells us where we're not to deliver interrupts by putting
219 * the range of addresses into "struct lguest_data". */ 222 * the range of addresses into "struct lguest_data". */
220 if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start) 223 if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start)
diff --git a/drivers/lguest/io.c b/drivers/lguest/io.c
index ea68613b43f6..3a845335fee8 100644
--- a/drivers/lguest/io.c
+++ b/drivers/lguest/io.c
@@ -186,7 +186,7 @@ int bind_dma(struct lguest *lg,
186 * we're doing this. */ 186 * we're doing this. */
187 mutex_lock(&lguest_lock); 187 mutex_lock(&lguest_lock);
188 down_read(fshared); 188 down_read(fshared);
189 if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { 189 if (get_futex_key(lg->mem_base + ukey, fshared, &key) != 0) {
190 kill_guest(lg, "bad dma key %#lx", ukey); 190 kill_guest(lg, "bad dma key %#lx", ukey);
191 goto unlock; 191 goto unlock;
192 } 192 }
@@ -247,7 +247,8 @@ static int lgread_other(struct lguest *lg,
247 void *buf, u32 addr, unsigned bytes) 247 void *buf, u32 addr, unsigned bytes)
248{ 248{
249 if (!lguest_address_ok(lg, addr, bytes) 249 if (!lguest_address_ok(lg, addr, bytes)
250 || access_process_vm(lg->tsk, addr, buf, bytes, 0) != bytes) { 250 || access_process_vm(lg->tsk, (unsigned long)lg->mem_base + addr,
251 buf, bytes, 0) != bytes) {
251 memset(buf, 0, bytes); 252 memset(buf, 0, bytes);
252 kill_guest(lg, "bad address in registered DMA struct"); 253 kill_guest(lg, "bad address in registered DMA struct");
253 return 0; 254 return 0;
@@ -261,8 +262,8 @@ static int lgwrite_other(struct lguest *lg, u32 addr,
261 const void *buf, unsigned bytes) 262 const void *buf, unsigned bytes)
262{ 263{
263 if (!lguest_address_ok(lg, addr, bytes) 264 if (!lguest_address_ok(lg, addr, bytes)
264 || (access_process_vm(lg->tsk, addr, (void *)buf, bytes, 1) 265 || access_process_vm(lg->tsk, (unsigned long)lg->mem_base + addr,
265 != bytes)) { 266 (void *)buf, bytes, 1) != bytes) {
266 kill_guest(lg, "bad address writing to registered DMA"); 267 kill_guest(lg, "bad address writing to registered DMA");
267 return 0; 268 return 0;
268 } 269 }
@@ -318,7 +319,7 @@ static u32 copy_data(struct lguest *srclg,
318 * copy_to_user_page(), and some arch's seem to need special 319 * copy_to_user_page(), and some arch's seem to need special
319 * flushes. x86 is fine. */ 320 * flushes. x86 is fine. */
320 if (copy_from_user(maddr + (dst->addr[di] + dstoff)%PAGE_SIZE, 321 if (copy_from_user(maddr + (dst->addr[di] + dstoff)%PAGE_SIZE,
321 (void __user *)src->addr[si], len) != 0) { 322 srclg->mem_base+src->addr[si], len) != 0) {
322 /* If a copy failed, it's the source's fault. */ 323 /* If a copy failed, it's the source's fault. */
323 kill_guest(srclg, "bad address in sending DMA"); 324 kill_guest(srclg, "bad address in sending DMA");
324 totlen = 0; 325 totlen = 0;
@@ -377,7 +378,8 @@ static u32 do_dma(struct lguest *srclg, const struct lguest_dma *src,
377 * number of pages. Note that we're holding the destination's 378 * number of pages. Note that we're holding the destination's
378 * mmap_sem, as get_user_pages() requires. */ 379 * mmap_sem, as get_user_pages() requires. */
379 if (get_user_pages(dstlg->tsk, dstlg->mm, 380 if (get_user_pages(dstlg->tsk, dstlg->mm,
380 dst->addr[i], 1, 1, 1, pages+i, NULL) 381 (unsigned long)dstlg->mem_base+dst->addr[i],
382 1, 1, 1, pages+i, NULL)
381 != 1) { 383 != 1) {
382 /* This means the destination gave us a bogus buffer */ 384 /* This means the destination gave us a bogus buffer */
383 kill_guest(dstlg, "Error mapping DMA pages"); 385 kill_guest(dstlg, "Error mapping DMA pages");
@@ -493,7 +495,7 @@ again:
493 mutex_lock(&lguest_lock); 495 mutex_lock(&lguest_lock);
494 down_read(fshared); 496 down_read(fshared);
495 /* Get the futex key for the key the Guest gave us */ 497 /* Get the futex key for the key the Guest gave us */
496 if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { 498 if (get_futex_key(lg->mem_base + ukey, fshared, &key) != 0) {
497 kill_guest(lg, "bad sending DMA key"); 499 kill_guest(lg, "bad sending DMA key");
498 goto unlock; 500 goto unlock;
499 } 501 }
@@ -584,7 +586,7 @@ unsigned long get_dma_buffer(struct lguest *lg,
584 586
585 /* This can fail if it's not a valid address, or if the address is not 587 /* This can fail if it's not a valid address, or if the address is not
586 * divisible by 4 (the futex code needs that, we don't really). */ 588 * divisible by 4 (the futex code needs that, we don't really). */
587 if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { 589 if (get_futex_key(lg->mem_base + ukey, fshared, &key) != 0) {
588 kill_guest(lg, "bad registered DMA buffer"); 590 kill_guest(lg, "bad registered DMA buffer");
589 goto unlock; 591 goto unlock;
590 } 592 }
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 399eab852ab5..54f2c2472bec 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -142,6 +142,9 @@ struct lguest
142 struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */ 142 struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */
143 u16 guestid; 143 u16 guestid;
144 u32 pfn_limit; 144 u32 pfn_limit;
145 /* This provides the offset to the base of guest-physical
146 * memory in the Launcher. */
147 void __user *mem_base;
145 u32 page_offset; 148 u32 page_offset;
146 u32 cr2; 149 u32 cr2;
147 int halted; 150 int halted;
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index 80d1b58c7698..816d4d12a801 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -1,9 +1,9 @@
1/*P:200 This contains all the /dev/lguest code, whereby the userspace launcher 1/*P:200 This contains all the /dev/lguest code, whereby the userspace launcher
2 * controls and communicates with the Guest. For example, the first write will 2 * controls and communicates with the Guest. For example, the first write will
3 * tell us the memory size, pagetable, entry point and kernel address offset. 3 * tell us the Guest's memory layout, pagetable, entry point and kernel address
4 * A read will run the Guest until a signal is pending (-EINTR), or the Guest 4 * offset. A read will run the Guest until something happens, such as a signal
5 * does a DMA out to the Launcher. Writes are also used to get a DMA buffer 5 * or the Guest doing a DMA out to the Launcher. Writes are also used to get a
6 * registered by the Guest and to send the Guest an interrupt. :*/ 6 * DMA buffer registered by the Guest and to send the Guest an interrupt. :*/
7#include <linux/uaccess.h> 7#include <linux/uaccess.h>
8#include <linux/miscdevice.h> 8#include <linux/miscdevice.h>
9#include <linux/fs.h> 9#include <linux/fs.h>
@@ -142,9 +142,11 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
142 return run_guest(lg, (unsigned long __user *)user); 142 return run_guest(lg, (unsigned long __user *)user);
143} 143}
144 144
145/*L:020 The initialization write supplies 4 32-bit values (in addition to the 145/*L:020 The initialization write supplies 5 32-bit values (in addition to the
146 * 32-bit LHREQ_INITIALIZE value). These are: 146 * 32-bit LHREQ_INITIALIZE value). These are:
147 * 147 *
148 * base: The start of the Guest-physical memory inside the Launcher memory.
149 *
148 * pfnlimit: The highest (Guest-physical) page number the Guest should be 150 * pfnlimit: The highest (Guest-physical) page number the Guest should be
149 * allowed to access. The Launcher has to live in Guest memory, so it sets 151 * allowed to access. The Launcher has to live in Guest memory, so it sets
150 * this to ensure the Guest can't reach it. 152 * this to ensure the Guest can't reach it.
@@ -166,7 +168,7 @@ static int initialize(struct file *file, const u32 __user *input)
166 * Guest. */ 168 * Guest. */
167 struct lguest *lg; 169 struct lguest *lg;
168 int err, i; 170 int err, i;
169 u32 args[4]; 171 u32 args[5];
170 172
171 /* We grab the Big Lguest lock, which protects the global array 173 /* We grab the Big Lguest lock, which protects the global array
172 * "lguests" and multiple simultaneous initializations. */ 174 * "lguests" and multiple simultaneous initializations. */
@@ -194,8 +196,9 @@ static int initialize(struct file *file, const u32 __user *input)
194 196
195 /* Populate the easy fields of our "struct lguest" */ 197 /* Populate the easy fields of our "struct lguest" */
196 lg->guestid = i; 198 lg->guestid = i;
197 lg->pfn_limit = args[0]; 199 lg->mem_base = (void __user *)(long)args[0];
198 lg->page_offset = args[3]; 200 lg->pfn_limit = args[1];
201 lg->page_offset = args[4];
199 202
200 /* We need a complete page for the Guest registers: they are accessible 203 /* We need a complete page for the Guest registers: they are accessible
201 * to the Guest and we can only grant it access to whole pages. */ 204 * to the Guest and we can only grant it access to whole pages. */
@@ -210,13 +213,13 @@ static int initialize(struct file *file, const u32 __user *input)
210 /* Initialize the Guest's shadow page tables, using the toplevel 213 /* Initialize the Guest's shadow page tables, using the toplevel
211 * address the Launcher gave us. This allocates memory, so can 214 * address the Launcher gave us. This allocates memory, so can
212 * fail. */ 215 * fail. */
213 err = init_guest_pagetable(lg, args[1]); 216 err = init_guest_pagetable(lg, args[2]);
214 if (err) 217 if (err)
215 goto free_regs; 218 goto free_regs;
216 219
217 /* Now we initialize the Guest's registers, handing it the start 220 /* Now we initialize the Guest's registers, handing it the start
218 * address. */ 221 * address. */
219 setup_regs(lg->regs, args[2]); 222 setup_regs(lg->regs, args[3]);
220 223
221 /* There are a couple of GDT entries the Guest expects when first 224 /* There are a couple of GDT entries the Guest expects when first
222 * booting. */ 225 * booting. */
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index b7a924ace684..9cd2faceb87c 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -152,7 +152,7 @@ static unsigned long get_pfn(unsigned long virtpfn, int write)
152static spte_t gpte_to_spte(struct lguest *lg, gpte_t gpte, int write) 152static spte_t gpte_to_spte(struct lguest *lg, gpte_t gpte, int write)
153{ 153{
154 spte_t spte; 154 spte_t spte;
155 unsigned long pfn; 155 unsigned long pfn, base;
156 156
157 /* The Guest sets the global flag, because it thinks that it is using 157 /* The Guest sets the global flag, because it thinks that it is using
158 * PGE. We only told it to use PGE so it would tell us whether it was 158 * PGE. We only told it to use PGE so it would tell us whether it was
@@ -160,11 +160,14 @@ static spte_t gpte_to_spte(struct lguest *lg, gpte_t gpte, int write)
160 * use the global bit, so throw it away. */ 160 * use the global bit, so throw it away. */
161 spte.flags = (gpte.flags & ~_PAGE_GLOBAL); 161 spte.flags = (gpte.flags & ~_PAGE_GLOBAL);
162 162
163 /* The Guest's pages are offset inside the Launcher. */
164 base = (unsigned long)lg->mem_base / PAGE_SIZE;
165
163 /* We need a temporary "unsigned long" variable to hold the answer from 166 /* We need a temporary "unsigned long" variable to hold the answer from
164 * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't 167 * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't
165 * fit in spte.pfn. get_pfn() finds the real physical number of the 168 * fit in spte.pfn. get_pfn() finds the real physical number of the
166 * page, given the virtual number. */ 169 * page, given the virtual number. */
167 pfn = get_pfn(gpte.pfn, write); 170 pfn = get_pfn(base + gpte.pfn, write);
168 if (pfn == -1UL) { 171 if (pfn == -1UL) {
169 kill_guest(lg, "failed to get page %u", gpte.pfn); 172 kill_guest(lg, "failed to get page %u", gpte.pfn);
170 /* When we destroy the Guest, we'll go through the shadow page 173 /* When we destroy the Guest, we'll go through the shadow page