diff options
Diffstat (limited to 'Documentation/lguest')
-rw-r--r-- | Documentation/lguest/lguest.c | 61 |
1 files changed, 32 insertions, 29 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index fa838e2eb65..401d26b464f 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <stdlib.h> | 15 | #include <stdlib.h> |
16 | #include <elf.h> | 16 | #include <elf.h> |
17 | #include <sys/mman.h> | 17 | #include <sys/mman.h> |
18 | #include <sys/param.h> | ||
18 | #include <sys/types.h> | 19 | #include <sys/types.h> |
19 | #include <sys/stat.h> | 20 | #include <sys/stat.h> |
20 | #include <sys/wait.h> | 21 | #include <sys/wait.h> |
@@ -162,6 +163,30 @@ static unsigned long entry_point(void *start, void *end, | |||
162 | errx(1, "Is this image a genuine lguest?"); | 163 | errx(1, "Is this image a genuine lguest?"); |
163 | } | 164 | } |
164 | 165 | ||
166 | /* This routine is used to load the kernel or initrd. It tries mmap, but if | ||
167 | * that fails (Plan 9's kernel file isn't nicely aligned on page boundaries), | ||
168 | * it falls back to reading the memory in. */ | ||
169 | static void map_at(int fd, void *addr, unsigned long offset, unsigned long len) | ||
170 | { | ||
171 | ssize_t r; | ||
172 | |||
173 | /* We map writable even though for some segments are marked read-only. | ||
174 | * The kernel really wants to be writable: it patches its own | ||
175 | * instructions. | ||
176 | * | ||
177 | * MAP_PRIVATE means that the page won't be copied until a write is | ||
178 | * done to it. This allows us to share untouched memory between | ||
179 | * Guests. */ | ||
180 | if (mmap(addr, len, PROT_READ|PROT_WRITE|PROT_EXEC, | ||
181 | MAP_FIXED|MAP_PRIVATE, fd, offset) != MAP_FAILED) | ||
182 | return; | ||
183 | |||
184 | /* pread does a seek and a read in one shot: saves a few lines. */ | ||
185 | r = pread(fd, addr, len, offset); | ||
186 | if (r != len) | ||
187 | err(1, "Reading offset %lu len %lu gave %zi", offset, len, r); | ||
188 | } | ||
189 | |||
165 | /* This routine takes an open vmlinux image, which is in ELF, and maps it into | 190 | /* This routine takes an open vmlinux image, which is in ELF, and maps it into |
166 | * the Guest memory. ELF = Embedded Linking Format, which is the format used | 191 | * the Guest memory. ELF = Embedded Linking Format, which is the format used |
167 | * by all modern binaries on Linux including the kernel. | 192 | * by all modern binaries on Linux including the kernel. |
@@ -176,7 +201,6 @@ static unsigned long entry_point(void *start, void *end, | |||
176 | static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr, | 201 | static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr, |
177 | unsigned long *page_offset) | 202 | unsigned long *page_offset) |
178 | { | 203 | { |
179 | void *addr; | ||
180 | Elf32_Phdr phdr[ehdr->e_phnum]; | 204 | Elf32_Phdr phdr[ehdr->e_phnum]; |
181 | unsigned int i; | 205 | unsigned int i; |
182 | unsigned long start = -1UL, end = 0; | 206 | unsigned long start = -1UL, end = 0; |
@@ -227,23 +251,9 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr, | |||
227 | if (phdr[i].p_paddr + phdr[i].p_filesz > end) | 251 | if (phdr[i].p_paddr + phdr[i].p_filesz > end) |
228 | end = phdr[i].p_paddr + phdr[i].p_filesz; | 252 | end = phdr[i].p_paddr + phdr[i].p_filesz; |
229 | 253 | ||
230 | /* We map this section of the file at its physical address. We | 254 | /* We map this section of the file at its physical address. */ |
231 | * map it read & write even if the header says this segment is | 255 | map_at(elf_fd, (void *)phdr[i].p_paddr, |
232 | * read-only. The kernel really wants to be writable: it | 256 | phdr[i].p_offset, phdr[i].p_filesz); |
233 | * patches its own instructions which would normally be | ||
234 | * read-only. | ||
235 | * | ||
236 | * MAP_PRIVATE means that the page won't be copied until a | ||
237 | * write is done to it. This allows us to share much of the | ||
238 | * kernel memory between Guests. */ | ||
239 | addr = mmap((void *)phdr[i].p_paddr, | ||
240 | phdr[i].p_filesz, | ||
241 | PROT_READ|PROT_WRITE|PROT_EXEC, | ||
242 | MAP_FIXED|MAP_PRIVATE, | ||
243 | elf_fd, phdr[i].p_offset); | ||
244 | if (addr != (void *)phdr[i].p_paddr) | ||
245 | err(1, "Mmaping vmlinux seg %i gave %p not %p", | ||
246 | i, addr, (void *)phdr[i].p_paddr); | ||
247 | } | 257 | } |
248 | 258 | ||
249 | return entry_point((void *)start, (void *)end, *page_offset); | 259 | return entry_point((void *)start, (void *)end, *page_offset); |
@@ -402,27 +412,20 @@ static unsigned long load_initrd(const char *name, unsigned long mem) | |||
402 | int ifd; | 412 | int ifd; |
403 | struct stat st; | 413 | struct stat st; |
404 | unsigned long len; | 414 | unsigned long len; |
405 | void *iaddr; | ||
406 | 415 | ||
407 | ifd = open_or_die(name, O_RDONLY); | 416 | ifd = open_or_die(name, O_RDONLY); |
408 | /* fstat() is needed to get the file size. */ | 417 | /* fstat() is needed to get the file size. */ |
409 | if (fstat(ifd, &st) < 0) | 418 | if (fstat(ifd, &st) < 0) |
410 | err(1, "fstat() on initrd '%s'", name); | 419 | err(1, "fstat() on initrd '%s'", name); |
411 | 420 | ||
412 | /* The length needs to be rounded up to a page size: mmap needs the | 421 | /* We map the initrd at the top of memory, but mmap wants it to be |
413 | * address to be page aligned. */ | 422 | * page-aligned, so we round the size up for that. */ |
414 | len = page_align(st.st_size); | 423 | len = page_align(st.st_size); |
415 | /* We map the initrd at the top of memory. */ | 424 | map_at(ifd, (void *)mem - len, 0, st.st_size); |
416 | iaddr = mmap((void *)mem - len, st.st_size, | ||
417 | PROT_READ|PROT_EXEC|PROT_WRITE, | ||
418 | MAP_FIXED|MAP_PRIVATE, ifd, 0); | ||
419 | if (iaddr != (void *)mem - len) | ||
420 | err(1, "Mmaping initrd '%s' returned %p not %p", | ||
421 | name, iaddr, (void *)mem - len); | ||
422 | /* Once a file is mapped, you can close the file descriptor. It's a | 425 | /* Once a file is mapped, you can close the file descriptor. It's a |
423 | * little odd, but quite useful. */ | 426 | * little odd, but quite useful. */ |
424 | close(ifd); | 427 | close(ifd); |
425 | verbose("mapped initrd %s size=%lu @ %p\n", name, st.st_size, iaddr); | 428 | verbose("mapped initrd %s size=%lu @ %p\n", name, len, (void*)mem-len); |
426 | 429 | ||
427 | /* We return the initrd size. */ | 430 | /* We return the initrd size. */ |
428 | return len; | 431 | return len; |