aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRonald G. Minnich <rminnich@gmail.com>2007-08-28 17:35:59 -0400
committerRusty Russell <rusty@rustcorp.com.au>2007-10-23 01:49:50 -0400
commit6649bb7af6a819b675bfcf22ab704737e905645a (patch)
tree64e3b9e120ff1bafa074db2f76302230b88ac3f6
parent1f4e1de4f23e158abf976a76e1d0fce6e39b532a (diff)
Accept elf files that are valid but have sections that can not be mmap'ed for some reason.
Plan9 kernel binaries don't neatly align their ELF sections to our page boundaries. Signed-off-by: Ronald G. Minnich <rminnich@gmail.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
-rw-r--r--Documentation/lguest/lguest.c61
1 files changed, 32 insertions, 29 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index fa838e2eb656..401d26b464ff 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -15,6 +15,7 @@
15#include <stdlib.h> 15#include <stdlib.h>
16#include <elf.h> 16#include <elf.h>
17#include <sys/mman.h> 17#include <sys/mman.h>
18#include <sys/param.h>
18#include <sys/types.h> 19#include <sys/types.h>
19#include <sys/stat.h> 20#include <sys/stat.h>
20#include <sys/wait.h> 21#include <sys/wait.h>
@@ -162,6 +163,30 @@ static unsigned long entry_point(void *start, void *end,
162 errx(1, "Is this image a genuine lguest?"); 163 errx(1, "Is this image a genuine lguest?");
163} 164}
164 165
166/* This routine is used to load the kernel or initrd. It tries mmap, but if
167 * that fails (Plan 9's kernel file isn't nicely aligned on page boundaries),
168 * it falls back to reading the memory in. */
169static void map_at(int fd, void *addr, unsigned long offset, unsigned long len)
170{
171 ssize_t r;
172
173 /* We map writable even though for some segments are marked read-only.
174 * The kernel really wants to be writable: it patches its own
175 * instructions.
176 *
177 * MAP_PRIVATE means that the page won't be copied until a write is
178 * done to it. This allows us to share untouched memory between
179 * Guests. */
180 if (mmap(addr, len, PROT_READ|PROT_WRITE|PROT_EXEC,
181 MAP_FIXED|MAP_PRIVATE, fd, offset) != MAP_FAILED)
182 return;
183
184 /* pread does a seek and a read in one shot: saves a few lines. */
185 r = pread(fd, addr, len, offset);
186 if (r != len)
187 err(1, "Reading offset %lu len %lu gave %zi", offset, len, r);
188}
189
165/* This routine takes an open vmlinux image, which is in ELF, and maps it into 190/* This routine takes an open vmlinux image, which is in ELF, and maps it into
166 * the Guest memory. ELF = Embedded Linking Format, which is the format used 191 * the Guest memory. ELF = Embedded Linking Format, which is the format used
167 * by all modern binaries on Linux including the kernel. 192 * by all modern binaries on Linux including the kernel.
@@ -176,7 +201,6 @@ static unsigned long entry_point(void *start, void *end,
176static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr, 201static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr,
177 unsigned long *page_offset) 202 unsigned long *page_offset)
178{ 203{
179 void *addr;
180 Elf32_Phdr phdr[ehdr->e_phnum]; 204 Elf32_Phdr phdr[ehdr->e_phnum];
181 unsigned int i; 205 unsigned int i;
182 unsigned long start = -1UL, end = 0; 206 unsigned long start = -1UL, end = 0;
@@ -227,23 +251,9 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr,
227 if (phdr[i].p_paddr + phdr[i].p_filesz > end) 251 if (phdr[i].p_paddr + phdr[i].p_filesz > end)
228 end = phdr[i].p_paddr + phdr[i].p_filesz; 252 end = phdr[i].p_paddr + phdr[i].p_filesz;
229 253
230 /* We map this section of the file at its physical address. We 254 /* We map this section of the file at its physical address. */
231 * map it read & write even if the header says this segment is 255 map_at(elf_fd, (void *)phdr[i].p_paddr,
232 * read-only. The kernel really wants to be writable: it 256 phdr[i].p_offset, phdr[i].p_filesz);
233 * patches its own instructions which would normally be
234 * read-only.
235 *
236 * MAP_PRIVATE means that the page won't be copied until a
237 * write is done to it. This allows us to share much of the
238 * kernel memory between Guests. */
239 addr = mmap((void *)phdr[i].p_paddr,
240 phdr[i].p_filesz,
241 PROT_READ|PROT_WRITE|PROT_EXEC,
242 MAP_FIXED|MAP_PRIVATE,
243 elf_fd, phdr[i].p_offset);
244 if (addr != (void *)phdr[i].p_paddr)
245 err(1, "Mmaping vmlinux seg %i gave %p not %p",
246 i, addr, (void *)phdr[i].p_paddr);
247 } 257 }
248 258
249 return entry_point((void *)start, (void *)end, *page_offset); 259 return entry_point((void *)start, (void *)end, *page_offset);
@@ -402,27 +412,20 @@ static unsigned long load_initrd(const char *name, unsigned long mem)
402 int ifd; 412 int ifd;
403 struct stat st; 413 struct stat st;
404 unsigned long len; 414 unsigned long len;
405 void *iaddr;
406 415
407 ifd = open_or_die(name, O_RDONLY); 416 ifd = open_or_die(name, O_RDONLY);
408 /* fstat() is needed to get the file size. */ 417 /* fstat() is needed to get the file size. */
409 if (fstat(ifd, &st) < 0) 418 if (fstat(ifd, &st) < 0)
410 err(1, "fstat() on initrd '%s'", name); 419 err(1, "fstat() on initrd '%s'", name);
411 420
412 /* The length needs to be rounded up to a page size: mmap needs the 421 /* We map the initrd at the top of memory, but mmap wants it to be
413 * address to be page aligned. */ 422 * page-aligned, so we round the size up for that. */
414 len = page_align(st.st_size); 423 len = page_align(st.st_size);
415 /* We map the initrd at the top of memory. */ 424 map_at(ifd, (void *)mem - len, 0, st.st_size);
416 iaddr = mmap((void *)mem - len, st.st_size,
417 PROT_READ|PROT_EXEC|PROT_WRITE,
418 MAP_FIXED|MAP_PRIVATE, ifd, 0);
419 if (iaddr != (void *)mem - len)
420 err(1, "Mmaping initrd '%s' returned %p not %p",
421 name, iaddr, (void *)mem - len);
422 /* Once a file is mapped, you can close the file descriptor. It's a 425 /* Once a file is mapped, you can close the file descriptor. It's a
423 * little odd, but quite useful. */ 426 * little odd, but quite useful. */
424 close(ifd); 427 close(ifd);
425 verbose("mapped initrd %s size=%lu @ %p\n", name, st.st_size, iaddr); 428 verbose("mapped initrd %s size=%lu @ %p\n", name, len, (void*)mem-len);
426 429
427 /* We return the initrd size. */ 430 /* We return the initrd size. */
428 return len; 431 return len;