aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation/lguest/lguest.c
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation/lguest/lguest.c')
-rw-r--r--Documentation/lguest/lguest.c77
1 files changed, 43 insertions, 34 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 0f23d67f958f..4c1fc65a8b3d 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -1,7 +1,7 @@
1/*P:100 This is the Launcher code, a simple program which lays out the 1/*P:100 This is the Launcher code, a simple program which lays out the
2 * "physical" memory for the new Guest by mapping the kernel image and the 2 * "physical" memory for the new Guest by mapping the kernel image and
3 * virtual devices, then reads repeatedly from /dev/lguest to run the Guest. 3 * the virtual devices, then opens /dev/lguest to tell the kernel
4:*/ 4 * about the Guest and control it. :*/
5#define _LARGEFILE64_SOURCE 5#define _LARGEFILE64_SOURCE
6#define _GNU_SOURCE 6#define _GNU_SOURCE
7#include <stdio.h> 7#include <stdio.h>
@@ -43,7 +43,7 @@
43#include "linux/virtio_console.h" 43#include "linux/virtio_console.h"
44#include "linux/virtio_ring.h" 44#include "linux/virtio_ring.h"
45#include "asm-x86/bootparam.h" 45#include "asm-x86/bootparam.h"
46/*L:110 We can ignore the 38 include files we need for this program, but I do 46/*L:110 We can ignore the 39 include files we need for this program, but I do
47 * want to draw attention to the use of kernel-style types. 47 * want to draw attention to the use of kernel-style types.
48 * 48 *
49 * As Linus said, "C is a Spartan language, and so should your naming be." I 49 * As Linus said, "C is a Spartan language, and so should your naming be." I
@@ -320,7 +320,7 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr)
320 err(1, "Reading program headers"); 320 err(1, "Reading program headers");
321 321
322 /* Try all the headers: there are usually only three. A read-only one, 322 /* Try all the headers: there are usually only three. A read-only one,
323 * a read-write one, and a "note" section which isn't loadable. */ 323 * a read-write one, and a "note" section which we don't load. */
324 for (i = 0; i < ehdr->e_phnum; i++) { 324 for (i = 0; i < ehdr->e_phnum; i++) {
325 /* If this isn't a loadable segment, we ignore it */ 325 /* If this isn't a loadable segment, we ignore it */
326 if (phdr[i].p_type != PT_LOAD) 326 if (phdr[i].p_type != PT_LOAD)
@@ -387,7 +387,7 @@ static unsigned long load_kernel(int fd)
387 if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) 387 if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0)
388 return map_elf(fd, &hdr); 388 return map_elf(fd, &hdr);
389 389
390 /* Otherwise we assume it's a bzImage, and try to unpack it */ 390 /* Otherwise we assume it's a bzImage, and try to load it. */
391 return load_bzimage(fd); 391 return load_bzimage(fd);
392} 392}
393 393
@@ -433,12 +433,12 @@ static unsigned long load_initrd(const char *name, unsigned long mem)
433 return len; 433 return len;
434} 434}
435 435
436/* Once we know how much memory we have, we can construct simple linear page 436/* Once we know how much memory we have we can construct simple linear page
437 * tables which set virtual == physical which will get the Guest far enough 437 * tables which set virtual == physical which will get the Guest far enough
438 * into the boot to create its own. 438 * into the boot to create its own.
439 * 439 *
440 * We lay them out of the way, just below the initrd (which is why we need to 440 * We lay them out of the way, just below the initrd (which is why we need to
441 * know its size). */ 441 * know its size here). */
442static unsigned long setup_pagetables(unsigned long mem, 442static unsigned long setup_pagetables(unsigned long mem,
443 unsigned long initrd_size) 443 unsigned long initrd_size)
444{ 444{
@@ -486,9 +486,12 @@ static void concat(char *dst, char *args[])
486 unsigned int i, len = 0; 486 unsigned int i, len = 0;
487 487
488 for (i = 0; args[i]; i++) { 488 for (i = 0; args[i]; i++) {
489 if (i) {
490 strcat(dst+len, " ");
491 len++;
492 }
489 strcpy(dst+len, args[i]); 493 strcpy(dst+len, args[i]);
490 strcat(dst+len, " "); 494 len += strlen(args[i]);
491 len += strlen(args[i]) + 1;
492 } 495 }
493 /* In case it's empty. */ 496 /* In case it's empty. */
494 dst[len] = '\0'; 497 dst[len] = '\0';
@@ -847,7 +850,8 @@ static void handle_console_output(int fd, struct virtqueue *vq)
847 * 850 *
848 * Handling output for network is also simple: we get all the output buffers 851 * Handling output for network is also simple: we get all the output buffers
849 * and write them (ignoring the first element) to this device's file descriptor 852 * and write them (ignoring the first element) to this device's file descriptor
850 * (stdout). */ 853 * (/dev/net/tun).
854 */
851static void handle_net_output(int fd, struct virtqueue *vq) 855static void handle_net_output(int fd, struct virtqueue *vq)
852{ 856{
853 unsigned int head, out, in; 857 unsigned int head, out, in;
@@ -921,7 +925,7 @@ static void enable_fd(int fd, struct virtqueue *vq)
921 write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); 925 write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd));
922} 926}
923 927
924/* Resetting a device is fairly easy. */ 928/* When the Guest asks us to reset a device, it's is fairly easy. */
925static void reset_device(struct device *dev) 929static void reset_device(struct device *dev)
926{ 930{
927 struct virtqueue *vq; 931 struct virtqueue *vq;
@@ -1000,8 +1004,8 @@ static void handle_input(int fd)
1000 if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0) 1004 if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0)
1001 break; 1005 break;
1002 1006
1003 /* Otherwise, call the device(s) which have readable 1007 /* Otherwise, call the device(s) which have readable file
1004 * file descriptors and a method of handling them. */ 1008 * descriptors and a method of handling them. */
1005 for (i = devices.dev; i; i = i->next) { 1009 for (i = devices.dev; i; i = i->next) {
1006 if (i->handle_input && FD_ISSET(i->fd, &fds)) { 1010 if (i->handle_input && FD_ISSET(i->fd, &fds)) {
1007 int dev_fd; 1011 int dev_fd;
@@ -1012,8 +1016,7 @@ static void handle_input(int fd)
1012 * should no longer service it. Networking and 1016 * should no longer service it. Networking and
1013 * console do this when there's no input 1017 * console do this when there's no input
1014 * buffers to deliver into. Console also uses 1018 * buffers to deliver into. Console also uses
1015 * it when it discovers that stdin is 1019 * it when it discovers that stdin is closed. */
1016 * closed. */
1017 FD_CLR(i->fd, &devices.infds); 1020 FD_CLR(i->fd, &devices.infds);
1018 /* Tell waker to ignore it too, by sending a 1021 /* Tell waker to ignore it too, by sending a
1019 * negative fd number (-1, since 0 is a valid 1022 * negative fd number (-1, since 0 is a valid
@@ -1030,7 +1033,8 @@ static void handle_input(int fd)
1030 * 1033 *
1031 * All devices need a descriptor so the Guest knows it exists, and a "struct 1034 * All devices need a descriptor so the Guest knows it exists, and a "struct
1032 * device" so the Launcher can keep track of it. We have common helper 1035 * device" so the Launcher can keep track of it. We have common helper
1033 * routines to allocate and manage them. */ 1036 * routines to allocate and manage them.
1037 */
1034 1038
1035/* The layout of the device page is a "struct lguest_device_desc" followed by a 1039/* The layout of the device page is a "struct lguest_device_desc" followed by a
1036 * number of virtqueue descriptors, then two sets of feature bits, then an 1040 * number of virtqueue descriptors, then two sets of feature bits, then an
@@ -1075,7 +1079,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
1075 struct virtqueue **i, *vq = malloc(sizeof(*vq)); 1079 struct virtqueue **i, *vq = malloc(sizeof(*vq));
1076 void *p; 1080 void *p;
1077 1081
1078 /* First we need some pages for this virtqueue. */ 1082 /* First we need some memory for this virtqueue. */
1079 pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1) 1083 pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1)
1080 / getpagesize(); 1084 / getpagesize();
1081 p = get_pages(pages); 1085 p = get_pages(pages);
@@ -1119,7 +1123,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
1119} 1123}
1120 1124
1121/* The first half of the feature bitmask is for us to advertise features. The 1125/* The first half of the feature bitmask is for us to advertise features. The
1122 * second half if for the Guest to accept features. */ 1126 * second half is for the Guest to accept features. */
1123static void add_feature(struct device *dev, unsigned bit) 1127static void add_feature(struct device *dev, unsigned bit)
1124{ 1128{
1125 u8 *features = get_feature_bits(dev); 1129 u8 *features = get_feature_bits(dev);
@@ -1148,7 +1152,9 @@ static void set_config(struct device *dev, unsigned len, const void *conf)
1148} 1152}
1149 1153
1150/* This routine does all the creation and setup of a new device, including 1154/* This routine does all the creation and setup of a new device, including
1151 * calling new_dev_desc() to allocate the descriptor and device memory. */ 1155 * calling new_dev_desc() to allocate the descriptor and device memory.
1156 *
1157 * See what I mean about userspace being boring? */
1152static struct device *new_device(const char *name, u16 type, int fd, 1158static struct device *new_device(const char *name, u16 type, int fd,
1153 bool (*handle_input)(int, struct device *)) 1159 bool (*handle_input)(int, struct device *))
1154{ 1160{
@@ -1380,7 +1386,6 @@ struct vblk_info
1380 * Launcher triggers interrupt to Guest. */ 1386 * Launcher triggers interrupt to Guest. */
1381 int done_fd; 1387 int done_fd;
1382}; 1388};
1383/*:*/
1384 1389
1385/*L:210 1390/*L:210
1386 * The Disk 1391 * The Disk
@@ -1490,7 +1495,10 @@ static int io_thread(void *_dev)
1490 while (read(vblk->workpipe[0], &c, 1) == 1) { 1495 while (read(vblk->workpipe[0], &c, 1) == 1) {
1491 /* We acknowledge each request immediately to reduce latency, 1496 /* We acknowledge each request immediately to reduce latency,
1492 * rather than waiting until we've done them all. I haven't 1497 * rather than waiting until we've done them all. I haven't
1493 * measured to see if it makes any difference. */ 1498 * measured to see if it makes any difference.
1499 *
1500 * That would be an interesting test, wouldn't it? You could
1501 * also try having more than one I/O thread. */
1494 while (service_io(dev)) 1502 while (service_io(dev))
1495 write(vblk->done_fd, &c, 1); 1503 write(vblk->done_fd, &c, 1);
1496 } 1504 }
@@ -1498,7 +1506,7 @@ static int io_thread(void *_dev)
1498} 1506}
1499 1507
1500/* Now we've seen the I/O thread, we return to the Launcher to see what happens 1508/* Now we've seen the I/O thread, we return to the Launcher to see what happens
1501 * when the thread tells us it's completed some I/O. */ 1509 * when that thread tells us it's completed some I/O. */
1502static bool handle_io_finish(int fd, struct device *dev) 1510static bool handle_io_finish(int fd, struct device *dev)
1503{ 1511{
1504 char c; 1512 char c;
@@ -1570,11 +1578,12 @@ static void setup_block_file(const char *filename)
1570 * more work. */ 1578 * more work. */
1571 pipe(vblk->workpipe); 1579 pipe(vblk->workpipe);
1572 1580
1573 /* Create stack for thread and run it */ 1581 /* Create stack for thread and run it. Since stack grows upwards, we
1582 * point the stack pointer to the end of this region. */
1574 stack = malloc(32768); 1583 stack = malloc(32768);
1575 /* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from 1584 /* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from
1576 * becoming a zombie. */ 1585 * becoming a zombie. */
1577 if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1) 1586 if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1)
1578 err(1, "Creating clone"); 1587 err(1, "Creating clone");
1579 1588
1580 /* We don't need to keep the I/O thread's end of the pipes open. */ 1589 /* We don't need to keep the I/O thread's end of the pipes open. */
@@ -1584,14 +1593,14 @@ static void setup_block_file(const char *filename)
1584 verbose("device %u: virtblock %llu sectors\n", 1593 verbose("device %u: virtblock %llu sectors\n",
1585 devices.device_num, le64_to_cpu(conf.capacity)); 1594 devices.device_num, le64_to_cpu(conf.capacity));
1586} 1595}
1587/* That's the end of device setup. :*/ 1596/* That's the end of device setup. */
1588 1597
1589/* Reboot */ 1598/*L:230 Reboot is pretty easy: clean up and exec() the Launcher afresh. */
1590static void __attribute__((noreturn)) restart_guest(void) 1599static void __attribute__((noreturn)) restart_guest(void)
1591{ 1600{
1592 unsigned int i; 1601 unsigned int i;
1593 1602
1594 /* Closing pipes causes the waker thread and io_threads to die, and 1603 /* Closing pipes causes the Waker thread and io_threads to die, and
1595 * closing /dev/lguest cleans up the Guest. Since we don't track all 1604 * closing /dev/lguest cleans up the Guest. Since we don't track all
1596 * open fds, we simply close everything beyond stderr. */ 1605 * open fds, we simply close everything beyond stderr. */
1597 for (i = 3; i < FD_SETSIZE; i++) 1606 for (i = 3; i < FD_SETSIZE; i++)
@@ -1600,7 +1609,7 @@ static void __attribute__((noreturn)) restart_guest(void)
1600 err(1, "Could not exec %s", main_args[0]); 1609 err(1, "Could not exec %s", main_args[0]);
1601} 1610}
1602 1611
1603/*L:220 Finally we reach the core of the Launcher, which runs the Guest, serves 1612/*L:220 Finally we reach the core of the Launcher which runs the Guest, serves
1604 * its input and output, and finally, lays it to rest. */ 1613 * its input and output, and finally, lays it to rest. */
1605static void __attribute__((noreturn)) run_guest(int lguest_fd) 1614static void __attribute__((noreturn)) run_guest(int lguest_fd)
1606{ 1615{
@@ -1641,7 +1650,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd)
1641 err(1, "Resetting break"); 1650 err(1, "Resetting break");
1642 } 1651 }
1643} 1652}
1644/* 1653/*L:240
1645 * This is the end of the Launcher. The good news: we are over halfway 1654 * This is the end of the Launcher. The good news: we are over halfway
1646 * through! The bad news: the most fiendish part of the code still lies ahead 1655 * through! The bad news: the most fiendish part of the code still lies ahead
1647 * of us. 1656 * of us.
@@ -1688,8 +1697,8 @@ int main(int argc, char *argv[])
1688 * device receive input from a file descriptor, we keep an fdset 1697 * device receive input from a file descriptor, we keep an fdset
1689 * (infds) and the maximum fd number (max_infd) with the head of the 1698 * (infds) and the maximum fd number (max_infd) with the head of the
1690 * list. We also keep a pointer to the last device. Finally, we keep 1699 * list. We also keep a pointer to the last device. Finally, we keep
1691 * the next interrupt number to hand out (1: remember that 0 is used by 1700 * the next interrupt number to use for devices (1: remember that 0 is
1692 * the timer). */ 1701 * used by the timer). */
1693 FD_ZERO(&devices.infds); 1702 FD_ZERO(&devices.infds);
1694 devices.max_infd = -1; 1703 devices.max_infd = -1;
1695 devices.lastdev = NULL; 1704 devices.lastdev = NULL;
@@ -1790,8 +1799,8 @@ int main(int argc, char *argv[])
1790 lguest_fd = tell_kernel(pgdir, start); 1799 lguest_fd = tell_kernel(pgdir, start);
1791 1800
1792 /* We fork off a child process, which wakes the Launcher whenever one 1801 /* We fork off a child process, which wakes the Launcher whenever one
1793 * of the input file descriptors needs attention. Otherwise we would 1802 * of the input file descriptors needs attention. We call this the
1794 * run the Guest until it tries to output something. */ 1803 * Waker, and we'll cover it in a moment. */
1795 waker_fd = setup_waker(lguest_fd); 1804 waker_fd = setup_waker(lguest_fd);
1796 1805
1797 /* Finally, run the Guest. This doesn't return. */ 1806 /* Finally, run the Guest. This doesn't return. */