aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation/lguest/lguest.c
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation/lguest/lguest.c')
-rw-r--r--Documentation/lguest/lguest.c69
1 files changed, 38 insertions, 31 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index d45c7f682b1b..4c1fc65a8b3d 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -1,7 +1,7 @@
1/*P:100 This is the Launcher code, a simple program which lays out the 1/*P:100 This is the Launcher code, a simple program which lays out the
2 * "physical" memory for the new Guest by mapping the kernel image and the 2 * "physical" memory for the new Guest by mapping the kernel image and
3 * virtual devices, then reads repeatedly from /dev/lguest to run the Guest. 3 * the virtual devices, then opens /dev/lguest to tell the kernel
4:*/ 4 * about the Guest and control it. :*/
5#define _LARGEFILE64_SOURCE 5#define _LARGEFILE64_SOURCE
6#define _GNU_SOURCE 6#define _GNU_SOURCE
7#include <stdio.h> 7#include <stdio.h>
@@ -43,7 +43,7 @@
43#include "linux/virtio_console.h" 43#include "linux/virtio_console.h"
44#include "linux/virtio_ring.h" 44#include "linux/virtio_ring.h"
45#include "asm-x86/bootparam.h" 45#include "asm-x86/bootparam.h"
46/*L:110 We can ignore the 38 include files we need for this program, but I do 46/*L:110 We can ignore the 39 include files we need for this program, but I do
47 * want to draw attention to the use of kernel-style types. 47 * want to draw attention to the use of kernel-style types.
48 * 48 *
49 * As Linus said, "C is a Spartan language, and so should your naming be." I 49 * As Linus said, "C is a Spartan language, and so should your naming be." I
@@ -320,7 +320,7 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr)
320 err(1, "Reading program headers"); 320 err(1, "Reading program headers");
321 321
322 /* Try all the headers: there are usually only three. A read-only one, 322 /* Try all the headers: there are usually only three. A read-only one,
323 * a read-write one, and a "note" section which isn't loadable. */ 323 * a read-write one, and a "note" section which we don't load. */
324 for (i = 0; i < ehdr->e_phnum; i++) { 324 for (i = 0; i < ehdr->e_phnum; i++) {
325 /* If this isn't a loadable segment, we ignore it */ 325 /* If this isn't a loadable segment, we ignore it */
326 if (phdr[i].p_type != PT_LOAD) 326 if (phdr[i].p_type != PT_LOAD)
@@ -387,7 +387,7 @@ static unsigned long load_kernel(int fd)
387 if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) 387 if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0)
388 return map_elf(fd, &hdr); 388 return map_elf(fd, &hdr);
389 389
390 /* Otherwise we assume it's a bzImage, and try to unpack it */ 390 /* Otherwise we assume it's a bzImage, and try to load it. */
391 return load_bzimage(fd); 391 return load_bzimage(fd);
392} 392}
393 393
@@ -433,12 +433,12 @@ static unsigned long load_initrd(const char *name, unsigned long mem)
433 return len; 433 return len;
434} 434}
435 435
436/* Once we know how much memory we have, we can construct simple linear page 436/* Once we know how much memory we have we can construct simple linear page
437 * tables which set virtual == physical which will get the Guest far enough 437 * tables which set virtual == physical which will get the Guest far enough
438 * into the boot to create its own. 438 * into the boot to create its own.
439 * 439 *
440 * We lay them out of the way, just below the initrd (which is why we need to 440 * We lay them out of the way, just below the initrd (which is why we need to
441 * know its size). */ 441 * know its size here). */
442static unsigned long setup_pagetables(unsigned long mem, 442static unsigned long setup_pagetables(unsigned long mem,
443 unsigned long initrd_size) 443 unsigned long initrd_size)
444{ 444{
@@ -850,7 +850,8 @@ static void handle_console_output(int fd, struct virtqueue *vq)
850 * 850 *
851 * Handling output for network is also simple: we get all the output buffers 851 * Handling output for network is also simple: we get all the output buffers
852 * and write them (ignoring the first element) to this device's file descriptor 852 * and write them (ignoring the first element) to this device's file descriptor
853 * (stdout). */ 853 * (/dev/net/tun).
854 */
854static void handle_net_output(int fd, struct virtqueue *vq) 855static void handle_net_output(int fd, struct virtqueue *vq)
855{ 856{
856 unsigned int head, out, in; 857 unsigned int head, out, in;
@@ -924,7 +925,7 @@ static void enable_fd(int fd, struct virtqueue *vq)
924 write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); 925 write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd));
925} 926}
926 927
927/* Resetting a device is fairly easy. */ 928/* When the Guest asks us to reset a device, it's is fairly easy. */
928static void reset_device(struct device *dev) 929static void reset_device(struct device *dev)
929{ 930{
930 struct virtqueue *vq; 931 struct virtqueue *vq;
@@ -1003,8 +1004,8 @@ static void handle_input(int fd)
1003 if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0) 1004 if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0)
1004 break; 1005 break;
1005 1006
1006 /* Otherwise, call the device(s) which have readable 1007 /* Otherwise, call the device(s) which have readable file
1007 * file descriptors and a method of handling them. */ 1008 * descriptors and a method of handling them. */
1008 for (i = devices.dev; i; i = i->next) { 1009 for (i = devices.dev; i; i = i->next) {
1009 if (i->handle_input && FD_ISSET(i->fd, &fds)) { 1010 if (i->handle_input && FD_ISSET(i->fd, &fds)) {
1010 int dev_fd; 1011 int dev_fd;
@@ -1015,8 +1016,7 @@ static void handle_input(int fd)
1015 * should no longer service it. Networking and 1016 * should no longer service it. Networking and
1016 * console do this when there's no input 1017 * console do this when there's no input
1017 * buffers to deliver into. Console also uses 1018 * buffers to deliver into. Console also uses
1018 * it when it discovers that stdin is 1019 * it when it discovers that stdin is closed. */
1019 * closed. */
1020 FD_CLR(i->fd, &devices.infds); 1020 FD_CLR(i->fd, &devices.infds);
1021 /* Tell waker to ignore it too, by sending a 1021 /* Tell waker to ignore it too, by sending a
1022 * negative fd number (-1, since 0 is a valid 1022 * negative fd number (-1, since 0 is a valid
@@ -1033,7 +1033,8 @@ static void handle_input(int fd)
1033 * 1033 *
1034 * All devices need a descriptor so the Guest knows it exists, and a "struct 1034 * All devices need a descriptor so the Guest knows it exists, and a "struct
1035 * device" so the Launcher can keep track of it. We have common helper 1035 * device" so the Launcher can keep track of it. We have common helper
1036 * routines to allocate and manage them. */ 1036 * routines to allocate and manage them.
1037 */
1037 1038
1038/* The layout of the device page is a "struct lguest_device_desc" followed by a 1039/* The layout of the device page is a "struct lguest_device_desc" followed by a
1039 * number of virtqueue descriptors, then two sets of feature bits, then an 1040 * number of virtqueue descriptors, then two sets of feature bits, then an
@@ -1078,7 +1079,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
1078 struct virtqueue **i, *vq = malloc(sizeof(*vq)); 1079 struct virtqueue **i, *vq = malloc(sizeof(*vq));
1079 void *p; 1080 void *p;
1080 1081
1081 /* First we need some pages for this virtqueue. */ 1082 /* First we need some memory for this virtqueue. */
1082 pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1) 1083 pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1)
1083 / getpagesize(); 1084 / getpagesize();
1084 p = get_pages(pages); 1085 p = get_pages(pages);
@@ -1122,7 +1123,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
1122} 1123}
1123 1124
1124/* The first half of the feature bitmask is for us to advertise features. The 1125/* The first half of the feature bitmask is for us to advertise features. The
1125 * second half if for the Guest to accept features. */ 1126 * second half is for the Guest to accept features. */
1126static void add_feature(struct device *dev, unsigned bit) 1127static void add_feature(struct device *dev, unsigned bit)
1127{ 1128{
1128 u8 *features = get_feature_bits(dev); 1129 u8 *features = get_feature_bits(dev);
@@ -1151,7 +1152,9 @@ static void set_config(struct device *dev, unsigned len, const void *conf)
1151} 1152}
1152 1153
1153/* This routine does all the creation and setup of a new device, including 1154/* This routine does all the creation and setup of a new device, including
1154 * calling new_dev_desc() to allocate the descriptor and device memory. */ 1155 * calling new_dev_desc() to allocate the descriptor and device memory.
1156 *
1157 * See what I mean about userspace being boring? */
1155static struct device *new_device(const char *name, u16 type, int fd, 1158static struct device *new_device(const char *name, u16 type, int fd,
1156 bool (*handle_input)(int, struct device *)) 1159 bool (*handle_input)(int, struct device *))
1157{ 1160{
@@ -1492,7 +1495,10 @@ static int io_thread(void *_dev)
1492 while (read(vblk->workpipe[0], &c, 1) == 1) { 1495 while (read(vblk->workpipe[0], &c, 1) == 1) {
1493 /* We acknowledge each request immediately to reduce latency, 1496 /* We acknowledge each request immediately to reduce latency,
1494 * rather than waiting until we've done them all. I haven't 1497 * rather than waiting until we've done them all. I haven't
1495 * measured to see if it makes any difference. */ 1498 * measured to see if it makes any difference.
1499 *
1500 * That would be an interesting test, wouldn't it? You could
1501 * also try having more than one I/O thread. */
1496 while (service_io(dev)) 1502 while (service_io(dev))
1497 write(vblk->done_fd, &c, 1); 1503 write(vblk->done_fd, &c, 1);
1498 } 1504 }
@@ -1500,7 +1506,7 @@ static int io_thread(void *_dev)
1500} 1506}
1501 1507
1502/* Now we've seen the I/O thread, we return to the Launcher to see what happens 1508/* Now we've seen the I/O thread, we return to the Launcher to see what happens
1503 * when the thread tells us it's completed some I/O. */ 1509 * when that thread tells us it's completed some I/O. */
1504static bool handle_io_finish(int fd, struct device *dev) 1510static bool handle_io_finish(int fd, struct device *dev)
1505{ 1511{
1506 char c; 1512 char c;
@@ -1572,11 +1578,12 @@ static void setup_block_file(const char *filename)
1572 * more work. */ 1578 * more work. */
1573 pipe(vblk->workpipe); 1579 pipe(vblk->workpipe);
1574 1580
1575 /* Create stack for thread and run it */ 1581 /* Create stack for thread and run it. Since stack grows upwards, we
1582 * point the stack pointer to the end of this region. */
1576 stack = malloc(32768); 1583 stack = malloc(32768);
1577 /* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from 1584 /* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from
1578 * becoming a zombie. */ 1585 * becoming a zombie. */
1579 if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1) 1586 if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1)
1580 err(1, "Creating clone"); 1587 err(1, "Creating clone");
1581 1588
1582 /* We don't need to keep the I/O thread's end of the pipes open. */ 1589 /* We don't need to keep the I/O thread's end of the pipes open. */
@@ -1586,14 +1593,14 @@ static void setup_block_file(const char *filename)
1586 verbose("device %u: virtblock %llu sectors\n", 1593 verbose("device %u: virtblock %llu sectors\n",
1587 devices.device_num, le64_to_cpu(conf.capacity)); 1594 devices.device_num, le64_to_cpu(conf.capacity));
1588} 1595}
1589/* That's the end of device setup. :*/ 1596/* That's the end of device setup. */
1590 1597
1591/* Reboot */ 1598/*L:230 Reboot is pretty easy: clean up and exec() the Launcher afresh. */
1592static void __attribute__((noreturn)) restart_guest(void) 1599static void __attribute__((noreturn)) restart_guest(void)
1593{ 1600{
1594 unsigned int i; 1601 unsigned int i;
1595 1602
1596 /* Closing pipes causes the waker thread and io_threads to die, and 1603 /* Closing pipes causes the Waker thread and io_threads to die, and
1597 * closing /dev/lguest cleans up the Guest. Since we don't track all 1604 * closing /dev/lguest cleans up the Guest. Since we don't track all
1598 * open fds, we simply close everything beyond stderr. */ 1605 * open fds, we simply close everything beyond stderr. */
1599 for (i = 3; i < FD_SETSIZE; i++) 1606 for (i = 3; i < FD_SETSIZE; i++)
@@ -1602,7 +1609,7 @@ static void __attribute__((noreturn)) restart_guest(void)
1602 err(1, "Could not exec %s", main_args[0]); 1609 err(1, "Could not exec %s", main_args[0]);
1603} 1610}
1604 1611
1605/*L:220 Finally we reach the core of the Launcher, which runs the Guest, serves 1612/*L:220 Finally we reach the core of the Launcher which runs the Guest, serves
1606 * its input and output, and finally, lays it to rest. */ 1613 * its input and output, and finally, lays it to rest. */
1607static void __attribute__((noreturn)) run_guest(int lguest_fd) 1614static void __attribute__((noreturn)) run_guest(int lguest_fd)
1608{ 1615{
@@ -1643,7 +1650,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd)
1643 err(1, "Resetting break"); 1650 err(1, "Resetting break");
1644 } 1651 }
1645} 1652}
1646/* 1653/*L:240
1647 * This is the end of the Launcher. The good news: we are over halfway 1654 * This is the end of the Launcher. The good news: we are over halfway
1648 * through! The bad news: the most fiendish part of the code still lies ahead 1655 * through! The bad news: the most fiendish part of the code still lies ahead
1649 * of us. 1656 * of us.
@@ -1690,8 +1697,8 @@ int main(int argc, char *argv[])
1690 * device receive input from a file descriptor, we keep an fdset 1697 * device receive input from a file descriptor, we keep an fdset
1691 * (infds) and the maximum fd number (max_infd) with the head of the 1698 * (infds) and the maximum fd number (max_infd) with the head of the
1692 * list. We also keep a pointer to the last device. Finally, we keep 1699 * list. We also keep a pointer to the last device. Finally, we keep
1693 * the next interrupt number to hand out (1: remember that 0 is used by 1700 * the next interrupt number to use for devices (1: remember that 0 is
1694 * the timer). */ 1701 * used by the timer). */
1695 FD_ZERO(&devices.infds); 1702 FD_ZERO(&devices.infds);
1696 devices.max_infd = -1; 1703 devices.max_infd = -1;
1697 devices.lastdev = NULL; 1704 devices.lastdev = NULL;
@@ -1792,8 +1799,8 @@ int main(int argc, char *argv[])
1792 lguest_fd = tell_kernel(pgdir, start); 1799 lguest_fd = tell_kernel(pgdir, start);
1793 1800
1794 /* We fork off a child process, which wakes the Launcher whenever one 1801 /* We fork off a child process, which wakes the Launcher whenever one
1795 * of the input file descriptors needs attention. Otherwise we would 1802 * of the input file descriptors needs attention. We call this the
1796 * run the Guest until it tries to output something. */ 1803 * Waker, and we'll cover it in a moment. */
1797 waker_fd = setup_waker(lguest_fd); 1804 waker_fd = setup_waker(lguest_fd);
1798 1805
1799 /* Finally, run the Guest. This doesn't return. */ 1806 /* Finally, run the Guest. This doesn't return. */