diff options
Diffstat (limited to 'Documentation')
-rw-r--r-- | Documentation/lguest/lguest.c | 69 |
1 files changed, 38 insertions, 31 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index d45c7f682b1b..4c1fc65a8b3d 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /*P:100 This is the Launcher code, a simple program which lays out the | 1 | /*P:100 This is the Launcher code, a simple program which lays out the |
2 | * "physical" memory for the new Guest by mapping the kernel image and the | 2 | * "physical" memory for the new Guest by mapping the kernel image and |
3 | * virtual devices, then reads repeatedly from /dev/lguest to run the Guest. | 3 | * the virtual devices, then opens /dev/lguest to tell the kernel |
4 | :*/ | 4 | * about the Guest and control it. :*/ |
5 | #define _LARGEFILE64_SOURCE | 5 | #define _LARGEFILE64_SOURCE |
6 | #define _GNU_SOURCE | 6 | #define _GNU_SOURCE |
7 | #include <stdio.h> | 7 | #include <stdio.h> |
@@ -43,7 +43,7 @@ | |||
43 | #include "linux/virtio_console.h" | 43 | #include "linux/virtio_console.h" |
44 | #include "linux/virtio_ring.h" | 44 | #include "linux/virtio_ring.h" |
45 | #include "asm-x86/bootparam.h" | 45 | #include "asm-x86/bootparam.h" |
46 | /*L:110 We can ignore the 38 include files we need for this program, but I do | 46 | /*L:110 We can ignore the 39 include files we need for this program, but I do |
47 | * want to draw attention to the use of kernel-style types. | 47 | * want to draw attention to the use of kernel-style types. |
48 | * | 48 | * |
49 | * As Linus said, "C is a Spartan language, and so should your naming be." I | 49 | * As Linus said, "C is a Spartan language, and so should your naming be." I |
@@ -320,7 +320,7 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr) | |||
320 | err(1, "Reading program headers"); | 320 | err(1, "Reading program headers"); |
321 | 321 | ||
322 | /* Try all the headers: there are usually only three. A read-only one, | 322 | /* Try all the headers: there are usually only three. A read-only one, |
323 | * a read-write one, and a "note" section which isn't loadable. */ | 323 | * a read-write one, and a "note" section which we don't load. */ |
324 | for (i = 0; i < ehdr->e_phnum; i++) { | 324 | for (i = 0; i < ehdr->e_phnum; i++) { |
325 | /* If this isn't a loadable segment, we ignore it */ | 325 | /* If this isn't a loadable segment, we ignore it */ |
326 | if (phdr[i].p_type != PT_LOAD) | 326 | if (phdr[i].p_type != PT_LOAD) |
@@ -387,7 +387,7 @@ static unsigned long load_kernel(int fd) | |||
387 | if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) | 387 | if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) |
388 | return map_elf(fd, &hdr); | 388 | return map_elf(fd, &hdr); |
389 | 389 | ||
390 | /* Otherwise we assume it's a bzImage, and try to unpack it */ | 390 | /* Otherwise we assume it's a bzImage, and try to load it. */ |
391 | return load_bzimage(fd); | 391 | return load_bzimage(fd); |
392 | } | 392 | } |
393 | 393 | ||
@@ -433,12 +433,12 @@ static unsigned long load_initrd(const char *name, unsigned long mem) | |||
433 | return len; | 433 | return len; |
434 | } | 434 | } |
435 | 435 | ||
436 | /* Once we know how much memory we have, we can construct simple linear page | 436 | /* Once we know how much memory we have we can construct simple linear page |
437 | * tables which set virtual == physical which will get the Guest far enough | 437 | * tables which set virtual == physical which will get the Guest far enough |
438 | * into the boot to create its own. | 438 | * into the boot to create its own. |
439 | * | 439 | * |
440 | * We lay them out of the way, just below the initrd (which is why we need to | 440 | * We lay them out of the way, just below the initrd (which is why we need to |
441 | * know its size). */ | 441 | * know its size here). */ |
442 | static unsigned long setup_pagetables(unsigned long mem, | 442 | static unsigned long setup_pagetables(unsigned long mem, |
443 | unsigned long initrd_size) | 443 | unsigned long initrd_size) |
444 | { | 444 | { |
@@ -850,7 +850,8 @@ static void handle_console_output(int fd, struct virtqueue *vq) | |||
850 | * | 850 | * |
851 | * Handling output for network is also simple: we get all the output buffers | 851 | * Handling output for network is also simple: we get all the output buffers |
852 | * and write them (ignoring the first element) to this device's file descriptor | 852 | * and write them (ignoring the first element) to this device's file descriptor |
853 | * (stdout). */ | 853 | * (/dev/net/tun). |
854 | */ | ||
854 | static void handle_net_output(int fd, struct virtqueue *vq) | 855 | static void handle_net_output(int fd, struct virtqueue *vq) |
855 | { | 856 | { |
856 | unsigned int head, out, in; | 857 | unsigned int head, out, in; |
@@ -924,7 +925,7 @@ static void enable_fd(int fd, struct virtqueue *vq) | |||
924 | write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); | 925 | write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); |
925 | } | 926 | } |
926 | 927 | ||
927 | /* Resetting a device is fairly easy. */ | 928 | /* When the Guest asks us to reset a device, it's is fairly easy. */ |
928 | static void reset_device(struct device *dev) | 929 | static void reset_device(struct device *dev) |
929 | { | 930 | { |
930 | struct virtqueue *vq; | 931 | struct virtqueue *vq; |
@@ -1003,8 +1004,8 @@ static void handle_input(int fd) | |||
1003 | if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0) | 1004 | if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0) |
1004 | break; | 1005 | break; |
1005 | 1006 | ||
1006 | /* Otherwise, call the device(s) which have readable | 1007 | /* Otherwise, call the device(s) which have readable file |
1007 | * file descriptors and a method of handling them. */ | 1008 | * descriptors and a method of handling them. */ |
1008 | for (i = devices.dev; i; i = i->next) { | 1009 | for (i = devices.dev; i; i = i->next) { |
1009 | if (i->handle_input && FD_ISSET(i->fd, &fds)) { | 1010 | if (i->handle_input && FD_ISSET(i->fd, &fds)) { |
1010 | int dev_fd; | 1011 | int dev_fd; |
@@ -1015,8 +1016,7 @@ static void handle_input(int fd) | |||
1015 | * should no longer service it. Networking and | 1016 | * should no longer service it. Networking and |
1016 | * console do this when there's no input | 1017 | * console do this when there's no input |
1017 | * buffers to deliver into. Console also uses | 1018 | * buffers to deliver into. Console also uses |
1018 | * it when it discovers that stdin is | 1019 | * it when it discovers that stdin is closed. */ |
1019 | * closed. */ | ||
1020 | FD_CLR(i->fd, &devices.infds); | 1020 | FD_CLR(i->fd, &devices.infds); |
1021 | /* Tell waker to ignore it too, by sending a | 1021 | /* Tell waker to ignore it too, by sending a |
1022 | * negative fd number (-1, since 0 is a valid | 1022 | * negative fd number (-1, since 0 is a valid |
@@ -1033,7 +1033,8 @@ static void handle_input(int fd) | |||
1033 | * | 1033 | * |
1034 | * All devices need a descriptor so the Guest knows it exists, and a "struct | 1034 | * All devices need a descriptor so the Guest knows it exists, and a "struct |
1035 | * device" so the Launcher can keep track of it. We have common helper | 1035 | * device" so the Launcher can keep track of it. We have common helper |
1036 | * routines to allocate and manage them. */ | 1036 | * routines to allocate and manage them. |
1037 | */ | ||
1037 | 1038 | ||
1038 | /* The layout of the device page is a "struct lguest_device_desc" followed by a | 1039 | /* The layout of the device page is a "struct lguest_device_desc" followed by a |
1039 | * number of virtqueue descriptors, then two sets of feature bits, then an | 1040 | * number of virtqueue descriptors, then two sets of feature bits, then an |
@@ -1078,7 +1079,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
1078 | struct virtqueue **i, *vq = malloc(sizeof(*vq)); | 1079 | struct virtqueue **i, *vq = malloc(sizeof(*vq)); |
1079 | void *p; | 1080 | void *p; |
1080 | 1081 | ||
1081 | /* First we need some pages for this virtqueue. */ | 1082 | /* First we need some memory for this virtqueue. */ |
1082 | pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1) | 1083 | pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1) |
1083 | / getpagesize(); | 1084 | / getpagesize(); |
1084 | p = get_pages(pages); | 1085 | p = get_pages(pages); |
@@ -1122,7 +1123,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
1122 | } | 1123 | } |
1123 | 1124 | ||
1124 | /* The first half of the feature bitmask is for us to advertise features. The | 1125 | /* The first half of the feature bitmask is for us to advertise features. The |
1125 | * second half if for the Guest to accept features. */ | 1126 | * second half is for the Guest to accept features. */ |
1126 | static void add_feature(struct device *dev, unsigned bit) | 1127 | static void add_feature(struct device *dev, unsigned bit) |
1127 | { | 1128 | { |
1128 | u8 *features = get_feature_bits(dev); | 1129 | u8 *features = get_feature_bits(dev); |
@@ -1151,7 +1152,9 @@ static void set_config(struct device *dev, unsigned len, const void *conf) | |||
1151 | } | 1152 | } |
1152 | 1153 | ||
1153 | /* This routine does all the creation and setup of a new device, including | 1154 | /* This routine does all the creation and setup of a new device, including |
1154 | * calling new_dev_desc() to allocate the descriptor and device memory. */ | 1155 | * calling new_dev_desc() to allocate the descriptor and device memory. |
1156 | * | ||
1157 | * See what I mean about userspace being boring? */ | ||
1155 | static struct device *new_device(const char *name, u16 type, int fd, | 1158 | static struct device *new_device(const char *name, u16 type, int fd, |
1156 | bool (*handle_input)(int, struct device *)) | 1159 | bool (*handle_input)(int, struct device *)) |
1157 | { | 1160 | { |
@@ -1492,7 +1495,10 @@ static int io_thread(void *_dev) | |||
1492 | while (read(vblk->workpipe[0], &c, 1) == 1) { | 1495 | while (read(vblk->workpipe[0], &c, 1) == 1) { |
1493 | /* We acknowledge each request immediately to reduce latency, | 1496 | /* We acknowledge each request immediately to reduce latency, |
1494 | * rather than waiting until we've done them all. I haven't | 1497 | * rather than waiting until we've done them all. I haven't |
1495 | * measured to see if it makes any difference. */ | 1498 | * measured to see if it makes any difference. |
1499 | * | ||
1500 | * That would be an interesting test, wouldn't it? You could | ||
1501 | * also try having more than one I/O thread. */ | ||
1496 | while (service_io(dev)) | 1502 | while (service_io(dev)) |
1497 | write(vblk->done_fd, &c, 1); | 1503 | write(vblk->done_fd, &c, 1); |
1498 | } | 1504 | } |
@@ -1500,7 +1506,7 @@ static int io_thread(void *_dev) | |||
1500 | } | 1506 | } |
1501 | 1507 | ||
1502 | /* Now we've seen the I/O thread, we return to the Launcher to see what happens | 1508 | /* Now we've seen the I/O thread, we return to the Launcher to see what happens |
1503 | * when the thread tells us it's completed some I/O. */ | 1509 | * when that thread tells us it's completed some I/O. */ |
1504 | static bool handle_io_finish(int fd, struct device *dev) | 1510 | static bool handle_io_finish(int fd, struct device *dev) |
1505 | { | 1511 | { |
1506 | char c; | 1512 | char c; |
@@ -1572,11 +1578,12 @@ static void setup_block_file(const char *filename) | |||
1572 | * more work. */ | 1578 | * more work. */ |
1573 | pipe(vblk->workpipe); | 1579 | pipe(vblk->workpipe); |
1574 | 1580 | ||
1575 | /* Create stack for thread and run it */ | 1581 | /* Create stack for thread and run it. Since stack grows upwards, we |
1582 | * point the stack pointer to the end of this region. */ | ||
1576 | stack = malloc(32768); | 1583 | stack = malloc(32768); |
1577 | /* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from | 1584 | /* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from |
1578 | * becoming a zombie. */ | 1585 | * becoming a zombie. */ |
1579 | if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1) | 1586 | if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1) |
1580 | err(1, "Creating clone"); | 1587 | err(1, "Creating clone"); |
1581 | 1588 | ||
1582 | /* We don't need to keep the I/O thread's end of the pipes open. */ | 1589 | /* We don't need to keep the I/O thread's end of the pipes open. */ |
@@ -1586,14 +1593,14 @@ static void setup_block_file(const char *filename) | |||
1586 | verbose("device %u: virtblock %llu sectors\n", | 1593 | verbose("device %u: virtblock %llu sectors\n", |
1587 | devices.device_num, le64_to_cpu(conf.capacity)); | 1594 | devices.device_num, le64_to_cpu(conf.capacity)); |
1588 | } | 1595 | } |
1589 | /* That's the end of device setup. :*/ | 1596 | /* That's the end of device setup. */ |
1590 | 1597 | ||
1591 | /* Reboot */ | 1598 | /*L:230 Reboot is pretty easy: clean up and exec() the Launcher afresh. */ |
1592 | static void __attribute__((noreturn)) restart_guest(void) | 1599 | static void __attribute__((noreturn)) restart_guest(void) |
1593 | { | 1600 | { |
1594 | unsigned int i; | 1601 | unsigned int i; |
1595 | 1602 | ||
1596 | /* Closing pipes causes the waker thread and io_threads to die, and | 1603 | /* Closing pipes causes the Waker thread and io_threads to die, and |
1597 | * closing /dev/lguest cleans up the Guest. Since we don't track all | 1604 | * closing /dev/lguest cleans up the Guest. Since we don't track all |
1598 | * open fds, we simply close everything beyond stderr. */ | 1605 | * open fds, we simply close everything beyond stderr. */ |
1599 | for (i = 3; i < FD_SETSIZE; i++) | 1606 | for (i = 3; i < FD_SETSIZE; i++) |
@@ -1602,7 +1609,7 @@ static void __attribute__((noreturn)) restart_guest(void) | |||
1602 | err(1, "Could not exec %s", main_args[0]); | 1609 | err(1, "Could not exec %s", main_args[0]); |
1603 | } | 1610 | } |
1604 | 1611 | ||
1605 | /*L:220 Finally we reach the core of the Launcher, which runs the Guest, serves | 1612 | /*L:220 Finally we reach the core of the Launcher which runs the Guest, serves |
1606 | * its input and output, and finally, lays it to rest. */ | 1613 | * its input and output, and finally, lays it to rest. */ |
1607 | static void __attribute__((noreturn)) run_guest(int lguest_fd) | 1614 | static void __attribute__((noreturn)) run_guest(int lguest_fd) |
1608 | { | 1615 | { |
@@ -1643,7 +1650,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd) | |||
1643 | err(1, "Resetting break"); | 1650 | err(1, "Resetting break"); |
1644 | } | 1651 | } |
1645 | } | 1652 | } |
1646 | /* | 1653 | /*L:240 |
1647 | * This is the end of the Launcher. The good news: we are over halfway | 1654 | * This is the end of the Launcher. The good news: we are over halfway |
1648 | * through! The bad news: the most fiendish part of the code still lies ahead | 1655 | * through! The bad news: the most fiendish part of the code still lies ahead |
1649 | * of us. | 1656 | * of us. |
@@ -1690,8 +1697,8 @@ int main(int argc, char *argv[]) | |||
1690 | * device receive input from a file descriptor, we keep an fdset | 1697 | * device receive input from a file descriptor, we keep an fdset |
1691 | * (infds) and the maximum fd number (max_infd) with the head of the | 1698 | * (infds) and the maximum fd number (max_infd) with the head of the |
1692 | * list. We also keep a pointer to the last device. Finally, we keep | 1699 | * list. We also keep a pointer to the last device. Finally, we keep |
1693 | * the next interrupt number to hand out (1: remember that 0 is used by | 1700 | * the next interrupt number to use for devices (1: remember that 0 is |
1694 | * the timer). */ | 1701 | * used by the timer). */ |
1695 | FD_ZERO(&devices.infds); | 1702 | FD_ZERO(&devices.infds); |
1696 | devices.max_infd = -1; | 1703 | devices.max_infd = -1; |
1697 | devices.lastdev = NULL; | 1704 | devices.lastdev = NULL; |
@@ -1792,8 +1799,8 @@ int main(int argc, char *argv[]) | |||
1792 | lguest_fd = tell_kernel(pgdir, start); | 1799 | lguest_fd = tell_kernel(pgdir, start); |
1793 | 1800 | ||
1794 | /* We fork off a child process, which wakes the Launcher whenever one | 1801 | /* We fork off a child process, which wakes the Launcher whenever one |
1795 | * of the input file descriptors needs attention. Otherwise we would | 1802 | * of the input file descriptors needs attention. We call this the |
1796 | * run the Guest until it tries to output something. */ | 1803 | * Waker, and we'll cover it in a moment. */ |
1797 | waker_fd = setup_waker(lguest_fd); | 1804 | waker_fd = setup_waker(lguest_fd); |
1798 | 1805 | ||
1799 | /* Finally, run the Guest. This doesn't return. */ | 1806 | /* Finally, run the Guest. This doesn't return. */ |