diff options
Diffstat (limited to 'Documentation/lguest/lguest.c')
-rw-r--r-- | Documentation/lguest/lguest.c | 77 |
1 files changed, 43 insertions, 34 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 0f23d67f958f..4c1fc65a8b3d 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /*P:100 This is the Launcher code, a simple program which lays out the | 1 | /*P:100 This is the Launcher code, a simple program which lays out the |
2 | * "physical" memory for the new Guest by mapping the kernel image and the | 2 | * "physical" memory for the new Guest by mapping the kernel image and |
3 | * virtual devices, then reads repeatedly from /dev/lguest to run the Guest. | 3 | * the virtual devices, then opens /dev/lguest to tell the kernel |
4 | :*/ | 4 | * about the Guest and control it. :*/ |
5 | #define _LARGEFILE64_SOURCE | 5 | #define _LARGEFILE64_SOURCE |
6 | #define _GNU_SOURCE | 6 | #define _GNU_SOURCE |
7 | #include <stdio.h> | 7 | #include <stdio.h> |
@@ -43,7 +43,7 @@ | |||
43 | #include "linux/virtio_console.h" | 43 | #include "linux/virtio_console.h" |
44 | #include "linux/virtio_ring.h" | 44 | #include "linux/virtio_ring.h" |
45 | #include "asm-x86/bootparam.h" | 45 | #include "asm-x86/bootparam.h" |
46 | /*L:110 We can ignore the 38 include files we need for this program, but I do | 46 | /*L:110 We can ignore the 39 include files we need for this program, but I do |
47 | * want to draw attention to the use of kernel-style types. | 47 | * want to draw attention to the use of kernel-style types. |
48 | * | 48 | * |
49 | * As Linus said, "C is a Spartan language, and so should your naming be." I | 49 | * As Linus said, "C is a Spartan language, and so should your naming be." I |
@@ -320,7 +320,7 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr) | |||
320 | err(1, "Reading program headers"); | 320 | err(1, "Reading program headers"); |
321 | 321 | ||
322 | /* Try all the headers: there are usually only three. A read-only one, | 322 | /* Try all the headers: there are usually only three. A read-only one, |
323 | * a read-write one, and a "note" section which isn't loadable. */ | 323 | * a read-write one, and a "note" section which we don't load. */ |
324 | for (i = 0; i < ehdr->e_phnum; i++) { | 324 | for (i = 0; i < ehdr->e_phnum; i++) { |
325 | /* If this isn't a loadable segment, we ignore it */ | 325 | /* If this isn't a loadable segment, we ignore it */ |
326 | if (phdr[i].p_type != PT_LOAD) | 326 | if (phdr[i].p_type != PT_LOAD) |
@@ -387,7 +387,7 @@ static unsigned long load_kernel(int fd) | |||
387 | if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) | 387 | if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) |
388 | return map_elf(fd, &hdr); | 388 | return map_elf(fd, &hdr); |
389 | 389 | ||
390 | /* Otherwise we assume it's a bzImage, and try to unpack it */ | 390 | /* Otherwise we assume it's a bzImage, and try to load it. */ |
391 | return load_bzimage(fd); | 391 | return load_bzimage(fd); |
392 | } | 392 | } |
393 | 393 | ||
@@ -433,12 +433,12 @@ static unsigned long load_initrd(const char *name, unsigned long mem) | |||
433 | return len; | 433 | return len; |
434 | } | 434 | } |
435 | 435 | ||
436 | /* Once we know how much memory we have, we can construct simple linear page | 436 | /* Once we know how much memory we have we can construct simple linear page |
437 | * tables which set virtual == physical which will get the Guest far enough | 437 | * tables which set virtual == physical which will get the Guest far enough |
438 | * into the boot to create its own. | 438 | * into the boot to create its own. |
439 | * | 439 | * |
440 | * We lay them out of the way, just below the initrd (which is why we need to | 440 | * We lay them out of the way, just below the initrd (which is why we need to |
441 | * know its size). */ | 441 | * know its size here). */ |
442 | static unsigned long setup_pagetables(unsigned long mem, | 442 | static unsigned long setup_pagetables(unsigned long mem, |
443 | unsigned long initrd_size) | 443 | unsigned long initrd_size) |
444 | { | 444 | { |
@@ -486,9 +486,12 @@ static void concat(char *dst, char *args[]) | |||
486 | unsigned int i, len = 0; | 486 | unsigned int i, len = 0; |
487 | 487 | ||
488 | for (i = 0; args[i]; i++) { | 488 | for (i = 0; args[i]; i++) { |
489 | if (i) { | ||
490 | strcat(dst+len, " "); | ||
491 | len++; | ||
492 | } | ||
489 | strcpy(dst+len, args[i]); | 493 | strcpy(dst+len, args[i]); |
490 | strcat(dst+len, " "); | 494 | len += strlen(args[i]); |
491 | len += strlen(args[i]) + 1; | ||
492 | } | 495 | } |
493 | /* In case it's empty. */ | 496 | /* In case it's empty. */ |
494 | dst[len] = '\0'; | 497 | dst[len] = '\0'; |
@@ -847,7 +850,8 @@ static void handle_console_output(int fd, struct virtqueue *vq) | |||
847 | * | 850 | * |
848 | * Handling output for network is also simple: we get all the output buffers | 851 | * Handling output for network is also simple: we get all the output buffers |
849 | * and write them (ignoring the first element) to this device's file descriptor | 852 | * and write them (ignoring the first element) to this device's file descriptor |
850 | * (stdout). */ | 853 | * (/dev/net/tun). |
854 | */ | ||
851 | static void handle_net_output(int fd, struct virtqueue *vq) | 855 | static void handle_net_output(int fd, struct virtqueue *vq) |
852 | { | 856 | { |
853 | unsigned int head, out, in; | 857 | unsigned int head, out, in; |
@@ -921,7 +925,7 @@ static void enable_fd(int fd, struct virtqueue *vq) | |||
921 | write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); | 925 | write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); |
922 | } | 926 | } |
923 | 927 | ||
924 | /* Resetting a device is fairly easy. */ | 928 | /* When the Guest asks us to reset a device, it's is fairly easy. */ |
925 | static void reset_device(struct device *dev) | 929 | static void reset_device(struct device *dev) |
926 | { | 930 | { |
927 | struct virtqueue *vq; | 931 | struct virtqueue *vq; |
@@ -1000,8 +1004,8 @@ static void handle_input(int fd) | |||
1000 | if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0) | 1004 | if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0) |
1001 | break; | 1005 | break; |
1002 | 1006 | ||
1003 | /* Otherwise, call the device(s) which have readable | 1007 | /* Otherwise, call the device(s) which have readable file |
1004 | * file descriptors and a method of handling them. */ | 1008 | * descriptors and a method of handling them. */ |
1005 | for (i = devices.dev; i; i = i->next) { | 1009 | for (i = devices.dev; i; i = i->next) { |
1006 | if (i->handle_input && FD_ISSET(i->fd, &fds)) { | 1010 | if (i->handle_input && FD_ISSET(i->fd, &fds)) { |
1007 | int dev_fd; | 1011 | int dev_fd; |
@@ -1012,8 +1016,7 @@ static void handle_input(int fd) | |||
1012 | * should no longer service it. Networking and | 1016 | * should no longer service it. Networking and |
1013 | * console do this when there's no input | 1017 | * console do this when there's no input |
1014 | * buffers to deliver into. Console also uses | 1018 | * buffers to deliver into. Console also uses |
1015 | * it when it discovers that stdin is | 1019 | * it when it discovers that stdin is closed. */ |
1016 | * closed. */ | ||
1017 | FD_CLR(i->fd, &devices.infds); | 1020 | FD_CLR(i->fd, &devices.infds); |
1018 | /* Tell waker to ignore it too, by sending a | 1021 | /* Tell waker to ignore it too, by sending a |
1019 | * negative fd number (-1, since 0 is a valid | 1022 | * negative fd number (-1, since 0 is a valid |
@@ -1030,7 +1033,8 @@ static void handle_input(int fd) | |||
1030 | * | 1033 | * |
1031 | * All devices need a descriptor so the Guest knows it exists, and a "struct | 1034 | * All devices need a descriptor so the Guest knows it exists, and a "struct |
1032 | * device" so the Launcher can keep track of it. We have common helper | 1035 | * device" so the Launcher can keep track of it. We have common helper |
1033 | * routines to allocate and manage them. */ | 1036 | * routines to allocate and manage them. |
1037 | */ | ||
1034 | 1038 | ||
1035 | /* The layout of the device page is a "struct lguest_device_desc" followed by a | 1039 | /* The layout of the device page is a "struct lguest_device_desc" followed by a |
1036 | * number of virtqueue descriptors, then two sets of feature bits, then an | 1040 | * number of virtqueue descriptors, then two sets of feature bits, then an |
@@ -1075,7 +1079,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
1075 | struct virtqueue **i, *vq = malloc(sizeof(*vq)); | 1079 | struct virtqueue **i, *vq = malloc(sizeof(*vq)); |
1076 | void *p; | 1080 | void *p; |
1077 | 1081 | ||
1078 | /* First we need some pages for this virtqueue. */ | 1082 | /* First we need some memory for this virtqueue. */ |
1079 | pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1) | 1083 | pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1) |
1080 | / getpagesize(); | 1084 | / getpagesize(); |
1081 | p = get_pages(pages); | 1085 | p = get_pages(pages); |
@@ -1119,7 +1123,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
1119 | } | 1123 | } |
1120 | 1124 | ||
1121 | /* The first half of the feature bitmask is for us to advertise features. The | 1125 | /* The first half of the feature bitmask is for us to advertise features. The |
1122 | * second half if for the Guest to accept features. */ | 1126 | * second half is for the Guest to accept features. */ |
1123 | static void add_feature(struct device *dev, unsigned bit) | 1127 | static void add_feature(struct device *dev, unsigned bit) |
1124 | { | 1128 | { |
1125 | u8 *features = get_feature_bits(dev); | 1129 | u8 *features = get_feature_bits(dev); |
@@ -1148,7 +1152,9 @@ static void set_config(struct device *dev, unsigned len, const void *conf) | |||
1148 | } | 1152 | } |
1149 | 1153 | ||
1150 | /* This routine does all the creation and setup of a new device, including | 1154 | /* This routine does all the creation and setup of a new device, including |
1151 | * calling new_dev_desc() to allocate the descriptor and device memory. */ | 1155 | * calling new_dev_desc() to allocate the descriptor and device memory. |
1156 | * | ||
1157 | * See what I mean about userspace being boring? */ | ||
1152 | static struct device *new_device(const char *name, u16 type, int fd, | 1158 | static struct device *new_device(const char *name, u16 type, int fd, |
1153 | bool (*handle_input)(int, struct device *)) | 1159 | bool (*handle_input)(int, struct device *)) |
1154 | { | 1160 | { |
@@ -1380,7 +1386,6 @@ struct vblk_info | |||
1380 | * Launcher triggers interrupt to Guest. */ | 1386 | * Launcher triggers interrupt to Guest. */ |
1381 | int done_fd; | 1387 | int done_fd; |
1382 | }; | 1388 | }; |
1383 | /*:*/ | ||
1384 | 1389 | ||
1385 | /*L:210 | 1390 | /*L:210 |
1386 | * The Disk | 1391 | * The Disk |
@@ -1490,7 +1495,10 @@ static int io_thread(void *_dev) | |||
1490 | while (read(vblk->workpipe[0], &c, 1) == 1) { | 1495 | while (read(vblk->workpipe[0], &c, 1) == 1) { |
1491 | /* We acknowledge each request immediately to reduce latency, | 1496 | /* We acknowledge each request immediately to reduce latency, |
1492 | * rather than waiting until we've done them all. I haven't | 1497 | * rather than waiting until we've done them all. I haven't |
1493 | * measured to see if it makes any difference. */ | 1498 | * measured to see if it makes any difference. |
1499 | * | ||
1500 | * That would be an interesting test, wouldn't it? You could | ||
1501 | * also try having more than one I/O thread. */ | ||
1494 | while (service_io(dev)) | 1502 | while (service_io(dev)) |
1495 | write(vblk->done_fd, &c, 1); | 1503 | write(vblk->done_fd, &c, 1); |
1496 | } | 1504 | } |
@@ -1498,7 +1506,7 @@ static int io_thread(void *_dev) | |||
1498 | } | 1506 | } |
1499 | 1507 | ||
1500 | /* Now we've seen the I/O thread, we return to the Launcher to see what happens | 1508 | /* Now we've seen the I/O thread, we return to the Launcher to see what happens |
1501 | * when the thread tells us it's completed some I/O. */ | 1509 | * when that thread tells us it's completed some I/O. */ |
1502 | static bool handle_io_finish(int fd, struct device *dev) | 1510 | static bool handle_io_finish(int fd, struct device *dev) |
1503 | { | 1511 | { |
1504 | char c; | 1512 | char c; |
@@ -1570,11 +1578,12 @@ static void setup_block_file(const char *filename) | |||
1570 | * more work. */ | 1578 | * more work. */ |
1571 | pipe(vblk->workpipe); | 1579 | pipe(vblk->workpipe); |
1572 | 1580 | ||
1573 | /* Create stack for thread and run it */ | 1581 | /* Create stack for thread and run it. Since stack grows upwards, we |
1582 | * point the stack pointer to the end of this region. */ | ||
1574 | stack = malloc(32768); | 1583 | stack = malloc(32768); |
1575 | /* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from | 1584 | /* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from |
1576 | * becoming a zombie. */ | 1585 | * becoming a zombie. */ |
1577 | if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1) | 1586 | if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1) |
1578 | err(1, "Creating clone"); | 1587 | err(1, "Creating clone"); |
1579 | 1588 | ||
1580 | /* We don't need to keep the I/O thread's end of the pipes open. */ | 1589 | /* We don't need to keep the I/O thread's end of the pipes open. */ |
@@ -1584,14 +1593,14 @@ static void setup_block_file(const char *filename) | |||
1584 | verbose("device %u: virtblock %llu sectors\n", | 1593 | verbose("device %u: virtblock %llu sectors\n", |
1585 | devices.device_num, le64_to_cpu(conf.capacity)); | 1594 | devices.device_num, le64_to_cpu(conf.capacity)); |
1586 | } | 1595 | } |
1587 | /* That's the end of device setup. :*/ | 1596 | /* That's the end of device setup. */ |
1588 | 1597 | ||
1589 | /* Reboot */ | 1598 | /*L:230 Reboot is pretty easy: clean up and exec() the Launcher afresh. */ |
1590 | static void __attribute__((noreturn)) restart_guest(void) | 1599 | static void __attribute__((noreturn)) restart_guest(void) |
1591 | { | 1600 | { |
1592 | unsigned int i; | 1601 | unsigned int i; |
1593 | 1602 | ||
1594 | /* Closing pipes causes the waker thread and io_threads to die, and | 1603 | /* Closing pipes causes the Waker thread and io_threads to die, and |
1595 | * closing /dev/lguest cleans up the Guest. Since we don't track all | 1604 | * closing /dev/lguest cleans up the Guest. Since we don't track all |
1596 | * open fds, we simply close everything beyond stderr. */ | 1605 | * open fds, we simply close everything beyond stderr. */ |
1597 | for (i = 3; i < FD_SETSIZE; i++) | 1606 | for (i = 3; i < FD_SETSIZE; i++) |
@@ -1600,7 +1609,7 @@ static void __attribute__((noreturn)) restart_guest(void) | |||
1600 | err(1, "Could not exec %s", main_args[0]); | 1609 | err(1, "Could not exec %s", main_args[0]); |
1601 | } | 1610 | } |
1602 | 1611 | ||
1603 | /*L:220 Finally we reach the core of the Launcher, which runs the Guest, serves | 1612 | /*L:220 Finally we reach the core of the Launcher which runs the Guest, serves |
1604 | * its input and output, and finally, lays it to rest. */ | 1613 | * its input and output, and finally, lays it to rest. */ |
1605 | static void __attribute__((noreturn)) run_guest(int lguest_fd) | 1614 | static void __attribute__((noreturn)) run_guest(int lguest_fd) |
1606 | { | 1615 | { |
@@ -1641,7 +1650,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd) | |||
1641 | err(1, "Resetting break"); | 1650 | err(1, "Resetting break"); |
1642 | } | 1651 | } |
1643 | } | 1652 | } |
1644 | /* | 1653 | /*L:240 |
1645 | * This is the end of the Launcher. The good news: we are over halfway | 1654 | * This is the end of the Launcher. The good news: we are over halfway |
1646 | * through! The bad news: the most fiendish part of the code still lies ahead | 1655 | * through! The bad news: the most fiendish part of the code still lies ahead |
1647 | * of us. | 1656 | * of us. |
@@ -1688,8 +1697,8 @@ int main(int argc, char *argv[]) | |||
1688 | * device receive input from a file descriptor, we keep an fdset | 1697 | * device receive input from a file descriptor, we keep an fdset |
1689 | * (infds) and the maximum fd number (max_infd) with the head of the | 1698 | * (infds) and the maximum fd number (max_infd) with the head of the |
1690 | * list. We also keep a pointer to the last device. Finally, we keep | 1699 | * list. We also keep a pointer to the last device. Finally, we keep |
1691 | * the next interrupt number to hand out (1: remember that 0 is used by | 1700 | * the next interrupt number to use for devices (1: remember that 0 is |
1692 | * the timer). */ | 1701 | * used by the timer). */ |
1693 | FD_ZERO(&devices.infds); | 1702 | FD_ZERO(&devices.infds); |
1694 | devices.max_infd = -1; | 1703 | devices.max_infd = -1; |
1695 | devices.lastdev = NULL; | 1704 | devices.lastdev = NULL; |
@@ -1790,8 +1799,8 @@ int main(int argc, char *argv[]) | |||
1790 | lguest_fd = tell_kernel(pgdir, start); | 1799 | lguest_fd = tell_kernel(pgdir, start); |
1791 | 1800 | ||
1792 | /* We fork off a child process, which wakes the Launcher whenever one | 1801 | /* We fork off a child process, which wakes the Launcher whenever one |
1793 | * of the input file descriptors needs attention. Otherwise we would | 1802 | * of the input file descriptors needs attention. We call this the |
1794 | * run the Guest until it tries to output something. */ | 1803 | * Waker, and we'll cover it in a moment. */ |
1795 | waker_fd = setup_waker(lguest_fd); | 1804 | waker_fd = setup_waker(lguest_fd); |
1796 | 1805 | ||
1797 | /* Finally, run the Guest. This doesn't return. */ | 1806 | /* Finally, run the Guest. This doesn't return. */ |