diff options
| author | Rusty Russell <rusty@rustcorp.com.au> | 2008-07-29 10:58:38 -0400 |
|---|---|---|
| committer | Rusty Russell <rusty@rustcorp.com.au> | 2008-07-28 19:58:39 -0400 |
| commit | 8c79873da0d2bedf4ad6b868c54e426bb0a2fe38 (patch) | |
| tree | 270efee346b70ae6615dd4796363479c94eca6d9 | |
| parent | 0f0c4fab8284f3b886b2e1e0e317e3bb8de176b3 (diff) | |
lguest: turn Waker into a thread, not a process
lguest uses a Waker process to break it out of the kernel (ie.
actually running the guest) when file descriptor needs attention.
Changing this from a process to a thread somewhat simplifies things:
it can directly access the fd_set of things to watch. More
importantly, it means that the Waker can see Guest memory correctly,
so /dev/vring file descriptors will work as anticipated (the
alternative is to actually mmap MAP_SHARED, but you can't do that with
/dev/zero).
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
| -rw-r--r-- | Documentation/lguest/lguest.c | 120 |
1 files changed, 57 insertions, 63 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index f9bba2d8fee1..b88b0ea54e90 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
| @@ -76,8 +76,12 @@ static bool verbose; | |||
| 76 | do { if (verbose) printf(args); } while(0) | 76 | do { if (verbose) printf(args); } while(0) |
| 77 | /*:*/ | 77 | /*:*/ |
| 78 | 78 | ||
| 79 | /* The pipe to send commands to the waker process */ | 79 | /* File descriptors for the Waker. */ |
| 80 | static int waker_fd; | 80 | struct { |
| 81 | int pipe[2]; | ||
| 82 | int lguest_fd; | ||
| 83 | } waker_fds; | ||
| 84 | |||
| 81 | /* The pointer to the start of guest memory. */ | 85 | /* The pointer to the start of guest memory. */ |
| 82 | static void *guest_base; | 86 | static void *guest_base; |
| 83 | /* The maximum guest physical address allowed, and maximum possible. */ | 87 | /* The maximum guest physical address allowed, and maximum possible. */ |
| @@ -579,69 +583,64 @@ static void add_device_fd(int fd) | |||
| 579 | * watch, but handing a file descriptor mask through to the kernel is fairly | 583 | * watch, but handing a file descriptor mask through to the kernel is fairly |
| 580 | * icky. | 584 | * icky. |
| 581 | * | 585 | * |
| 582 | * Instead, we fork off a process which watches the file descriptors and writes | 586 | * Instead, we clone off a thread which watches the file descriptors and writes |
| 583 | * the LHREQ_BREAK command to the /dev/lguest file descriptor to tell the Host | 587 | * the LHREQ_BREAK command to the /dev/lguest file descriptor to tell the Host |
| 584 | * stop running the Guest. This causes the Launcher to return from the | 588 | * stop running the Guest. This causes the Launcher to return from the |
| 585 | * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset | 589 | * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset |
| 586 | * the LHREQ_BREAK and wake us up again. | 590 | * the LHREQ_BREAK and wake us up again. |
| 587 | * | 591 | * |
| 588 | * This, of course, is merely a different *kind* of icky. | 592 | * This, of course, is merely a different *kind* of icky. |
| 593 | * | ||
| 594 | * Given my well-known antipathy to threads, I'd prefer to use processes. But | ||
| 595 | * it's easier to share Guest memory with threads, and trivial to share the | ||
| 596 | * devices.infds as the Launcher changes it. | ||
| 589 | */ | 597 | */ |
| 590 | static void wake_parent(int pipefd, int lguest_fd) | 598 | static int waker(void *unused) |
| 591 | { | 599 | { |
| 592 | /* Add the pipe from the Launcher to the fdset in the device_list, so | 600 | /* Close the write end of the pipe: only the Launcher has it open. */ |
| 593 | * we watch it, too. */ | 601 | close(waker_fds.pipe[1]); |
| 594 | add_device_fd(pipefd); | ||
| 595 | 602 | ||
| 596 | for (;;) { | 603 | for (;;) { |
| 597 | fd_set rfds = devices.infds; | 604 | fd_set rfds = devices.infds; |
| 598 | unsigned long args[] = { LHREQ_BREAK, 1 }; | 605 | unsigned long args[] = { LHREQ_BREAK, 1 }; |
| 606 | unsigned int maxfd = devices.max_infd; | ||
| 607 | |||
| 608 | /* We also listen to the pipe from the Launcher. */ | ||
| 609 | FD_SET(waker_fds.pipe[0], &rfds); | ||
| 610 | if (waker_fds.pipe[0] > maxfd) | ||
| 611 | maxfd = waker_fds.pipe[0]; | ||
| 599 | 612 | ||
| 600 | /* Wait until input is ready from one of the devices. */ | 613 | /* Wait until input is ready from one of the devices. */ |
| 601 | select(devices.max_infd+1, &rfds, NULL, NULL, NULL); | 614 | select(maxfd+1, &rfds, NULL, NULL, NULL); |
| 602 | /* Is it a message from the Launcher? */ | 615 | |
| 603 | if (FD_ISSET(pipefd, &rfds)) { | 616 | /* Message from Launcher? */ |
| 604 | int fd; | 617 | if (FD_ISSET(waker_fds.pipe[0], &rfds)) { |
| 605 | /* If read() returns 0, it means the Launcher has | 618 | char c; |
| 606 | * exited. We silently follow. */ | 619 | /* If this fails, then assume Launcher has exited. |
| 607 | if (read(pipefd, &fd, sizeof(fd)) == 0) | 620 | * Don't do anything on exit: we're just a thread! */ |
| 608 | exit(0); | 621 | if (read(waker_fds.pipe[0], &c, 1) != 1) |
| 609 | /* Otherwise it's telling us to change what file | 622 | _exit(0); |
| 610 | * descriptors we're to listen to. Positive means | 623 | continue; |
| 611 | * listen to a new one, negative means stop | 624 | } |
| 612 | * listening. */ | 625 | |
| 613 | if (fd >= 0) | 626 | /* Send LHREQ_BREAK command to snap the Launcher out of it. */ |
| 614 | FD_SET(fd, &devices.infds); | 627 | pwrite(waker_fds.lguest_fd, args, sizeof(args), cpu_id); |
| 615 | else | ||
| 616 | FD_CLR(-fd - 1, &devices.infds); | ||
| 617 | } else /* Send LHREQ_BREAK command. */ | ||
| 618 | pwrite(lguest_fd, args, sizeof(args), cpu_id); | ||
| 619 | } | 628 | } |
| 629 | return 0; | ||
| 620 | } | 630 | } |
| 621 | 631 | ||
| 622 | /* This routine just sets up a pipe to the Waker process. */ | 632 | /* This routine just sets up a pipe to the Waker process. */ |
| 623 | static int setup_waker(int lguest_fd) | 633 | static void setup_waker(int lguest_fd) |
| 624 | { | 634 | { |
| 625 | int pipefd[2], child; | 635 | /* This pipe is closed when Launcher dies, telling Waker. */ |
| 626 | 636 | if (pipe(waker_fds.pipe) != 0) | |
| 627 | /* We create a pipe to talk to the Waker, and also so it knows when the | 637 | err(1, "Creating pipe for Waker"); |
| 628 | * Launcher dies (and closes pipe). */ | ||
| 629 | pipe(pipefd); | ||
| 630 | child = fork(); | ||
| 631 | if (child == -1) | ||
| 632 | err(1, "forking"); | ||
| 633 | |||
| 634 | if (child == 0) { | ||
| 635 | /* We are the Waker: close the "writing" end of our copy of the | ||
| 636 | * pipe and start waiting for input. */ | ||
| 637 | close(pipefd[1]); | ||
| 638 | wake_parent(pipefd[0], lguest_fd); | ||
| 639 | } | ||
| 640 | /* Close the reading end of our copy of the pipe. */ | ||
| 641 | close(pipefd[0]); | ||
| 642 | 638 | ||
| 643 | /* Here is the fd used to talk to the waker. */ | 639 | /* Waker also needs to know the lguest fd */ |
| 644 | return pipefd[1]; | 640 | waker_fds.lguest_fd = lguest_fd; |
| 641 | |||
| 642 | if (clone(waker, malloc(4096) + 4096, CLONE_VM | SIGCHLD, NULL) == -1) | ||
| 643 | err(1, "Creating Waker"); | ||
| 645 | } | 644 | } |
| 646 | 645 | ||
| 647 | /* | 646 | /* |
| @@ -863,8 +862,8 @@ static bool handle_console_input(int fd, struct device *dev) | |||
| 863 | unsigned long args[] = { LHREQ_BREAK, 0 }; | 862 | unsigned long args[] = { LHREQ_BREAK, 0 }; |
| 864 | /* Close the fd so Waker will know it has to | 863 | /* Close the fd so Waker will know it has to |
| 865 | * exit. */ | 864 | * exit. */ |
| 866 | close(waker_fd); | 865 | close(waker_fds.pipe[1]); |
| 867 | /* Just in case waker is blocked in BREAK, send | 866 | /* Just in case Waker is blocked in BREAK, send |
| 868 | * unbreak now. */ | 867 | * unbreak now. */ |
| 869 | write(fd, args, sizeof(args)); | 868 | write(fd, args, sizeof(args)); |
| 870 | exit(2); | 869 | exit(2); |
| @@ -996,8 +995,8 @@ static bool handle_tun_input(int fd, struct device *dev) | |||
| 996 | static void enable_fd(int fd, struct virtqueue *vq, bool timeout) | 995 | static void enable_fd(int fd, struct virtqueue *vq, bool timeout) |
| 997 | { | 996 | { |
| 998 | add_device_fd(vq->dev->fd); | 997 | add_device_fd(vq->dev->fd); |
| 999 | /* Tell waker to listen to it again */ | 998 | /* Snap the Waker out of its select loop. */ |
| 1000 | write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); | 999 | write(waker_fds.pipe[1], "", 1); |
| 1001 | } | 1000 | } |
| 1002 | 1001 | ||
| 1003 | static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout) | 1002 | static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout) |
| @@ -1134,7 +1133,6 @@ static void handle_input(int fd) | |||
| 1134 | * descriptors and a method of handling them. */ | 1133 | * descriptors and a method of handling them. */ |
| 1135 | for (i = devices.dev; i; i = i->next) { | 1134 | for (i = devices.dev; i; i = i->next) { |
| 1136 | if (i->handle_input && FD_ISSET(i->fd, &fds)) { | 1135 | if (i->handle_input && FD_ISSET(i->fd, &fds)) { |
| 1137 | int dev_fd; | ||
| 1138 | if (i->handle_input(fd, i)) | 1136 | if (i->handle_input(fd, i)) |
| 1139 | continue; | 1137 | continue; |
| 1140 | 1138 | ||
| @@ -1144,11 +1142,6 @@ static void handle_input(int fd) | |||
| 1144 | * buffers to deliver into. Console also uses | 1142 | * buffers to deliver into. Console also uses |
| 1145 | * it when it discovers that stdin is closed. */ | 1143 | * it when it discovers that stdin is closed. */ |
| 1146 | FD_CLR(i->fd, &devices.infds); | 1144 | FD_CLR(i->fd, &devices.infds); |
| 1147 | /* Tell waker to ignore it too, by sending a | ||
| 1148 | * negative fd number (-1, since 0 is a valid | ||
| 1149 | * FD number). */ | ||
| 1150 | dev_fd = -i->fd - 1; | ||
| 1151 | write(waker_fd, &dev_fd, sizeof(dev_fd)); | ||
| 1152 | } | 1145 | } |
| 1153 | } | 1146 | } |
| 1154 | 1147 | ||
| @@ -1880,11 +1873,12 @@ static void __attribute__((noreturn)) restart_guest(void) | |||
| 1880 | { | 1873 | { |
| 1881 | unsigned int i; | 1874 | unsigned int i; |
| 1882 | 1875 | ||
| 1883 | /* Closing pipes causes the Waker thread and io_threads to die, and | 1876 | /* Since we don't track all open fds, we simply close everything beyond |
| 1884 | * closing /dev/lguest cleans up the Guest. Since we don't track all | 1877 | * stderr. */ |
| 1885 | * open fds, we simply close everything beyond stderr. */ | ||
| 1886 | for (i = 3; i < FD_SETSIZE; i++) | 1878 | for (i = 3; i < FD_SETSIZE; i++) |
| 1887 | close(i); | 1879 | close(i); |
| 1880 | |||
| 1881 | /* The exec automatically gets rid of the I/O and Waker threads. */ | ||
| 1888 | execv(main_args[0], main_args); | 1882 | execv(main_args[0], main_args); |
| 1889 | err(1, "Could not exec %s", main_args[0]); | 1883 | err(1, "Could not exec %s", main_args[0]); |
| 1890 | } | 1884 | } |
| @@ -2085,10 +2079,10 @@ int main(int argc, char *argv[]) | |||
| 2085 | * /dev/lguest file descriptor. */ | 2079 | * /dev/lguest file descriptor. */ |
| 2086 | lguest_fd = tell_kernel(pgdir, start); | 2080 | lguest_fd = tell_kernel(pgdir, start); |
| 2087 | 2081 | ||
| 2088 | /* We fork off a child process, which wakes the Launcher whenever one | 2082 | /* We clone off a thread, which wakes the Launcher whenever one of the |
| 2089 | * of the input file descriptors needs attention. We call this the | 2083 | * input file descriptors needs attention. We call this the Waker, and |
| 2090 | * Waker, and we'll cover it in a moment. */ | 2084 | * we'll cover it in a moment. */ |
| 2091 | waker_fd = setup_waker(lguest_fd); | 2085 | setup_waker(lguest_fd); |
| 2092 | 2086 | ||
| 2093 | /* Finally, run the Guest. This doesn't return. */ | 2087 | /* Finally, run the Guest. This doesn't return. */ |
| 2094 | run_guest(lguest_fd); | 2088 | run_guest(lguest_fd); |
