diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2008-07-29 10:58:38 -0400 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2008-07-28 19:58:39 -0400 |
commit | 8c79873da0d2bedf4ad6b868c54e426bb0a2fe38 (patch) | |
tree | 270efee346b70ae6615dd4796363479c94eca6d9 | |
parent | 0f0c4fab8284f3b886b2e1e0e317e3bb8de176b3 (diff) |
lguest: turn Waker into a thread, not a process
lguest uses a Waker process to break it out of the kernel (ie.
actually running the guest) when file descriptor needs attention.
Changing this from a process to a thread somewhat simplifies things:
it can directly access the fd_set of things to watch. More
importantly, it means that the Waker can see Guest memory correctly,
so /dev/vring file descriptors will work as anticipated (the
alternative is to actually mmap MAP_SHARED, but you can't do that with
/dev/zero).
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
-rw-r--r-- | Documentation/lguest/lguest.c | 120 |
1 files changed, 57 insertions, 63 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index f9bba2d8fee1..b88b0ea54e90 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
@@ -76,8 +76,12 @@ static bool verbose; | |||
76 | do { if (verbose) printf(args); } while(0) | 76 | do { if (verbose) printf(args); } while(0) |
77 | /*:*/ | 77 | /*:*/ |
78 | 78 | ||
79 | /* The pipe to send commands to the waker process */ | 79 | /* File descriptors for the Waker. */ |
80 | static int waker_fd; | 80 | struct { |
81 | int pipe[2]; | ||
82 | int lguest_fd; | ||
83 | } waker_fds; | ||
84 | |||
81 | /* The pointer to the start of guest memory. */ | 85 | /* The pointer to the start of guest memory. */ |
82 | static void *guest_base; | 86 | static void *guest_base; |
83 | /* The maximum guest physical address allowed, and maximum possible. */ | 87 | /* The maximum guest physical address allowed, and maximum possible. */ |
@@ -579,69 +583,64 @@ static void add_device_fd(int fd) | |||
579 | * watch, but handing a file descriptor mask through to the kernel is fairly | 583 | * watch, but handing a file descriptor mask through to the kernel is fairly |
580 | * icky. | 584 | * icky. |
581 | * | 585 | * |
582 | * Instead, we fork off a process which watches the file descriptors and writes | 586 | * Instead, we clone off a thread which watches the file descriptors and writes |
583 | * the LHREQ_BREAK command to the /dev/lguest file descriptor to tell the Host | 587 | * the LHREQ_BREAK command to the /dev/lguest file descriptor to tell the Host |
584 | * stop running the Guest. This causes the Launcher to return from the | 588 | * stop running the Guest. This causes the Launcher to return from the |
585 | * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset | 589 | * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset |
586 | * the LHREQ_BREAK and wake us up again. | 590 | * the LHREQ_BREAK and wake us up again. |
587 | * | 591 | * |
588 | * This, of course, is merely a different *kind* of icky. | 592 | * This, of course, is merely a different *kind* of icky. |
593 | * | ||
594 | * Given my well-known antipathy to threads, I'd prefer to use processes. But | ||
595 | * it's easier to share Guest memory with threads, and trivial to share the | ||
596 | * devices.infds as the Launcher changes it. | ||
589 | */ | 597 | */ |
590 | static void wake_parent(int pipefd, int lguest_fd) | 598 | static int waker(void *unused) |
591 | { | 599 | { |
592 | /* Add the pipe from the Launcher to the fdset in the device_list, so | 600 | /* Close the write end of the pipe: only the Launcher has it open. */ |
593 | * we watch it, too. */ | 601 | close(waker_fds.pipe[1]); |
594 | add_device_fd(pipefd); | ||
595 | 602 | ||
596 | for (;;) { | 603 | for (;;) { |
597 | fd_set rfds = devices.infds; | 604 | fd_set rfds = devices.infds; |
598 | unsigned long args[] = { LHREQ_BREAK, 1 }; | 605 | unsigned long args[] = { LHREQ_BREAK, 1 }; |
606 | unsigned int maxfd = devices.max_infd; | ||
607 | |||
608 | /* We also listen to the pipe from the Launcher. */ | ||
609 | FD_SET(waker_fds.pipe[0], &rfds); | ||
610 | if (waker_fds.pipe[0] > maxfd) | ||
611 | maxfd = waker_fds.pipe[0]; | ||
599 | 612 | ||
600 | /* Wait until input is ready from one of the devices. */ | 613 | /* Wait until input is ready from one of the devices. */ |
601 | select(devices.max_infd+1, &rfds, NULL, NULL, NULL); | 614 | select(maxfd+1, &rfds, NULL, NULL, NULL); |
602 | /* Is it a message from the Launcher? */ | 615 | |
603 | if (FD_ISSET(pipefd, &rfds)) { | 616 | /* Message from Launcher? */ |
604 | int fd; | 617 | if (FD_ISSET(waker_fds.pipe[0], &rfds)) { |
605 | /* If read() returns 0, it means the Launcher has | 618 | char c; |
606 | * exited. We silently follow. */ | 619 | /* If this fails, then assume Launcher has exited. |
607 | if (read(pipefd, &fd, sizeof(fd)) == 0) | 620 | * Don't do anything on exit: we're just a thread! */ |
608 | exit(0); | 621 | if (read(waker_fds.pipe[0], &c, 1) != 1) |
609 | /* Otherwise it's telling us to change what file | 622 | _exit(0); |
610 | * descriptors we're to listen to. Positive means | 623 | continue; |
611 | * listen to a new one, negative means stop | 624 | } |
612 | * listening. */ | 625 | |
613 | if (fd >= 0) | 626 | /* Send LHREQ_BREAK command to snap the Launcher out of it. */ |
614 | FD_SET(fd, &devices.infds); | 627 | pwrite(waker_fds.lguest_fd, args, sizeof(args), cpu_id); |
615 | else | ||
616 | FD_CLR(-fd - 1, &devices.infds); | ||
617 | } else /* Send LHREQ_BREAK command. */ | ||
618 | pwrite(lguest_fd, args, sizeof(args), cpu_id); | ||
619 | } | 628 | } |
629 | return 0; | ||
620 | } | 630 | } |
621 | 631 | ||
622 | /* This routine just sets up a pipe to the Waker process. */ | 632 | /* This routine just sets up a pipe to the Waker process. */ |
623 | static int setup_waker(int lguest_fd) | 633 | static void setup_waker(int lguest_fd) |
624 | { | 634 | { |
625 | int pipefd[2], child; | 635 | /* This pipe is closed when Launcher dies, telling Waker. */ |
626 | 636 | if (pipe(waker_fds.pipe) != 0) | |
627 | /* We create a pipe to talk to the Waker, and also so it knows when the | 637 | err(1, "Creating pipe for Waker"); |
628 | * Launcher dies (and closes pipe). */ | ||
629 | pipe(pipefd); | ||
630 | child = fork(); | ||
631 | if (child == -1) | ||
632 | err(1, "forking"); | ||
633 | |||
634 | if (child == 0) { | ||
635 | /* We are the Waker: close the "writing" end of our copy of the | ||
636 | * pipe and start waiting for input. */ | ||
637 | close(pipefd[1]); | ||
638 | wake_parent(pipefd[0], lguest_fd); | ||
639 | } | ||
640 | /* Close the reading end of our copy of the pipe. */ | ||
641 | close(pipefd[0]); | ||
642 | 638 | ||
643 | /* Here is the fd used to talk to the waker. */ | 639 | /* Waker also needs to know the lguest fd */ |
644 | return pipefd[1]; | 640 | waker_fds.lguest_fd = lguest_fd; |
641 | |||
642 | if (clone(waker, malloc(4096) + 4096, CLONE_VM | SIGCHLD, NULL) == -1) | ||
643 | err(1, "Creating Waker"); | ||
645 | } | 644 | } |
646 | 645 | ||
647 | /* | 646 | /* |
@@ -863,8 +862,8 @@ static bool handle_console_input(int fd, struct device *dev) | |||
863 | unsigned long args[] = { LHREQ_BREAK, 0 }; | 862 | unsigned long args[] = { LHREQ_BREAK, 0 }; |
864 | /* Close the fd so Waker will know it has to | 863 | /* Close the fd so Waker will know it has to |
865 | * exit. */ | 864 | * exit. */ |
866 | close(waker_fd); | 865 | close(waker_fds.pipe[1]); |
867 | /* Just in case waker is blocked in BREAK, send | 866 | /* Just in case Waker is blocked in BREAK, send |
868 | * unbreak now. */ | 867 | * unbreak now. */ |
869 | write(fd, args, sizeof(args)); | 868 | write(fd, args, sizeof(args)); |
870 | exit(2); | 869 | exit(2); |
@@ -996,8 +995,8 @@ static bool handle_tun_input(int fd, struct device *dev) | |||
996 | static void enable_fd(int fd, struct virtqueue *vq, bool timeout) | 995 | static void enable_fd(int fd, struct virtqueue *vq, bool timeout) |
997 | { | 996 | { |
998 | add_device_fd(vq->dev->fd); | 997 | add_device_fd(vq->dev->fd); |
999 | /* Tell waker to listen to it again */ | 998 | /* Snap the Waker out of its select loop. */ |
1000 | write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); | 999 | write(waker_fds.pipe[1], "", 1); |
1001 | } | 1000 | } |
1002 | 1001 | ||
1003 | static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout) | 1002 | static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout) |
@@ -1134,7 +1133,6 @@ static void handle_input(int fd) | |||
1134 | * descriptors and a method of handling them. */ | 1133 | * descriptors and a method of handling them. */ |
1135 | for (i = devices.dev; i; i = i->next) { | 1134 | for (i = devices.dev; i; i = i->next) { |
1136 | if (i->handle_input && FD_ISSET(i->fd, &fds)) { | 1135 | if (i->handle_input && FD_ISSET(i->fd, &fds)) { |
1137 | int dev_fd; | ||
1138 | if (i->handle_input(fd, i)) | 1136 | if (i->handle_input(fd, i)) |
1139 | continue; | 1137 | continue; |
1140 | 1138 | ||
@@ -1144,11 +1142,6 @@ static void handle_input(int fd) | |||
1144 | * buffers to deliver into. Console also uses | 1142 | * buffers to deliver into. Console also uses |
1145 | * it when it discovers that stdin is closed. */ | 1143 | * it when it discovers that stdin is closed. */ |
1146 | FD_CLR(i->fd, &devices.infds); | 1144 | FD_CLR(i->fd, &devices.infds); |
1147 | /* Tell waker to ignore it too, by sending a | ||
1148 | * negative fd number (-1, since 0 is a valid | ||
1149 | * FD number). */ | ||
1150 | dev_fd = -i->fd - 1; | ||
1151 | write(waker_fd, &dev_fd, sizeof(dev_fd)); | ||
1152 | } | 1145 | } |
1153 | } | 1146 | } |
1154 | 1147 | ||
@@ -1880,11 +1873,12 @@ static void __attribute__((noreturn)) restart_guest(void) | |||
1880 | { | 1873 | { |
1881 | unsigned int i; | 1874 | unsigned int i; |
1882 | 1875 | ||
1883 | /* Closing pipes causes the Waker thread and io_threads to die, and | 1876 | /* Since we don't track all open fds, we simply close everything beyond |
1884 | * closing /dev/lguest cleans up the Guest. Since we don't track all | 1877 | * stderr. */ |
1885 | * open fds, we simply close everything beyond stderr. */ | ||
1886 | for (i = 3; i < FD_SETSIZE; i++) | 1878 | for (i = 3; i < FD_SETSIZE; i++) |
1887 | close(i); | 1879 | close(i); |
1880 | |||
1881 | /* The exec automatically gets rid of the I/O and Waker threads. */ | ||
1888 | execv(main_args[0], main_args); | 1882 | execv(main_args[0], main_args); |
1889 | err(1, "Could not exec %s", main_args[0]); | 1883 | err(1, "Could not exec %s", main_args[0]); |
1890 | } | 1884 | } |
@@ -2085,10 +2079,10 @@ int main(int argc, char *argv[]) | |||
2085 | * /dev/lguest file descriptor. */ | 2079 | * /dev/lguest file descriptor. */ |
2086 | lguest_fd = tell_kernel(pgdir, start); | 2080 | lguest_fd = tell_kernel(pgdir, start); |
2087 | 2081 | ||
2088 | /* We fork off a child process, which wakes the Launcher whenever one | 2082 | /* We clone off a thread, which wakes the Launcher whenever one of the |
2089 | * of the input file descriptors needs attention. We call this the | 2083 | * input file descriptors needs attention. We call this the Waker, and |
2090 | * Waker, and we'll cover it in a moment. */ | 2084 | * we'll cover it in a moment. */ |
2091 | waker_fd = setup_waker(lguest_fd); | 2085 | setup_waker(lguest_fd); |
2092 | 2086 | ||
2093 | /* Finally, run the Guest. This doesn't return. */ | 2087 | /* Finally, run the Guest. This doesn't return. */ |
2094 | run_guest(lguest_fd); | 2088 | run_guest(lguest_fd); |