aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation/lguest
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2008-07-29 10:58:38 -0400
committerRusty Russell <rusty@rustcorp.com.au>2008-07-28 19:58:39 -0400
commit8c79873da0d2bedf4ad6b868c54e426bb0a2fe38 (patch)
tree270efee346b70ae6615dd4796363479c94eca6d9 /Documentation/lguest
parent0f0c4fab8284f3b886b2e1e0e317e3bb8de176b3 (diff)
lguest: turn Waker into a thread, not a process
lguest uses a Waker process to break it out of the kernel (ie. actually running the guest) when file descriptor needs attention. Changing this from a process to a thread somewhat simplifies things: it can directly access the fd_set of things to watch. More importantly, it means that the Waker can see Guest memory correctly, so /dev/vring file descriptors will work as anticipated (the alternative is to actually mmap MAP_SHARED, but you can't do that with /dev/zero). Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'Documentation/lguest')
-rw-r--r--Documentation/lguest/lguest.c120
1 files changed, 57 insertions, 63 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index f9bba2d8fee1..b88b0ea54e90 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -76,8 +76,12 @@ static bool verbose;
76 do { if (verbose) printf(args); } while(0) 76 do { if (verbose) printf(args); } while(0)
77/*:*/ 77/*:*/
78 78
79/* The pipe to send commands to the waker process */ 79/* File descriptors for the Waker. */
80static int waker_fd; 80struct {
81 int pipe[2];
82 int lguest_fd;
83} waker_fds;
84
81/* The pointer to the start of guest memory. */ 85/* The pointer to the start of guest memory. */
82static void *guest_base; 86static void *guest_base;
83/* The maximum guest physical address allowed, and maximum possible. */ 87/* The maximum guest physical address allowed, and maximum possible. */
@@ -579,69 +583,64 @@ static void add_device_fd(int fd)
579 * watch, but handing a file descriptor mask through to the kernel is fairly 583 * watch, but handing a file descriptor mask through to the kernel is fairly
580 * icky. 584 * icky.
581 * 585 *
582 * Instead, we fork off a process which watches the file descriptors and writes 586 * Instead, we clone off a thread which watches the file descriptors and writes
583 * the LHREQ_BREAK command to the /dev/lguest file descriptor to tell the Host 587 * the LHREQ_BREAK command to the /dev/lguest file descriptor to tell the Host
584 * stop running the Guest. This causes the Launcher to return from the 588 * stop running the Guest. This causes the Launcher to return from the
585 * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset 589 * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset
586 * the LHREQ_BREAK and wake us up again. 590 * the LHREQ_BREAK and wake us up again.
587 * 591 *
588 * This, of course, is merely a different *kind* of icky. 592 * This, of course, is merely a different *kind* of icky.
593 *
594 * Given my well-known antipathy to threads, I'd prefer to use processes. But
595 * it's easier to share Guest memory with threads, and trivial to share the
596 * devices.infds as the Launcher changes it.
589 */ 597 */
590static void wake_parent(int pipefd, int lguest_fd) 598static int waker(void *unused)
591{ 599{
592 /* Add the pipe from the Launcher to the fdset in the device_list, so 600 /* Close the write end of the pipe: only the Launcher has it open. */
593 * we watch it, too. */ 601 close(waker_fds.pipe[1]);
594 add_device_fd(pipefd);
595 602
596 for (;;) { 603 for (;;) {
597 fd_set rfds = devices.infds; 604 fd_set rfds = devices.infds;
598 unsigned long args[] = { LHREQ_BREAK, 1 }; 605 unsigned long args[] = { LHREQ_BREAK, 1 };
606 unsigned int maxfd = devices.max_infd;
607
608 /* We also listen to the pipe from the Launcher. */
609 FD_SET(waker_fds.pipe[0], &rfds);
610 if (waker_fds.pipe[0] > maxfd)
611 maxfd = waker_fds.pipe[0];
599 612
600 /* Wait until input is ready from one of the devices. */ 613 /* Wait until input is ready from one of the devices. */
601 select(devices.max_infd+1, &rfds, NULL, NULL, NULL); 614 select(maxfd+1, &rfds, NULL, NULL, NULL);
602 /* Is it a message from the Launcher? */ 615
603 if (FD_ISSET(pipefd, &rfds)) { 616 /* Message from Launcher? */
604 int fd; 617 if (FD_ISSET(waker_fds.pipe[0], &rfds)) {
605 /* If read() returns 0, it means the Launcher has 618 char c;
606 * exited. We silently follow. */ 619 /* If this fails, then assume Launcher has exited.
607 if (read(pipefd, &fd, sizeof(fd)) == 0) 620 * Don't do anything on exit: we're just a thread! */
608 exit(0); 621 if (read(waker_fds.pipe[0], &c, 1) != 1)
609 /* Otherwise it's telling us to change what file 622 _exit(0);
610 * descriptors we're to listen to. Positive means 623 continue;
611 * listen to a new one, negative means stop 624 }
612 * listening. */ 625
613 if (fd >= 0) 626 /* Send LHREQ_BREAK command to snap the Launcher out of it. */
614 FD_SET(fd, &devices.infds); 627 pwrite(waker_fds.lguest_fd, args, sizeof(args), cpu_id);
615 else
616 FD_CLR(-fd - 1, &devices.infds);
617 } else /* Send LHREQ_BREAK command. */
618 pwrite(lguest_fd, args, sizeof(args), cpu_id);
619 } 628 }
629 return 0;
620} 630}
621 631
622/* This routine just sets up a pipe to the Waker process. */ 632/* This routine just sets up a pipe to the Waker process. */
623static int setup_waker(int lguest_fd) 633static void setup_waker(int lguest_fd)
624{ 634{
625 int pipefd[2], child; 635 /* This pipe is closed when Launcher dies, telling Waker. */
626 636 if (pipe(waker_fds.pipe) != 0)
627 /* We create a pipe to talk to the Waker, and also so it knows when the 637 err(1, "Creating pipe for Waker");
628 * Launcher dies (and closes pipe). */
629 pipe(pipefd);
630 child = fork();
631 if (child == -1)
632 err(1, "forking");
633
634 if (child == 0) {
635 /* We are the Waker: close the "writing" end of our copy of the
636 * pipe and start waiting for input. */
637 close(pipefd[1]);
638 wake_parent(pipefd[0], lguest_fd);
639 }
640 /* Close the reading end of our copy of the pipe. */
641 close(pipefd[0]);
642 638
643 /* Here is the fd used to talk to the waker. */ 639 /* Waker also needs to know the lguest fd */
644 return pipefd[1]; 640 waker_fds.lguest_fd = lguest_fd;
641
642 if (clone(waker, malloc(4096) + 4096, CLONE_VM | SIGCHLD, NULL) == -1)
643 err(1, "Creating Waker");
645} 644}
646 645
647/* 646/*
@@ -863,8 +862,8 @@ static bool handle_console_input(int fd, struct device *dev)
863 unsigned long args[] = { LHREQ_BREAK, 0 }; 862 unsigned long args[] = { LHREQ_BREAK, 0 };
864 /* Close the fd so Waker will know it has to 863 /* Close the fd so Waker will know it has to
865 * exit. */ 864 * exit. */
866 close(waker_fd); 865 close(waker_fds.pipe[1]);
867 /* Just in case waker is blocked in BREAK, send 866 /* Just in case Waker is blocked in BREAK, send
868 * unbreak now. */ 867 * unbreak now. */
869 write(fd, args, sizeof(args)); 868 write(fd, args, sizeof(args));
870 exit(2); 869 exit(2);
@@ -996,8 +995,8 @@ static bool handle_tun_input(int fd, struct device *dev)
996static void enable_fd(int fd, struct virtqueue *vq, bool timeout) 995static void enable_fd(int fd, struct virtqueue *vq, bool timeout)
997{ 996{
998 add_device_fd(vq->dev->fd); 997 add_device_fd(vq->dev->fd);
999 /* Tell waker to listen to it again */ 998 /* Snap the Waker out of its select loop. */
1000 write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); 999 write(waker_fds.pipe[1], "", 1);
1001} 1000}
1002 1001
1003static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout) 1002static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout)
@@ -1134,7 +1133,6 @@ static void handle_input(int fd)
1134 * descriptors and a method of handling them. */ 1133 * descriptors and a method of handling them. */
1135 for (i = devices.dev; i; i = i->next) { 1134 for (i = devices.dev; i; i = i->next) {
1136 if (i->handle_input && FD_ISSET(i->fd, &fds)) { 1135 if (i->handle_input && FD_ISSET(i->fd, &fds)) {
1137 int dev_fd;
1138 if (i->handle_input(fd, i)) 1136 if (i->handle_input(fd, i))
1139 continue; 1137 continue;
1140 1138
@@ -1144,11 +1142,6 @@ static void handle_input(int fd)
1144 * buffers to deliver into. Console also uses 1142 * buffers to deliver into. Console also uses
1145 * it when it discovers that stdin is closed. */ 1143 * it when it discovers that stdin is closed. */
1146 FD_CLR(i->fd, &devices.infds); 1144 FD_CLR(i->fd, &devices.infds);
1147 /* Tell waker to ignore it too, by sending a
1148 * negative fd number (-1, since 0 is a valid
1149 * FD number). */
1150 dev_fd = -i->fd - 1;
1151 write(waker_fd, &dev_fd, sizeof(dev_fd));
1152 } 1145 }
1153 } 1146 }
1154 1147
@@ -1880,11 +1873,12 @@ static void __attribute__((noreturn)) restart_guest(void)
1880{ 1873{
1881 unsigned int i; 1874 unsigned int i;
1882 1875
1883 /* Closing pipes causes the Waker thread and io_threads to die, and 1876 /* Since we don't track all open fds, we simply close everything beyond
1884 * closing /dev/lguest cleans up the Guest. Since we don't track all 1877 * stderr. */
1885 * open fds, we simply close everything beyond stderr. */
1886 for (i = 3; i < FD_SETSIZE; i++) 1878 for (i = 3; i < FD_SETSIZE; i++)
1887 close(i); 1879 close(i);
1880
1881 /* The exec automatically gets rid of the I/O and Waker threads. */
1888 execv(main_args[0], main_args); 1882 execv(main_args[0], main_args);
1889 err(1, "Could not exec %s", main_args[0]); 1883 err(1, "Could not exec %s", main_args[0]);
1890} 1884}
@@ -2085,10 +2079,10 @@ int main(int argc, char *argv[])
2085 * /dev/lguest file descriptor. */ 2079 * /dev/lguest file descriptor. */
2086 lguest_fd = tell_kernel(pgdir, start); 2080 lguest_fd = tell_kernel(pgdir, start);
2087 2081
2088 /* We fork off a child process, which wakes the Launcher whenever one 2082 /* We clone off a thread, which wakes the Launcher whenever one of the
2089 * of the input file descriptors needs attention. We call this the 2083 * input file descriptors needs attention. We call this the Waker, and
2090 * Waker, and we'll cover it in a moment. */ 2084 * we'll cover it in a moment. */
2091 waker_fd = setup_waker(lguest_fd); 2085 setup_waker(lguest_fd);
2092 2086
2093 /* Finally, run the Guest. This doesn't return. */ 2087 /* Finally, run the Guest. This doesn't return. */
2094 run_guest(lguest_fd); 2088 run_guest(lguest_fd);