diff options
| -rw-r--r-- | Documentation/lguest/lguest.c | 106 |
1 files changed, 93 insertions, 13 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 46f4c5b09e9e..018472cee151 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
| @@ -36,6 +36,7 @@ | |||
| 36 | #include <sched.h> | 36 | #include <sched.h> |
| 37 | #include <limits.h> | 37 | #include <limits.h> |
| 38 | #include <stddef.h> | 38 | #include <stddef.h> |
| 39 | #include <signal.h> | ||
| 39 | #include "linux/lguest_launcher.h" | 40 | #include "linux/lguest_launcher.h" |
| 40 | #include "linux/virtio_config.h" | 41 | #include "linux/virtio_config.h" |
| 41 | #include "linux/virtio_net.h" | 42 | #include "linux/virtio_net.h" |
| @@ -81,6 +82,8 @@ static int waker_fd; | |||
| 81 | static void *guest_base; | 82 | static void *guest_base; |
| 82 | /* The maximum guest physical address allowed, and maximum possible. */ | 83 | /* The maximum guest physical address allowed, and maximum possible. */ |
| 83 | static unsigned long guest_limit, guest_max; | 84 | static unsigned long guest_limit, guest_max; |
| 85 | /* The pipe for signal hander to write to. */ | ||
| 86 | static int timeoutpipe[2]; | ||
| 84 | 87 | ||
| 85 | /* a per-cpu variable indicating whose vcpu is currently running */ | 88 | /* a per-cpu variable indicating whose vcpu is currently running */ |
| 86 | static unsigned int __thread cpu_id; | 89 | static unsigned int __thread cpu_id; |
| @@ -156,11 +159,14 @@ struct virtqueue | |||
| 156 | /* Last available index we saw. */ | 159 | /* Last available index we saw. */ |
| 157 | u16 last_avail_idx; | 160 | u16 last_avail_idx; |
| 158 | 161 | ||
| 159 | /* The routine to call when the Guest pings us. */ | 162 | /* The routine to call when the Guest pings us, or timeout. */ |
| 160 | void (*handle_output)(int fd, struct virtqueue *me); | 163 | void (*handle_output)(int fd, struct virtqueue *me, bool timeout); |
| 161 | 164 | ||
| 162 | /* Outstanding buffers */ | 165 | /* Outstanding buffers */ |
| 163 | unsigned int inflight; | 166 | unsigned int inflight; |
| 167 | |||
| 168 | /* Is this blocked awaiting a timer? */ | ||
| 169 | bool blocked; | ||
| 164 | }; | 170 | }; |
| 165 | 171 | ||
| 166 | /* Remember the arguments to the program so we can "reboot" */ | 172 | /* Remember the arguments to the program so we can "reboot" */ |
| @@ -874,7 +880,7 @@ static bool handle_console_input(int fd, struct device *dev) | |||
| 874 | 880 | ||
| 875 | /* Handling output for console is simple: we just get all the output buffers | 881 | /* Handling output for console is simple: we just get all the output buffers |
| 876 | * and write them to stdout. */ | 882 | * and write them to stdout. */ |
| 877 | static void handle_console_output(int fd, struct virtqueue *vq) | 883 | static void handle_console_output(int fd, struct virtqueue *vq, bool timeout) |
| 878 | { | 884 | { |
| 879 | unsigned int head, out, in; | 885 | unsigned int head, out, in; |
| 880 | int len; | 886 | int len; |
| @@ -889,6 +895,21 @@ static void handle_console_output(int fd, struct virtqueue *vq) | |||
| 889 | } | 895 | } |
| 890 | } | 896 | } |
| 891 | 897 | ||
| 898 | static void block_vq(struct virtqueue *vq) | ||
| 899 | { | ||
| 900 | struct itimerval itm; | ||
| 901 | |||
| 902 | vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; | ||
| 903 | vq->blocked = true; | ||
| 904 | |||
| 905 | itm.it_interval.tv_sec = 0; | ||
| 906 | itm.it_interval.tv_usec = 0; | ||
| 907 | itm.it_value.tv_sec = 0; | ||
| 908 | itm.it_value.tv_usec = 500; | ||
| 909 | |||
| 910 | setitimer(ITIMER_REAL, &itm, NULL); | ||
| 911 | } | ||
| 912 | |||
| 892 | /* | 913 | /* |
| 893 | * The Network | 914 | * The Network |
| 894 | * | 915 | * |
| @@ -896,9 +917,9 @@ static void handle_console_output(int fd, struct virtqueue *vq) | |||
| 896 | * and write them (ignoring the first element) to this device's file descriptor | 917 | * and write them (ignoring the first element) to this device's file descriptor |
| 897 | * (/dev/net/tun). | 918 | * (/dev/net/tun). |
| 898 | */ | 919 | */ |
| 899 | static void handle_net_output(int fd, struct virtqueue *vq) | 920 | static void handle_net_output(int fd, struct virtqueue *vq, bool timeout) |
| 900 | { | 921 | { |
| 901 | unsigned int head, out, in; | 922 | unsigned int head, out, in, num = 0; |
| 902 | int len; | 923 | int len; |
| 903 | struct iovec iov[vq->vring.num]; | 924 | struct iovec iov[vq->vring.num]; |
| 904 | 925 | ||
| @@ -912,7 +933,12 @@ static void handle_net_output(int fd, struct virtqueue *vq) | |||
| 912 | (void)convert(&iov[0], struct virtio_net_hdr); | 933 | (void)convert(&iov[0], struct virtio_net_hdr); |
| 913 | len = writev(vq->dev->fd, iov+1, out-1); | 934 | len = writev(vq->dev->fd, iov+1, out-1); |
| 914 | add_used_and_trigger(fd, vq, head, len); | 935 | add_used_and_trigger(fd, vq, head, len); |
| 936 | num++; | ||
| 915 | } | 937 | } |
| 938 | |||
| 939 | /* Block further kicks and set up a timer if we saw anything. */ | ||
| 940 | if (!timeout && num) | ||
| 941 | block_vq(vq); | ||
| 916 | } | 942 | } |
| 917 | 943 | ||
| 918 | /* This is where we handle a packet coming in from the tun device to our | 944 | /* This is where we handle a packet coming in from the tun device to our |
| @@ -967,18 +993,18 @@ static bool handle_tun_input(int fd, struct device *dev) | |||
| 967 | /*L:215 This is the callback attached to the network and console input | 993 | /*L:215 This is the callback attached to the network and console input |
| 968 | * virtqueues: it ensures we try again, in case we stopped console or net | 994 | * virtqueues: it ensures we try again, in case we stopped console or net |
| 969 | * delivery because Guest didn't have any buffers. */ | 995 | * delivery because Guest didn't have any buffers. */ |
| 970 | static void enable_fd(int fd, struct virtqueue *vq) | 996 | static void enable_fd(int fd, struct virtqueue *vq, bool timeout) |
| 971 | { | 997 | { |
| 972 | add_device_fd(vq->dev->fd); | 998 | add_device_fd(vq->dev->fd); |
| 973 | /* Tell waker to listen to it again */ | 999 | /* Tell waker to listen to it again */ |
| 974 | write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); | 1000 | write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); |
| 975 | } | 1001 | } |
| 976 | 1002 | ||
| 977 | static void net_enable_fd(int fd, struct virtqueue *vq) | 1003 | static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout) |
| 978 | { | 1004 | { |
| 979 | /* We don't need to know again when Guest refills receive buffer. */ | 1005 | /* We don't need to know again when Guest refills receive buffer. */ |
| 980 | vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; | 1006 | vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; |
| 981 | enable_fd(fd, vq); | 1007 | enable_fd(fd, vq, timeout); |
| 982 | } | 1008 | } |
| 983 | 1009 | ||
| 984 | /* When the Guest tells us they updated the status field, we handle it. */ | 1010 | /* When the Guest tells us they updated the status field, we handle it. */ |
| @@ -1047,7 +1073,7 @@ static void handle_output(int fd, unsigned long addr) | |||
| 1047 | if (strcmp(vq->dev->name, "console") != 0) | 1073 | if (strcmp(vq->dev->name, "console") != 0) |
| 1048 | verbose("Output to %s\n", vq->dev->name); | 1074 | verbose("Output to %s\n", vq->dev->name); |
| 1049 | if (vq->handle_output) | 1075 | if (vq->handle_output) |
| 1050 | vq->handle_output(fd, vq); | 1076 | vq->handle_output(fd, vq, false); |
| 1051 | return; | 1077 | return; |
| 1052 | } | 1078 | } |
| 1053 | } | 1079 | } |
| @@ -1061,6 +1087,29 @@ static void handle_output(int fd, unsigned long addr) | |||
| 1061 | strnlen(from_guest_phys(addr), guest_limit - addr)); | 1087 | strnlen(from_guest_phys(addr), guest_limit - addr)); |
| 1062 | } | 1088 | } |
| 1063 | 1089 | ||
| 1090 | static void handle_timeout(int fd) | ||
| 1091 | { | ||
| 1092 | char buf[32]; | ||
| 1093 | struct device *i; | ||
| 1094 | struct virtqueue *vq; | ||
| 1095 | |||
| 1096 | /* Clear the pipe */ | ||
| 1097 | read(timeoutpipe[0], buf, sizeof(buf)); | ||
| 1098 | |||
| 1099 | /* Check each device and virtqueue: flush blocked ones. */ | ||
| 1100 | for (i = devices.dev; i; i = i->next) { | ||
| 1101 | for (vq = i->vq; vq; vq = vq->next) { | ||
| 1102 | if (!vq->blocked) | ||
| 1103 | continue; | ||
| 1104 | |||
| 1105 | vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; | ||
| 1106 | vq->blocked = false; | ||
| 1107 | if (vq->handle_output) | ||
| 1108 | vq->handle_output(fd, vq, true); | ||
| 1109 | } | ||
| 1110 | } | ||
| 1111 | } | ||
| 1112 | |||
| 1064 | /* This is called when the Waker wakes us up: check for incoming file | 1113 | /* This is called when the Waker wakes us up: check for incoming file |
| 1065 | * descriptors. */ | 1114 | * descriptors. */ |
| 1066 | static void handle_input(int fd) | 1115 | static void handle_input(int fd) |
| @@ -1071,9 +1120,14 @@ static void handle_input(int fd) | |||
| 1071 | for (;;) { | 1120 | for (;;) { |
| 1072 | struct device *i; | 1121 | struct device *i; |
| 1073 | fd_set fds = devices.infds; | 1122 | fd_set fds = devices.infds; |
| 1123 | int num; | ||
| 1074 | 1124 | ||
| 1125 | num = select(devices.max_infd+1, &fds, NULL, NULL, &poll); | ||
| 1126 | /* Could get interrupted */ | ||
| 1127 | if (num < 0) | ||
| 1128 | continue; | ||
| 1075 | /* If nothing is ready, we're done. */ | 1129 | /* If nothing is ready, we're done. */ |
| 1076 | if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0) | 1130 | if (num == 0) |
| 1077 | break; | 1131 | break; |
| 1078 | 1132 | ||
| 1079 | /* Otherwise, call the device(s) which have readable file | 1133 | /* Otherwise, call the device(s) which have readable file |
| @@ -1097,6 +1151,10 @@ static void handle_input(int fd) | |||
| 1097 | write(waker_fd, &dev_fd, sizeof(dev_fd)); | 1151 | write(waker_fd, &dev_fd, sizeof(dev_fd)); |
| 1098 | } | 1152 | } |
| 1099 | } | 1153 | } |
| 1154 | |||
| 1155 | /* Is this the timeout fd? */ | ||
| 1156 | if (FD_ISSET(timeoutpipe[0], &fds)) | ||
| 1157 | handle_timeout(fd); | ||
| 1100 | } | 1158 | } |
| 1101 | } | 1159 | } |
| 1102 | 1160 | ||
| @@ -1145,7 +1203,7 @@ static struct lguest_device_desc *new_dev_desc(u16 type) | |||
| 1145 | /* Each device descriptor is followed by the description of its virtqueues. We | 1203 | /* Each device descriptor is followed by the description of its virtqueues. We |
| 1146 | * specify how many descriptors the virtqueue is to have. */ | 1204 | * specify how many descriptors the virtqueue is to have. */ |
| 1147 | static void add_virtqueue(struct device *dev, unsigned int num_descs, | 1205 | static void add_virtqueue(struct device *dev, unsigned int num_descs, |
| 1148 | void (*handle_output)(int fd, struct virtqueue *me)) | 1206 | void (*handle_output)(int, struct virtqueue *, bool)) |
| 1149 | { | 1207 | { |
| 1150 | unsigned int pages; | 1208 | unsigned int pages; |
| 1151 | struct virtqueue **i, *vq = malloc(sizeof(*vq)); | 1209 | struct virtqueue **i, *vq = malloc(sizeof(*vq)); |
| @@ -1161,6 +1219,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
| 1161 | vq->last_avail_idx = 0; | 1219 | vq->last_avail_idx = 0; |
| 1162 | vq->dev = dev; | 1220 | vq->dev = dev; |
| 1163 | vq->inflight = 0; | 1221 | vq->inflight = 0; |
| 1222 | vq->blocked = false; | ||
| 1164 | 1223 | ||
| 1165 | /* Initialize the configuration. */ | 1224 | /* Initialize the configuration. */ |
| 1166 | vq->config.num = num_descs; | 1225 | vq->config.num = num_descs; |
| @@ -1293,6 +1352,24 @@ static void setup_console(void) | |||
| 1293 | } | 1352 | } |
| 1294 | /*:*/ | 1353 | /*:*/ |
| 1295 | 1354 | ||
| 1355 | static void timeout_alarm(int sig) | ||
| 1356 | { | ||
| 1357 | write(timeoutpipe[1], "", 1); | ||
| 1358 | } | ||
| 1359 | |||
| 1360 | static void setup_timeout(void) | ||
| 1361 | { | ||
| 1362 | if (pipe(timeoutpipe) != 0) | ||
| 1363 | err(1, "Creating timeout pipe"); | ||
| 1364 | |||
| 1365 | if (fcntl(timeoutpipe[1], F_SETFL, | ||
| 1366 | fcntl(timeoutpipe[1], F_GETFL) | O_NONBLOCK) != 0) | ||
| 1367 | err(1, "Making timeout pipe nonblocking"); | ||
| 1368 | |||
| 1369 | add_device_fd(timeoutpipe[0]); | ||
| 1370 | signal(SIGALRM, timeout_alarm); | ||
| 1371 | } | ||
| 1372 | |||
| 1296 | /*M:010 Inter-guest networking is an interesting area. Simplest is to have a | 1373 | /*M:010 Inter-guest networking is an interesting area. Simplest is to have a |
| 1297 | * --sharenet=<name> option which opens or creates a named pipe. This can be | 1374 | * --sharenet=<name> option which opens or creates a named pipe. This can be |
| 1298 | * used to send packets to another guest in a 1:1 manner. | 1375 | * used to send packets to another guest in a 1:1 manner. |
| @@ -1653,7 +1730,7 @@ static bool handle_io_finish(int fd, struct device *dev) | |||
| 1653 | } | 1730 | } |
| 1654 | 1731 | ||
| 1655 | /* When the Guest submits some I/O, we just need to wake the I/O thread. */ | 1732 | /* When the Guest submits some I/O, we just need to wake the I/O thread. */ |
| 1656 | static void handle_virtblk_output(int fd, struct virtqueue *vq) | 1733 | static void handle_virtblk_output(int fd, struct virtqueue *vq, bool timeout) |
| 1657 | { | 1734 | { |
| 1658 | struct vblk_info *vblk = vq->dev->priv; | 1735 | struct vblk_info *vblk = vq->dev->priv; |
| 1659 | char c = 0; | 1736 | char c = 0; |
| @@ -1824,7 +1901,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd) | |||
| 1824 | /* ERESTART means that we need to reboot the guest */ | 1901 | /* ERESTART means that we need to reboot the guest */ |
| 1825 | } else if (errno == ERESTART) { | 1902 | } else if (errno == ERESTART) { |
| 1826 | restart_guest(); | 1903 | restart_guest(); |
| 1827 | /* EAGAIN means the Waker wanted us to look at some input. | 1904 | /* EAGAIN means a signal (timeout). |
| 1828 | * Anything else means a bug or incompatible change. */ | 1905 | * Anything else means a bug or incompatible change. */ |
| 1829 | } else if (errno != EAGAIN) | 1906 | } else if (errno != EAGAIN) |
| 1830 | err(1, "Running guest failed"); | 1907 | err(1, "Running guest failed"); |
| @@ -1948,6 +2025,9 @@ int main(int argc, char *argv[]) | |||
| 1948 | /* We always have a console device */ | 2025 | /* We always have a console device */ |
| 1949 | setup_console(); | 2026 | setup_console(); |
| 1950 | 2027 | ||
| 2028 | /* We can timeout waiting for Guest network transmit. */ | ||
| 2029 | setup_timeout(); | ||
| 2030 | |||
| 1951 | /* Now we load the kernel */ | 2031 | /* Now we load the kernel */ |
| 1952 | start = load_kernel(open_or_die(argv[optind+1], O_RDONLY)); | 2032 | start = load_kernel(open_or_die(argv[optind+1], O_RDONLY)); |
| 1953 | 2033 | ||
