aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2008-07-29 10:58:35 -0400
committerRusty Russell <rusty@rustcorp.com.au>2008-07-28 19:58:36 -0400
commita161883a29bf6100efe7b5346bec274e5023c29c (patch)
tree5d5b65172a64789eada0e3b824564a793033d0ad
parent5dae785a82c1a8c05b5b4f9709bd9ce658dcf1b6 (diff)
lguest: Tell Guest net not to notify us on every packet xmit
virtio_ring has the ability to suppress notifications. This prevents a guest exit for every packet, but we need to set a timer on packet receipt to re-check if there were any remaining packets. Here are the times for 1G TCP Guest->Host with different timeout settings (it matters because the TCP window doesn't grow big enough to fill the entire buffer): Timeout value Seconds Xmit/Recv/Timeout None (before) 25.3784 xmit 7750233 recv 1 2500 usec 62.5119 xmit 207020 recv 2 timeout 207020 1000 usec 34.5379 xmit 207003 recv 2 timeout 207003 750 usec 29.2305 xmit 207002 recv 1 timeout 207002 500 usec 19.1887 xmit 561141 recv 1 timeout 559657 250 usec 20.0465 xmit 214128 recv 2 timeout 214110 100 usec 19.2583 xmit 561621 recv 1 timeout 560153 (Note that these values are sensitive to the GSO patches which come later, and probably other traffic-related variables, so take with a large grain of salt). Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
-rw-r--r--Documentation/lguest/lguest.c106
1 files changed, 93 insertions, 13 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 46f4c5b09e9e..018472cee151 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -36,6 +36,7 @@
36#include <sched.h> 36#include <sched.h>
37#include <limits.h> 37#include <limits.h>
38#include <stddef.h> 38#include <stddef.h>
39#include <signal.h>
39#include "linux/lguest_launcher.h" 40#include "linux/lguest_launcher.h"
40#include "linux/virtio_config.h" 41#include "linux/virtio_config.h"
41#include "linux/virtio_net.h" 42#include "linux/virtio_net.h"
@@ -81,6 +82,8 @@ static int waker_fd;
81static void *guest_base; 82static void *guest_base;
82/* The maximum guest physical address allowed, and maximum possible. */ 83/* The maximum guest physical address allowed, and maximum possible. */
83static unsigned long guest_limit, guest_max; 84static unsigned long guest_limit, guest_max;
85/* The pipe for signal hander to write to. */
86static int timeoutpipe[2];
84 87
85/* a per-cpu variable indicating whose vcpu is currently running */ 88/* a per-cpu variable indicating whose vcpu is currently running */
86static unsigned int __thread cpu_id; 89static unsigned int __thread cpu_id;
@@ -156,11 +159,14 @@ struct virtqueue
156 /* Last available index we saw. */ 159 /* Last available index we saw. */
157 u16 last_avail_idx; 160 u16 last_avail_idx;
158 161
159 /* The routine to call when the Guest pings us. */ 162 /* The routine to call when the Guest pings us, or timeout. */
160 void (*handle_output)(int fd, struct virtqueue *me); 163 void (*handle_output)(int fd, struct virtqueue *me, bool timeout);
161 164
162 /* Outstanding buffers */ 165 /* Outstanding buffers */
163 unsigned int inflight; 166 unsigned int inflight;
167
168 /* Is this blocked awaiting a timer? */
169 bool blocked;
164}; 170};
165 171
166/* Remember the arguments to the program so we can "reboot" */ 172/* Remember the arguments to the program so we can "reboot" */
@@ -874,7 +880,7 @@ static bool handle_console_input(int fd, struct device *dev)
874 880
875/* Handling output for console is simple: we just get all the output buffers 881/* Handling output for console is simple: we just get all the output buffers
876 * and write them to stdout. */ 882 * and write them to stdout. */
877static void handle_console_output(int fd, struct virtqueue *vq) 883static void handle_console_output(int fd, struct virtqueue *vq, bool timeout)
878{ 884{
879 unsigned int head, out, in; 885 unsigned int head, out, in;
880 int len; 886 int len;
@@ -889,6 +895,21 @@ static void handle_console_output(int fd, struct virtqueue *vq)
889 } 895 }
890} 896}
891 897
898static void block_vq(struct virtqueue *vq)
899{
900 struct itimerval itm;
901
902 vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
903 vq->blocked = true;
904
905 itm.it_interval.tv_sec = 0;
906 itm.it_interval.tv_usec = 0;
907 itm.it_value.tv_sec = 0;
908 itm.it_value.tv_usec = 500;
909
910 setitimer(ITIMER_REAL, &itm, NULL);
911}
912
892/* 913/*
893 * The Network 914 * The Network
894 * 915 *
@@ -896,9 +917,9 @@ static void handle_console_output(int fd, struct virtqueue *vq)
896 * and write them (ignoring the first element) to this device's file descriptor 917 * and write them (ignoring the first element) to this device's file descriptor
897 * (/dev/net/tun). 918 * (/dev/net/tun).
898 */ 919 */
899static void handle_net_output(int fd, struct virtqueue *vq) 920static void handle_net_output(int fd, struct virtqueue *vq, bool timeout)
900{ 921{
901 unsigned int head, out, in; 922 unsigned int head, out, in, num = 0;
902 int len; 923 int len;
903 struct iovec iov[vq->vring.num]; 924 struct iovec iov[vq->vring.num];
904 925
@@ -912,7 +933,12 @@ static void handle_net_output(int fd, struct virtqueue *vq)
912 (void)convert(&iov[0], struct virtio_net_hdr); 933 (void)convert(&iov[0], struct virtio_net_hdr);
913 len = writev(vq->dev->fd, iov+1, out-1); 934 len = writev(vq->dev->fd, iov+1, out-1);
914 add_used_and_trigger(fd, vq, head, len); 935 add_used_and_trigger(fd, vq, head, len);
936 num++;
915 } 937 }
938
939 /* Block further kicks and set up a timer if we saw anything. */
940 if (!timeout && num)
941 block_vq(vq);
916} 942}
917 943
918/* This is where we handle a packet coming in from the tun device to our 944/* This is where we handle a packet coming in from the tun device to our
@@ -967,18 +993,18 @@ static bool handle_tun_input(int fd, struct device *dev)
967/*L:215 This is the callback attached to the network and console input 993/*L:215 This is the callback attached to the network and console input
968 * virtqueues: it ensures we try again, in case we stopped console or net 994 * virtqueues: it ensures we try again, in case we stopped console or net
969 * delivery because Guest didn't have any buffers. */ 995 * delivery because Guest didn't have any buffers. */
970static void enable_fd(int fd, struct virtqueue *vq) 996static void enable_fd(int fd, struct virtqueue *vq, bool timeout)
971{ 997{
972 add_device_fd(vq->dev->fd); 998 add_device_fd(vq->dev->fd);
973 /* Tell waker to listen to it again */ 999 /* Tell waker to listen to it again */
974 write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); 1000 write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd));
975} 1001}
976 1002
977static void net_enable_fd(int fd, struct virtqueue *vq) 1003static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout)
978{ 1004{
979 /* We don't need to know again when Guest refills receive buffer. */ 1005 /* We don't need to know again when Guest refills receive buffer. */
980 vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; 1006 vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
981 enable_fd(fd, vq); 1007 enable_fd(fd, vq, timeout);
982} 1008}
983 1009
984/* When the Guest tells us they updated the status field, we handle it. */ 1010/* When the Guest tells us they updated the status field, we handle it. */
@@ -1047,7 +1073,7 @@ static void handle_output(int fd, unsigned long addr)
1047 if (strcmp(vq->dev->name, "console") != 0) 1073 if (strcmp(vq->dev->name, "console") != 0)
1048 verbose("Output to %s\n", vq->dev->name); 1074 verbose("Output to %s\n", vq->dev->name);
1049 if (vq->handle_output) 1075 if (vq->handle_output)
1050 vq->handle_output(fd, vq); 1076 vq->handle_output(fd, vq, false);
1051 return; 1077 return;
1052 } 1078 }
1053 } 1079 }
@@ -1061,6 +1087,29 @@ static void handle_output(int fd, unsigned long addr)
1061 strnlen(from_guest_phys(addr), guest_limit - addr)); 1087 strnlen(from_guest_phys(addr), guest_limit - addr));
1062} 1088}
1063 1089
1090static void handle_timeout(int fd)
1091{
1092 char buf[32];
1093 struct device *i;
1094 struct virtqueue *vq;
1095
1096 /* Clear the pipe */
1097 read(timeoutpipe[0], buf, sizeof(buf));
1098
1099 /* Check each device and virtqueue: flush blocked ones. */
1100 for (i = devices.dev; i; i = i->next) {
1101 for (vq = i->vq; vq; vq = vq->next) {
1102 if (!vq->blocked)
1103 continue;
1104
1105 vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
1106 vq->blocked = false;
1107 if (vq->handle_output)
1108 vq->handle_output(fd, vq, true);
1109 }
1110 }
1111}
1112
1064/* This is called when the Waker wakes us up: check for incoming file 1113/* This is called when the Waker wakes us up: check for incoming file
1065 * descriptors. */ 1114 * descriptors. */
1066static void handle_input(int fd) 1115static void handle_input(int fd)
@@ -1071,9 +1120,14 @@ static void handle_input(int fd)
1071 for (;;) { 1120 for (;;) {
1072 struct device *i; 1121 struct device *i;
1073 fd_set fds = devices.infds; 1122 fd_set fds = devices.infds;
1123 int num;
1074 1124
1125 num = select(devices.max_infd+1, &fds, NULL, NULL, &poll);
1126 /* Could get interrupted */
1127 if (num < 0)
1128 continue;
1075 /* If nothing is ready, we're done. */ 1129 /* If nothing is ready, we're done. */
1076 if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0) 1130 if (num == 0)
1077 break; 1131 break;
1078 1132
1079 /* Otherwise, call the device(s) which have readable file 1133 /* Otherwise, call the device(s) which have readable file
@@ -1097,6 +1151,10 @@ static void handle_input(int fd)
1097 write(waker_fd, &dev_fd, sizeof(dev_fd)); 1151 write(waker_fd, &dev_fd, sizeof(dev_fd));
1098 } 1152 }
1099 } 1153 }
1154
1155 /* Is this the timeout fd? */
1156 if (FD_ISSET(timeoutpipe[0], &fds))
1157 handle_timeout(fd);
1100 } 1158 }
1101} 1159}
1102 1160
@@ -1145,7 +1203,7 @@ static struct lguest_device_desc *new_dev_desc(u16 type)
1145/* Each device descriptor is followed by the description of its virtqueues. We 1203/* Each device descriptor is followed by the description of its virtqueues. We
1146 * specify how many descriptors the virtqueue is to have. */ 1204 * specify how many descriptors the virtqueue is to have. */
1147static void add_virtqueue(struct device *dev, unsigned int num_descs, 1205static void add_virtqueue(struct device *dev, unsigned int num_descs,
1148 void (*handle_output)(int fd, struct virtqueue *me)) 1206 void (*handle_output)(int, struct virtqueue *, bool))
1149{ 1207{
1150 unsigned int pages; 1208 unsigned int pages;
1151 struct virtqueue **i, *vq = malloc(sizeof(*vq)); 1209 struct virtqueue **i, *vq = malloc(sizeof(*vq));
@@ -1161,6 +1219,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
1161 vq->last_avail_idx = 0; 1219 vq->last_avail_idx = 0;
1162 vq->dev = dev; 1220 vq->dev = dev;
1163 vq->inflight = 0; 1221 vq->inflight = 0;
1222 vq->blocked = false;
1164 1223
1165 /* Initialize the configuration. */ 1224 /* Initialize the configuration. */
1166 vq->config.num = num_descs; 1225 vq->config.num = num_descs;
@@ -1293,6 +1352,24 @@ static void setup_console(void)
1293} 1352}
1294/*:*/ 1353/*:*/
1295 1354
1355static void timeout_alarm(int sig)
1356{
1357 write(timeoutpipe[1], "", 1);
1358}
1359
1360static void setup_timeout(void)
1361{
1362 if (pipe(timeoutpipe) != 0)
1363 err(1, "Creating timeout pipe");
1364
1365 if (fcntl(timeoutpipe[1], F_SETFL,
1366 fcntl(timeoutpipe[1], F_GETFL) | O_NONBLOCK) != 0)
1367 err(1, "Making timeout pipe nonblocking");
1368
1369 add_device_fd(timeoutpipe[0]);
1370 signal(SIGALRM, timeout_alarm);
1371}
1372
1296/*M:010 Inter-guest networking is an interesting area. Simplest is to have a 1373/*M:010 Inter-guest networking is an interesting area. Simplest is to have a
1297 * --sharenet=<name> option which opens or creates a named pipe. This can be 1374 * --sharenet=<name> option which opens or creates a named pipe. This can be
1298 * used to send packets to another guest in a 1:1 manner. 1375 * used to send packets to another guest in a 1:1 manner.
@@ -1653,7 +1730,7 @@ static bool handle_io_finish(int fd, struct device *dev)
1653} 1730}
1654 1731
1655/* When the Guest submits some I/O, we just need to wake the I/O thread. */ 1732/* When the Guest submits some I/O, we just need to wake the I/O thread. */
1656static void handle_virtblk_output(int fd, struct virtqueue *vq) 1733static void handle_virtblk_output(int fd, struct virtqueue *vq, bool timeout)
1657{ 1734{
1658 struct vblk_info *vblk = vq->dev->priv; 1735 struct vblk_info *vblk = vq->dev->priv;
1659 char c = 0; 1736 char c = 0;
@@ -1824,7 +1901,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd)
1824 /* ERESTART means that we need to reboot the guest */ 1901 /* ERESTART means that we need to reboot the guest */
1825 } else if (errno == ERESTART) { 1902 } else if (errno == ERESTART) {
1826 restart_guest(); 1903 restart_guest();
1827 /* EAGAIN means the Waker wanted us to look at some input. 1904 /* EAGAIN means a signal (timeout).
1828 * Anything else means a bug or incompatible change. */ 1905 * Anything else means a bug or incompatible change. */
1829 } else if (errno != EAGAIN) 1906 } else if (errno != EAGAIN)
1830 err(1, "Running guest failed"); 1907 err(1, "Running guest failed");
@@ -1948,6 +2025,9 @@ int main(int argc, char *argv[])
1948 /* We always have a console device */ 2025 /* We always have a console device */
1949 setup_console(); 2026 setup_console();
1950 2027
2028 /* We can timeout waiting for Guest network transmit. */
2029 setup_timeout();
2030
1951 /* Now we load the kernel */ 2031 /* Now we load the kernel */
1952 start = load_kernel(open_or_die(argv[optind+1], O_RDONLY)); 2032 start = load_kernel(open_or_die(argv[optind+1], O_RDONLY));
1953 2033