diff options
Diffstat (limited to 'Documentation')
-rw-r--r-- | Documentation/lguest/lguest.c | 231 |
1 files changed, 154 insertions, 77 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 6c8a2386cd50..0f23d67f958f 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
@@ -34,6 +34,8 @@ | |||
34 | #include <zlib.h> | 34 | #include <zlib.h> |
35 | #include <assert.h> | 35 | #include <assert.h> |
36 | #include <sched.h> | 36 | #include <sched.h> |
37 | #include <limits.h> | ||
38 | #include <stddef.h> | ||
37 | #include "linux/lguest_launcher.h" | 39 | #include "linux/lguest_launcher.h" |
38 | #include "linux/virtio_config.h" | 40 | #include "linux/virtio_config.h" |
39 | #include "linux/virtio_net.h" | 41 | #include "linux/virtio_net.h" |
@@ -99,13 +101,11 @@ struct device_list | |||
99 | /* The descriptor page for the devices. */ | 101 | /* The descriptor page for the devices. */ |
100 | u8 *descpage; | 102 | u8 *descpage; |
101 | 103 | ||
102 | /* The tail of the last descriptor. */ | ||
103 | unsigned int desc_used; | ||
104 | |||
105 | /* A single linked list of devices. */ | 104 | /* A single linked list of devices. */ |
106 | struct device *dev; | 105 | struct device *dev; |
107 | /* ... And an end pointer so we can easily append new devices */ | 106 | /* And a pointer to the last device for easy append and also for |
108 | struct device **lastdev; | 107 | * configuration appending. */ |
108 | struct device *lastdev; | ||
109 | }; | 109 | }; |
110 | 110 | ||
111 | /* The list of Guest devices, based on command line arguments. */ | 111 | /* The list of Guest devices, based on command line arguments. */ |
@@ -191,7 +191,14 @@ static void *_convert(struct iovec *iov, size_t size, size_t align, | |||
191 | #define cpu_to_le64(v64) (v64) | 191 | #define cpu_to_le64(v64) (v64) |
192 | #define le16_to_cpu(v16) (v16) | 192 | #define le16_to_cpu(v16) (v16) |
193 | #define le32_to_cpu(v32) (v32) | 193 | #define le32_to_cpu(v32) (v32) |
194 | #define le64_to_cpu(v32) (v64) | 194 | #define le64_to_cpu(v64) (v64) |
195 | |||
196 | /* The device virtqueue descriptors are followed by feature bitmasks. */ | ||
197 | static u8 *get_feature_bits(struct device *dev) | ||
198 | { | ||
199 | return (u8 *)(dev->desc + 1) | ||
200 | + dev->desc->num_vq * sizeof(struct lguest_vqconfig); | ||
201 | } | ||
195 | 202 | ||
196 | /*L:100 The Launcher code itself takes us out into userspace, that scary place | 203 | /*L:100 The Launcher code itself takes us out into userspace, that scary place |
197 | * where pointers run wild and free! Unfortunately, like most userspace | 204 | * where pointers run wild and free! Unfortunately, like most userspace |
@@ -914,21 +921,58 @@ static void enable_fd(int fd, struct virtqueue *vq) | |||
914 | write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); | 921 | write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); |
915 | } | 922 | } |
916 | 923 | ||
924 | /* Resetting a device is fairly easy. */ | ||
925 | static void reset_device(struct device *dev) | ||
926 | { | ||
927 | struct virtqueue *vq; | ||
928 | |||
929 | verbose("Resetting device %s\n", dev->name); | ||
930 | /* Clear the status. */ | ||
931 | dev->desc->status = 0; | ||
932 | |||
933 | /* Clear any features they've acked. */ | ||
934 | memset(get_feature_bits(dev) + dev->desc->feature_len, 0, | ||
935 | dev->desc->feature_len); | ||
936 | |||
937 | /* Zero out the virtqueues. */ | ||
938 | for (vq = dev->vq; vq; vq = vq->next) { | ||
939 | memset(vq->vring.desc, 0, | ||
940 | vring_size(vq->config.num, getpagesize())); | ||
941 | vq->last_avail_idx = 0; | ||
942 | } | ||
943 | } | ||
944 | |||
917 | /* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */ | 945 | /* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */ |
918 | static void handle_output(int fd, unsigned long addr) | 946 | static void handle_output(int fd, unsigned long addr) |
919 | { | 947 | { |
920 | struct device *i; | 948 | struct device *i; |
921 | struct virtqueue *vq; | 949 | struct virtqueue *vq; |
922 | 950 | ||
923 | /* Check each virtqueue. */ | 951 | /* Check each device and virtqueue. */ |
924 | for (i = devices.dev; i; i = i->next) { | 952 | for (i = devices.dev; i; i = i->next) { |
953 | /* Notifications to device descriptors reset the device. */ | ||
954 | if (from_guest_phys(addr) == i->desc) { | ||
955 | reset_device(i); | ||
956 | return; | ||
957 | } | ||
958 | |||
959 | /* Notifications to virtqueues mean output has occurred. */ | ||
925 | for (vq = i->vq; vq; vq = vq->next) { | 960 | for (vq = i->vq; vq; vq = vq->next) { |
926 | if (vq->config.pfn == addr/getpagesize() | 961 | if (vq->config.pfn != addr/getpagesize()) |
927 | && vq->handle_output) { | 962 | continue; |
928 | verbose("Output to %s\n", vq->dev->name); | 963 | |
929 | vq->handle_output(fd, vq); | 964 | /* Guest should acknowledge (and set features!) before |
965 | * using the device. */ | ||
966 | if (i->desc->status == 0) { | ||
967 | warnx("%s gave early output", i->name); | ||
930 | return; | 968 | return; |
931 | } | 969 | } |
970 | |||
971 | if (strcmp(vq->dev->name, "console") != 0) | ||
972 | verbose("Output to %s\n", vq->dev->name); | ||
973 | if (vq->handle_output) | ||
974 | vq->handle_output(fd, vq); | ||
975 | return; | ||
932 | } | 976 | } |
933 | } | 977 | } |
934 | 978 | ||
@@ -986,54 +1030,44 @@ static void handle_input(int fd) | |||
986 | * | 1030 | * |
987 | * All devices need a descriptor so the Guest knows it exists, and a "struct | 1031 | * All devices need a descriptor so the Guest knows it exists, and a "struct |
988 | * device" so the Launcher can keep track of it. We have common helper | 1032 | * device" so the Launcher can keep track of it. We have common helper |
989 | * routines to allocate them. | 1033 | * routines to allocate and manage them. */ |
990 | * | ||
991 | * This routine allocates a new "struct lguest_device_desc" from descriptor | ||
992 | * table just above the Guest's normal memory. It returns a pointer to that | ||
993 | * descriptor. */ | ||
994 | static struct lguest_device_desc *new_dev_desc(u16 type) | ||
995 | { | ||
996 | struct lguest_device_desc *d; | ||
997 | 1034 | ||
998 | /* We only have one page for all the descriptors. */ | 1035 | /* The layout of the device page is a "struct lguest_device_desc" followed by a |
999 | if (devices.desc_used + sizeof(*d) > getpagesize()) | 1036 | * number of virtqueue descriptors, then two sets of feature bits, then an |
1000 | errx(1, "Too many devices"); | 1037 | * array of configuration bytes. This routine returns the configuration |
1001 | 1038 | * pointer. */ | |
1002 | /* We don't need to set config_len or status: page is 0 already. */ | 1039 | static u8 *device_config(const struct device *dev) |
1003 | d = (void *)devices.descpage + devices.desc_used; | 1040 | { |
1004 | d->type = type; | 1041 | return (void *)(dev->desc + 1) |
1005 | devices.desc_used += sizeof(*d); | 1042 | + dev->desc->num_vq * sizeof(struct lguest_vqconfig) |
1006 | 1043 | + dev->desc->feature_len * 2; | |
1007 | return d; | ||
1008 | } | 1044 | } |
1009 | 1045 | ||
1010 | /* Each device descriptor is followed by some configuration information. | 1046 | /* This routine allocates a new "struct lguest_device_desc" from descriptor |
1011 | * Each configuration field looks like: u8 type, u8 len, [... len bytes...]. | 1047 | * table page just above the Guest's normal memory. It returns a pointer to |
1012 | * | 1048 | * that descriptor. */ |
1013 | * This routine adds a new field to an existing device's descriptor. It only | 1049 | static struct lguest_device_desc *new_dev_desc(u16 type) |
1014 | * works for the last device, but that's OK because that's how we use it. */ | ||
1015 | static void add_desc_field(struct device *dev, u8 type, u8 len, const void *c) | ||
1016 | { | 1050 | { |
1017 | /* This is the last descriptor, right? */ | 1051 | struct lguest_device_desc d = { .type = type }; |
1018 | assert(devices.descpage + devices.desc_used | 1052 | void *p; |
1019 | == (u8 *)(dev->desc + 1) + dev->desc->config_len); | ||
1020 | 1053 | ||
1021 | /* We only have one page of device descriptions. */ | 1054 | /* Figure out where the next device config is, based on the last one. */ |
1022 | if (devices.desc_used + 2 + len > getpagesize()) | 1055 | if (devices.lastdev) |
1023 | errx(1, "Too many devices"); | 1056 | p = device_config(devices.lastdev) |
1057 | + devices.lastdev->desc->config_len; | ||
1058 | else | ||
1059 | p = devices.descpage; | ||
1024 | 1060 | ||
1025 | /* Copy in the new config header: type then length. */ | 1061 | /* We only have one page for all the descriptors. */ |
1026 | devices.descpage[devices.desc_used++] = type; | 1062 | if (p + sizeof(d) > (void *)devices.descpage + getpagesize()) |
1027 | devices.descpage[devices.desc_used++] = len; | 1063 | errx(1, "Too many devices"); |
1028 | memcpy(devices.descpage + devices.desc_used, c, len); | ||
1029 | devices.desc_used += len; | ||
1030 | 1064 | ||
1031 | /* Update the device descriptor length: two byte head then data. */ | 1065 | /* p might not be aligned, so we memcpy in. */ |
1032 | dev->desc->config_len += 2 + len; | 1066 | return memcpy(p, &d, sizeof(d)); |
1033 | } | 1067 | } |
1034 | 1068 | ||
1035 | /* This routine adds a virtqueue to a device. We specify how many descriptors | 1069 | /* Each device descriptor is followed by the description of its virtqueues. We |
1036 | * the virtqueue is to have. */ | 1070 | * specify how many descriptors the virtqueue is to have. */ |
1037 | static void add_virtqueue(struct device *dev, unsigned int num_descs, | 1071 | static void add_virtqueue(struct device *dev, unsigned int num_descs, |
1038 | void (*handle_output)(int fd, struct virtqueue *me)) | 1072 | void (*handle_output)(int fd, struct virtqueue *me)) |
1039 | { | 1073 | { |
@@ -1059,9 +1093,15 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
1059 | /* Initialize the vring. */ | 1093 | /* Initialize the vring. */ |
1060 | vring_init(&vq->vring, num_descs, p, getpagesize()); | 1094 | vring_init(&vq->vring, num_descs, p, getpagesize()); |
1061 | 1095 | ||
1062 | /* Add the configuration information to this device's descriptor. */ | 1096 | /* Append virtqueue to this device's descriptor. We use |
1063 | add_desc_field(dev, VIRTIO_CONFIG_F_VIRTQUEUE, | 1097 | * device_config() to get the end of the device's current virtqueues; |
1064 | sizeof(vq->config), &vq->config); | 1098 | * we check that we haven't added any config or feature information |
1099 | * yet, otherwise we'd be overwriting them. */ | ||
1100 | assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0); | ||
1101 | memcpy(device_config(dev), &vq->config, sizeof(vq->config)); | ||
1102 | dev->desc->num_vq++; | ||
1103 | |||
1104 | verbose("Virtqueue page %#lx\n", to_guest_phys(p)); | ||
1065 | 1105 | ||
1066 | /* Add to tail of list, so dev->vq is first vq, dev->vq->next is | 1106 | /* Add to tail of list, so dev->vq is first vq, dev->vq->next is |
1067 | * second. */ | 1107 | * second. */ |
@@ -1072,11 +1112,41 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
1072 | * virtqueue. */ | 1112 | * virtqueue. */ |
1073 | vq->handle_output = handle_output; | 1113 | vq->handle_output = handle_output; |
1074 | 1114 | ||
1075 | /* Set the "Don't Notify Me" flag if we don't have a handler */ | 1115 | /* As an optimization, set the advisory "Don't Notify Me" flag if we |
1116 | * don't have a handler */ | ||
1076 | if (!handle_output) | 1117 | if (!handle_output) |
1077 | vq->vring.used->flags = VRING_USED_F_NO_NOTIFY; | 1118 | vq->vring.used->flags = VRING_USED_F_NO_NOTIFY; |
1078 | } | 1119 | } |
1079 | 1120 | ||
1121 | /* The first half of the feature bitmask is for us to advertise features. The | ||
1122 | * second half if for the Guest to accept features. */ | ||
1123 | static void add_feature(struct device *dev, unsigned bit) | ||
1124 | { | ||
1125 | u8 *features = get_feature_bits(dev); | ||
1126 | |||
1127 | /* We can't extend the feature bits once we've added config bytes */ | ||
1128 | if (dev->desc->feature_len <= bit / CHAR_BIT) { | ||
1129 | assert(dev->desc->config_len == 0); | ||
1130 | dev->desc->feature_len = (bit / CHAR_BIT) + 1; | ||
1131 | } | ||
1132 | |||
1133 | features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT)); | ||
1134 | } | ||
1135 | |||
1136 | /* This routine sets the configuration fields for an existing device's | ||
1137 | * descriptor. It only works for the last device, but that's OK because that's | ||
1138 | * how we use it. */ | ||
1139 | static void set_config(struct device *dev, unsigned len, const void *conf) | ||
1140 | { | ||
1141 | /* Check we haven't overflowed our single page. */ | ||
1142 | if (device_config(dev) + len > devices.descpage + getpagesize()) | ||
1143 | errx(1, "Too many devices"); | ||
1144 | |||
1145 | /* Copy in the config information, and store the length. */ | ||
1146 | memcpy(device_config(dev), conf, len); | ||
1147 | dev->desc->config_len = len; | ||
1148 | } | ||
1149 | |||
1080 | /* This routine does all the creation and setup of a new device, including | 1150 | /* This routine does all the creation and setup of a new device, including |
1081 | * calling new_dev_desc() to allocate the descriptor and device memory. */ | 1151 | * calling new_dev_desc() to allocate the descriptor and device memory. */ |
1082 | static struct device *new_device(const char *name, u16 type, int fd, | 1152 | static struct device *new_device(const char *name, u16 type, int fd, |
@@ -1084,14 +1154,6 @@ static struct device *new_device(const char *name, u16 type, int fd, | |||
1084 | { | 1154 | { |
1085 | struct device *dev = malloc(sizeof(*dev)); | 1155 | struct device *dev = malloc(sizeof(*dev)); |
1086 | 1156 | ||
1087 | /* Append to device list. Prepending to a single-linked list is | ||
1088 | * easier, but the user expects the devices to be arranged on the bus | ||
1089 | * in command-line order. The first network device on the command line | ||
1090 | * is eth0, the first block device /dev/vda, etc. */ | ||
1091 | *devices.lastdev = dev; | ||
1092 | dev->next = NULL; | ||
1093 | devices.lastdev = &dev->next; | ||
1094 | |||
1095 | /* Now we populate the fields one at a time. */ | 1157 | /* Now we populate the fields one at a time. */ |
1096 | dev->fd = fd; | 1158 | dev->fd = fd; |
1097 | /* If we have an input handler for this file descriptor, then we add it | 1159 | /* If we have an input handler for this file descriptor, then we add it |
@@ -1102,6 +1164,17 @@ static struct device *new_device(const char *name, u16 type, int fd, | |||
1102 | dev->handle_input = handle_input; | 1164 | dev->handle_input = handle_input; |
1103 | dev->name = name; | 1165 | dev->name = name; |
1104 | dev->vq = NULL; | 1166 | dev->vq = NULL; |
1167 | |||
1168 | /* Append to device list. Prepending to a single-linked list is | ||
1169 | * easier, but the user expects the devices to be arranged on the bus | ||
1170 | * in command-line order. The first network device on the command line | ||
1171 | * is eth0, the first block device /dev/vda, etc. */ | ||
1172 | if (devices.lastdev) | ||
1173 | devices.lastdev->next = dev; | ||
1174 | else | ||
1175 | devices.dev = dev; | ||
1176 | devices.lastdev = dev; | ||
1177 | |||
1105 | return dev; | 1178 | return dev; |
1106 | } | 1179 | } |
1107 | 1180 | ||
@@ -1226,7 +1299,7 @@ static void setup_tun_net(const char *arg) | |||
1226 | int netfd, ipfd; | 1299 | int netfd, ipfd; |
1227 | u32 ip; | 1300 | u32 ip; |
1228 | const char *br_name = NULL; | 1301 | const char *br_name = NULL; |
1229 | u8 hwaddr[6]; | 1302 | struct virtio_net_config conf; |
1230 | 1303 | ||
1231 | /* We open the /dev/net/tun device and tell it we want a tap device. A | 1304 | /* We open the /dev/net/tun device and tell it we want a tap device. A |
1232 | * tap device is like a tun device, only somehow different. To tell | 1305 | * tap device is like a tun device, only somehow different. To tell |
@@ -1265,12 +1338,13 @@ static void setup_tun_net(const char *arg) | |||
1265 | ip = str2ip(arg); | 1338 | ip = str2ip(arg); |
1266 | 1339 | ||
1267 | /* Set up the tun device, and get the mac address for the interface. */ | 1340 | /* Set up the tun device, and get the mac address for the interface. */ |
1268 | configure_device(ipfd, ifr.ifr_name, ip, hwaddr); | 1341 | configure_device(ipfd, ifr.ifr_name, ip, conf.mac); |
1269 | 1342 | ||
1270 | /* Tell Guest what MAC address to use. */ | 1343 | /* Tell Guest what MAC address to use. */ |
1271 | add_desc_field(dev, VIRTIO_CONFIG_NET_MAC_F, sizeof(hwaddr), hwaddr); | 1344 | add_feature(dev, VIRTIO_NET_F_MAC); |
1345 | set_config(dev, sizeof(conf), &conf); | ||
1272 | 1346 | ||
1273 | /* We don't seed the socket any more; setup is done. */ | 1347 | /* We don't need the socket any more; setup is done. */ |
1274 | close(ipfd); | 1348 | close(ipfd); |
1275 | 1349 | ||
1276 | verbose("device %u: tun net %u.%u.%u.%u\n", | 1350 | verbose("device %u: tun net %u.%u.%u.%u\n", |
@@ -1458,8 +1532,7 @@ static void setup_block_file(const char *filename) | |||
1458 | struct device *dev; | 1532 | struct device *dev; |
1459 | struct vblk_info *vblk; | 1533 | struct vblk_info *vblk; |
1460 | void *stack; | 1534 | void *stack; |
1461 | u64 cap; | 1535 | struct virtio_blk_config conf; |
1462 | unsigned int val; | ||
1463 | 1536 | ||
1464 | /* This is the pipe the I/O thread will use to tell us I/O is done. */ | 1537 | /* This is the pipe the I/O thread will use to tell us I/O is done. */ |
1465 | pipe(p); | 1538 | pipe(p); |
@@ -1477,14 +1550,18 @@ static void setup_block_file(const char *filename) | |||
1477 | vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE); | 1550 | vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE); |
1478 | vblk->len = lseek64(vblk->fd, 0, SEEK_END); | 1551 | vblk->len = lseek64(vblk->fd, 0, SEEK_END); |
1479 | 1552 | ||
1553 | /* We support barriers. */ | ||
1554 | add_feature(dev, VIRTIO_BLK_F_BARRIER); | ||
1555 | |||
1480 | /* Tell Guest how many sectors this device has. */ | 1556 | /* Tell Guest how many sectors this device has. */ |
1481 | cap = cpu_to_le64(vblk->len / 512); | 1557 | conf.capacity = cpu_to_le64(vblk->len / 512); |
1482 | add_desc_field(dev, VIRTIO_CONFIG_BLK_F_CAPACITY, sizeof(cap), &cap); | ||
1483 | 1558 | ||
1484 | /* Tell Guest not to put in too many descriptors at once: two are used | 1559 | /* Tell Guest not to put in too many descriptors at once: two are used |
1485 | * for the in and out elements. */ | 1560 | * for the in and out elements. */ |
1486 | val = cpu_to_le32(VIRTQUEUE_NUM - 2); | 1561 | add_feature(dev, VIRTIO_BLK_F_SEG_MAX); |
1487 | add_desc_field(dev, VIRTIO_CONFIG_BLK_F_SEG_MAX, sizeof(val), &val); | 1562 | conf.seg_max = cpu_to_le32(VIRTQUEUE_NUM - 2); |
1563 | |||
1564 | set_config(dev, sizeof(conf), &conf); | ||
1488 | 1565 | ||
1489 | /* The I/O thread writes to this end of the pipe when done. */ | 1566 | /* The I/O thread writes to this end of the pipe when done. */ |
1490 | vblk->done_fd = p[1]; | 1567 | vblk->done_fd = p[1]; |
@@ -1505,7 +1582,7 @@ static void setup_block_file(const char *filename) | |||
1505 | close(vblk->workpipe[0]); | 1582 | close(vblk->workpipe[0]); |
1506 | 1583 | ||
1507 | verbose("device %u: virtblock %llu sectors\n", | 1584 | verbose("device %u: virtblock %llu sectors\n", |
1508 | devices.device_num, cap); | 1585 | devices.device_num, le64_to_cpu(conf.capacity)); |
1509 | } | 1586 | } |
1510 | /* That's the end of device setup. :*/ | 1587 | /* That's the end of device setup. :*/ |
1511 | 1588 | ||
@@ -1610,12 +1687,12 @@ int main(int argc, char *argv[]) | |||
1610 | /* First we initialize the device list. Since console and network | 1687 | /* First we initialize the device list. Since console and network |
1611 | * device receive input from a file descriptor, we keep an fdset | 1688 | * device receive input from a file descriptor, we keep an fdset |
1612 | * (infds) and the maximum fd number (max_infd) with the head of the | 1689 | * (infds) and the maximum fd number (max_infd) with the head of the |
1613 | * list. We also keep a pointer to the last device, for easy appending | 1690 | * list. We also keep a pointer to the last device. Finally, we keep |
1614 | * to the list. Finally, we keep the next interrupt number to hand out | 1691 | * the next interrupt number to hand out (1: remember that 0 is used by |
1615 | * (1: remember that 0 is used by the timer). */ | 1692 | * the timer). */ |
1616 | FD_ZERO(&devices.infds); | 1693 | FD_ZERO(&devices.infds); |
1617 | devices.max_infd = -1; | 1694 | devices.max_infd = -1; |
1618 | devices.lastdev = &devices.dev; | 1695 | devices.lastdev = NULL; |
1619 | devices.next_irq = 1; | 1696 | devices.next_irq = 1; |
1620 | 1697 | ||
1621 | cpu_id = 0; | 1698 | cpu_id = 0; |