diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2008-02-04 23:49:56 -0500 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2008-02-04 07:49:57 -0500 |
commit | a586d4f6016f7139d8c26df0e6927131168d3b5b (patch) | |
tree | 1c47e1a6b6b8fb18baa42f32980f29c4ae9cbbdc /Documentation/lguest | |
parent | f35d9d8aae08940b7fdd1bb8110619da2ece6b28 (diff) |
virtio: simplify config mechanism.
Previously we used a type/len pair within the config space, but this
seems overkill. We now simply define a structure which represents the
layout in the config space: the config space can now only be extended
at the end.
The main driver-visible changes:
1) We indicate what fields are present with an explicit feature bit.
2) Virtqueues are explicitly numbered, and not in the config space.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'Documentation/lguest')
-rw-r--r-- | Documentation/lguest/lguest.c | 176 |
1 files changed, 105 insertions, 71 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 6c8a2386cd50..4df1804169dc 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
@@ -34,6 +34,8 @@ | |||
34 | #include <zlib.h> | 34 | #include <zlib.h> |
35 | #include <assert.h> | 35 | #include <assert.h> |
36 | #include <sched.h> | 36 | #include <sched.h> |
37 | #include <limits.h> | ||
38 | #include <stddef.h> | ||
37 | #include "linux/lguest_launcher.h" | 39 | #include "linux/lguest_launcher.h" |
38 | #include "linux/virtio_config.h" | 40 | #include "linux/virtio_config.h" |
39 | #include "linux/virtio_net.h" | 41 | #include "linux/virtio_net.h" |
@@ -99,13 +101,11 @@ struct device_list | |||
99 | /* The descriptor page for the devices. */ | 101 | /* The descriptor page for the devices. */ |
100 | u8 *descpage; | 102 | u8 *descpage; |
101 | 103 | ||
102 | /* The tail of the last descriptor. */ | ||
103 | unsigned int desc_used; | ||
104 | |||
105 | /* A single linked list of devices. */ | 104 | /* A single linked list of devices. */ |
106 | struct device *dev; | 105 | struct device *dev; |
107 | /* ... And an end pointer so we can easily append new devices */ | 106 | /* And a pointer to the last device for easy append and also for |
108 | struct device **lastdev; | 107 | * configuration appending. */ |
108 | struct device *lastdev; | ||
109 | }; | 109 | }; |
110 | 110 | ||
111 | /* The list of Guest devices, based on command line arguments. */ | 111 | /* The list of Guest devices, based on command line arguments. */ |
@@ -191,7 +191,7 @@ static void *_convert(struct iovec *iov, size_t size, size_t align, | |||
191 | #define cpu_to_le64(v64) (v64) | 191 | #define cpu_to_le64(v64) (v64) |
192 | #define le16_to_cpu(v16) (v16) | 192 | #define le16_to_cpu(v16) (v16) |
193 | #define le32_to_cpu(v32) (v32) | 193 | #define le32_to_cpu(v32) (v32) |
194 | #define le64_to_cpu(v32) (v64) | 194 | #define le64_to_cpu(v64) (v64) |
195 | 195 | ||
196 | /*L:100 The Launcher code itself takes us out into userspace, that scary place | 196 | /*L:100 The Launcher code itself takes us out into userspace, that scary place |
197 | * where pointers run wild and free! Unfortunately, like most userspace | 197 | * where pointers run wild and free! Unfortunately, like most userspace |
@@ -986,54 +986,44 @@ static void handle_input(int fd) | |||
986 | * | 986 | * |
987 | * All devices need a descriptor so the Guest knows it exists, and a "struct | 987 | * All devices need a descriptor so the Guest knows it exists, and a "struct |
988 | * device" so the Launcher can keep track of it. We have common helper | 988 | * device" so the Launcher can keep track of it. We have common helper |
989 | * routines to allocate them. | 989 | * routines to allocate and manage them. */ |
990 | * | ||
991 | * This routine allocates a new "struct lguest_device_desc" from descriptor | ||
992 | * table just above the Guest's normal memory. It returns a pointer to that | ||
993 | * descriptor. */ | ||
994 | static struct lguest_device_desc *new_dev_desc(u16 type) | ||
995 | { | ||
996 | struct lguest_device_desc *d; | ||
997 | 990 | ||
998 | /* We only have one page for all the descriptors. */ | 991 | /* The layout of the device page is a "struct lguest_device_desc" followed by a |
999 | if (devices.desc_used + sizeof(*d) > getpagesize()) | 992 | * number of virtqueue descriptors, then two sets of feature bits, then an |
1000 | errx(1, "Too many devices"); | 993 | * array of configuration bytes. This routine returns the configuration |
1001 | 994 | * pointer. */ | |
1002 | /* We don't need to set config_len or status: page is 0 already. */ | 995 | static u8 *device_config(const struct device *dev) |
1003 | d = (void *)devices.descpage + devices.desc_used; | 996 | { |
1004 | d->type = type; | 997 | return (void *)(dev->desc + 1) |
1005 | devices.desc_used += sizeof(*d); | 998 | + dev->desc->num_vq * sizeof(struct lguest_vqconfig) |
1006 | 999 | + dev->desc->feature_len * 2; | |
1007 | return d; | ||
1008 | } | 1000 | } |
1009 | 1001 | ||
1010 | /* Each device descriptor is followed by some configuration information. | 1002 | /* This routine allocates a new "struct lguest_device_desc" from descriptor |
1011 | * Each configuration field looks like: u8 type, u8 len, [... len bytes...]. | 1003 | * table page just above the Guest's normal memory. It returns a pointer to |
1012 | * | 1004 | * that descriptor. */ |
1013 | * This routine adds a new field to an existing device's descriptor. It only | 1005 | static struct lguest_device_desc *new_dev_desc(u16 type) |
1014 | * works for the last device, but that's OK because that's how we use it. */ | ||
1015 | static void add_desc_field(struct device *dev, u8 type, u8 len, const void *c) | ||
1016 | { | 1006 | { |
1017 | /* This is the last descriptor, right? */ | 1007 | struct lguest_device_desc d = { .type = type }; |
1018 | assert(devices.descpage + devices.desc_used | 1008 | void *p; |
1019 | == (u8 *)(dev->desc + 1) + dev->desc->config_len); | ||
1020 | 1009 | ||
1021 | /* We only have one page of device descriptions. */ | 1010 | /* Figure out where the next device config is, based on the last one. */ |
1022 | if (devices.desc_used + 2 + len > getpagesize()) | 1011 | if (devices.lastdev) |
1023 | errx(1, "Too many devices"); | 1012 | p = device_config(devices.lastdev) |
1013 | + devices.lastdev->desc->config_len; | ||
1014 | else | ||
1015 | p = devices.descpage; | ||
1024 | 1016 | ||
1025 | /* Copy in the new config header: type then length. */ | 1017 | /* We only have one page for all the descriptors. */ |
1026 | devices.descpage[devices.desc_used++] = type; | 1018 | if (p + sizeof(d) > (void *)devices.descpage + getpagesize()) |
1027 | devices.descpage[devices.desc_used++] = len; | 1019 | errx(1, "Too many devices"); |
1028 | memcpy(devices.descpage + devices.desc_used, c, len); | ||
1029 | devices.desc_used += len; | ||
1030 | 1020 | ||
1031 | /* Update the device descriptor length: two byte head then data. */ | 1021 | /* p might not be aligned, so we memcpy in. */ |
1032 | dev->desc->config_len += 2 + len; | 1022 | return memcpy(p, &d, sizeof(d)); |
1033 | } | 1023 | } |
1034 | 1024 | ||
1035 | /* This routine adds a virtqueue to a device. We specify how many descriptors | 1025 | /* Each device descriptor is followed by the description of its virtqueues. We |
1036 | * the virtqueue is to have. */ | 1026 | * specify how many descriptors the virtqueue is to have. */ |
1037 | static void add_virtqueue(struct device *dev, unsigned int num_descs, | 1027 | static void add_virtqueue(struct device *dev, unsigned int num_descs, |
1038 | void (*handle_output)(int fd, struct virtqueue *me)) | 1028 | void (*handle_output)(int fd, struct virtqueue *me)) |
1039 | { | 1029 | { |
@@ -1059,9 +1049,15 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
1059 | /* Initialize the vring. */ | 1049 | /* Initialize the vring. */ |
1060 | vring_init(&vq->vring, num_descs, p, getpagesize()); | 1050 | vring_init(&vq->vring, num_descs, p, getpagesize()); |
1061 | 1051 | ||
1062 | /* Add the configuration information to this device's descriptor. */ | 1052 | /* Append virtqueue to this device's descriptor. We use |
1063 | add_desc_field(dev, VIRTIO_CONFIG_F_VIRTQUEUE, | 1053 | * device_config() to get the end of the device's current virtqueues; |
1064 | sizeof(vq->config), &vq->config); | 1054 | * we check that we haven't added any config or feature information |
1055 | * yet, otherwise we'd be overwriting them. */ | ||
1056 | assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0); | ||
1057 | memcpy(device_config(dev), &vq->config, sizeof(vq->config)); | ||
1058 | dev->desc->num_vq++; | ||
1059 | |||
1060 | verbose("Virtqueue page %#lx\n", to_guest_phys(p)); | ||
1065 | 1061 | ||
1066 | /* Add to tail of list, so dev->vq is first vq, dev->vq->next is | 1062 | /* Add to tail of list, so dev->vq is first vq, dev->vq->next is |
1067 | * second. */ | 1063 | * second. */ |
@@ -1077,6 +1073,37 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
1077 | vq->vring.used->flags = VRING_USED_F_NO_NOTIFY; | 1073 | vq->vring.used->flags = VRING_USED_F_NO_NOTIFY; |
1078 | } | 1074 | } |
1079 | 1075 | ||
1076 | /* The virtqueue descriptors are followed by feature bytes. */ | ||
1077 | static void add_feature(struct device *dev, unsigned bit) | ||
1078 | { | ||
1079 | u8 *features; | ||
1080 | |||
1081 | /* We can't extend the feature bits once we've added config bytes */ | ||
1082 | if (dev->desc->feature_len <= bit / CHAR_BIT) { | ||
1083 | assert(dev->desc->config_len == 0); | ||
1084 | dev->desc->feature_len = (bit / CHAR_BIT) + 1; | ||
1085 | } | ||
1086 | |||
1087 | features = (u8 *)(dev->desc + 1) | ||
1088 | + dev->desc->num_vq * sizeof(struct lguest_vqconfig); | ||
1089 | |||
1090 | features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT)); | ||
1091 | } | ||
1092 | |||
1093 | /* This routine sets the configuration fields for an existing device's | ||
1094 | * descriptor. It only works for the last device, but that's OK because that's | ||
1095 | * how we use it. */ | ||
1096 | static void set_config(struct device *dev, unsigned len, const void *conf) | ||
1097 | { | ||
1098 | /* Check we haven't overflowed our single page. */ | ||
1099 | if (device_config(dev) + len > devices.descpage + getpagesize()) | ||
1100 | errx(1, "Too many devices"); | ||
1101 | |||
1102 | /* Copy in the config information, and store the length. */ | ||
1103 | memcpy(device_config(dev), conf, len); | ||
1104 | dev->desc->config_len = len; | ||
1105 | } | ||
1106 | |||
1080 | /* This routine does all the creation and setup of a new device, including | 1107 | /* This routine does all the creation and setup of a new device, including |
1081 | * calling new_dev_desc() to allocate the descriptor and device memory. */ | 1108 | * calling new_dev_desc() to allocate the descriptor and device memory. */ |
1082 | static struct device *new_device(const char *name, u16 type, int fd, | 1109 | static struct device *new_device(const char *name, u16 type, int fd, |
@@ -1084,14 +1111,6 @@ static struct device *new_device(const char *name, u16 type, int fd, | |||
1084 | { | 1111 | { |
1085 | struct device *dev = malloc(sizeof(*dev)); | 1112 | struct device *dev = malloc(sizeof(*dev)); |
1086 | 1113 | ||
1087 | /* Append to device list. Prepending to a single-linked list is | ||
1088 | * easier, but the user expects the devices to be arranged on the bus | ||
1089 | * in command-line order. The first network device on the command line | ||
1090 | * is eth0, the first block device /dev/vda, etc. */ | ||
1091 | *devices.lastdev = dev; | ||
1092 | dev->next = NULL; | ||
1093 | devices.lastdev = &dev->next; | ||
1094 | |||
1095 | /* Now we populate the fields one at a time. */ | 1114 | /* Now we populate the fields one at a time. */ |
1096 | dev->fd = fd; | 1115 | dev->fd = fd; |
1097 | /* If we have an input handler for this file descriptor, then we add it | 1116 | /* If we have an input handler for this file descriptor, then we add it |
@@ -1102,6 +1121,17 @@ static struct device *new_device(const char *name, u16 type, int fd, | |||
1102 | dev->handle_input = handle_input; | 1121 | dev->handle_input = handle_input; |
1103 | dev->name = name; | 1122 | dev->name = name; |
1104 | dev->vq = NULL; | 1123 | dev->vq = NULL; |
1124 | |||
1125 | /* Append to device list. Prepending to a single-linked list is | ||
1126 | * easier, but the user expects the devices to be arranged on the bus | ||
1127 | * in command-line order. The first network device on the command line | ||
1128 | * is eth0, the first block device /dev/vda, etc. */ | ||
1129 | if (devices.lastdev) | ||
1130 | devices.lastdev->next = dev; | ||
1131 | else | ||
1132 | devices.dev = dev; | ||
1133 | devices.lastdev = dev; | ||
1134 | |||
1105 | return dev; | 1135 | return dev; |
1106 | } | 1136 | } |
1107 | 1137 | ||
@@ -1226,7 +1256,7 @@ static void setup_tun_net(const char *arg) | |||
1226 | int netfd, ipfd; | 1256 | int netfd, ipfd; |
1227 | u32 ip; | 1257 | u32 ip; |
1228 | const char *br_name = NULL; | 1258 | const char *br_name = NULL; |
1229 | u8 hwaddr[6]; | 1259 | struct virtio_net_config conf; |
1230 | 1260 | ||
1231 | /* We open the /dev/net/tun device and tell it we want a tap device. A | 1261 | /* We open the /dev/net/tun device and tell it we want a tap device. A |
1232 | * tap device is like a tun device, only somehow different. To tell | 1262 | * tap device is like a tun device, only somehow different. To tell |
@@ -1265,12 +1295,13 @@ static void setup_tun_net(const char *arg) | |||
1265 | ip = str2ip(arg); | 1295 | ip = str2ip(arg); |
1266 | 1296 | ||
1267 | /* Set up the tun device, and get the mac address for the interface. */ | 1297 | /* Set up the tun device, and get the mac address for the interface. */ |
1268 | configure_device(ipfd, ifr.ifr_name, ip, hwaddr); | 1298 | configure_device(ipfd, ifr.ifr_name, ip, conf.mac); |
1269 | 1299 | ||
1270 | /* Tell Guest what MAC address to use. */ | 1300 | /* Tell Guest what MAC address to use. */ |
1271 | add_desc_field(dev, VIRTIO_CONFIG_NET_MAC_F, sizeof(hwaddr), hwaddr); | 1301 | add_feature(dev, VIRTIO_NET_F_MAC); |
1302 | set_config(dev, sizeof(conf), &conf); | ||
1272 | 1303 | ||
1273 | /* We don't seed the socket any more; setup is done. */ | 1304 | /* We don't need the socket any more; setup is done. */ |
1274 | close(ipfd); | 1305 | close(ipfd); |
1275 | 1306 | ||
1276 | verbose("device %u: tun net %u.%u.%u.%u\n", | 1307 | verbose("device %u: tun net %u.%u.%u.%u\n", |
@@ -1458,8 +1489,7 @@ static void setup_block_file(const char *filename) | |||
1458 | struct device *dev; | 1489 | struct device *dev; |
1459 | struct vblk_info *vblk; | 1490 | struct vblk_info *vblk; |
1460 | void *stack; | 1491 | void *stack; |
1461 | u64 cap; | 1492 | struct virtio_blk_config conf; |
1462 | unsigned int val; | ||
1463 | 1493 | ||
1464 | /* This is the pipe the I/O thread will use to tell us I/O is done. */ | 1494 | /* This is the pipe the I/O thread will use to tell us I/O is done. */ |
1465 | pipe(p); | 1495 | pipe(p); |
@@ -1477,14 +1507,18 @@ static void setup_block_file(const char *filename) | |||
1477 | vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE); | 1507 | vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE); |
1478 | vblk->len = lseek64(vblk->fd, 0, SEEK_END); | 1508 | vblk->len = lseek64(vblk->fd, 0, SEEK_END); |
1479 | 1509 | ||
1510 | /* We support barriers. */ | ||
1511 | add_feature(dev, VIRTIO_BLK_F_BARRIER); | ||
1512 | |||
1480 | /* Tell Guest how many sectors this device has. */ | 1513 | /* Tell Guest how many sectors this device has. */ |
1481 | cap = cpu_to_le64(vblk->len / 512); | 1514 | conf.capacity = cpu_to_le64(vblk->len / 512); |
1482 | add_desc_field(dev, VIRTIO_CONFIG_BLK_F_CAPACITY, sizeof(cap), &cap); | ||
1483 | 1515 | ||
1484 | /* Tell Guest not to put in too many descriptors at once: two are used | 1516 | /* Tell Guest not to put in too many descriptors at once: two are used |
1485 | * for the in and out elements. */ | 1517 | * for the in and out elements. */ |
1486 | val = cpu_to_le32(VIRTQUEUE_NUM - 2); | 1518 | add_feature(dev, VIRTIO_BLK_F_SEG_MAX); |
1487 | add_desc_field(dev, VIRTIO_CONFIG_BLK_F_SEG_MAX, sizeof(val), &val); | 1519 | conf.seg_max = cpu_to_le32(VIRTQUEUE_NUM - 2); |
1520 | |||
1521 | set_config(dev, sizeof(conf), &conf); | ||
1488 | 1522 | ||
1489 | /* The I/O thread writes to this end of the pipe when done. */ | 1523 | /* The I/O thread writes to this end of the pipe when done. */ |
1490 | vblk->done_fd = p[1]; | 1524 | vblk->done_fd = p[1]; |
@@ -1505,7 +1539,7 @@ static void setup_block_file(const char *filename) | |||
1505 | close(vblk->workpipe[0]); | 1539 | close(vblk->workpipe[0]); |
1506 | 1540 | ||
1507 | verbose("device %u: virtblock %llu sectors\n", | 1541 | verbose("device %u: virtblock %llu sectors\n", |
1508 | devices.device_num, cap); | 1542 | devices.device_num, le64_to_cpu(conf.capacity)); |
1509 | } | 1543 | } |
1510 | /* That's the end of device setup. :*/ | 1544 | /* That's the end of device setup. :*/ |
1511 | 1545 | ||
@@ -1610,12 +1644,12 @@ int main(int argc, char *argv[]) | |||
1610 | /* First we initialize the device list. Since console and network | 1644 | /* First we initialize the device list. Since console and network |
1611 | * device receive input from a file descriptor, we keep an fdset | 1645 | * device receive input from a file descriptor, we keep an fdset |
1612 | * (infds) and the maximum fd number (max_infd) with the head of the | 1646 | * (infds) and the maximum fd number (max_infd) with the head of the |
1613 | * list. We also keep a pointer to the last device, for easy appending | 1647 | * list. We also keep a pointer to the last device. Finally, we keep |
1614 | * to the list. Finally, we keep the next interrupt number to hand out | 1648 | * the next interrupt number to hand out (1: remember that 0 is used by |
1615 | * (1: remember that 0 is used by the timer). */ | 1649 | * the timer). */ |
1616 | FD_ZERO(&devices.infds); | 1650 | FD_ZERO(&devices.infds); |
1617 | devices.max_infd = -1; | 1651 | devices.max_infd = -1; |
1618 | devices.lastdev = &devices.dev; | 1652 | devices.lastdev = NULL; |
1619 | devices.next_irq = 1; | 1653 | devices.next_irq = 1; |
1620 | 1654 | ||
1621 | cpu_id = 0; | 1655 | cpu_id = 0; |