diff options
Diffstat (limited to 'Documentation/lguest/lguest.c')
-rw-r--r-- | Documentation/lguest/lguest.c | 142 |
1 files changed, 88 insertions, 54 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index bec5a32e4095..82fafe0429fe 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /*P:100 This is the Launcher code, a simple program which lays out the | 1 | /*P:100 This is the Launcher code, a simple program which lays out the |
2 | * "physical" memory for the new Guest by mapping the kernel image and the | 2 | * "physical" memory for the new Guest by mapping the kernel image and |
3 | * virtual devices, then reads repeatedly from /dev/lguest to run the Guest. | 3 | * the virtual devices, then opens /dev/lguest to tell the kernel |
4 | :*/ | 4 | * about the Guest and control it. :*/ |
5 | #define _LARGEFILE64_SOURCE | 5 | #define _LARGEFILE64_SOURCE |
6 | #define _GNU_SOURCE | 6 | #define _GNU_SOURCE |
7 | #include <stdio.h> | 7 | #include <stdio.h> |
@@ -43,7 +43,7 @@ | |||
43 | #include "linux/virtio_console.h" | 43 | #include "linux/virtio_console.h" |
44 | #include "linux/virtio_ring.h" | 44 | #include "linux/virtio_ring.h" |
45 | #include "asm-x86/bootparam.h" | 45 | #include "asm-x86/bootparam.h" |
46 | /*L:110 We can ignore the 38 include files we need for this program, but I do | 46 | /*L:110 We can ignore the 39 include files we need for this program, but I do |
47 | * want to draw attention to the use of kernel-style types. | 47 | * want to draw attention to the use of kernel-style types. |
48 | * | 48 | * |
49 | * As Linus said, "C is a Spartan language, and so should your naming be." I | 49 | * As Linus said, "C is a Spartan language, and so should your naming be." I |
@@ -131,6 +131,9 @@ struct device | |||
131 | /* Any queues attached to this device */ | 131 | /* Any queues attached to this device */ |
132 | struct virtqueue *vq; | 132 | struct virtqueue *vq; |
133 | 133 | ||
134 | /* Handle status being finalized (ie. feature bits stable). */ | ||
135 | void (*ready)(struct device *me); | ||
136 | |||
134 | /* Device-specific data. */ | 137 | /* Device-specific data. */ |
135 | void *priv; | 138 | void *priv; |
136 | }; | 139 | }; |
@@ -154,6 +157,9 @@ struct virtqueue | |||
154 | 157 | ||
155 | /* The routine to call when the Guest pings us. */ | 158 | /* The routine to call when the Guest pings us. */ |
156 | void (*handle_output)(int fd, struct virtqueue *me); | 159 | void (*handle_output)(int fd, struct virtqueue *me); |
160 | |||
161 | /* Outstanding buffers */ | ||
162 | unsigned int inflight; | ||
157 | }; | 163 | }; |
158 | 164 | ||
159 | /* Remember the arguments to the program so we can "reboot" */ | 165 | /* Remember the arguments to the program so we can "reboot" */ |
@@ -320,7 +326,7 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr) | |||
320 | err(1, "Reading program headers"); | 326 | err(1, "Reading program headers"); |
321 | 327 | ||
322 | /* Try all the headers: there are usually only three. A read-only one, | 328 | /* Try all the headers: there are usually only three. A read-only one, |
323 | * a read-write one, and a "note" section which isn't loadable. */ | 329 | * a read-write one, and a "note" section which we don't load. */ |
324 | for (i = 0; i < ehdr->e_phnum; i++) { | 330 | for (i = 0; i < ehdr->e_phnum; i++) { |
325 | /* If this isn't a loadable segment, we ignore it */ | 331 | /* If this isn't a loadable segment, we ignore it */ |
326 | if (phdr[i].p_type != PT_LOAD) | 332 | if (phdr[i].p_type != PT_LOAD) |
@@ -387,7 +393,7 @@ static unsigned long load_kernel(int fd) | |||
387 | if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) | 393 | if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) |
388 | return map_elf(fd, &hdr); | 394 | return map_elf(fd, &hdr); |
389 | 395 | ||
390 | /* Otherwise we assume it's a bzImage, and try to unpack it */ | 396 | /* Otherwise we assume it's a bzImage, and try to load it. */ |
391 | return load_bzimage(fd); | 397 | return load_bzimage(fd); |
392 | } | 398 | } |
393 | 399 | ||
@@ -433,12 +439,12 @@ static unsigned long load_initrd(const char *name, unsigned long mem) | |||
433 | return len; | 439 | return len; |
434 | } | 440 | } |
435 | 441 | ||
436 | /* Once we know how much memory we have, we can construct simple linear page | 442 | /* Once we know how much memory we have we can construct simple linear page |
437 | * tables which set virtual == physical which will get the Guest far enough | 443 | * tables which set virtual == physical which will get the Guest far enough |
438 | * into the boot to create its own. | 444 | * into the boot to create its own. |
439 | * | 445 | * |
440 | * We lay them out of the way, just below the initrd (which is why we need to | 446 | * We lay them out of the way, just below the initrd (which is why we need to |
441 | * know its size). */ | 447 | * know its size here). */ |
442 | static unsigned long setup_pagetables(unsigned long mem, | 448 | static unsigned long setup_pagetables(unsigned long mem, |
443 | unsigned long initrd_size) | 449 | unsigned long initrd_size) |
444 | { | 450 | { |
@@ -699,6 +705,7 @@ static unsigned get_vq_desc(struct virtqueue *vq, | |||
699 | errx(1, "Looped descriptor"); | 705 | errx(1, "Looped descriptor"); |
700 | } while ((i = next_desc(vq, i)) != vq->vring.num); | 706 | } while ((i = next_desc(vq, i)) != vq->vring.num); |
701 | 707 | ||
708 | vq->inflight++; | ||
702 | return head; | 709 | return head; |
703 | } | 710 | } |
704 | 711 | ||
@@ -716,6 +723,7 @@ static void add_used(struct virtqueue *vq, unsigned int head, int len) | |||
716 | /* Make sure buffer is written before we update index. */ | 723 | /* Make sure buffer is written before we update index. */ |
717 | wmb(); | 724 | wmb(); |
718 | vq->vring.used->idx++; | 725 | vq->vring.used->idx++; |
726 | vq->inflight--; | ||
719 | } | 727 | } |
720 | 728 | ||
721 | /* This actually sends the interrupt for this virtqueue */ | 729 | /* This actually sends the interrupt for this virtqueue */ |
@@ -723,8 +731,9 @@ static void trigger_irq(int fd, struct virtqueue *vq) | |||
723 | { | 731 | { |
724 | unsigned long buf[] = { LHREQ_IRQ, vq->config.irq }; | 732 | unsigned long buf[] = { LHREQ_IRQ, vq->config.irq }; |
725 | 733 | ||
726 | /* If they don't want an interrupt, don't send one. */ | 734 | /* If they don't want an interrupt, don't send one, unless empty. */ |
727 | if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) | 735 | if ((vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) |
736 | && vq->inflight) | ||
728 | return; | 737 | return; |
729 | 738 | ||
730 | /* Send the Guest an interrupt tell them we used something up. */ | 739 | /* Send the Guest an interrupt tell them we used something up. */ |
@@ -850,7 +859,8 @@ static void handle_console_output(int fd, struct virtqueue *vq) | |||
850 | * | 859 | * |
851 | * Handling output for network is also simple: we get all the output buffers | 860 | * Handling output for network is also simple: we get all the output buffers |
852 | * and write them (ignoring the first element) to this device's file descriptor | 861 | * and write them (ignoring the first element) to this device's file descriptor |
853 | * (stdout). */ | 862 | * (/dev/net/tun). |
863 | */ | ||
854 | static void handle_net_output(int fd, struct virtqueue *vq) | 864 | static void handle_net_output(int fd, struct virtqueue *vq) |
855 | { | 865 | { |
856 | unsigned int head, out, in; | 866 | unsigned int head, out, in; |
@@ -924,24 +934,40 @@ static void enable_fd(int fd, struct virtqueue *vq) | |||
924 | write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); | 934 | write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); |
925 | } | 935 | } |
926 | 936 | ||
927 | /* Resetting a device is fairly easy. */ | 937 | /* When the Guest tells us they updated the status field, we handle it. */ |
928 | static void reset_device(struct device *dev) | 938 | static void update_device_status(struct device *dev) |
929 | { | 939 | { |
930 | struct virtqueue *vq; | 940 | struct virtqueue *vq; |
931 | 941 | ||
932 | verbose("Resetting device %s\n", dev->name); | 942 | /* This is a reset. */ |
933 | /* Clear the status. */ | 943 | if (dev->desc->status == 0) { |
934 | dev->desc->status = 0; | 944 | verbose("Resetting device %s\n", dev->name); |
935 | 945 | ||
936 | /* Clear any features they've acked. */ | 946 | /* Clear any features they've acked. */ |
937 | memset(get_feature_bits(dev) + dev->desc->feature_len, 0, | 947 | memset(get_feature_bits(dev) + dev->desc->feature_len, 0, |
938 | dev->desc->feature_len); | 948 | dev->desc->feature_len); |
939 | 949 | ||
940 | /* Zero out the virtqueues. */ | 950 | /* Zero out the virtqueues. */ |
941 | for (vq = dev->vq; vq; vq = vq->next) { | 951 | for (vq = dev->vq; vq; vq = vq->next) { |
942 | memset(vq->vring.desc, 0, | 952 | memset(vq->vring.desc, 0, |
943 | vring_size(vq->config.num, getpagesize())); | 953 | vring_size(vq->config.num, getpagesize())); |
944 | vq->last_avail_idx = 0; | 954 | vq->last_avail_idx = 0; |
955 | } | ||
956 | } else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) { | ||
957 | warnx("Device %s configuration FAILED", dev->name); | ||
958 | } else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) { | ||
959 | unsigned int i; | ||
960 | |||
961 | verbose("Device %s OK: offered", dev->name); | ||
962 | for (i = 0; i < dev->desc->feature_len; i++) | ||
963 | verbose(" %08x", get_feature_bits(dev)[i]); | ||
964 | verbose(", accepted"); | ||
965 | for (i = 0; i < dev->desc->feature_len; i++) | ||
966 | verbose(" %08x", get_feature_bits(dev) | ||
967 | [dev->desc->feature_len+i]); | ||
968 | |||
969 | if (dev->ready) | ||
970 | dev->ready(dev); | ||
945 | } | 971 | } |
946 | } | 972 | } |
947 | 973 | ||
@@ -953,9 +979,9 @@ static void handle_output(int fd, unsigned long addr) | |||
953 | 979 | ||
954 | /* Check each device and virtqueue. */ | 980 | /* Check each device and virtqueue. */ |
955 | for (i = devices.dev; i; i = i->next) { | 981 | for (i = devices.dev; i; i = i->next) { |
956 | /* Notifications to device descriptors reset the device. */ | 982 | /* Notifications to device descriptors update device status. */ |
957 | if (from_guest_phys(addr) == i->desc) { | 983 | if (from_guest_phys(addr) == i->desc) { |
958 | reset_device(i); | 984 | update_device_status(i); |
959 | return; | 985 | return; |
960 | } | 986 | } |
961 | 987 | ||
@@ -1003,8 +1029,8 @@ static void handle_input(int fd) | |||
1003 | if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0) | 1029 | if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0) |
1004 | break; | 1030 | break; |
1005 | 1031 | ||
1006 | /* Otherwise, call the device(s) which have readable | 1032 | /* Otherwise, call the device(s) which have readable file |
1007 | * file descriptors and a method of handling them. */ | 1033 | * descriptors and a method of handling them. */ |
1008 | for (i = devices.dev; i; i = i->next) { | 1034 | for (i = devices.dev; i; i = i->next) { |
1009 | if (i->handle_input && FD_ISSET(i->fd, &fds)) { | 1035 | if (i->handle_input && FD_ISSET(i->fd, &fds)) { |
1010 | int dev_fd; | 1036 | int dev_fd; |
@@ -1015,8 +1041,7 @@ static void handle_input(int fd) | |||
1015 | * should no longer service it. Networking and | 1041 | * should no longer service it. Networking and |
1016 | * console do this when there's no input | 1042 | * console do this when there's no input |
1017 | * buffers to deliver into. Console also uses | 1043 | * buffers to deliver into. Console also uses |
1018 | * it when it discovers that stdin is | 1044 | * it when it discovers that stdin is closed. */ |
1019 | * closed. */ | ||
1020 | FD_CLR(i->fd, &devices.infds); | 1045 | FD_CLR(i->fd, &devices.infds); |
1021 | /* Tell waker to ignore it too, by sending a | 1046 | /* Tell waker to ignore it too, by sending a |
1022 | * negative fd number (-1, since 0 is a valid | 1047 | * negative fd number (-1, since 0 is a valid |
@@ -1033,7 +1058,8 @@ static void handle_input(int fd) | |||
1033 | * | 1058 | * |
1034 | * All devices need a descriptor so the Guest knows it exists, and a "struct | 1059 | * All devices need a descriptor so the Guest knows it exists, and a "struct |
1035 | * device" so the Launcher can keep track of it. We have common helper | 1060 | * device" so the Launcher can keep track of it. We have common helper |
1036 | * routines to allocate and manage them. */ | 1061 | * routines to allocate and manage them. |
1062 | */ | ||
1037 | 1063 | ||
1038 | /* The layout of the device page is a "struct lguest_device_desc" followed by a | 1064 | /* The layout of the device page is a "struct lguest_device_desc" followed by a |
1039 | * number of virtqueue descriptors, then two sets of feature bits, then an | 1065 | * number of virtqueue descriptors, then two sets of feature bits, then an |
@@ -1078,7 +1104,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
1078 | struct virtqueue **i, *vq = malloc(sizeof(*vq)); | 1104 | struct virtqueue **i, *vq = malloc(sizeof(*vq)); |
1079 | void *p; | 1105 | void *p; |
1080 | 1106 | ||
1081 | /* First we need some pages for this virtqueue. */ | 1107 | /* First we need some memory for this virtqueue. */ |
1082 | pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1) | 1108 | pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1) |
1083 | / getpagesize(); | 1109 | / getpagesize(); |
1084 | p = get_pages(pages); | 1110 | p = get_pages(pages); |
@@ -1087,6 +1113,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
1087 | vq->next = NULL; | 1113 | vq->next = NULL; |
1088 | vq->last_avail_idx = 0; | 1114 | vq->last_avail_idx = 0; |
1089 | vq->dev = dev; | 1115 | vq->dev = dev; |
1116 | vq->inflight = 0; | ||
1090 | 1117 | ||
1091 | /* Initialize the configuration. */ | 1118 | /* Initialize the configuration. */ |
1092 | vq->config.num = num_descs; | 1119 | vq->config.num = num_descs; |
@@ -1122,7 +1149,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
1122 | } | 1149 | } |
1123 | 1150 | ||
1124 | /* The first half of the feature bitmask is for us to advertise features. The | 1151 | /* The first half of the feature bitmask is for us to advertise features. The |
1125 | * second half if for the Guest to accept features. */ | 1152 | * second half is for the Guest to accept features. */ |
1126 | static void add_feature(struct device *dev, unsigned bit) | 1153 | static void add_feature(struct device *dev, unsigned bit) |
1127 | { | 1154 | { |
1128 | u8 *features = get_feature_bits(dev); | 1155 | u8 *features = get_feature_bits(dev); |
@@ -1151,7 +1178,9 @@ static void set_config(struct device *dev, unsigned len, const void *conf) | |||
1151 | } | 1178 | } |
1152 | 1179 | ||
1153 | /* This routine does all the creation and setup of a new device, including | 1180 | /* This routine does all the creation and setup of a new device, including |
1154 | * calling new_dev_desc() to allocate the descriptor and device memory. */ | 1181 | * calling new_dev_desc() to allocate the descriptor and device memory. |
1182 | * | ||
1183 | * See what I mean about userspace being boring? */ | ||
1155 | static struct device *new_device(const char *name, u16 type, int fd, | 1184 | static struct device *new_device(const char *name, u16 type, int fd, |
1156 | bool (*handle_input)(int, struct device *)) | 1185 | bool (*handle_input)(int, struct device *)) |
1157 | { | 1186 | { |
@@ -1167,6 +1196,7 @@ static struct device *new_device(const char *name, u16 type, int fd, | |||
1167 | dev->handle_input = handle_input; | 1196 | dev->handle_input = handle_input; |
1168 | dev->name = name; | 1197 | dev->name = name; |
1169 | dev->vq = NULL; | 1198 | dev->vq = NULL; |
1199 | dev->ready = NULL; | ||
1170 | 1200 | ||
1171 | /* Append to device list. Prepending to a single-linked list is | 1201 | /* Append to device list. Prepending to a single-linked list is |
1172 | * easier, but the user expects the devices to be arranged on the bus | 1202 | * easier, but the user expects the devices to be arranged on the bus |
@@ -1345,6 +1375,7 @@ static void setup_tun_net(const char *arg) | |||
1345 | 1375 | ||
1346 | /* Tell Guest what MAC address to use. */ | 1376 | /* Tell Guest what MAC address to use. */ |
1347 | add_feature(dev, VIRTIO_NET_F_MAC); | 1377 | add_feature(dev, VIRTIO_NET_F_MAC); |
1378 | add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY); | ||
1348 | set_config(dev, sizeof(conf), &conf); | 1379 | set_config(dev, sizeof(conf), &conf); |
1349 | 1380 | ||
1350 | /* We don't need the socket any more; setup is done. */ | 1381 | /* We don't need the socket any more; setup is done. */ |
@@ -1383,7 +1414,6 @@ struct vblk_info | |||
1383 | * Launcher triggers interrupt to Guest. */ | 1414 | * Launcher triggers interrupt to Guest. */ |
1384 | int done_fd; | 1415 | int done_fd; |
1385 | }; | 1416 | }; |
1386 | /*:*/ | ||
1387 | 1417 | ||
1388 | /*L:210 | 1418 | /*L:210 |
1389 | * The Disk | 1419 | * The Disk |
@@ -1396,7 +1426,7 @@ static bool service_io(struct device *dev) | |||
1396 | struct vblk_info *vblk = dev->priv; | 1426 | struct vblk_info *vblk = dev->priv; |
1397 | unsigned int head, out_num, in_num, wlen; | 1427 | unsigned int head, out_num, in_num, wlen; |
1398 | int ret; | 1428 | int ret; |
1399 | struct virtio_blk_inhdr *in; | 1429 | u8 *in; |
1400 | struct virtio_blk_outhdr *out; | 1430 | struct virtio_blk_outhdr *out; |
1401 | struct iovec iov[dev->vq->vring.num]; | 1431 | struct iovec iov[dev->vq->vring.num]; |
1402 | off64_t off; | 1432 | off64_t off; |
@@ -1414,7 +1444,7 @@ static bool service_io(struct device *dev) | |||
1414 | head, out_num, in_num); | 1444 | head, out_num, in_num); |
1415 | 1445 | ||
1416 | out = convert(&iov[0], struct virtio_blk_outhdr); | 1446 | out = convert(&iov[0], struct virtio_blk_outhdr); |
1417 | in = convert(&iov[out_num+in_num-1], struct virtio_blk_inhdr); | 1447 | in = convert(&iov[out_num+in_num-1], u8); |
1418 | off = out->sector * 512; | 1448 | off = out->sector * 512; |
1419 | 1449 | ||
1420 | /* The block device implements "barriers", where the Guest indicates | 1450 | /* The block device implements "barriers", where the Guest indicates |
@@ -1428,7 +1458,7 @@ static bool service_io(struct device *dev) | |||
1428 | * It'd be nice if we supported eject, for example, but we don't. */ | 1458 | * It'd be nice if we supported eject, for example, but we don't. */ |
1429 | if (out->type & VIRTIO_BLK_T_SCSI_CMD) { | 1459 | if (out->type & VIRTIO_BLK_T_SCSI_CMD) { |
1430 | fprintf(stderr, "Scsi commands unsupported\n"); | 1460 | fprintf(stderr, "Scsi commands unsupported\n"); |
1431 | in->status = VIRTIO_BLK_S_UNSUPP; | 1461 | *in = VIRTIO_BLK_S_UNSUPP; |
1432 | wlen = sizeof(*in); | 1462 | wlen = sizeof(*in); |
1433 | } else if (out->type & VIRTIO_BLK_T_OUT) { | 1463 | } else if (out->type & VIRTIO_BLK_T_OUT) { |
1434 | /* Write */ | 1464 | /* Write */ |
@@ -1451,7 +1481,7 @@ static bool service_io(struct device *dev) | |||
1451 | errx(1, "Write past end %llu+%u", off, ret); | 1481 | errx(1, "Write past end %llu+%u", off, ret); |
1452 | } | 1482 | } |
1453 | wlen = sizeof(*in); | 1483 | wlen = sizeof(*in); |
1454 | in->status = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); | 1484 | *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); |
1455 | } else { | 1485 | } else { |
1456 | /* Read */ | 1486 | /* Read */ |
1457 | 1487 | ||
@@ -1464,10 +1494,10 @@ static bool service_io(struct device *dev) | |||
1464 | verbose("READ from sector %llu: %i\n", out->sector, ret); | 1494 | verbose("READ from sector %llu: %i\n", out->sector, ret); |
1465 | if (ret >= 0) { | 1495 | if (ret >= 0) { |
1466 | wlen = sizeof(*in) + ret; | 1496 | wlen = sizeof(*in) + ret; |
1467 | in->status = VIRTIO_BLK_S_OK; | 1497 | *in = VIRTIO_BLK_S_OK; |
1468 | } else { | 1498 | } else { |
1469 | wlen = sizeof(*in); | 1499 | wlen = sizeof(*in); |
1470 | in->status = VIRTIO_BLK_S_IOERR; | 1500 | *in = VIRTIO_BLK_S_IOERR; |
1471 | } | 1501 | } |
1472 | } | 1502 | } |
1473 | 1503 | ||
@@ -1493,7 +1523,10 @@ static int io_thread(void *_dev) | |||
1493 | while (read(vblk->workpipe[0], &c, 1) == 1) { | 1523 | while (read(vblk->workpipe[0], &c, 1) == 1) { |
1494 | /* We acknowledge each request immediately to reduce latency, | 1524 | /* We acknowledge each request immediately to reduce latency, |
1495 | * rather than waiting until we've done them all. I haven't | 1525 | * rather than waiting until we've done them all. I haven't |
1496 | * measured to see if it makes any difference. */ | 1526 | * measured to see if it makes any difference. |
1527 | * | ||
1528 | * That would be an interesting test, wouldn't it? You could | ||
1529 | * also try having more than one I/O thread. */ | ||
1497 | while (service_io(dev)) | 1530 | while (service_io(dev)) |
1498 | write(vblk->done_fd, &c, 1); | 1531 | write(vblk->done_fd, &c, 1); |
1499 | } | 1532 | } |
@@ -1501,7 +1534,7 @@ static int io_thread(void *_dev) | |||
1501 | } | 1534 | } |
1502 | 1535 | ||
1503 | /* Now we've seen the I/O thread, we return to the Launcher to see what happens | 1536 | /* Now we've seen the I/O thread, we return to the Launcher to see what happens |
1504 | * when the thread tells us it's completed some I/O. */ | 1537 | * when that thread tells us it's completed some I/O. */ |
1505 | static bool handle_io_finish(int fd, struct device *dev) | 1538 | static bool handle_io_finish(int fd, struct device *dev) |
1506 | { | 1539 | { |
1507 | char c; | 1540 | char c; |
@@ -1573,11 +1606,12 @@ static void setup_block_file(const char *filename) | |||
1573 | * more work. */ | 1606 | * more work. */ |
1574 | pipe(vblk->workpipe); | 1607 | pipe(vblk->workpipe); |
1575 | 1608 | ||
1576 | /* Create stack for thread and run it */ | 1609 | /* Create stack for thread and run it. Since stack grows upwards, we |
1610 | * point the stack pointer to the end of this region. */ | ||
1577 | stack = malloc(32768); | 1611 | stack = malloc(32768); |
1578 | /* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from | 1612 | /* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from |
1579 | * becoming a zombie. */ | 1613 | * becoming a zombie. */ |
1580 | if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1) | 1614 | if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1) |
1581 | err(1, "Creating clone"); | 1615 | err(1, "Creating clone"); |
1582 | 1616 | ||
1583 | /* We don't need to keep the I/O thread's end of the pipes open. */ | 1617 | /* We don't need to keep the I/O thread's end of the pipes open. */ |
@@ -1587,14 +1621,14 @@ static void setup_block_file(const char *filename) | |||
1587 | verbose("device %u: virtblock %llu sectors\n", | 1621 | verbose("device %u: virtblock %llu sectors\n", |
1588 | devices.device_num, le64_to_cpu(conf.capacity)); | 1622 | devices.device_num, le64_to_cpu(conf.capacity)); |
1589 | } | 1623 | } |
1590 | /* That's the end of device setup. :*/ | 1624 | /* That's the end of device setup. */ |
1591 | 1625 | ||
1592 | /* Reboot */ | 1626 | /*L:230 Reboot is pretty easy: clean up and exec() the Launcher afresh. */ |
1593 | static void __attribute__((noreturn)) restart_guest(void) | 1627 | static void __attribute__((noreturn)) restart_guest(void) |
1594 | { | 1628 | { |
1595 | unsigned int i; | 1629 | unsigned int i; |
1596 | 1630 | ||
1597 | /* Closing pipes causes the waker thread and io_threads to die, and | 1631 | /* Closing pipes causes the Waker thread and io_threads to die, and |
1598 | * closing /dev/lguest cleans up the Guest. Since we don't track all | 1632 | * closing /dev/lguest cleans up the Guest. Since we don't track all |
1599 | * open fds, we simply close everything beyond stderr. */ | 1633 | * open fds, we simply close everything beyond stderr. */ |
1600 | for (i = 3; i < FD_SETSIZE; i++) | 1634 | for (i = 3; i < FD_SETSIZE; i++) |
@@ -1603,7 +1637,7 @@ static void __attribute__((noreturn)) restart_guest(void) | |||
1603 | err(1, "Could not exec %s", main_args[0]); | 1637 | err(1, "Could not exec %s", main_args[0]); |
1604 | } | 1638 | } |
1605 | 1639 | ||
1606 | /*L:220 Finally we reach the core of the Launcher, which runs the Guest, serves | 1640 | /*L:220 Finally we reach the core of the Launcher which runs the Guest, serves |
1607 | * its input and output, and finally, lays it to rest. */ | 1641 | * its input and output, and finally, lays it to rest. */ |
1608 | static void __attribute__((noreturn)) run_guest(int lguest_fd) | 1642 | static void __attribute__((noreturn)) run_guest(int lguest_fd) |
1609 | { | 1643 | { |
@@ -1644,7 +1678,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd) | |||
1644 | err(1, "Resetting break"); | 1678 | err(1, "Resetting break"); |
1645 | } | 1679 | } |
1646 | } | 1680 | } |
1647 | /* | 1681 | /*L:240 |
1648 | * This is the end of the Launcher. The good news: we are over halfway | 1682 | * This is the end of the Launcher. The good news: we are over halfway |
1649 | * through! The bad news: the most fiendish part of the code still lies ahead | 1683 | * through! The bad news: the most fiendish part of the code still lies ahead |
1650 | * of us. | 1684 | * of us. |
@@ -1691,8 +1725,8 @@ int main(int argc, char *argv[]) | |||
1691 | * device receive input from a file descriptor, we keep an fdset | 1725 | * device receive input from a file descriptor, we keep an fdset |
1692 | * (infds) and the maximum fd number (max_infd) with the head of the | 1726 | * (infds) and the maximum fd number (max_infd) with the head of the |
1693 | * list. We also keep a pointer to the last device. Finally, we keep | 1727 | * list. We also keep a pointer to the last device. Finally, we keep |
1694 | * the next interrupt number to hand out (1: remember that 0 is used by | 1728 | * the next interrupt number to use for devices (1: remember that 0 is |
1695 | * the timer). */ | 1729 | * used by the timer). */ |
1696 | FD_ZERO(&devices.infds); | 1730 | FD_ZERO(&devices.infds); |
1697 | devices.max_infd = -1; | 1731 | devices.max_infd = -1; |
1698 | devices.lastdev = NULL; | 1732 | devices.lastdev = NULL; |
@@ -1793,8 +1827,8 @@ int main(int argc, char *argv[]) | |||
1793 | lguest_fd = tell_kernel(pgdir, start); | 1827 | lguest_fd = tell_kernel(pgdir, start); |
1794 | 1828 | ||
1795 | /* We fork off a child process, which wakes the Launcher whenever one | 1829 | /* We fork off a child process, which wakes the Launcher whenever one |
1796 | * of the input file descriptors needs attention. Otherwise we would | 1830 | * of the input file descriptors needs attention. We call this the |
1797 | * run the Guest until it tries to output something. */ | 1831 | * Waker, and we'll cover it in a moment. */ |
1798 | waker_fd = setup_waker(lguest_fd); | 1832 | waker_fd = setup_waker(lguest_fd); |
1799 | 1833 | ||
1800 | /* Finally, run the Guest. This doesn't return. */ | 1834 | /* Finally, run the Guest. This doesn't return. */ |