diff options
Diffstat (limited to 'Documentation/lguest')
-rw-r--r-- | Documentation/lguest/lguest.c | 155 |
1 files changed, 95 insertions, 60 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index c91c28ae8290..f2668390e8f7 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
@@ -360,8 +360,8 @@ static unsigned long load_bzimage(int fd) | |||
360 | } | 360 | } |
361 | 361 | ||
362 | /*L:140 Loading the kernel is easy when it's a "vmlinux", but most kernels | 362 | /*L:140 Loading the kernel is easy when it's a "vmlinux", but most kernels |
363 | * come wrapped up in the self-decompressing "bzImage" format. With some funky | 363 | * come wrapped up in the self-decompressing "bzImage" format. With a little |
364 | * coding, we can load those, too. */ | 364 | * work, we can load those, too. */ |
365 | static unsigned long load_kernel(int fd) | 365 | static unsigned long load_kernel(int fd) |
366 | { | 366 | { |
367 | Elf32_Ehdr hdr; | 367 | Elf32_Ehdr hdr; |
@@ -464,6 +464,7 @@ static unsigned long setup_pagetables(unsigned long mem, | |||
464 | * to know where it is. */ | 464 | * to know where it is. */ |
465 | return to_guest_phys(pgdir); | 465 | return to_guest_phys(pgdir); |
466 | } | 466 | } |
467 | /*:*/ | ||
467 | 468 | ||
468 | /* Simple routine to roll all the commandline arguments together with spaces | 469 | /* Simple routine to roll all the commandline arguments together with spaces |
469 | * between them. */ | 470 | * between them. */ |
@@ -480,9 +481,9 @@ static void concat(char *dst, char *args[]) | |||
480 | dst[len] = '\0'; | 481 | dst[len] = '\0'; |
481 | } | 482 | } |
482 | 483 | ||
483 | /* This is where we actually tell the kernel to initialize the Guest. We saw | 484 | /*L:185 This is where we actually tell the kernel to initialize the Guest. We |
484 | * the arguments it expects when we looked at initialize() in lguest_user.c: | 485 | * saw the arguments it expects when we looked at initialize() in lguest_user.c: |
485 | * the base of guest "physical" memory, the top physical page to allow, the | 486 | * the base of Guest "physical" memory, the top physical page to allow, the |
486 | * top level pagetable and the entry point for the Guest. */ | 487 | * top level pagetable and the entry point for the Guest. */ |
487 | static int tell_kernel(unsigned long pgdir, unsigned long start) | 488 | static int tell_kernel(unsigned long pgdir, unsigned long start) |
488 | { | 489 | { |
@@ -512,13 +513,14 @@ static void add_device_fd(int fd) | |||
512 | /*L:200 | 513 | /*L:200 |
513 | * The Waker. | 514 | * The Waker. |
514 | * | 515 | * |
515 | * With a console and network devices, we can have lots of input which we need | 516 | * With console, block and network devices, we can have lots of input which we |
516 | * to process. We could try to tell the kernel what file descriptors to watch, | 517 | * need to process. We could try to tell the kernel what file descriptors to |
517 | * but handing a file descriptor mask through to the kernel is fairly icky. | 518 | * watch, but handing a file descriptor mask through to the kernel is fairly |
519 | * icky. | ||
518 | * | 520 | * |
519 | * Instead, we fork off a process which watches the file descriptors and writes | 521 | * Instead, we fork off a process which watches the file descriptors and writes |
520 | * the LHREQ_BREAK command to the /dev/lguest filedescriptor to tell the Host | 522 | * the LHREQ_BREAK command to the /dev/lguest file descriptor to tell the Host |
521 | * loop to stop running the Guest. This causes it to return from the | 523 | * stop running the Guest. This causes the Launcher to return from the |
522 | * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset | 524 | * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset |
523 | * the LHREQ_BREAK and wake us up again. | 525 | * the LHREQ_BREAK and wake us up again. |
524 | * | 526 | * |
@@ -544,7 +546,9 @@ static void wake_parent(int pipefd, int lguest_fd) | |||
544 | if (read(pipefd, &fd, sizeof(fd)) == 0) | 546 | if (read(pipefd, &fd, sizeof(fd)) == 0) |
545 | exit(0); | 547 | exit(0); |
546 | /* Otherwise it's telling us to change what file | 548 | /* Otherwise it's telling us to change what file |
547 | * descriptors we're to listen to. */ | 549 | * descriptors we're to listen to. Positive means |
550 | * listen to a new one, negative means stop | ||
551 | * listening. */ | ||
548 | if (fd >= 0) | 552 | if (fd >= 0) |
549 | FD_SET(fd, &devices.infds); | 553 | FD_SET(fd, &devices.infds); |
550 | else | 554 | else |
@@ -559,7 +563,7 @@ static int setup_waker(int lguest_fd) | |||
559 | { | 563 | { |
560 | int pipefd[2], child; | 564 | int pipefd[2], child; |
561 | 565 | ||
562 | /* We create a pipe to talk to the waker, and also so it knows when the | 566 | /* We create a pipe to talk to the Waker, and also so it knows when the |
563 | * Launcher dies (and closes pipe). */ | 567 | * Launcher dies (and closes pipe). */ |
564 | pipe(pipefd); | 568 | pipe(pipefd); |
565 | child = fork(); | 569 | child = fork(); |
@@ -567,7 +571,8 @@ static int setup_waker(int lguest_fd) | |||
567 | err(1, "forking"); | 571 | err(1, "forking"); |
568 | 572 | ||
569 | if (child == 0) { | 573 | if (child == 0) { |
570 | /* Close the "writing" end of our copy of the pipe */ | 574 | /* We are the Waker: close the "writing" end of our copy of the |
575 | * pipe and start waiting for input. */ | ||
571 | close(pipefd[1]); | 576 | close(pipefd[1]); |
572 | wake_parent(pipefd[0], lguest_fd); | 577 | wake_parent(pipefd[0], lguest_fd); |
573 | } | 578 | } |
@@ -578,12 +583,12 @@ static int setup_waker(int lguest_fd) | |||
578 | return pipefd[1]; | 583 | return pipefd[1]; |
579 | } | 584 | } |
580 | 585 | ||
581 | /*L:210 | 586 | /* |
582 | * Device Handling. | 587 | * Device Handling. |
583 | * | 588 | * |
584 | * When the Guest sends DMA to us, it sends us an array of addresses and sizes. | 589 | * When the Guest gives us a buffer, it sends an array of addresses and sizes. |
585 | * We need to make sure it's not trying to reach into the Launcher itself, so | 590 | * We need to make sure it's not trying to reach into the Launcher itself, so |
586 | * we have a convenient routine which check it and exits with an error message | 591 | * we have a convenient routine which checks it and exits with an error message |
587 | * if something funny is going on: | 592 | * if something funny is going on: |
588 | */ | 593 | */ |
589 | static void *_check_pointer(unsigned long addr, unsigned int size, | 594 | static void *_check_pointer(unsigned long addr, unsigned int size, |
@@ -600,7 +605,9 @@ static void *_check_pointer(unsigned long addr, unsigned int size, | |||
600 | /* A macro which transparently hands the line number to the real function. */ | 605 | /* A macro which transparently hands the line number to the real function. */ |
601 | #define check_pointer(addr,size) _check_pointer(addr, size, __LINE__) | 606 | #define check_pointer(addr,size) _check_pointer(addr, size, __LINE__) |
602 | 607 | ||
603 | /* This function returns the next descriptor in the chain, or vq->vring.num. */ | 608 | /* Each buffer in the virtqueues is actually a chain of descriptors. This |
609 | * function returns the next descriptor in the chain, or vq->vring.num if we're | ||
610 | * at the end. */ | ||
604 | static unsigned next_desc(struct virtqueue *vq, unsigned int i) | 611 | static unsigned next_desc(struct virtqueue *vq, unsigned int i) |
605 | { | 612 | { |
606 | unsigned int next; | 613 | unsigned int next; |
@@ -679,13 +686,14 @@ static unsigned get_vq_desc(struct virtqueue *vq, | |||
679 | return head; | 686 | return head; |
680 | } | 687 | } |
681 | 688 | ||
682 | /* Once we've used one of their buffers, we tell them about it. We'll then | 689 | /* After we've used one of their buffers, we tell them about it. We'll then |
683 | * want to send them an interrupt, using trigger_irq(). */ | 690 | * want to send them an interrupt, using trigger_irq(). */ |
684 | static void add_used(struct virtqueue *vq, unsigned int head, int len) | 691 | static void add_used(struct virtqueue *vq, unsigned int head, int len) |
685 | { | 692 | { |
686 | struct vring_used_elem *used; | 693 | struct vring_used_elem *used; |
687 | 694 | ||
688 | /* Get a pointer to the next entry in the used ring. */ | 695 | /* The virtqueue contains a ring of used buffers. Get a pointer to the |
696 | * next entry in that used ring. */ | ||
689 | used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num]; | 697 | used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num]; |
690 | used->id = head; | 698 | used->id = head; |
691 | used->len = len; | 699 | used->len = len; |
@@ -699,6 +707,7 @@ static void trigger_irq(int fd, struct virtqueue *vq) | |||
699 | { | 707 | { |
700 | unsigned long buf[] = { LHREQ_IRQ, vq->config.irq }; | 708 | unsigned long buf[] = { LHREQ_IRQ, vq->config.irq }; |
701 | 709 | ||
710 | /* If they don't want an interrupt, don't send one. */ | ||
702 | if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) | 711 | if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) |
703 | return; | 712 | return; |
704 | 713 | ||
@@ -715,8 +724,11 @@ static void add_used_and_trigger(int fd, struct virtqueue *vq, | |||
715 | trigger_irq(fd, vq); | 724 | trigger_irq(fd, vq); |
716 | } | 725 | } |
717 | 726 | ||
718 | /* Here is the input terminal setting we save, and the routine to restore them | 727 | /* |
719 | * on exit so the user can see what they type next. */ | 728 | * The Console |
729 | * | ||
730 | * Here is the input terminal setting we save, and the routine to restore them | ||
731 | * on exit so the user gets their terminal back. */ | ||
720 | static struct termios orig_term; | 732 | static struct termios orig_term; |
721 | static void restore_term(void) | 733 | static void restore_term(void) |
722 | { | 734 | { |
@@ -817,7 +829,10 @@ static void handle_console_output(int fd, struct virtqueue *vq) | |||
817 | } | 829 | } |
818 | } | 830 | } |
819 | 831 | ||
820 | /* Handling output for network is also simple: we get all the output buffers | 832 | /* |
833 | * The Network | ||
834 | * | ||
835 | * Handling output for network is also simple: we get all the output buffers | ||
821 | * and write them (ignoring the first element) to this device's file descriptor | 836 | * and write them (ignoring the first element) to this device's file descriptor |
822 | * (stdout). */ | 837 | * (stdout). */ |
823 | static void handle_net_output(int fd, struct virtqueue *vq) | 838 | static void handle_net_output(int fd, struct virtqueue *vq) |
@@ -830,8 +845,9 @@ static void handle_net_output(int fd, struct virtqueue *vq) | |||
830 | while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) { | 845 | while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) { |
831 | if (in) | 846 | if (in) |
832 | errx(1, "Input buffers in output queue?"); | 847 | errx(1, "Input buffers in output queue?"); |
833 | /* Check header, but otherwise ignore it (we said we supported | 848 | /* Check header, but otherwise ignore it (we told the Guest we |
834 | * no features). */ | 849 | * supported no features, so it shouldn't have anything |
850 | * interesting). */ | ||
835 | (void)convert(&iov[0], struct virtio_net_hdr); | 851 | (void)convert(&iov[0], struct virtio_net_hdr); |
836 | len = writev(vq->dev->fd, iov+1, out-1); | 852 | len = writev(vq->dev->fd, iov+1, out-1); |
837 | add_used_and_trigger(fd, vq, head, len); | 853 | add_used_and_trigger(fd, vq, head, len); |
@@ -882,7 +898,8 @@ static bool handle_tun_input(int fd, struct device *dev) | |||
882 | return true; | 898 | return true; |
883 | } | 899 | } |
884 | 900 | ||
885 | /* This callback ensures we try again, in case we stopped console or net | 901 | /*L:215 This is the callback attached to the network and console input |
902 | * virtqueues: it ensures we try again, in case we stopped console or net | ||
886 | * delivery because Guest didn't have any buffers. */ | 903 | * delivery because Guest didn't have any buffers. */ |
887 | static void enable_fd(int fd, struct virtqueue *vq) | 904 | static void enable_fd(int fd, struct virtqueue *vq) |
888 | { | 905 | { |
@@ -918,7 +935,7 @@ static void handle_output(int fd, unsigned long addr) | |||
918 | strnlen(from_guest_phys(addr), guest_limit - addr)); | 935 | strnlen(from_guest_phys(addr), guest_limit - addr)); |
919 | } | 936 | } |
920 | 937 | ||
921 | /* This is called when the waker wakes us up: check for incoming file | 938 | /* This is called when the Waker wakes us up: check for incoming file |
922 | * descriptors. */ | 939 | * descriptors. */ |
923 | static void handle_input(int fd) | 940 | static void handle_input(int fd) |
924 | { | 941 | { |
@@ -985,8 +1002,7 @@ static struct lguest_device_desc *new_dev_desc(u16 type) | |||
985 | } | 1002 | } |
986 | 1003 | ||
987 | /* Each device descriptor is followed by some configuration information. | 1004 | /* Each device descriptor is followed by some configuration information. |
988 | * The first byte is a "status" byte for the Guest to report what's happening. | 1005 | * Each configuration field looks like: u8 type, u8 len, [... len bytes...]. |
989 | * After that are fields: u8 type, u8 len, [... len bytes...]. | ||
990 | * | 1006 | * |
991 | * This routine adds a new field to an existing device's descriptor. It only | 1007 | * This routine adds a new field to an existing device's descriptor. It only |
992 | * works for the last device, but that's OK because that's how we use it. */ | 1008 | * works for the last device, but that's OK because that's how we use it. */ |
@@ -1043,14 +1059,17 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
1043 | /* Link virtqueue back to device. */ | 1059 | /* Link virtqueue back to device. */ |
1044 | vq->dev = dev; | 1060 | vq->dev = dev; |
1045 | 1061 | ||
1046 | /* Set up handler. */ | 1062 | /* Set the routine to call when the Guest does something to this |
1063 | * virtqueue. */ | ||
1047 | vq->handle_output = handle_output; | 1064 | vq->handle_output = handle_output; |
1065 | |||
1066 | /* Set the "Don't Notify Me" flag if we don't have a handler */ | ||
1048 | if (!handle_output) | 1067 | if (!handle_output) |
1049 | vq->vring.used->flags = VRING_USED_F_NO_NOTIFY; | 1068 | vq->vring.used->flags = VRING_USED_F_NO_NOTIFY; |
1050 | } | 1069 | } |
1051 | 1070 | ||
1052 | /* This routine does all the creation and setup of a new device, including | 1071 | /* This routine does all the creation and setup of a new device, including |
1053 | * caling new_dev_desc() to allocate the descriptor and device memory. */ | 1072 | * calling new_dev_desc() to allocate the descriptor and device memory. */ |
1054 | static struct device *new_device(const char *name, u16 type, int fd, | 1073 | static struct device *new_device(const char *name, u16 type, int fd, |
1055 | bool (*handle_input)(int, struct device *)) | 1074 | bool (*handle_input)(int, struct device *)) |
1056 | { | 1075 | { |
@@ -1059,7 +1078,7 @@ static struct device *new_device(const char *name, u16 type, int fd, | |||
1059 | /* Append to device list. Prepending to a single-linked list is | 1078 | /* Append to device list. Prepending to a single-linked list is |
1060 | * easier, but the user expects the devices to be arranged on the bus | 1079 | * easier, but the user expects the devices to be arranged on the bus |
1061 | * in command-line order. The first network device on the command line | 1080 | * in command-line order. The first network device on the command line |
1062 | * is eth0, the first block device /dev/lgba, etc. */ | 1081 | * is eth0, the first block device /dev/vda, etc. */ |
1063 | *devices.lastdev = dev; | 1082 | *devices.lastdev = dev; |
1064 | dev->next = NULL; | 1083 | dev->next = NULL; |
1065 | devices.lastdev = &dev->next; | 1084 | devices.lastdev = &dev->next; |
@@ -1103,7 +1122,7 @@ static void setup_console(void) | |||
1103 | /* The console needs two virtqueues: the input then the output. When | 1122 | /* The console needs two virtqueues: the input then the output. When |
1104 | * they put something the input queue, we make sure we're listening to | 1123 | * they put something the input queue, we make sure we're listening to |
1105 | * stdin. When they put something in the output queue, we write it to | 1124 | * stdin. When they put something in the output queue, we write it to |
1106 | * stdout. */ | 1125 | * stdout. */ |
1107 | add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd); | 1126 | add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd); |
1108 | add_virtqueue(dev, VIRTQUEUE_NUM, handle_console_output); | 1127 | add_virtqueue(dev, VIRTQUEUE_NUM, handle_console_output); |
1109 | 1128 | ||
@@ -1251,21 +1270,17 @@ static void setup_tun_net(const char *arg) | |||
1251 | verbose("attached to bridge: %s\n", br_name); | 1270 | verbose("attached to bridge: %s\n", br_name); |
1252 | } | 1271 | } |
1253 | 1272 | ||
1254 | 1273 | /* Our block (disk) device should be really simple: the Guest asks for a block | |
1255 | /* | 1274 | * number and we read or write that position in the file. Unfortunately, that |
1256 | * Block device. | 1275 | * was amazingly slow: the Guest waits until the read is finished before |
1276 | * running anything else, even if it could have been doing useful work. | ||
1257 | * | 1277 | * |
1258 | * Serving a block device is really easy: the Guest asks for a block number and | 1278 | * We could use async I/O, except it's reputed to suck so hard that characters |
1259 | * we read or write that position in the file. | 1279 | * actually go missing from your code when you try to use it. |
1260 | * | ||
1261 | * Unfortunately, this is amazingly slow: the Guest waits until the read is | ||
1262 | * finished before running anything else, even if it could be doing useful | ||
1263 | * work. We could use async I/O, except it's reputed to suck so hard that | ||
1264 | * characters actually go missing from your code when you try to use it. | ||
1265 | * | 1280 | * |
1266 | * So we farm the I/O out to thread, and communicate with it via a pipe. */ | 1281 | * So we farm the I/O out to thread, and communicate with it via a pipe. */ |
1267 | 1282 | ||
1268 | /* This hangs off device->priv, with the data. */ | 1283 | /* This hangs off device->priv. */ |
1269 | struct vblk_info | 1284 | struct vblk_info |
1270 | { | 1285 | { |
1271 | /* The size of the file. */ | 1286 | /* The size of the file. */ |
@@ -1281,8 +1296,14 @@ struct vblk_info | |||
1281 | * Launcher triggers interrupt to Guest. */ | 1296 | * Launcher triggers interrupt to Guest. */ |
1282 | int done_fd; | 1297 | int done_fd; |
1283 | }; | 1298 | }; |
1299 | /*:*/ | ||
1284 | 1300 | ||
1285 | /* This is the core of the I/O thread. It returns true if it did something. */ | 1301 | /*L:210 |
1302 | * The Disk | ||
1303 | * | ||
1304 | * Remember that the block device is handled by a separate I/O thread. We head | ||
1305 | * straight into the core of that thread here: | ||
1306 | */ | ||
1286 | static bool service_io(struct device *dev) | 1307 | static bool service_io(struct device *dev) |
1287 | { | 1308 | { |
1288 | struct vblk_info *vblk = dev->priv; | 1309 | struct vblk_info *vblk = dev->priv; |
@@ -1293,10 +1314,14 @@ static bool service_io(struct device *dev) | |||
1293 | struct iovec iov[dev->vq->vring.num]; | 1314 | struct iovec iov[dev->vq->vring.num]; |
1294 | off64_t off; | 1315 | off64_t off; |
1295 | 1316 | ||
1317 | /* See if there's a request waiting. If not, nothing to do. */ | ||
1296 | head = get_vq_desc(dev->vq, iov, &out_num, &in_num); | 1318 | head = get_vq_desc(dev->vq, iov, &out_num, &in_num); |
1297 | if (head == dev->vq->vring.num) | 1319 | if (head == dev->vq->vring.num) |
1298 | return false; | 1320 | return false; |
1299 | 1321 | ||
1322 | /* Every block request should contain at least one output buffer | ||
1323 | * (detailing the location on disk and the type of request) and one | ||
1324 | * input buffer (to hold the result). */ | ||
1300 | if (out_num == 0 || in_num == 0) | 1325 | if (out_num == 0 || in_num == 0) |
1301 | errx(1, "Bad virtblk cmd %u out=%u in=%u", | 1326 | errx(1, "Bad virtblk cmd %u out=%u in=%u", |
1302 | head, out_num, in_num); | 1327 | head, out_num, in_num); |
@@ -1305,10 +1330,15 @@ static bool service_io(struct device *dev) | |||
1305 | in = convert(&iov[out_num+in_num-1], struct virtio_blk_inhdr); | 1330 | in = convert(&iov[out_num+in_num-1], struct virtio_blk_inhdr); |
1306 | off = out->sector * 512; | 1331 | off = out->sector * 512; |
1307 | 1332 | ||
1308 | /* This is how we implement barriers. Pretty poor, no? */ | 1333 | /* The block device implements "barriers", where the Guest indicates |
1334 | * that it wants all previous writes to occur before this write. We | ||
1335 | * don't have a way of asking our kernel to do a barrier, so we just | ||
1336 | * synchronize all the data in the file. Pretty poor, no? */ | ||
1309 | if (out->type & VIRTIO_BLK_T_BARRIER) | 1337 | if (out->type & VIRTIO_BLK_T_BARRIER) |
1310 | fdatasync(vblk->fd); | 1338 | fdatasync(vblk->fd); |
1311 | 1339 | ||
1340 | /* In general the virtio block driver is allowed to try SCSI commands. | ||
1341 | * It'd be nice if we supported eject, for example, but we don't. */ | ||
1312 | if (out->type & VIRTIO_BLK_T_SCSI_CMD) { | 1342 | if (out->type & VIRTIO_BLK_T_SCSI_CMD) { |
1313 | fprintf(stderr, "Scsi commands unsupported\n"); | 1343 | fprintf(stderr, "Scsi commands unsupported\n"); |
1314 | in->status = VIRTIO_BLK_S_UNSUPP; | 1344 | in->status = VIRTIO_BLK_S_UNSUPP; |
@@ -1374,7 +1404,7 @@ static int io_thread(void *_dev) | |||
1374 | 1404 | ||
1375 | /* When this read fails, it means Launcher died, so we follow. */ | 1405 | /* When this read fails, it means Launcher died, so we follow. */ |
1376 | while (read(vblk->workpipe[0], &c, 1) == 1) { | 1406 | while (read(vblk->workpipe[0], &c, 1) == 1) { |
1377 | /* We acknowledge each request immediately, to reduce latency, | 1407 | /* We acknowledge each request immediately to reduce latency, |
1378 | * rather than waiting until we've done them all. I haven't | 1408 | * rather than waiting until we've done them all. I haven't |
1379 | * measured to see if it makes any difference. */ | 1409 | * measured to see if it makes any difference. */ |
1380 | while (service_io(dev)) | 1410 | while (service_io(dev)) |
@@ -1383,12 +1413,14 @@ static int io_thread(void *_dev) | |||
1383 | return 0; | 1413 | return 0; |
1384 | } | 1414 | } |
1385 | 1415 | ||
1386 | /* When the thread says some I/O is done, we interrupt the Guest. */ | 1416 | /* Now we've seen the I/O thread, we return to the Launcher to see what happens |
1417 | * when the thread tells us it's completed some I/O. */ | ||
1387 | static bool handle_io_finish(int fd, struct device *dev) | 1418 | static bool handle_io_finish(int fd, struct device *dev) |
1388 | { | 1419 | { |
1389 | char c; | 1420 | char c; |
1390 | 1421 | ||
1391 | /* If child died, presumably it printed message. */ | 1422 | /* If the I/O thread died, presumably it printed the error, so we |
1423 | * simply exit. */ | ||
1392 | if (read(dev->fd, &c, 1) != 1) | 1424 | if (read(dev->fd, &c, 1) != 1) |
1393 | exit(1); | 1425 | exit(1); |
1394 | 1426 | ||
@@ -1397,7 +1429,7 @@ static bool handle_io_finish(int fd, struct device *dev) | |||
1397 | return true; | 1429 | return true; |
1398 | } | 1430 | } |
1399 | 1431 | ||
1400 | /* When the Guest submits some I/O, we wake the I/O thread. */ | 1432 | /* When the Guest submits some I/O, we just need to wake the I/O thread. */ |
1401 | static void handle_virtblk_output(int fd, struct virtqueue *vq) | 1433 | static void handle_virtblk_output(int fd, struct virtqueue *vq) |
1402 | { | 1434 | { |
1403 | struct vblk_info *vblk = vq->dev->priv; | 1435 | struct vblk_info *vblk = vq->dev->priv; |
@@ -1409,7 +1441,7 @@ static void handle_virtblk_output(int fd, struct virtqueue *vq) | |||
1409 | exit(1); | 1441 | exit(1); |
1410 | } | 1442 | } |
1411 | 1443 | ||
1412 | /* This creates a virtual block device. */ | 1444 | /*L:198 This actually sets up a virtual block device. */ |
1413 | static void setup_block_file(const char *filename) | 1445 | static void setup_block_file(const char *filename) |
1414 | { | 1446 | { |
1415 | int p[2]; | 1447 | int p[2]; |
@@ -1425,7 +1457,7 @@ static void setup_block_file(const char *filename) | |||
1425 | /* The device responds to return from I/O thread. */ | 1457 | /* The device responds to return from I/O thread. */ |
1426 | dev = new_device("block", VIRTIO_ID_BLOCK, p[0], handle_io_finish); | 1458 | dev = new_device("block", VIRTIO_ID_BLOCK, p[0], handle_io_finish); |
1427 | 1459 | ||
1428 | /* The device has a virtqueue. */ | 1460 | /* The device has one virtqueue, where the Guest places requests. */ |
1429 | add_virtqueue(dev, VIRTQUEUE_NUM, handle_virtblk_output); | 1461 | add_virtqueue(dev, VIRTQUEUE_NUM, handle_virtblk_output); |
1430 | 1462 | ||
1431 | /* Allocate the room for our own bookkeeping */ | 1463 | /* Allocate the room for our own bookkeeping */ |
@@ -1447,7 +1479,8 @@ static void setup_block_file(const char *filename) | |||
1447 | /* The I/O thread writes to this end of the pipe when done. */ | 1479 | /* The I/O thread writes to this end of the pipe when done. */ |
1448 | vblk->done_fd = p[1]; | 1480 | vblk->done_fd = p[1]; |
1449 | 1481 | ||
1450 | /* This is how we tell the I/O thread about more work. */ | 1482 | /* This is the second pipe, which is how we tell the I/O thread about |
1483 | * more work. */ | ||
1451 | pipe(vblk->workpipe); | 1484 | pipe(vblk->workpipe); |
1452 | 1485 | ||
1453 | /* Create stack for thread and run it */ | 1486 | /* Create stack for thread and run it */ |
@@ -1486,24 +1519,25 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd) | |||
1486 | char reason[1024] = { 0 }; | 1519 | char reason[1024] = { 0 }; |
1487 | read(lguest_fd, reason, sizeof(reason)-1); | 1520 | read(lguest_fd, reason, sizeof(reason)-1); |
1488 | errx(1, "%s", reason); | 1521 | errx(1, "%s", reason); |
1489 | /* EAGAIN means the waker wanted us to look at some input. | 1522 | /* EAGAIN means the Waker wanted us to look at some input. |
1490 | * Anything else means a bug or incompatible change. */ | 1523 | * Anything else means a bug or incompatible change. */ |
1491 | } else if (errno != EAGAIN) | 1524 | } else if (errno != EAGAIN) |
1492 | err(1, "Running guest failed"); | 1525 | err(1, "Running guest failed"); |
1493 | 1526 | ||
1494 | /* Service input, then unset the BREAK which releases | 1527 | /* Service input, then unset the BREAK to release the Waker. */ |
1495 | * the Waker. */ | ||
1496 | handle_input(lguest_fd); | 1528 | handle_input(lguest_fd); |
1497 | if (write(lguest_fd, args, sizeof(args)) < 0) | 1529 | if (write(lguest_fd, args, sizeof(args)) < 0) |
1498 | err(1, "Resetting break"); | 1530 | err(1, "Resetting break"); |
1499 | } | 1531 | } |
1500 | } | 1532 | } |
1501 | /* | 1533 | /* |
1502 | * This is the end of the Launcher. | 1534 | * This is the end of the Launcher. The good news: we are over halfway |
1535 | * through! The bad news: the most fiendish part of the code still lies ahead | ||
1536 | * of us. | ||
1503 | * | 1537 | * |
1504 | * But wait! We've seen I/O from the Launcher, and we've seen I/O from the | 1538 | * Are you ready? Take a deep breath and join me in the core of the Host, in |
1505 | * Drivers. If we were to see the Host kernel I/O code, our understanding | 1539 | * "make Host". |
1506 | * would be complete... :*/ | 1540 | :*/ |
1507 | 1541 | ||
1508 | static struct option opts[] = { | 1542 | static struct option opts[] = { |
1509 | { "verbose", 0, NULL, 'v' }, | 1543 | { "verbose", 0, NULL, 'v' }, |
@@ -1526,7 +1560,7 @@ int main(int argc, char *argv[]) | |||
1526 | /* Memory, top-level pagetable, code startpoint and size of the | 1560 | /* Memory, top-level pagetable, code startpoint and size of the |
1527 | * (optional) initrd. */ | 1561 | * (optional) initrd. */ |
1528 | unsigned long mem = 0, pgdir, start, initrd_size = 0; | 1562 | unsigned long mem = 0, pgdir, start, initrd_size = 0; |
1529 | /* A temporary and the /dev/lguest file descriptor. */ | 1563 | /* Two temporaries and the /dev/lguest file descriptor. */ |
1530 | int i, c, lguest_fd; | 1564 | int i, c, lguest_fd; |
1531 | /* The boot information for the Guest. */ | 1565 | /* The boot information for the Guest. */ |
1532 | struct boot_params *boot; | 1566 | struct boot_params *boot; |
@@ -1621,6 +1655,7 @@ int main(int argc, char *argv[]) | |||
1621 | /* The boot header contains a command line pointer: we put the command | 1655 | /* The boot header contains a command line pointer: we put the command |
1622 | * line after the boot header. */ | 1656 | * line after the boot header. */ |
1623 | boot->hdr.cmd_line_ptr = to_guest_phys(boot + 1); | 1657 | boot->hdr.cmd_line_ptr = to_guest_phys(boot + 1); |
1658 | /* We use a simple helper to copy the arguments separated by spaces. */ | ||
1624 | concat((char *)(boot + 1), argv+optind+2); | 1659 | concat((char *)(boot + 1), argv+optind+2); |
1625 | 1660 | ||
1626 | /* Boot protocol version: 2.07 supports the fields for lguest. */ | 1661 | /* Boot protocol version: 2.07 supports the fields for lguest. */ |