diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2009-07-30 18:03:45 -0400 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2009-07-30 02:33:46 -0400 |
commit | a91d74a3c4de8115295ee87350c13a329164aaaf (patch) | |
tree | 02c862fccc9abedf7fc354061e69c4b5fbcce06d /Documentation/lguest/lguest.c | |
parent | 2e04ef76916d1e29a077ea9d0f2003c8fd86724d (diff) |
lguest: update commentry
Every so often, after code shuffles, I need to go through and unbitrot
the Lguest Journey (see drivers/lguest/README). Since we now use RCU in
a simple form in one place I took the opportunity to expand that explanation.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'Documentation/lguest/lguest.c')
-rw-r--r-- | Documentation/lguest/lguest.c | 184 |
1 files changed, 139 insertions, 45 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index aa66a52b73e9..45163651b519 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
@@ -49,7 +49,7 @@ | |||
49 | #include "linux/virtio_ring.h" | 49 | #include "linux/virtio_ring.h" |
50 | #include "asm/bootparam.h" | 50 | #include "asm/bootparam.h" |
51 | /*L:110 | 51 | /*L:110 |
52 | * We can ignore the 39 include files we need for this program, but I do want | 52 | * We can ignore the 42 include files we need for this program, but I do want |
53 | * to draw attention to the use of kernel-style types. | 53 | * to draw attention to the use of kernel-style types. |
54 | * | 54 | * |
55 | * As Linus said, "C is a Spartan language, and so should your naming be." I | 55 | * As Linus said, "C is a Spartan language, and so should your naming be." I |
@@ -305,6 +305,11 @@ static void *map_zeroed_pages(unsigned int num) | |||
305 | PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, fd, 0); | 305 | PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, fd, 0); |
306 | if (addr == MAP_FAILED) | 306 | if (addr == MAP_FAILED) |
307 | err(1, "Mmaping %u pages of /dev/zero", num); | 307 | err(1, "Mmaping %u pages of /dev/zero", num); |
308 | |||
309 | /* | ||
310 | * One neat mmap feature is that you can close the fd, and it | ||
311 | * stays mapped. | ||
312 | */ | ||
308 | close(fd); | 313 | close(fd); |
309 | 314 | ||
310 | return addr; | 315 | return addr; |
@@ -557,7 +562,7 @@ static void tell_kernel(unsigned long start) | |||
557 | } | 562 | } |
558 | /*:*/ | 563 | /*:*/ |
559 | 564 | ||
560 | /* | 565 | /*L:200 |
561 | * Device Handling. | 566 | * Device Handling. |
562 | * | 567 | * |
563 | * When the Guest gives us a buffer, it sends an array of addresses and sizes. | 568 | * When the Guest gives us a buffer, it sends an array of addresses and sizes. |
@@ -608,7 +613,10 @@ static unsigned next_desc(struct vring_desc *desc, | |||
608 | return next; | 613 | return next; |
609 | } | 614 | } |
610 | 615 | ||
611 | /* This actually sends the interrupt for this virtqueue */ | 616 | /* |
617 | * This actually sends the interrupt for this virtqueue, if we've used a | ||
618 | * buffer. | ||
619 | */ | ||
612 | static void trigger_irq(struct virtqueue *vq) | 620 | static void trigger_irq(struct virtqueue *vq) |
613 | { | 621 | { |
614 | unsigned long buf[] = { LHREQ_IRQ, vq->config.irq }; | 622 | unsigned long buf[] = { LHREQ_IRQ, vq->config.irq }; |
@@ -629,12 +637,12 @@ static void trigger_irq(struct virtqueue *vq) | |||
629 | } | 637 | } |
630 | 638 | ||
631 | /* | 639 | /* |
632 | * This looks in the virtqueue and for the first available buffer, and converts | 640 | * This looks in the virtqueue for the first available buffer, and converts |
633 | * it to an iovec for convenient access. Since descriptors consist of some | 641 | * it to an iovec for convenient access. Since descriptors consist of some |
634 | * number of output then some number of input descriptors, it's actually two | 642 | * number of output then some number of input descriptors, it's actually two |
635 | * iovecs, but we pack them into one and note how many of each there were. | 643 | * iovecs, but we pack them into one and note how many of each there were. |
636 | * | 644 | * |
637 | * This function returns the descriptor number found. | 645 | * This function waits if necessary, and returns the descriptor number found. |
638 | */ | 646 | */ |
639 | static unsigned wait_for_vq_desc(struct virtqueue *vq, | 647 | static unsigned wait_for_vq_desc(struct virtqueue *vq, |
640 | struct iovec iov[], | 648 | struct iovec iov[], |
@@ -644,10 +652,14 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq, | |||
644 | struct vring_desc *desc; | 652 | struct vring_desc *desc; |
645 | u16 last_avail = lg_last_avail(vq); | 653 | u16 last_avail = lg_last_avail(vq); |
646 | 654 | ||
655 | /* There's nothing available? */ | ||
647 | while (last_avail == vq->vring.avail->idx) { | 656 | while (last_avail == vq->vring.avail->idx) { |
648 | u64 event; | 657 | u64 event; |
649 | 658 | ||
650 | /* OK, tell Guest about progress up to now. */ | 659 | /* |
660 | * Since we're about to sleep, now is a good time to tell the | ||
661 | * Guest about what we've used up to now. | ||
662 | */ | ||
651 | trigger_irq(vq); | 663 | trigger_irq(vq); |
652 | 664 | ||
653 | /* OK, now we need to know about added descriptors. */ | 665 | /* OK, now we need to know about added descriptors. */ |
@@ -734,8 +746,9 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq, | |||
734 | } | 746 | } |
735 | 747 | ||
736 | /* | 748 | /* |
737 | * After we've used one of their buffers, we tell them about it. We'll then | 749 | * After we've used one of their buffers, we tell the Guest about it. Sometime |
738 | * want to send them an interrupt, using trigger_irq(). | 750 | * later we'll want to send them an interrupt using trigger_irq(); note that |
751 | * wait_for_vq_desc() does that for us if it has to wait. | ||
739 | */ | 752 | */ |
740 | static void add_used(struct virtqueue *vq, unsigned int head, int len) | 753 | static void add_used(struct virtqueue *vq, unsigned int head, int len) |
741 | { | 754 | { |
@@ -782,12 +795,12 @@ static void console_input(struct virtqueue *vq) | |||
782 | struct console_abort *abort = vq->dev->priv; | 795 | struct console_abort *abort = vq->dev->priv; |
783 | struct iovec iov[vq->vring.num]; | 796 | struct iovec iov[vq->vring.num]; |
784 | 797 | ||
785 | /* Make sure there's a descriptor waiting. */ | 798 | /* Make sure there's a descriptor available. */ |
786 | head = wait_for_vq_desc(vq, iov, &out_num, &in_num); | 799 | head = wait_for_vq_desc(vq, iov, &out_num, &in_num); |
787 | if (out_num) | 800 | if (out_num) |
788 | errx(1, "Output buffers in console in queue?"); | 801 | errx(1, "Output buffers in console in queue?"); |
789 | 802 | ||
790 | /* Read it in. */ | 803 | /* Read into it. This is where we usually wait. */ |
791 | len = readv(STDIN_FILENO, iov, in_num); | 804 | len = readv(STDIN_FILENO, iov, in_num); |
792 | if (len <= 0) { | 805 | if (len <= 0) { |
793 | /* Ran out of input? */ | 806 | /* Ran out of input? */ |
@@ -800,6 +813,7 @@ static void console_input(struct virtqueue *vq) | |||
800 | pause(); | 813 | pause(); |
801 | } | 814 | } |
802 | 815 | ||
816 | /* Tell the Guest we used a buffer. */ | ||
803 | add_used_and_trigger(vq, head, len); | 817 | add_used_and_trigger(vq, head, len); |
804 | 818 | ||
805 | /* | 819 | /* |
@@ -834,15 +848,23 @@ static void console_output(struct virtqueue *vq) | |||
834 | unsigned int head, out, in; | 848 | unsigned int head, out, in; |
835 | struct iovec iov[vq->vring.num]; | 849 | struct iovec iov[vq->vring.num]; |
836 | 850 | ||
851 | /* We usually wait in here, for the Guest to give us something. */ | ||
837 | head = wait_for_vq_desc(vq, iov, &out, &in); | 852 | head = wait_for_vq_desc(vq, iov, &out, &in); |
838 | if (in) | 853 | if (in) |
839 | errx(1, "Input buffers in console output queue?"); | 854 | errx(1, "Input buffers in console output queue?"); |
855 | |||
856 | /* writev can return a partial write, so we loop here. */ | ||
840 | while (!iov_empty(iov, out)) { | 857 | while (!iov_empty(iov, out)) { |
841 | int len = writev(STDOUT_FILENO, iov, out); | 858 | int len = writev(STDOUT_FILENO, iov, out); |
842 | if (len <= 0) | 859 | if (len <= 0) |
843 | err(1, "Write to stdout gave %i", len); | 860 | err(1, "Write to stdout gave %i", len); |
844 | iov_consume(iov, out, len); | 861 | iov_consume(iov, out, len); |
845 | } | 862 | } |
863 | |||
864 | /* | ||
865 | * We're finished with that buffer: if we're going to sleep, | ||
866 | * wait_for_vq_desc() will prod the Guest with an interrupt. | ||
867 | */ | ||
846 | add_used(vq, head, 0); | 868 | add_used(vq, head, 0); |
847 | } | 869 | } |
848 | 870 | ||
@@ -862,15 +884,30 @@ static void net_output(struct virtqueue *vq) | |||
862 | unsigned int head, out, in; | 884 | unsigned int head, out, in; |
863 | struct iovec iov[vq->vring.num]; | 885 | struct iovec iov[vq->vring.num]; |
864 | 886 | ||
887 | /* We usually wait in here for the Guest to give us a packet. */ | ||
865 | head = wait_for_vq_desc(vq, iov, &out, &in); | 888 | head = wait_for_vq_desc(vq, iov, &out, &in); |
866 | if (in) | 889 | if (in) |
867 | errx(1, "Input buffers in net output queue?"); | 890 | errx(1, "Input buffers in net output queue?"); |
891 | /* | ||
892 | * Send the whole thing through to /dev/net/tun. It expects the exact | ||
893 | * same format: what a coincidence! | ||
894 | */ | ||
868 | if (writev(net_info->tunfd, iov, out) < 0) | 895 | if (writev(net_info->tunfd, iov, out) < 0) |
869 | errx(1, "Write to tun failed?"); | 896 | errx(1, "Write to tun failed?"); |
897 | |||
898 | /* | ||
899 | * Done with that one; wait_for_vq_desc() will send the interrupt if | ||
900 | * all packets are processed. | ||
901 | */ | ||
870 | add_used(vq, head, 0); | 902 | add_used(vq, head, 0); |
871 | } | 903 | } |
872 | 904 | ||
873 | /* Will reading from this file descriptor block? */ | 905 | /* |
906 | * Handling network input is a bit trickier, because I've tried to optimize it. | ||
907 | * | ||
908 | * First we have a helper routine which tells is if from this file descriptor | ||
909 | * (ie. the /dev/net/tun device) will block: | ||
910 | */ | ||
874 | static bool will_block(int fd) | 911 | static bool will_block(int fd) |
875 | { | 912 | { |
876 | fd_set fdset; | 913 | fd_set fdset; |
@@ -880,7 +917,11 @@ static bool will_block(int fd) | |||
880 | return select(fd+1, &fdset, NULL, NULL, &zero) != 1; | 917 | return select(fd+1, &fdset, NULL, NULL, &zero) != 1; |
881 | } | 918 | } |
882 | 919 | ||
883 | /* This handles packets coming in from the tun device to our Guest. */ | 920 | /* |
921 | * This handles packets coming in from the tun device to our Guest. Like all | ||
922 | * service routines, it gets called again as soon as it returns, so you don't | ||
923 | * see a while(1) loop here. | ||
924 | */ | ||
884 | static void net_input(struct virtqueue *vq) | 925 | static void net_input(struct virtqueue *vq) |
885 | { | 926 | { |
886 | int len; | 927 | int len; |
@@ -888,21 +929,38 @@ static void net_input(struct virtqueue *vq) | |||
888 | struct iovec iov[vq->vring.num]; | 929 | struct iovec iov[vq->vring.num]; |
889 | struct net_info *net_info = vq->dev->priv; | 930 | struct net_info *net_info = vq->dev->priv; |
890 | 931 | ||
932 | /* | ||
933 | * Get a descriptor to write an incoming packet into. This will also | ||
934 | * send an interrupt if they're out of descriptors. | ||
935 | */ | ||
891 | head = wait_for_vq_desc(vq, iov, &out, &in); | 936 | head = wait_for_vq_desc(vq, iov, &out, &in); |
892 | if (out) | 937 | if (out) |
893 | errx(1, "Output buffers in net input queue?"); | 938 | errx(1, "Output buffers in net input queue?"); |
894 | 939 | ||
895 | /* Deliver interrupt now, since we're about to sleep. */ | 940 | /* |
941 | * If it looks like we'll block reading from the tun device, send them | ||
942 | * an interrupt. | ||
943 | */ | ||
896 | if (vq->pending_used && will_block(net_info->tunfd)) | 944 | if (vq->pending_used && will_block(net_info->tunfd)) |
897 | trigger_irq(vq); | 945 | trigger_irq(vq); |
898 | 946 | ||
947 | /* | ||
948 | * Read in the packet. This is where we normally wait (when there's no | ||
949 | * incoming network traffic). | ||
950 | */ | ||
899 | len = readv(net_info->tunfd, iov, in); | 951 | len = readv(net_info->tunfd, iov, in); |
900 | if (len <= 0) | 952 | if (len <= 0) |
901 | err(1, "Failed to read from tun."); | 953 | err(1, "Failed to read from tun."); |
954 | |||
955 | /* | ||
956 | * Mark that packet buffer as used, but don't interrupt here. We want | ||
957 | * to wait until we've done as much work as we can. | ||
958 | */ | ||
902 | add_used(vq, head, len); | 959 | add_used(vq, head, len); |
903 | } | 960 | } |
961 | /*:*/ | ||
904 | 962 | ||
905 | /* This is the helper to create threads. */ | 963 | /* This is the helper to create threads: run the service routine in a loop. */ |
906 | static int do_thread(void *_vq) | 964 | static int do_thread(void *_vq) |
907 | { | 965 | { |
908 | struct virtqueue *vq = _vq; | 966 | struct virtqueue *vq = _vq; |
@@ -950,11 +1008,14 @@ static void reset_device(struct device *dev) | |||
950 | signal(SIGCHLD, (void *)kill_launcher); | 1008 | signal(SIGCHLD, (void *)kill_launcher); |
951 | } | 1009 | } |
952 | 1010 | ||
1011 | /*L:216 | ||
1012 | * This actually creates the thread which services the virtqueue for a device. | ||
1013 | */ | ||
953 | static void create_thread(struct virtqueue *vq) | 1014 | static void create_thread(struct virtqueue *vq) |
954 | { | 1015 | { |
955 | /* | 1016 | /* |
956 | * Create stack for thread and run it. Since the stack grows upwards, | 1017 | * Create stack for thread. Since the stack grows upwards, we point |
957 | * we point the stack pointer to the end of this region. | 1018 | * the stack pointer to the end of this region. |
958 | */ | 1019 | */ |
959 | char *stack = malloc(32768); | 1020 | char *stack = malloc(32768); |
960 | unsigned long args[] = { LHREQ_EVENTFD, | 1021 | unsigned long args[] = { LHREQ_EVENTFD, |
@@ -966,17 +1027,22 @@ static void create_thread(struct virtqueue *vq) | |||
966 | err(1, "Creating eventfd"); | 1027 | err(1, "Creating eventfd"); |
967 | args[2] = vq->eventfd; | 1028 | args[2] = vq->eventfd; |
968 | 1029 | ||
969 | /* Attach an eventfd to this virtqueue: it will go off | 1030 | /* |
970 | * when the Guest does an LHCALL_NOTIFY for this vq. */ | 1031 | * Attach an eventfd to this virtqueue: it will go off when the Guest |
1032 | * does an LHCALL_NOTIFY for this vq. | ||
1033 | */ | ||
971 | if (write(lguest_fd, &args, sizeof(args)) != 0) | 1034 | if (write(lguest_fd, &args, sizeof(args)) != 0) |
972 | err(1, "Attaching eventfd"); | 1035 | err(1, "Attaching eventfd"); |
973 | 1036 | ||
974 | /* CLONE_VM: because it has to access the Guest memory, and | 1037 | /* |
975 | * SIGCHLD so we get a signal if it dies. */ | 1038 | * CLONE_VM: because it has to access the Guest memory, and SIGCHLD so |
1039 | * we get a signal if it dies. | ||
1040 | */ | ||
976 | vq->thread = clone(do_thread, stack + 32768, CLONE_VM | SIGCHLD, vq); | 1041 | vq->thread = clone(do_thread, stack + 32768, CLONE_VM | SIGCHLD, vq); |
977 | if (vq->thread == (pid_t)-1) | 1042 | if (vq->thread == (pid_t)-1) |
978 | err(1, "Creating clone"); | 1043 | err(1, "Creating clone"); |
979 | /* We close our local copy, now the child has it. */ | 1044 | |
1045 | /* We close our local copy now the child has it. */ | ||
980 | close(vq->eventfd); | 1046 | close(vq->eventfd); |
981 | } | 1047 | } |
982 | 1048 | ||
@@ -1028,7 +1094,10 @@ static void update_device_status(struct device *dev) | |||
1028 | } | 1094 | } |
1029 | } | 1095 | } |
1030 | 1096 | ||
1031 | /* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */ | 1097 | /*L:215 |
1098 | * This is the generic routine we call when the Guest uses LHCALL_NOTIFY. In | ||
1099 | * particular, it's used to notify us of device status changes during boot. | ||
1100 | */ | ||
1032 | static void handle_output(unsigned long addr) | 1101 | static void handle_output(unsigned long addr) |
1033 | { | 1102 | { |
1034 | struct device *i; | 1103 | struct device *i; |
@@ -1037,18 +1106,32 @@ static void handle_output(unsigned long addr) | |||
1037 | for (i = devices.dev; i; i = i->next) { | 1106 | for (i = devices.dev; i; i = i->next) { |
1038 | struct virtqueue *vq; | 1107 | struct virtqueue *vq; |
1039 | 1108 | ||
1040 | /* Notifications to device descriptors update device status. */ | 1109 | /* |
1110 | * Notifications to device descriptors mean they updated the | ||
1111 | * device status. | ||
1112 | */ | ||
1041 | if (from_guest_phys(addr) == i->desc) { | 1113 | if (from_guest_phys(addr) == i->desc) { |
1042 | update_device_status(i); | 1114 | update_device_status(i); |
1043 | return; | 1115 | return; |
1044 | } | 1116 | } |
1045 | 1117 | ||
1046 | /* Devices *can* be used before status is set to DRIVER_OK. */ | 1118 | /* |
1119 | * Devices *can* be used before status is set to DRIVER_OK. | ||
1120 | * The original plan was that they would never do this: they | ||
1121 | * would always finish setting up their status bits before | ||
1122 | * actually touching the virtqueues. In practice, we allowed | ||
1123 | * them to, and they do (eg. the disk probes for partition | ||
1124 | * tables as part of initialization). | ||
1125 | * | ||
1126 | * If we see this, we start the device: once it's running, we | ||
1127 | * expect the device to catch all the notifications. | ||
1128 | */ | ||
1047 | for (vq = i->vq; vq; vq = vq->next) { | 1129 | for (vq = i->vq; vq; vq = vq->next) { |
1048 | if (addr != vq->config.pfn*getpagesize()) | 1130 | if (addr != vq->config.pfn*getpagesize()) |
1049 | continue; | 1131 | continue; |
1050 | if (i->running) | 1132 | if (i->running) |
1051 | errx(1, "Notification on running %s", i->name); | 1133 | errx(1, "Notification on running %s", i->name); |
1134 | /* This just calls create_thread() for each virtqueue */ | ||
1052 | start_device(i); | 1135 | start_device(i); |
1053 | return; | 1136 | return; |
1054 | } | 1137 | } |
@@ -1132,6 +1215,11 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
1132 | vq->next = NULL; | 1215 | vq->next = NULL; |
1133 | vq->last_avail_idx = 0; | 1216 | vq->last_avail_idx = 0; |
1134 | vq->dev = dev; | 1217 | vq->dev = dev; |
1218 | |||
1219 | /* | ||
1220 | * This is the routine the service thread will run, and its Process ID | ||
1221 | * once it's running. | ||
1222 | */ | ||
1135 | vq->service = service; | 1223 | vq->service = service; |
1136 | vq->thread = (pid_t)-1; | 1224 | vq->thread = (pid_t)-1; |
1137 | 1225 | ||
@@ -1202,7 +1290,8 @@ static void set_config(struct device *dev, unsigned len, const void *conf) | |||
1202 | 1290 | ||
1203 | /* | 1291 | /* |
1204 | * This routine does all the creation and setup of a new device, including | 1292 | * This routine does all the creation and setup of a new device, including |
1205 | * calling new_dev_desc() to allocate the descriptor and device memory. | 1293 | * calling new_dev_desc() to allocate the descriptor and device memory. We |
1294 | * don't actually start the service threads until later. | ||
1206 | * | 1295 | * |
1207 | * See what I mean about userspace being boring? | 1296 | * See what I mean about userspace being boring? |
1208 | */ | 1297 | */ |
@@ -1478,19 +1567,7 @@ static void setup_tun_net(char *arg) | |||
1478 | verbose("device %u: tun %s: %s\n", | 1567 | verbose("device %u: tun %s: %s\n", |
1479 | devices.device_num, tapif, arg); | 1568 | devices.device_num, tapif, arg); |
1480 | } | 1569 | } |
1481 | 1570 | /*:*/ | |
1482 | /* | ||
1483 | * Our block (disk) device should be really simple: the Guest asks for a block | ||
1484 | * number and we read or write that position in the file. Unfortunately, that | ||
1485 | * was amazingly slow: the Guest waits until the read is finished before | ||
1486 | * running anything else, even if it could have been doing useful work. | ||
1487 | * | ||
1488 | * We could use async I/O, except it's reputed to suck so hard that characters | ||
1489 | * actually go missing from your code when you try to use it. | ||
1490 | * | ||
1491 | * So this was one reason why lguest now does all virtqueue servicing in | ||
1492 | * separate threads: it's more efficient and more like a real device. | ||
1493 | */ | ||
1494 | 1571 | ||
1495 | /* This hangs off device->priv. */ | 1572 | /* This hangs off device->priv. */ |
1496 | struct vblk_info | 1573 | struct vblk_info |
@@ -1512,8 +1589,16 @@ struct vblk_info | |||
1512 | /*L:210 | 1589 | /*L:210 |
1513 | * The Disk | 1590 | * The Disk |
1514 | * | 1591 | * |
1515 | * Remember that the block device is handled by a separate I/O thread. We head | 1592 | * The disk only has one virtqueue, so it only has one thread. It is really |
1516 | * straight into the core of that thread here: | 1593 | * simple: the Guest asks for a block number and we read or write that position |
1594 | * in the file. | ||
1595 | * | ||
1596 | * Before we serviced each virtqueue in a separate thread, that was unacceptably | ||
1597 | * slow: the Guest waits until the read is finished before running anything | ||
1598 | * else, even if it could have been doing useful work. | ||
1599 | * | ||
1600 | * We could have used async I/O, except it's reputed to suck so hard that | ||
1601 | * characters actually go missing from your code when you try to use it. | ||
1517 | */ | 1602 | */ |
1518 | static void blk_request(struct virtqueue *vq) | 1603 | static void blk_request(struct virtqueue *vq) |
1519 | { | 1604 | { |
@@ -1525,7 +1610,10 @@ static void blk_request(struct virtqueue *vq) | |||
1525 | struct iovec iov[vq->vring.num]; | 1610 | struct iovec iov[vq->vring.num]; |
1526 | off64_t off; | 1611 | off64_t off; |
1527 | 1612 | ||
1528 | /* Get the next request. */ | 1613 | /* |
1614 | * Get the next request, where we normally wait. It triggers the | ||
1615 | * interrupt to acknowledge previously serviced requests (if any). | ||
1616 | */ | ||
1529 | head = wait_for_vq_desc(vq, iov, &out_num, &in_num); | 1617 | head = wait_for_vq_desc(vq, iov, &out_num, &in_num); |
1530 | 1618 | ||
1531 | /* | 1619 | /* |
@@ -1539,6 +1627,10 @@ static void blk_request(struct virtqueue *vq) | |||
1539 | 1627 | ||
1540 | out = convert(&iov[0], struct virtio_blk_outhdr); | 1628 | out = convert(&iov[0], struct virtio_blk_outhdr); |
1541 | in = convert(&iov[out_num+in_num-1], u8); | 1629 | in = convert(&iov[out_num+in_num-1], u8); |
1630 | /* | ||
1631 | * For historical reasons, block operations are expressed in 512 byte | ||
1632 | * "sectors". | ||
1633 | */ | ||
1542 | off = out->sector * 512; | 1634 | off = out->sector * 512; |
1543 | 1635 | ||
1544 | /* | 1636 | /* |
@@ -1614,6 +1706,7 @@ static void blk_request(struct virtqueue *vq) | |||
1614 | if (out->type & VIRTIO_BLK_T_BARRIER) | 1706 | if (out->type & VIRTIO_BLK_T_BARRIER) |
1615 | fdatasync(vblk->fd); | 1707 | fdatasync(vblk->fd); |
1616 | 1708 | ||
1709 | /* Finished that request. */ | ||
1617 | add_used(vq, head, wlen); | 1710 | add_used(vq, head, wlen); |
1618 | } | 1711 | } |
1619 | 1712 | ||
@@ -1682,9 +1775,8 @@ static void rng_input(struct virtqueue *vq) | |||
1682 | errx(1, "Output buffers in rng?"); | 1775 | errx(1, "Output buffers in rng?"); |
1683 | 1776 | ||
1684 | /* | 1777 | /* |
1685 | * This is why we convert to iovecs: the readv() call uses them, and so | 1778 | * Just like the console write, we loop to cover the whole iovec. |
1686 | * it reads straight into the Guest's buffer. We loop to make sure we | 1779 | * In this case, short reads actually happen quite a bit. |
1687 | * fill it. | ||
1688 | */ | 1780 | */ |
1689 | while (!iov_empty(iov, in_num)) { | 1781 | while (!iov_empty(iov, in_num)) { |
1690 | len = readv(rng_info->rfd, iov, in_num); | 1782 | len = readv(rng_info->rfd, iov, in_num); |
@@ -1818,7 +1910,9 @@ int main(int argc, char *argv[]) | |||
1818 | devices.lastdev = NULL; | 1910 | devices.lastdev = NULL; |
1819 | devices.next_irq = 1; | 1911 | devices.next_irq = 1; |
1820 | 1912 | ||
1913 | /* We're CPU 0. In fact, that's the only CPU possible right now. */ | ||
1821 | cpu_id = 0; | 1914 | cpu_id = 0; |
1915 | |||
1822 | /* | 1916 | /* |
1823 | * We need to know how much memory so we can set up the device | 1917 | * We need to know how much memory so we can set up the device |
1824 | * descriptor and memory pages for the devices as we parse the command | 1918 | * descriptor and memory pages for the devices as we parse the command |
@@ -1926,7 +2020,7 @@ int main(int argc, char *argv[]) | |||
1926 | */ | 2020 | */ |
1927 | tell_kernel(start); | 2021 | tell_kernel(start); |
1928 | 2022 | ||
1929 | /* Ensure that we terminate if a child dies. */ | 2023 | /* Ensure that we terminate if a device-servicing child dies. */ |
1930 | signal(SIGCHLD, kill_launcher); | 2024 | signal(SIGCHLD, kill_launcher); |
1931 | 2025 | ||
1932 | /* If we exit via err(), this kills all the threads, restores tty. */ | 2026 | /* If we exit via err(), this kills all the threads, restores tty. */ |