diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-28 21:16:26 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-28 21:16:26 -0400 |
| commit | 7874d35173d549c1a2b2f77c4b1f94379fa65698 (patch) | |
| tree | 995aa7212619dbdebb43b124cae2378562dd3065 | |
| parent | 5dfb66ba8c4a96eb732942c9f78629e4db1a51d4 (diff) | |
| parent | 8c79873da0d2bedf4ad6b868c54e426bb0a2fe38 (diff) | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus
* git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus:
lguest: turn Waker into a thread, not a process
lguest: Enlarge virtio rings
lguest: Use GSO/IFF_VNET_HDR extensions on tun/tap
lguest: Remove 'network: no dma buffer!' warning
lguest: Adaptive timeout
lguest: Tell Guest net not to notify us on every packet xmit
lguest: net block unneeded receive queue update notifications
lguest: wrap last_avail accesses.
lguest: use cpu capability accessors
lguest: virtio-rng support
lguest: Support assigning a MAC address
lguest: Don't leak /dev/zero fd
lguest: fix verbose printing of device features.
lguest: fix switcher_page leak on unload
lguest: Guest int3 fix
lguest: set max_pfn_mapped, growl loudly at Yinghai Lu
| -rw-r--r-- | Documentation/lguest/lguest.c | 519 | ||||
| -rw-r--r-- | arch/x86/lguest/boot.c | 3 | ||||
| -rw-r--r-- | drivers/lguest/core.c | 1 | ||||
| -rw-r--r-- | drivers/lguest/interrupts_and_traps.c | 24 | ||||
| -rw-r--r-- | drivers/lguest/x86/core.c | 4 |
5 files changed, 409 insertions, 142 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 82fafe0429fe..b88b0ea54e90 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
| @@ -36,11 +36,13 @@ | |||
| 36 | #include <sched.h> | 36 | #include <sched.h> |
| 37 | #include <limits.h> | 37 | #include <limits.h> |
| 38 | #include <stddef.h> | 38 | #include <stddef.h> |
| 39 | #include <signal.h> | ||
| 39 | #include "linux/lguest_launcher.h" | 40 | #include "linux/lguest_launcher.h" |
| 40 | #include "linux/virtio_config.h" | 41 | #include "linux/virtio_config.h" |
| 41 | #include "linux/virtio_net.h" | 42 | #include "linux/virtio_net.h" |
| 42 | #include "linux/virtio_blk.h" | 43 | #include "linux/virtio_blk.h" |
| 43 | #include "linux/virtio_console.h" | 44 | #include "linux/virtio_console.h" |
| 45 | #include "linux/virtio_rng.h" | ||
| 44 | #include "linux/virtio_ring.h" | 46 | #include "linux/virtio_ring.h" |
| 45 | #include "asm-x86/bootparam.h" | 47 | #include "asm-x86/bootparam.h" |
| 46 | /*L:110 We can ignore the 39 include files we need for this program, but I do | 48 | /*L:110 We can ignore the 39 include files we need for this program, but I do |
| @@ -64,8 +66,8 @@ typedef uint8_t u8; | |||
| 64 | #endif | 66 | #endif |
| 65 | /* We can have up to 256 pages for devices. */ | 67 | /* We can have up to 256 pages for devices. */ |
| 66 | #define DEVICE_PAGES 256 | 68 | #define DEVICE_PAGES 256 |
| 67 | /* This will occupy 2 pages: it must be a power of 2. */ | 69 | /* This will occupy 3 pages: it must be a power of 2. */ |
| 68 | #define VIRTQUEUE_NUM 128 | 70 | #define VIRTQUEUE_NUM 256 |
| 69 | 71 | ||
| 70 | /*L:120 verbose is both a global flag and a macro. The C preprocessor allows | 72 | /*L:120 verbose is both a global flag and a macro. The C preprocessor allows |
| 71 | * this, and although I wouldn't recommend it, it works quite nicely here. */ | 73 | * this, and although I wouldn't recommend it, it works quite nicely here. */ |
| @@ -74,12 +76,19 @@ static bool verbose; | |||
| 74 | do { if (verbose) printf(args); } while(0) | 76 | do { if (verbose) printf(args); } while(0) |
| 75 | /*:*/ | 77 | /*:*/ |
| 76 | 78 | ||
| 77 | /* The pipe to send commands to the waker process */ | 79 | /* File descriptors for the Waker. */ |
| 78 | static int waker_fd; | 80 | struct { |
| 81 | int pipe[2]; | ||
| 82 | int lguest_fd; | ||
| 83 | } waker_fds; | ||
| 84 | |||
| 79 | /* The pointer to the start of guest memory. */ | 85 | /* The pointer to the start of guest memory. */ |
| 80 | static void *guest_base; | 86 | static void *guest_base; |
| 81 | /* The maximum guest physical address allowed, and maximum possible. */ | 87 | /* The maximum guest physical address allowed, and maximum possible. */ |
| 82 | static unsigned long guest_limit, guest_max; | 88 | static unsigned long guest_limit, guest_max; |
| 89 | /* The pipe for signal hander to write to. */ | ||
| 90 | static int timeoutpipe[2]; | ||
| 91 | static unsigned int timeout_usec = 500; | ||
| 83 | 92 | ||
| 84 | /* a per-cpu variable indicating whose vcpu is currently running */ | 93 | /* a per-cpu variable indicating whose vcpu is currently running */ |
| 85 | static unsigned int __thread cpu_id; | 94 | static unsigned int __thread cpu_id; |
| @@ -155,11 +164,14 @@ struct virtqueue | |||
| 155 | /* Last available index we saw. */ | 164 | /* Last available index we saw. */ |
| 156 | u16 last_avail_idx; | 165 | u16 last_avail_idx; |
| 157 | 166 | ||
| 158 | /* The routine to call when the Guest pings us. */ | 167 | /* The routine to call when the Guest pings us, or timeout. */ |
| 159 | void (*handle_output)(int fd, struct virtqueue *me); | 168 | void (*handle_output)(int fd, struct virtqueue *me, bool timeout); |
| 160 | 169 | ||
| 161 | /* Outstanding buffers */ | 170 | /* Outstanding buffers */ |
| 162 | unsigned int inflight; | 171 | unsigned int inflight; |
| 172 | |||
| 173 | /* Is this blocked awaiting a timer? */ | ||
| 174 | bool blocked; | ||
| 163 | }; | 175 | }; |
| 164 | 176 | ||
| 165 | /* Remember the arguments to the program so we can "reboot" */ | 177 | /* Remember the arguments to the program so we can "reboot" */ |
| @@ -190,6 +202,9 @@ static void *_convert(struct iovec *iov, size_t size, size_t align, | |||
| 190 | return iov->iov_base; | 202 | return iov->iov_base; |
| 191 | } | 203 | } |
| 192 | 204 | ||
| 205 | /* Wrapper for the last available index. Makes it easier to change. */ | ||
| 206 | #define lg_last_avail(vq) ((vq)->last_avail_idx) | ||
| 207 | |||
| 193 | /* The virtio configuration space is defined to be little-endian. x86 is | 208 | /* The virtio configuration space is defined to be little-endian. x86 is |
| 194 | * little-endian too, but it's nice to be explicit so we have these helpers. */ | 209 | * little-endian too, but it's nice to be explicit so we have these helpers. */ |
| 195 | #define cpu_to_le16(v16) (v16) | 210 | #define cpu_to_le16(v16) (v16) |
| @@ -199,6 +214,33 @@ static void *_convert(struct iovec *iov, size_t size, size_t align, | |||
| 199 | #define le32_to_cpu(v32) (v32) | 214 | #define le32_to_cpu(v32) (v32) |
| 200 | #define le64_to_cpu(v64) (v64) | 215 | #define le64_to_cpu(v64) (v64) |
| 201 | 216 | ||
| 217 | /* Is this iovec empty? */ | ||
| 218 | static bool iov_empty(const struct iovec iov[], unsigned int num_iov) | ||
| 219 | { | ||
| 220 | unsigned int i; | ||
| 221 | |||
| 222 | for (i = 0; i < num_iov; i++) | ||
| 223 | if (iov[i].iov_len) | ||
| 224 | return false; | ||
| 225 | return true; | ||
| 226 | } | ||
| 227 | |||
| 228 | /* Take len bytes from the front of this iovec. */ | ||
| 229 | static void iov_consume(struct iovec iov[], unsigned num_iov, unsigned len) | ||
| 230 | { | ||
| 231 | unsigned int i; | ||
| 232 | |||
| 233 | for (i = 0; i < num_iov; i++) { | ||
| 234 | unsigned int used; | ||
| 235 | |||
| 236 | used = iov[i].iov_len < len ? iov[i].iov_len : len; | ||
| 237 | iov[i].iov_base += used; | ||
| 238 | iov[i].iov_len -= used; | ||
| 239 | len -= used; | ||
| 240 | } | ||
| 241 | assert(len == 0); | ||
| 242 | } | ||
| 243 | |||
| 202 | /* The device virtqueue descriptors are followed by feature bitmasks. */ | 244 | /* The device virtqueue descriptors are followed by feature bitmasks. */ |
| 203 | static u8 *get_feature_bits(struct device *dev) | 245 | static u8 *get_feature_bits(struct device *dev) |
| 204 | { | 246 | { |
| @@ -254,6 +296,7 @@ static void *map_zeroed_pages(unsigned int num) | |||
| 254 | PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, fd, 0); | 296 | PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, fd, 0); |
| 255 | if (addr == MAP_FAILED) | 297 | if (addr == MAP_FAILED) |
| 256 | err(1, "Mmaping %u pages of /dev/zero", num); | 298 | err(1, "Mmaping %u pages of /dev/zero", num); |
| 299 | close(fd); | ||
| 257 | 300 | ||
| 258 | return addr; | 301 | return addr; |
| 259 | } | 302 | } |
| @@ -540,69 +583,64 @@ static void add_device_fd(int fd) | |||
| 540 | * watch, but handing a file descriptor mask through to the kernel is fairly | 583 | * watch, but handing a file descriptor mask through to the kernel is fairly |
| 541 | * icky. | 584 | * icky. |
| 542 | * | 585 | * |
| 543 | * Instead, we fork off a process which watches the file descriptors and writes | 586 | * Instead, we clone off a thread which watches the file descriptors and writes |
| 544 | * the LHREQ_BREAK command to the /dev/lguest file descriptor to tell the Host | 587 | * the LHREQ_BREAK command to the /dev/lguest file descriptor to tell the Host |
| 545 | * stop running the Guest. This causes the Launcher to return from the | 588 | * stop running the Guest. This causes the Launcher to return from the |
| 546 | * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset | 589 | * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset |
| 547 | * the LHREQ_BREAK and wake us up again. | 590 | * the LHREQ_BREAK and wake us up again. |
| 548 | * | 591 | * |
| 549 | * This, of course, is merely a different *kind* of icky. | 592 | * This, of course, is merely a different *kind* of icky. |
| 593 | * | ||
| 594 | * Given my well-known antipathy to threads, I'd prefer to use processes. But | ||
| 595 | * it's easier to share Guest memory with threads, and trivial to share the | ||
| 596 | * devices.infds as the Launcher changes it. | ||
| 550 | */ | 597 | */ |
| 551 | static void wake_parent(int pipefd, int lguest_fd) | 598 | static int waker(void *unused) |
| 552 | { | 599 | { |
| 553 | /* Add the pipe from the Launcher to the fdset in the device_list, so | 600 | /* Close the write end of the pipe: only the Launcher has it open. */ |
| 554 | * we watch it, too. */ | 601 | close(waker_fds.pipe[1]); |
| 555 | add_device_fd(pipefd); | ||
| 556 | 602 | ||
| 557 | for (;;) { | 603 | for (;;) { |
| 558 | fd_set rfds = devices.infds; | 604 | fd_set rfds = devices.infds; |
| 559 | unsigned long args[] = { LHREQ_BREAK, 1 }; | 605 | unsigned long args[] = { LHREQ_BREAK, 1 }; |
| 606 | unsigned int maxfd = devices.max_infd; | ||
| 607 | |||
| 608 | /* We also listen to the pipe from the Launcher. */ | ||
| 609 | FD_SET(waker_fds.pipe[0], &rfds); | ||
| 610 | if (waker_fds.pipe[0] > maxfd) | ||
| 611 | maxfd = waker_fds.pipe[0]; | ||
| 560 | 612 | ||
| 561 | /* Wait until input is ready from one of the devices. */ | 613 | /* Wait until input is ready from one of the devices. */ |
| 562 | select(devices.max_infd+1, &rfds, NULL, NULL, NULL); | 614 | select(maxfd+1, &rfds, NULL, NULL, NULL); |
| 563 | /* Is it a message from the Launcher? */ | 615 | |
| 564 | if (FD_ISSET(pipefd, &rfds)) { | 616 | /* Message from Launcher? */ |
| 565 | int fd; | 617 | if (FD_ISSET(waker_fds.pipe[0], &rfds)) { |
| 566 | /* If read() returns 0, it means the Launcher has | 618 | char c; |
| 567 | * exited. We silently follow. */ | 619 | /* If this fails, then assume Launcher has exited. |
| 568 | if (read(pipefd, &fd, sizeof(fd)) == 0) | 620 | * Don't do anything on exit: we're just a thread! */ |
| 569 | exit(0); | 621 | if (read(waker_fds.pipe[0], &c, 1) != 1) |
| 570 | /* Otherwise it's telling us to change what file | 622 | _exit(0); |
| 571 | * descriptors we're to listen to. Positive means | 623 | continue; |
| 572 | * listen to a new one, negative means stop | 624 | } |
| 573 | * listening. */ | 625 | |
| 574 | if (fd >= 0) | 626 | /* Send LHREQ_BREAK command to snap the Launcher out of it. */ |
| 575 | FD_SET(fd, &devices.infds); | 627 | pwrite(waker_fds.lguest_fd, args, sizeof(args), cpu_id); |
| 576 | else | ||
| 577 | FD_CLR(-fd - 1, &devices.infds); | ||
| 578 | } else /* Send LHREQ_BREAK command. */ | ||
| 579 | pwrite(lguest_fd, args, sizeof(args), cpu_id); | ||
| 580 | } | 628 | } |
| 629 | return 0; | ||
| 581 | } | 630 | } |
| 582 | 631 | ||
| 583 | /* This routine just sets up a pipe to the Waker process. */ | 632 | /* This routine just sets up a pipe to the Waker process. */ |
| 584 | static int setup_waker(int lguest_fd) | 633 | static void setup_waker(int lguest_fd) |
| 585 | { | 634 | { |
| 586 | int pipefd[2], child; | 635 | /* This pipe is closed when Launcher dies, telling Waker. */ |
| 587 | 636 | if (pipe(waker_fds.pipe) != 0) | |
| 588 | /* We create a pipe to talk to the Waker, and also so it knows when the | 637 | err(1, "Creating pipe for Waker"); |
| 589 | * Launcher dies (and closes pipe). */ | ||
| 590 | pipe(pipefd); | ||
| 591 | child = fork(); | ||
| 592 | if (child == -1) | ||
| 593 | err(1, "forking"); | ||
| 594 | |||
| 595 | if (child == 0) { | ||
| 596 | /* We are the Waker: close the "writing" end of our copy of the | ||
| 597 | * pipe and start waiting for input. */ | ||
| 598 | close(pipefd[1]); | ||
| 599 | wake_parent(pipefd[0], lguest_fd); | ||
| 600 | } | ||
| 601 | /* Close the reading end of our copy of the pipe. */ | ||
| 602 | close(pipefd[0]); | ||
| 603 | 638 | ||
| 604 | /* Here is the fd used to talk to the waker. */ | 639 | /* Waker also needs to know the lguest fd */ |
| 605 | return pipefd[1]; | 640 | waker_fds.lguest_fd = lguest_fd; |
| 641 | |||
| 642 | if (clone(waker, malloc(4096) + 4096, CLONE_VM | SIGCHLD, NULL) == -1) | ||
| 643 | err(1, "Creating Waker"); | ||
| 606 | } | 644 | } |
| 607 | 645 | ||
| 608 | /* | 646 | /* |
| @@ -661,19 +699,22 @@ static unsigned get_vq_desc(struct virtqueue *vq, | |||
| 661 | unsigned int *out_num, unsigned int *in_num) | 699 | unsigned int *out_num, unsigned int *in_num) |
| 662 | { | 700 | { |
| 663 | unsigned int i, head; | 701 | unsigned int i, head; |
| 702 | u16 last_avail; | ||
| 664 | 703 | ||
| 665 | /* Check it isn't doing very strange things with descriptor numbers. */ | 704 | /* Check it isn't doing very strange things with descriptor numbers. */ |
| 666 | if ((u16)(vq->vring.avail->idx - vq->last_avail_idx) > vq->vring.num) | 705 | last_avail = lg_last_avail(vq); |
| 706 | if ((u16)(vq->vring.avail->idx - last_avail) > vq->vring.num) | ||
| 667 | errx(1, "Guest moved used index from %u to %u", | 707 | errx(1, "Guest moved used index from %u to %u", |
| 668 | vq->last_avail_idx, vq->vring.avail->idx); | 708 | last_avail, vq->vring.avail->idx); |
| 669 | 709 | ||
| 670 | /* If there's nothing new since last we looked, return invalid. */ | 710 | /* If there's nothing new since last we looked, return invalid. */ |
| 671 | if (vq->vring.avail->idx == vq->last_avail_idx) | 711 | if (vq->vring.avail->idx == last_avail) |
| 672 | return vq->vring.num; | 712 | return vq->vring.num; |
| 673 | 713 | ||
| 674 | /* Grab the next descriptor number they're advertising, and increment | 714 | /* Grab the next descriptor number they're advertising, and increment |
| 675 | * the index we've seen. */ | 715 | * the index we've seen. */ |
| 676 | head = vq->vring.avail->ring[vq->last_avail_idx++ % vq->vring.num]; | 716 | head = vq->vring.avail->ring[last_avail % vq->vring.num]; |
| 717 | lg_last_avail(vq)++; | ||
| 677 | 718 | ||
| 678 | /* If their number is silly, that's a fatal mistake. */ | 719 | /* If their number is silly, that's a fatal mistake. */ |
| 679 | if (head >= vq->vring.num) | 720 | if (head >= vq->vring.num) |
| @@ -821,8 +862,8 @@ static bool handle_console_input(int fd, struct device *dev) | |||
| 821 | unsigned long args[] = { LHREQ_BREAK, 0 }; | 862 | unsigned long args[] = { LHREQ_BREAK, 0 }; |
| 822 | /* Close the fd so Waker will know it has to | 863 | /* Close the fd so Waker will know it has to |
| 823 | * exit. */ | 864 | * exit. */ |
| 824 | close(waker_fd); | 865 | close(waker_fds.pipe[1]); |
| 825 | /* Just in case waker is blocked in BREAK, send | 866 | /* Just in case Waker is blocked in BREAK, send |
| 826 | * unbreak now. */ | 867 | * unbreak now. */ |
| 827 | write(fd, args, sizeof(args)); | 868 | write(fd, args, sizeof(args)); |
| 828 | exit(2); | 869 | exit(2); |
| @@ -839,7 +880,7 @@ static bool handle_console_input(int fd, struct device *dev) | |||
| 839 | 880 | ||
| 840 | /* Handling output for console is simple: we just get all the output buffers | 881 | /* Handling output for console is simple: we just get all the output buffers |
| 841 | * and write them to stdout. */ | 882 | * and write them to stdout. */ |
| 842 | static void handle_console_output(int fd, struct virtqueue *vq) | 883 | static void handle_console_output(int fd, struct virtqueue *vq, bool timeout) |
| 843 | { | 884 | { |
| 844 | unsigned int head, out, in; | 885 | unsigned int head, out, in; |
| 845 | int len; | 886 | int len; |
| @@ -854,6 +895,21 @@ static void handle_console_output(int fd, struct virtqueue *vq) | |||
| 854 | } | 895 | } |
| 855 | } | 896 | } |
| 856 | 897 | ||
| 898 | static void block_vq(struct virtqueue *vq) | ||
| 899 | { | ||
| 900 | struct itimerval itm; | ||
| 901 | |||
| 902 | vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; | ||
| 903 | vq->blocked = true; | ||
| 904 | |||
| 905 | itm.it_interval.tv_sec = 0; | ||
| 906 | itm.it_interval.tv_usec = 0; | ||
| 907 | itm.it_value.tv_sec = 0; | ||
| 908 | itm.it_value.tv_usec = timeout_usec; | ||
| 909 | |||
| 910 | setitimer(ITIMER_REAL, &itm, NULL); | ||
| 911 | } | ||
| 912 | |||
| 857 | /* | 913 | /* |
| 858 | * The Network | 914 | * The Network |
| 859 | * | 915 | * |
| @@ -861,22 +917,34 @@ static void handle_console_output(int fd, struct virtqueue *vq) | |||
| 861 | * and write them (ignoring the first element) to this device's file descriptor | 917 | * and write them (ignoring the first element) to this device's file descriptor |
| 862 | * (/dev/net/tun). | 918 | * (/dev/net/tun). |
| 863 | */ | 919 | */ |
| 864 | static void handle_net_output(int fd, struct virtqueue *vq) | 920 | static void handle_net_output(int fd, struct virtqueue *vq, bool timeout) |
| 865 | { | 921 | { |
| 866 | unsigned int head, out, in; | 922 | unsigned int head, out, in, num = 0; |
| 867 | int len; | 923 | int len; |
| 868 | struct iovec iov[vq->vring.num]; | 924 | struct iovec iov[vq->vring.num]; |
| 925 | static int last_timeout_num; | ||
| 869 | 926 | ||
| 870 | /* Keep getting output buffers from the Guest until we run out. */ | 927 | /* Keep getting output buffers from the Guest until we run out. */ |
| 871 | while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) { | 928 | while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) { |
| 872 | if (in) | 929 | if (in) |
| 873 | errx(1, "Input buffers in output queue?"); | 930 | errx(1, "Input buffers in output queue?"); |
| 874 | /* Check header, but otherwise ignore it (we told the Guest we | 931 | len = writev(vq->dev->fd, iov, out); |
| 875 | * supported no features, so it shouldn't have anything | 932 | if (len < 0) |
| 876 | * interesting). */ | 933 | err(1, "Writing network packet to tun"); |
| 877 | (void)convert(&iov[0], struct virtio_net_hdr); | ||
| 878 | len = writev(vq->dev->fd, iov+1, out-1); | ||
| 879 | add_used_and_trigger(fd, vq, head, len); | 934 | add_used_and_trigger(fd, vq, head, len); |
| 935 | num++; | ||
| 936 | } | ||
| 937 | |||
| 938 | /* Block further kicks and set up a timer if we saw anything. */ | ||
| 939 | if (!timeout && num) | ||
| 940 | block_vq(vq); | ||
| 941 | |||
| 942 | if (timeout) { | ||
| 943 | if (num < last_timeout_num) | ||
| 944 | timeout_usec += 10; | ||
| 945 | else if (timeout_usec > 1) | ||
| 946 | timeout_usec--; | ||
| 947 | last_timeout_num = num; | ||
| 880 | } | 948 | } |
| 881 | } | 949 | } |
| 882 | 950 | ||
| @@ -887,7 +955,6 @@ static bool handle_tun_input(int fd, struct device *dev) | |||
| 887 | unsigned int head, in_num, out_num; | 955 | unsigned int head, in_num, out_num; |
| 888 | int len; | 956 | int len; |
| 889 | struct iovec iov[dev->vq->vring.num]; | 957 | struct iovec iov[dev->vq->vring.num]; |
| 890 | struct virtio_net_hdr *hdr; | ||
| 891 | 958 | ||
| 892 | /* First we need a network buffer from the Guests's recv virtqueue. */ | 959 | /* First we need a network buffer from the Guests's recv virtqueue. */ |
| 893 | head = get_vq_desc(dev->vq, iov, &out_num, &in_num); | 960 | head = get_vq_desc(dev->vq, iov, &out_num, &in_num); |
| @@ -896,25 +963,23 @@ static bool handle_tun_input(int fd, struct device *dev) | |||
| 896 | * early, the Guest won't be ready yet. Wait until the device | 963 | * early, the Guest won't be ready yet. Wait until the device |
| 897 | * status says it's ready. */ | 964 | * status says it's ready. */ |
| 898 | /* FIXME: Actually want DRIVER_ACTIVE here. */ | 965 | /* FIXME: Actually want DRIVER_ACTIVE here. */ |
| 899 | if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) | 966 | |
| 900 | warn("network: no dma buffer!"); | 967 | /* Now tell it we want to know if new things appear. */ |
| 968 | dev->vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; | ||
| 969 | wmb(); | ||
| 970 | |||
| 901 | /* We'll turn this back on if input buffers are registered. */ | 971 | /* We'll turn this back on if input buffers are registered. */ |
| 902 | return false; | 972 | return false; |
| 903 | } else if (out_num) | 973 | } else if (out_num) |
| 904 | errx(1, "Output buffers in network recv queue?"); | 974 | errx(1, "Output buffers in network recv queue?"); |
| 905 | 975 | ||
| 906 | /* First element is the header: we set it to 0 (no features). */ | ||
| 907 | hdr = convert(&iov[0], struct virtio_net_hdr); | ||
| 908 | hdr->flags = 0; | ||
| 909 | hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; | ||
| 910 | |||
| 911 | /* Read the packet from the device directly into the Guest's buffer. */ | 976 | /* Read the packet from the device directly into the Guest's buffer. */ |
| 912 | len = readv(dev->fd, iov+1, in_num-1); | 977 | len = readv(dev->fd, iov, in_num); |
| 913 | if (len <= 0) | 978 | if (len <= 0) |
| 914 | err(1, "reading network"); | 979 | err(1, "reading network"); |
| 915 | 980 | ||
| 916 | /* Tell the Guest about the new packet. */ | 981 | /* Tell the Guest about the new packet. */ |
| 917 | add_used_and_trigger(fd, dev->vq, head, sizeof(*hdr) + len); | 982 | add_used_and_trigger(fd, dev->vq, head, len); |
| 918 | 983 | ||
| 919 | verbose("tun input packet len %i [%02x %02x] (%s)\n", len, | 984 | verbose("tun input packet len %i [%02x %02x] (%s)\n", len, |
| 920 | ((u8 *)iov[1].iov_base)[0], ((u8 *)iov[1].iov_base)[1], | 985 | ((u8 *)iov[1].iov_base)[0], ((u8 *)iov[1].iov_base)[1], |
| @@ -927,11 +992,18 @@ static bool handle_tun_input(int fd, struct device *dev) | |||
| 927 | /*L:215 This is the callback attached to the network and console input | 992 | /*L:215 This is the callback attached to the network and console input |
| 928 | * virtqueues: it ensures we try again, in case we stopped console or net | 993 | * virtqueues: it ensures we try again, in case we stopped console or net |
| 929 | * delivery because Guest didn't have any buffers. */ | 994 | * delivery because Guest didn't have any buffers. */ |
| 930 | static void enable_fd(int fd, struct virtqueue *vq) | 995 | static void enable_fd(int fd, struct virtqueue *vq, bool timeout) |
| 931 | { | 996 | { |
| 932 | add_device_fd(vq->dev->fd); | 997 | add_device_fd(vq->dev->fd); |
| 933 | /* Tell waker to listen to it again */ | 998 | /* Snap the Waker out of its select loop. */ |
| 934 | write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); | 999 | write(waker_fds.pipe[1], "", 1); |
| 1000 | } | ||
| 1001 | |||
| 1002 | static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout) | ||
| 1003 | { | ||
| 1004 | /* We don't need to know again when Guest refills receive buffer. */ | ||
| 1005 | vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; | ||
| 1006 | enable_fd(fd, vq, timeout); | ||
| 935 | } | 1007 | } |
| 936 | 1008 | ||
| 937 | /* When the Guest tells us they updated the status field, we handle it. */ | 1009 | /* When the Guest tells us they updated the status field, we handle it. */ |
| @@ -951,7 +1023,7 @@ static void update_device_status(struct device *dev) | |||
| 951 | for (vq = dev->vq; vq; vq = vq->next) { | 1023 | for (vq = dev->vq; vq; vq = vq->next) { |
| 952 | memset(vq->vring.desc, 0, | 1024 | memset(vq->vring.desc, 0, |
| 953 | vring_size(vq->config.num, getpagesize())); | 1025 | vring_size(vq->config.num, getpagesize())); |
| 954 | vq->last_avail_idx = 0; | 1026 | lg_last_avail(vq) = 0; |
| 955 | } | 1027 | } |
| 956 | } else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) { | 1028 | } else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) { |
| 957 | warnx("Device %s configuration FAILED", dev->name); | 1029 | warnx("Device %s configuration FAILED", dev->name); |
| @@ -960,10 +1032,10 @@ static void update_device_status(struct device *dev) | |||
| 960 | 1032 | ||
| 961 | verbose("Device %s OK: offered", dev->name); | 1033 | verbose("Device %s OK: offered", dev->name); |
| 962 | for (i = 0; i < dev->desc->feature_len; i++) | 1034 | for (i = 0; i < dev->desc->feature_len; i++) |
| 963 | verbose(" %08x", get_feature_bits(dev)[i]); | 1035 | verbose(" %02x", get_feature_bits(dev)[i]); |
| 964 | verbose(", accepted"); | 1036 | verbose(", accepted"); |
| 965 | for (i = 0; i < dev->desc->feature_len; i++) | 1037 | for (i = 0; i < dev->desc->feature_len; i++) |
| 966 | verbose(" %08x", get_feature_bits(dev) | 1038 | verbose(" %02x", get_feature_bits(dev) |
| 967 | [dev->desc->feature_len+i]); | 1039 | [dev->desc->feature_len+i]); |
| 968 | 1040 | ||
| 969 | if (dev->ready) | 1041 | if (dev->ready) |
| @@ -1000,7 +1072,7 @@ static void handle_output(int fd, unsigned long addr) | |||
| 1000 | if (strcmp(vq->dev->name, "console") != 0) | 1072 | if (strcmp(vq->dev->name, "console") != 0) |
| 1001 | verbose("Output to %s\n", vq->dev->name); | 1073 | verbose("Output to %s\n", vq->dev->name); |
| 1002 | if (vq->handle_output) | 1074 | if (vq->handle_output) |
| 1003 | vq->handle_output(fd, vq); | 1075 | vq->handle_output(fd, vq, false); |
| 1004 | return; | 1076 | return; |
| 1005 | } | 1077 | } |
| 1006 | } | 1078 | } |
| @@ -1014,6 +1086,29 @@ static void handle_output(int fd, unsigned long addr) | |||
| 1014 | strnlen(from_guest_phys(addr), guest_limit - addr)); | 1086 | strnlen(from_guest_phys(addr), guest_limit - addr)); |
| 1015 | } | 1087 | } |
| 1016 | 1088 | ||
| 1089 | static void handle_timeout(int fd) | ||
| 1090 | { | ||
| 1091 | char buf[32]; | ||
| 1092 | struct device *i; | ||
| 1093 | struct virtqueue *vq; | ||
| 1094 | |||
| 1095 | /* Clear the pipe */ | ||
| 1096 | read(timeoutpipe[0], buf, sizeof(buf)); | ||
| 1097 | |||
| 1098 | /* Check each device and virtqueue: flush blocked ones. */ | ||
| 1099 | for (i = devices.dev; i; i = i->next) { | ||
| 1100 | for (vq = i->vq; vq; vq = vq->next) { | ||
| 1101 | if (!vq->blocked) | ||
| 1102 | continue; | ||
| 1103 | |||
| 1104 | vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; | ||
| 1105 | vq->blocked = false; | ||
| 1106 | if (vq->handle_output) | ||
| 1107 | vq->handle_output(fd, vq, true); | ||
| 1108 | } | ||
| 1109 | } | ||
| 1110 | } | ||
| 1111 | |||
| 1017 | /* This is called when the Waker wakes us up: check for incoming file | 1112 | /* This is called when the Waker wakes us up: check for incoming file |
| 1018 | * descriptors. */ | 1113 | * descriptors. */ |
| 1019 | static void handle_input(int fd) | 1114 | static void handle_input(int fd) |
| @@ -1024,16 +1119,20 @@ static void handle_input(int fd) | |||
| 1024 | for (;;) { | 1119 | for (;;) { |
| 1025 | struct device *i; | 1120 | struct device *i; |
| 1026 | fd_set fds = devices.infds; | 1121 | fd_set fds = devices.infds; |
| 1122 | int num; | ||
| 1027 | 1123 | ||
| 1124 | num = select(devices.max_infd+1, &fds, NULL, NULL, &poll); | ||
| 1125 | /* Could get interrupted */ | ||
| 1126 | if (num < 0) | ||
| 1127 | continue; | ||
| 1028 | /* If nothing is ready, we're done. */ | 1128 | /* If nothing is ready, we're done. */ |
| 1029 | if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0) | 1129 | if (num == 0) |
| 1030 | break; | 1130 | break; |
| 1031 | 1131 | ||
| 1032 | /* Otherwise, call the device(s) which have readable file | 1132 | /* Otherwise, call the device(s) which have readable file |
| 1033 | * descriptors and a method of handling them. */ | 1133 | * descriptors and a method of handling them. */ |
| 1034 | for (i = devices.dev; i; i = i->next) { | 1134 | for (i = devices.dev; i; i = i->next) { |
| 1035 | if (i->handle_input && FD_ISSET(i->fd, &fds)) { | 1135 | if (i->handle_input && FD_ISSET(i->fd, &fds)) { |
| 1036 | int dev_fd; | ||
| 1037 | if (i->handle_input(fd, i)) | 1136 | if (i->handle_input(fd, i)) |
| 1038 | continue; | 1137 | continue; |
| 1039 | 1138 | ||
| @@ -1043,13 +1142,12 @@ static void handle_input(int fd) | |||
| 1043 | * buffers to deliver into. Console also uses | 1142 | * buffers to deliver into. Console also uses |
| 1044 | * it when it discovers that stdin is closed. */ | 1143 | * it when it discovers that stdin is closed. */ |
| 1045 | FD_CLR(i->fd, &devices.infds); | 1144 | FD_CLR(i->fd, &devices.infds); |
| 1046 | /* Tell waker to ignore it too, by sending a | ||
| 1047 | * negative fd number (-1, since 0 is a valid | ||
| 1048 | * FD number). */ | ||
| 1049 | dev_fd = -i->fd - 1; | ||
| 1050 | write(waker_fd, &dev_fd, sizeof(dev_fd)); | ||
| 1051 | } | 1145 | } |
| 1052 | } | 1146 | } |
| 1147 | |||
| 1148 | /* Is this the timeout fd? */ | ||
| 1149 | if (FD_ISSET(timeoutpipe[0], &fds)) | ||
| 1150 | handle_timeout(fd); | ||
| 1053 | } | 1151 | } |
| 1054 | } | 1152 | } |
| 1055 | 1153 | ||
| @@ -1098,7 +1196,7 @@ static struct lguest_device_desc *new_dev_desc(u16 type) | |||
| 1098 | /* Each device descriptor is followed by the description of its virtqueues. We | 1196 | /* Each device descriptor is followed by the description of its virtqueues. We |
| 1099 | * specify how many descriptors the virtqueue is to have. */ | 1197 | * specify how many descriptors the virtqueue is to have. */ |
| 1100 | static void add_virtqueue(struct device *dev, unsigned int num_descs, | 1198 | static void add_virtqueue(struct device *dev, unsigned int num_descs, |
| 1101 | void (*handle_output)(int fd, struct virtqueue *me)) | 1199 | void (*handle_output)(int, struct virtqueue *, bool)) |
| 1102 | { | 1200 | { |
| 1103 | unsigned int pages; | 1201 | unsigned int pages; |
| 1104 | struct virtqueue **i, *vq = malloc(sizeof(*vq)); | 1202 | struct virtqueue **i, *vq = malloc(sizeof(*vq)); |
| @@ -1114,6 +1212,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
| 1114 | vq->last_avail_idx = 0; | 1212 | vq->last_avail_idx = 0; |
| 1115 | vq->dev = dev; | 1213 | vq->dev = dev; |
| 1116 | vq->inflight = 0; | 1214 | vq->inflight = 0; |
| 1215 | vq->blocked = false; | ||
| 1117 | 1216 | ||
| 1118 | /* Initialize the configuration. */ | 1217 | /* Initialize the configuration. */ |
| 1119 | vq->config.num = num_descs; | 1218 | vq->config.num = num_descs; |
| @@ -1246,6 +1345,24 @@ static void setup_console(void) | |||
| 1246 | } | 1345 | } |
| 1247 | /*:*/ | 1346 | /*:*/ |
| 1248 | 1347 | ||
| 1348 | static void timeout_alarm(int sig) | ||
| 1349 | { | ||
| 1350 | write(timeoutpipe[1], "", 1); | ||
| 1351 | } | ||
| 1352 | |||
| 1353 | static void setup_timeout(void) | ||
| 1354 | { | ||
| 1355 | if (pipe(timeoutpipe) != 0) | ||
| 1356 | err(1, "Creating timeout pipe"); | ||
| 1357 | |||
| 1358 | if (fcntl(timeoutpipe[1], F_SETFL, | ||
| 1359 | fcntl(timeoutpipe[1], F_GETFL) | O_NONBLOCK) != 0) | ||
| 1360 | err(1, "Making timeout pipe nonblocking"); | ||
| 1361 | |||
| 1362 | add_device_fd(timeoutpipe[0]); | ||
| 1363 | signal(SIGALRM, timeout_alarm); | ||
| 1364 | } | ||
| 1365 | |||
| 1249 | /*M:010 Inter-guest networking is an interesting area. Simplest is to have a | 1366 | /*M:010 Inter-guest networking is an interesting area. Simplest is to have a |
| 1250 | * --sharenet=<name> option which opens or creates a named pipe. This can be | 1367 | * --sharenet=<name> option which opens or creates a named pipe. This can be |
| 1251 | * used to send packets to another guest in a 1:1 manner. | 1368 | * used to send packets to another guest in a 1:1 manner. |
| @@ -1264,10 +1381,25 @@ static void setup_console(void) | |||
| 1264 | 1381 | ||
| 1265 | static u32 str2ip(const char *ipaddr) | 1382 | static u32 str2ip(const char *ipaddr) |
| 1266 | { | 1383 | { |
| 1267 | unsigned int byte[4]; | 1384 | unsigned int b[4]; |
| 1268 | 1385 | ||
| 1269 | sscanf(ipaddr, "%u.%u.%u.%u", &byte[0], &byte[1], &byte[2], &byte[3]); | 1386 | if (sscanf(ipaddr, "%u.%u.%u.%u", &b[0], &b[1], &b[2], &b[3]) != 4) |
| 1270 | return (byte[0] << 24) | (byte[1] << 16) | (byte[2] << 8) | byte[3]; | 1387 | errx(1, "Failed to parse IP address '%s'", ipaddr); |
| 1388 | return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]; | ||
| 1389 | } | ||
| 1390 | |||
| 1391 | static void str2mac(const char *macaddr, unsigned char mac[6]) | ||
| 1392 | { | ||
| 1393 | unsigned int m[6]; | ||
| 1394 | if (sscanf(macaddr, "%02x:%02x:%02x:%02x:%02x:%02x", | ||
| 1395 | &m[0], &m[1], &m[2], &m[3], &m[4], &m[5]) != 6) | ||
| 1396 | errx(1, "Failed to parse mac address '%s'", macaddr); | ||
| 1397 | mac[0] = m[0]; | ||
| 1398 | mac[1] = m[1]; | ||
| 1399 | mac[2] = m[2]; | ||
| 1400 | mac[3] = m[3]; | ||
| 1401 | mac[4] = m[4]; | ||
| 1402 | mac[5] = m[5]; | ||
| 1271 | } | 1403 | } |
| 1272 | 1404 | ||
| 1273 | /* This code is "adapted" from libbridge: it attaches the Host end of the | 1405 | /* This code is "adapted" from libbridge: it attaches the Host end of the |
| @@ -1288,6 +1420,7 @@ static void add_to_bridge(int fd, const char *if_name, const char *br_name) | |||
| 1288 | errx(1, "interface %s does not exist!", if_name); | 1420 | errx(1, "interface %s does not exist!", if_name); |
| 1289 | 1421 | ||
| 1290 | strncpy(ifr.ifr_name, br_name, IFNAMSIZ); | 1422 | strncpy(ifr.ifr_name, br_name, IFNAMSIZ); |
| 1423 | ifr.ifr_name[IFNAMSIZ-1] = '\0'; | ||
| 1291 | ifr.ifr_ifindex = ifidx; | 1424 | ifr.ifr_ifindex = ifidx; |
| 1292 | if (ioctl(fd, SIOCBRADDIF, &ifr) < 0) | 1425 | if (ioctl(fd, SIOCBRADDIF, &ifr) < 0) |
| 1293 | err(1, "can't add %s to bridge %s", if_name, br_name); | 1426 | err(1, "can't add %s to bridge %s", if_name, br_name); |
| @@ -1296,64 +1429,90 @@ static void add_to_bridge(int fd, const char *if_name, const char *br_name) | |||
| 1296 | /* This sets up the Host end of the network device with an IP address, brings | 1429 | /* This sets up the Host end of the network device with an IP address, brings |
| 1297 | * it up so packets will flow, the copies the MAC address into the hwaddr | 1430 | * it up so packets will flow, the copies the MAC address into the hwaddr |
| 1298 | * pointer. */ | 1431 | * pointer. */ |
| 1299 | static void configure_device(int fd, const char *devname, u32 ipaddr, | 1432 | static void configure_device(int fd, const char *tapif, u32 ipaddr) |
| 1300 | unsigned char hwaddr[6]) | ||
| 1301 | { | 1433 | { |
| 1302 | struct ifreq ifr; | 1434 | struct ifreq ifr; |
| 1303 | struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; | 1435 | struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; |
| 1304 | 1436 | ||
| 1305 | /* Don't read these incantations. Just cut & paste them like I did! */ | ||
| 1306 | memset(&ifr, 0, sizeof(ifr)); | 1437 | memset(&ifr, 0, sizeof(ifr)); |
| 1307 | strcpy(ifr.ifr_name, devname); | 1438 | strcpy(ifr.ifr_name, tapif); |
| 1439 | |||
| 1440 | /* Don't read these incantations. Just cut & paste them like I did! */ | ||
| 1308 | sin->sin_family = AF_INET; | 1441 | sin->sin_family = AF_INET; |
| 1309 | sin->sin_addr.s_addr = htonl(ipaddr); | 1442 | sin->sin_addr.s_addr = htonl(ipaddr); |
| 1310 | if (ioctl(fd, SIOCSIFADDR, &ifr) != 0) | 1443 | if (ioctl(fd, SIOCSIFADDR, &ifr) != 0) |
| 1311 | err(1, "Setting %s interface address", devname); | 1444 | err(1, "Setting %s interface address", tapif); |
| 1312 | ifr.ifr_flags = IFF_UP; | 1445 | ifr.ifr_flags = IFF_UP; |
| 1313 | if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0) | 1446 | if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0) |
| 1314 | err(1, "Bringing interface %s up", devname); | 1447 | err(1, "Bringing interface %s up", tapif); |
| 1448 | } | ||
| 1449 | |||
| 1450 | static void get_mac(int fd, const char *tapif, unsigned char hwaddr[6]) | ||
| 1451 | { | ||
| 1452 | struct ifreq ifr; | ||
| 1453 | |||
| 1454 | memset(&ifr, 0, sizeof(ifr)); | ||
| 1455 | strcpy(ifr.ifr_name, tapif); | ||
| 1315 | 1456 | ||
| 1316 | /* SIOC stands for Socket I/O Control. G means Get (vs S for Set | 1457 | /* SIOC stands for Socket I/O Control. G means Get (vs S for Set |
| 1317 | * above). IF means Interface, and HWADDR is hardware address. | 1458 | * above). IF means Interface, and HWADDR is hardware address. |
| 1318 | * Simple! */ | 1459 | * Simple! */ |
| 1319 | if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0) | 1460 | if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0) |
| 1320 | err(1, "getting hw address for %s", devname); | 1461 | err(1, "getting hw address for %s", tapif); |
| 1321 | memcpy(hwaddr, ifr.ifr_hwaddr.sa_data, 6); | 1462 | memcpy(hwaddr, ifr.ifr_hwaddr.sa_data, 6); |
| 1322 | } | 1463 | } |
| 1323 | 1464 | ||
| 1324 | /*L:195 Our network is a Host<->Guest network. This can either use bridging or | 1465 | static int get_tun_device(char tapif[IFNAMSIZ]) |
| 1325 | * routing, but the principle is the same: it uses the "tun" device to inject | ||
| 1326 | * packets into the Host as if they came in from a normal network card. We | ||
| 1327 | * just shunt packets between the Guest and the tun device. */ | ||
| 1328 | static void setup_tun_net(const char *arg) | ||
| 1329 | { | 1466 | { |
| 1330 | struct device *dev; | ||
| 1331 | struct ifreq ifr; | 1467 | struct ifreq ifr; |
| 1332 | int netfd, ipfd; | 1468 | int netfd; |
| 1333 | u32 ip; | 1469 | |
| 1334 | const char *br_name = NULL; | 1470 | /* Start with this zeroed. Messy but sure. */ |
| 1335 | struct virtio_net_config conf; | 1471 | memset(&ifr, 0, sizeof(ifr)); |
| 1336 | 1472 | ||
| 1337 | /* We open the /dev/net/tun device and tell it we want a tap device. A | 1473 | /* We open the /dev/net/tun device and tell it we want a tap device. A |
| 1338 | * tap device is like a tun device, only somehow different. To tell | 1474 | * tap device is like a tun device, only somehow different. To tell |
| 1339 | * the truth, I completely blundered my way through this code, but it | 1475 | * the truth, I completely blundered my way through this code, but it |
| 1340 | * works now! */ | 1476 | * works now! */ |
| 1341 | netfd = open_or_die("/dev/net/tun", O_RDWR); | 1477 | netfd = open_or_die("/dev/net/tun", O_RDWR); |
| 1342 | memset(&ifr, 0, sizeof(ifr)); | 1478 | ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; |
| 1343 | ifr.ifr_flags = IFF_TAP | IFF_NO_PI; | ||
| 1344 | strcpy(ifr.ifr_name, "tap%d"); | 1479 | strcpy(ifr.ifr_name, "tap%d"); |
| 1345 | if (ioctl(netfd, TUNSETIFF, &ifr) != 0) | 1480 | if (ioctl(netfd, TUNSETIFF, &ifr) != 0) |
| 1346 | err(1, "configuring /dev/net/tun"); | 1481 | err(1, "configuring /dev/net/tun"); |
| 1482 | |||
| 1483 | if (ioctl(netfd, TUNSETOFFLOAD, | ||
| 1484 | TUN_F_CSUM|TUN_F_TSO4|TUN_F_TSO6|TUN_F_TSO_ECN) != 0) | ||
| 1485 | err(1, "Could not set features for tun device"); | ||
| 1486 | |||
| 1347 | /* We don't need checksums calculated for packets coming in this | 1487 | /* We don't need checksums calculated for packets coming in this |
| 1348 | * device: trust us! */ | 1488 | * device: trust us! */ |
| 1349 | ioctl(netfd, TUNSETNOCSUM, 1); | 1489 | ioctl(netfd, TUNSETNOCSUM, 1); |
| 1350 | 1490 | ||
| 1491 | memcpy(tapif, ifr.ifr_name, IFNAMSIZ); | ||
| 1492 | return netfd; | ||
| 1493 | } | ||
| 1494 | |||
| 1495 | /*L:195 Our network is a Host<->Guest network. This can either use bridging or | ||
| 1496 | * routing, but the principle is the same: it uses the "tun" device to inject | ||
| 1497 | * packets into the Host as if they came in from a normal network card. We | ||
| 1498 | * just shunt packets between the Guest and the tun device. */ | ||
| 1499 | static void setup_tun_net(char *arg) | ||
| 1500 | { | ||
| 1501 | struct device *dev; | ||
| 1502 | int netfd, ipfd; | ||
| 1503 | u32 ip = INADDR_ANY; | ||
| 1504 | bool bridging = false; | ||
| 1505 | char tapif[IFNAMSIZ], *p; | ||
| 1506 | struct virtio_net_config conf; | ||
| 1507 | |||
| 1508 | netfd = get_tun_device(tapif); | ||
| 1509 | |||
| 1351 | /* First we create a new network device. */ | 1510 | /* First we create a new network device. */ |
| 1352 | dev = new_device("net", VIRTIO_ID_NET, netfd, handle_tun_input); | 1511 | dev = new_device("net", VIRTIO_ID_NET, netfd, handle_tun_input); |
| 1353 | 1512 | ||
| 1354 | /* Network devices need a receive and a send queue, just like | 1513 | /* Network devices need a receive and a send queue, just like |
| 1355 | * console. */ | 1514 | * console. */ |
| 1356 | add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd); | 1515 | add_virtqueue(dev, VIRTQUEUE_NUM, net_enable_fd); |
| 1357 | add_virtqueue(dev, VIRTQUEUE_NUM, handle_net_output); | 1516 | add_virtqueue(dev, VIRTQUEUE_NUM, handle_net_output); |
| 1358 | 1517 | ||
| 1359 | /* We need a socket to perform the magic network ioctls to bring up the | 1518 | /* We need a socket to perform the magic network ioctls to bring up the |
| @@ -1364,28 +1523,56 @@ static void setup_tun_net(const char *arg) | |||
| 1364 | 1523 | ||
| 1365 | /* If the command line was --tunnet=bridge:<name> do bridging. */ | 1524 | /* If the command line was --tunnet=bridge:<name> do bridging. */ |
| 1366 | if (!strncmp(BRIDGE_PFX, arg, strlen(BRIDGE_PFX))) { | 1525 | if (!strncmp(BRIDGE_PFX, arg, strlen(BRIDGE_PFX))) { |
| 1367 | ip = INADDR_ANY; | 1526 | arg += strlen(BRIDGE_PFX); |
| 1368 | br_name = arg + strlen(BRIDGE_PFX); | 1527 | bridging = true; |
| 1369 | add_to_bridge(ipfd, ifr.ifr_name, br_name); | 1528 | } |
| 1370 | } else /* It is an IP address to set up the device with */ | 1529 | |
| 1530 | /* A mac address may follow the bridge name or IP address */ | ||
| 1531 | p = strchr(arg, ':'); | ||
| 1532 | if (p) { | ||
| 1533 | str2mac(p+1, conf.mac); | ||
| 1534 | *p = '\0'; | ||
| 1535 | } else { | ||
| 1536 | p = arg + strlen(arg); | ||
| 1537 | /* None supplied; query the randomly assigned mac. */ | ||
| 1538 | get_mac(ipfd, tapif, conf.mac); | ||
| 1539 | } | ||
| 1540 | |||
| 1541 | /* arg is now either an IP address or a bridge name */ | ||
| 1542 | if (bridging) | ||
| 1543 | add_to_bridge(ipfd, tapif, arg); | ||
| 1544 | else | ||
| 1371 | ip = str2ip(arg); | 1545 | ip = str2ip(arg); |
| 1372 | 1546 | ||
| 1373 | /* Set up the tun device, and get the mac address for the interface. */ | 1547 | /* Set up the tun device. */ |
| 1374 | configure_device(ipfd, ifr.ifr_name, ip, conf.mac); | 1548 | configure_device(ipfd, tapif, ip); |
| 1375 | 1549 | ||
| 1376 | /* Tell Guest what MAC address to use. */ | 1550 | /* Tell Guest what MAC address to use. */ |
| 1377 | add_feature(dev, VIRTIO_NET_F_MAC); | 1551 | add_feature(dev, VIRTIO_NET_F_MAC); |
| 1378 | add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY); | 1552 | add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY); |
| 1553 | /* Expect Guest to handle everything except UFO */ | ||
| 1554 | add_feature(dev, VIRTIO_NET_F_CSUM); | ||
| 1555 | add_feature(dev, VIRTIO_NET_F_GUEST_CSUM); | ||
| 1556 | add_feature(dev, VIRTIO_NET_F_MAC); | ||
| 1557 | add_feature(dev, VIRTIO_NET_F_GUEST_TSO4); | ||
| 1558 | add_feature(dev, VIRTIO_NET_F_GUEST_TSO6); | ||
| 1559 | add_feature(dev, VIRTIO_NET_F_GUEST_ECN); | ||
| 1560 | add_feature(dev, VIRTIO_NET_F_HOST_TSO4); | ||
| 1561 | add_feature(dev, VIRTIO_NET_F_HOST_TSO6); | ||
| 1562 | add_feature(dev, VIRTIO_NET_F_HOST_ECN); | ||
| 1379 | set_config(dev, sizeof(conf), &conf); | 1563 | set_config(dev, sizeof(conf), &conf); |
| 1380 | 1564 | ||
| 1381 | /* We don't need the socket any more; setup is done. */ | 1565 | /* We don't need the socket any more; setup is done. */ |
| 1382 | close(ipfd); | 1566 | close(ipfd); |
| 1383 | 1567 | ||
| 1384 | verbose("device %u: tun net %u.%u.%u.%u\n", | 1568 | devices.device_num++; |
| 1385 | devices.device_num++, | 1569 | |
| 1386 | (u8)(ip>>24),(u8)(ip>>16),(u8)(ip>>8),(u8)ip); | 1570 | if (bridging) |
| 1387 | if (br_name) | 1571 | verbose("device %u: tun %s attached to bridge: %s\n", |
| 1388 | verbose("attached to bridge: %s\n", br_name); | 1572 | devices.device_num, tapif, arg); |
| 1573 | else | ||
| 1574 | verbose("device %u: tun %s: %s\n", | ||
| 1575 | devices.device_num, tapif, arg); | ||
| 1389 | } | 1576 | } |
| 1390 | 1577 | ||
| 1391 | /* Our block (disk) device should be really simple: the Guest asks for a block | 1578 | /* Our block (disk) device should be really simple: the Guest asks for a block |
| @@ -1550,7 +1737,7 @@ static bool handle_io_finish(int fd, struct device *dev) | |||
| 1550 | } | 1737 | } |
| 1551 | 1738 | ||
| 1552 | /* When the Guest submits some I/O, we just need to wake the I/O thread. */ | 1739 | /* When the Guest submits some I/O, we just need to wake the I/O thread. */ |
| 1553 | static void handle_virtblk_output(int fd, struct virtqueue *vq) | 1740 | static void handle_virtblk_output(int fd, struct virtqueue *vq, bool timeout) |
| 1554 | { | 1741 | { |
| 1555 | struct vblk_info *vblk = vq->dev->priv; | 1742 | struct vblk_info *vblk = vq->dev->priv; |
| 1556 | char c = 0; | 1743 | char c = 0; |
| @@ -1621,6 +1808,64 @@ static void setup_block_file(const char *filename) | |||
| 1621 | verbose("device %u: virtblock %llu sectors\n", | 1808 | verbose("device %u: virtblock %llu sectors\n", |
| 1622 | devices.device_num, le64_to_cpu(conf.capacity)); | 1809 | devices.device_num, le64_to_cpu(conf.capacity)); |
| 1623 | } | 1810 | } |
| 1811 | |||
| 1812 | /* Our random number generator device reads from /dev/random into the Guest's | ||
| 1813 | * input buffers. The usual case is that the Guest doesn't want random numbers | ||
| 1814 | * and so has no buffers although /dev/random is still readable, whereas | ||
| 1815 | * console is the reverse. | ||
| 1816 | * | ||
| 1817 | * The same logic applies, however. */ | ||
| 1818 | static bool handle_rng_input(int fd, struct device *dev) | ||
| 1819 | { | ||
| 1820 | int len; | ||
| 1821 | unsigned int head, in_num, out_num, totlen = 0; | ||
| 1822 | struct iovec iov[dev->vq->vring.num]; | ||
| 1823 | |||
| 1824 | /* First we need a buffer from the Guests's virtqueue. */ | ||
| 1825 | head = get_vq_desc(dev->vq, iov, &out_num, &in_num); | ||
| 1826 | |||
| 1827 | /* If they're not ready for input, stop listening to this file | ||
| 1828 | * descriptor. We'll start again once they add an input buffer. */ | ||
| 1829 | if (head == dev->vq->vring.num) | ||
| 1830 | return false; | ||
| 1831 | |||
| 1832 | if (out_num) | ||
| 1833 | errx(1, "Output buffers in rng?"); | ||
| 1834 | |||
| 1835 | /* This is why we convert to iovecs: the readv() call uses them, and so | ||
| 1836 | * it reads straight into the Guest's buffer. We loop to make sure we | ||
| 1837 | * fill it. */ | ||
| 1838 | while (!iov_empty(iov, in_num)) { | ||
| 1839 | len = readv(dev->fd, iov, in_num); | ||
| 1840 | if (len <= 0) | ||
| 1841 | err(1, "Read from /dev/random gave %i", len); | ||
| 1842 | iov_consume(iov, in_num, len); | ||
| 1843 | totlen += len; | ||
| 1844 | } | ||
| 1845 | |||
| 1846 | /* Tell the Guest about the new input. */ | ||
| 1847 | add_used_and_trigger(fd, dev->vq, head, totlen); | ||
| 1848 | |||
| 1849 | /* Everything went OK! */ | ||
| 1850 | return true; | ||
| 1851 | } | ||
| 1852 | |||
| 1853 | /* And this creates a "hardware" random number device for the Guest. */ | ||
| 1854 | static void setup_rng(void) | ||
| 1855 | { | ||
| 1856 | struct device *dev; | ||
| 1857 | int fd; | ||
| 1858 | |||
| 1859 | fd = open_or_die("/dev/random", O_RDONLY); | ||
| 1860 | |||
| 1861 | /* The device responds to return from I/O thread. */ | ||
| 1862 | dev = new_device("rng", VIRTIO_ID_RNG, fd, handle_rng_input); | ||
| 1863 | |||
| 1864 | /* The device has one virtqueue, where the Guest places inbufs. */ | ||
| 1865 | add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd); | ||
| 1866 | |||
| 1867 | verbose("device %u: rng\n", devices.device_num++); | ||
| 1868 | } | ||
| 1624 | /* That's the end of device setup. */ | 1869 | /* That's the end of device setup. */ |
| 1625 | 1870 | ||
| 1626 | /*L:230 Reboot is pretty easy: clean up and exec() the Launcher afresh. */ | 1871 | /*L:230 Reboot is pretty easy: clean up and exec() the Launcher afresh. */ |
| @@ -1628,11 +1873,12 @@ static void __attribute__((noreturn)) restart_guest(void) | |||
| 1628 | { | 1873 | { |
| 1629 | unsigned int i; | 1874 | unsigned int i; |
| 1630 | 1875 | ||
| 1631 | /* Closing pipes causes the Waker thread and io_threads to die, and | 1876 | /* Since we don't track all open fds, we simply close everything beyond |
| 1632 | * closing /dev/lguest cleans up the Guest. Since we don't track all | 1877 | * stderr. */ |
| 1633 | * open fds, we simply close everything beyond stderr. */ | ||
| 1634 | for (i = 3; i < FD_SETSIZE; i++) | 1878 | for (i = 3; i < FD_SETSIZE; i++) |
| 1635 | close(i); | 1879 | close(i); |
| 1880 | |||
| 1881 | /* The exec automatically gets rid of the I/O and Waker threads. */ | ||
| 1636 | execv(main_args[0], main_args); | 1882 | execv(main_args[0], main_args); |
| 1637 | err(1, "Could not exec %s", main_args[0]); | 1883 | err(1, "Could not exec %s", main_args[0]); |
| 1638 | } | 1884 | } |
| @@ -1663,7 +1909,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd) | |||
| 1663 | /* ERESTART means that we need to reboot the guest */ | 1909 | /* ERESTART means that we need to reboot the guest */ |
| 1664 | } else if (errno == ERESTART) { | 1910 | } else if (errno == ERESTART) { |
| 1665 | restart_guest(); | 1911 | restart_guest(); |
| 1666 | /* EAGAIN means the Waker wanted us to look at some input. | 1912 | /* EAGAIN means a signal (timeout). |
| 1667 | * Anything else means a bug or incompatible change. */ | 1913 | * Anything else means a bug or incompatible change. */ |
| 1668 | } else if (errno != EAGAIN) | 1914 | } else if (errno != EAGAIN) |
| 1669 | err(1, "Running guest failed"); | 1915 | err(1, "Running guest failed"); |
| @@ -1691,13 +1937,14 @@ static struct option opts[] = { | |||
| 1691 | { "verbose", 0, NULL, 'v' }, | 1937 | { "verbose", 0, NULL, 'v' }, |
| 1692 | { "tunnet", 1, NULL, 't' }, | 1938 | { "tunnet", 1, NULL, 't' }, |
| 1693 | { "block", 1, NULL, 'b' }, | 1939 | { "block", 1, NULL, 'b' }, |
| 1940 | { "rng", 0, NULL, 'r' }, | ||
| 1694 | { "initrd", 1, NULL, 'i' }, | 1941 | { "initrd", 1, NULL, 'i' }, |
| 1695 | { NULL }, | 1942 | { NULL }, |
| 1696 | }; | 1943 | }; |
| 1697 | static void usage(void) | 1944 | static void usage(void) |
| 1698 | { | 1945 | { |
| 1699 | errx(1, "Usage: lguest [--verbose] " | 1946 | errx(1, "Usage: lguest [--verbose] " |
| 1700 | "[--tunnet=(<ipaddr>|bridge:<bridgename>)\n" | 1947 | "[--tunnet=(<ipaddr>:<macaddr>|bridge:<bridgename>:<macaddr>)\n" |
| 1701 | "|--block=<filename>|--initrd=<filename>]...\n" | 1948 | "|--block=<filename>|--initrd=<filename>]...\n" |
| 1702 | "<mem-in-mb> vmlinux [args...]"); | 1949 | "<mem-in-mb> vmlinux [args...]"); |
| 1703 | } | 1950 | } |
| @@ -1765,6 +2012,9 @@ int main(int argc, char *argv[]) | |||
| 1765 | case 'b': | 2012 | case 'b': |
| 1766 | setup_block_file(optarg); | 2013 | setup_block_file(optarg); |
| 1767 | break; | 2014 | break; |
| 2015 | case 'r': | ||
| 2016 | setup_rng(); | ||
| 2017 | break; | ||
| 1768 | case 'i': | 2018 | case 'i': |
| 1769 | initrd_name = optarg; | 2019 | initrd_name = optarg; |
| 1770 | break; | 2020 | break; |
| @@ -1783,6 +2033,9 @@ int main(int argc, char *argv[]) | |||
| 1783 | /* We always have a console device */ | 2033 | /* We always have a console device */ |
| 1784 | setup_console(); | 2034 | setup_console(); |
| 1785 | 2035 | ||
| 2036 | /* We can timeout waiting for Guest network transmit. */ | ||
| 2037 | setup_timeout(); | ||
| 2038 | |||
| 1786 | /* Now we load the kernel */ | 2039 | /* Now we load the kernel */ |
| 1787 | start = load_kernel(open_or_die(argv[optind+1], O_RDONLY)); | 2040 | start = load_kernel(open_or_die(argv[optind+1], O_RDONLY)); |
| 1788 | 2041 | ||
| @@ -1826,10 +2079,10 @@ int main(int argc, char *argv[]) | |||
| 1826 | * /dev/lguest file descriptor. */ | 2079 | * /dev/lguest file descriptor. */ |
| 1827 | lguest_fd = tell_kernel(pgdir, start); | 2080 | lguest_fd = tell_kernel(pgdir, start); |
| 1828 | 2081 | ||
| 1829 | /* We fork off a child process, which wakes the Launcher whenever one | 2082 | /* We clone off a thread, which wakes the Launcher whenever one of the |
| 1830 | * of the input file descriptors needs attention. We call this the | 2083 | * input file descriptors needs attention. We call this the Waker, and |
| 1831 | * Waker, and we'll cover it in a moment. */ | 2084 | * we'll cover it in a moment. */ |
| 1832 | waker_fd = setup_waker(lguest_fd); | 2085 | setup_waker(lguest_fd); |
| 1833 | 2086 | ||
| 1834 | /* Finally, run the Guest. This doesn't return. */ | 2087 | /* Finally, run the Guest. This doesn't return. */ |
| 1835 | run_guest(lguest_fd); | 2088 | run_guest(lguest_fd); |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 0313a5eec412..d9249a882aa5 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
| @@ -1014,6 +1014,9 @@ __init void lguest_init(void) | |||
| 1014 | init_pg_tables_start = __pa(pg0); | 1014 | init_pg_tables_start = __pa(pg0); |
| 1015 | init_pg_tables_end = __pa(pg0); | 1015 | init_pg_tables_end = __pa(pg0); |
| 1016 | 1016 | ||
| 1017 | /* As described in head_32.S, we map the first 128M of memory. */ | ||
| 1018 | max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT; | ||
| 1019 | |||
| 1017 | /* Load the %fs segment register (the per-cpu segment register) with | 1020 | /* Load the %fs segment register (the per-cpu segment register) with |
| 1018 | * the normal data segment to get through booting. */ | 1021 | * the normal data segment to get through booting. */ |
| 1019 | asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); | 1022 | asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); |
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 5eea4356d703..90663e01a56e 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c | |||
| @@ -135,6 +135,7 @@ static void unmap_switcher(void) | |||
| 135 | /* Now we just need to free the pages we copied the switcher into */ | 135 | /* Now we just need to free the pages we copied the switcher into */ |
| 136 | for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) | 136 | for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) |
| 137 | __free_pages(switcher_page[i], 0); | 137 | __free_pages(switcher_page[i], 0); |
| 138 | kfree(switcher_page); | ||
| 138 | } | 139 | } |
| 139 | 140 | ||
| 140 | /*H:032 | 141 | /*H:032 |
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 0414ddf87587..a1039068f95c 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c | |||
| @@ -406,7 +406,8 @@ void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi) | |||
| 406 | * deliver_trap() to bounce it back into the Guest. */ | 406 | * deliver_trap() to bounce it back into the Guest. */ |
| 407 | static void default_idt_entry(struct desc_struct *idt, | 407 | static void default_idt_entry(struct desc_struct *idt, |
| 408 | int trap, | 408 | int trap, |
| 409 | const unsigned long handler) | 409 | const unsigned long handler, |
| 410 | const struct desc_struct *base) | ||
| 410 | { | 411 | { |
| 411 | /* A present interrupt gate. */ | 412 | /* A present interrupt gate. */ |
| 412 | u32 flags = 0x8e00; | 413 | u32 flags = 0x8e00; |
| @@ -415,6 +416,10 @@ static void default_idt_entry(struct desc_struct *idt, | |||
| 415 | * the Guest to use the "int" instruction to trigger it. */ | 416 | * the Guest to use the "int" instruction to trigger it. */ |
| 416 | if (trap == LGUEST_TRAP_ENTRY) | 417 | if (trap == LGUEST_TRAP_ENTRY) |
| 417 | flags |= (GUEST_PL << 13); | 418 | flags |= (GUEST_PL << 13); |
| 419 | else if (base) | ||
| 420 | /* Copy priv. level from what Guest asked for. This allows | ||
| 421 | * debug (int 3) traps from Guest userspace, for example. */ | ||
| 422 | flags |= (base->b & 0x6000); | ||
| 418 | 423 | ||
| 419 | /* Now pack it into the IDT entry in its weird format. */ | 424 | /* Now pack it into the IDT entry in its weird format. */ |
| 420 | idt->a = (LGUEST_CS<<16) | (handler&0x0000FFFF); | 425 | idt->a = (LGUEST_CS<<16) | (handler&0x0000FFFF); |
| @@ -428,7 +433,7 @@ void setup_default_idt_entries(struct lguest_ro_state *state, | |||
| 428 | unsigned int i; | 433 | unsigned int i; |
| 429 | 434 | ||
| 430 | for (i = 0; i < ARRAY_SIZE(state->guest_idt); i++) | 435 | for (i = 0; i < ARRAY_SIZE(state->guest_idt); i++) |
| 431 | default_idt_entry(&state->guest_idt[i], i, def[i]); | 436 | default_idt_entry(&state->guest_idt[i], i, def[i], NULL); |
| 432 | } | 437 | } |
| 433 | 438 | ||
| 434 | /*H:240 We don't use the IDT entries in the "struct lguest" directly, instead | 439 | /*H:240 We don't use the IDT entries in the "struct lguest" directly, instead |
| @@ -442,6 +447,8 @@ void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, | |||
| 442 | /* We can simply copy the direct traps, otherwise we use the default | 447 | /* We can simply copy the direct traps, otherwise we use the default |
| 443 | * ones in the Switcher: they will return to the Host. */ | 448 | * ones in the Switcher: they will return to the Host. */ |
| 444 | for (i = 0; i < ARRAY_SIZE(cpu->arch.idt); i++) { | 449 | for (i = 0; i < ARRAY_SIZE(cpu->arch.idt); i++) { |
| 450 | const struct desc_struct *gidt = &cpu->arch.idt[i]; | ||
| 451 | |||
| 445 | /* If no Guest can ever override this trap, leave it alone. */ | 452 | /* If no Guest can ever override this trap, leave it alone. */ |
| 446 | if (!direct_trap(i)) | 453 | if (!direct_trap(i)) |
| 447 | continue; | 454 | continue; |
| @@ -449,12 +456,15 @@ void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, | |||
| 449 | /* Only trap gates (type 15) can go direct to the Guest. | 456 | /* Only trap gates (type 15) can go direct to the Guest. |
| 450 | * Interrupt gates (type 14) disable interrupts as they are | 457 | * Interrupt gates (type 14) disable interrupts as they are |
| 451 | * entered, which we never let the Guest do. Not present | 458 | * entered, which we never let the Guest do. Not present |
| 452 | * entries (type 0x0) also can't go direct, of course. */ | 459 | * entries (type 0x0) also can't go direct, of course. |
| 453 | if (idt_type(cpu->arch.idt[i].a, cpu->arch.idt[i].b) == 0xF) | 460 | * |
| 454 | idt[i] = cpu->arch.idt[i]; | 461 | * If it can't go direct, we still need to copy the priv. level: |
| 462 | * they might want to give userspace access to a software | ||
| 463 | * interrupt. */ | ||
| 464 | if (idt_type(gidt->a, gidt->b) == 0xF) | ||
| 465 | idt[i] = *gidt; | ||
| 455 | else | 466 | else |
| 456 | /* Reset it to the default. */ | 467 | default_idt_entry(&idt[i], i, def[i], gidt); |
| 457 | default_idt_entry(&idt[i], i, def[i]); | ||
| 458 | } | 468 | } |
| 459 | } | 469 | } |
| 460 | 470 | ||
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 95dfda52b4f9..bf7942327bda 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c | |||
| @@ -480,7 +480,7 @@ void __init lguest_arch_host_init(void) | |||
| 480 | * bit on its CPU, depending on the argument (0 == unset). */ | 480 | * bit on its CPU, depending on the argument (0 == unset). */ |
| 481 | on_each_cpu(adjust_pge, (void *)0, 1); | 481 | on_each_cpu(adjust_pge, (void *)0, 1); |
| 482 | /* Turn off the feature in the global feature set. */ | 482 | /* Turn off the feature in the global feature set. */ |
| 483 | clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); | 483 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE); |
| 484 | } | 484 | } |
| 485 | put_online_cpus(); | 485 | put_online_cpus(); |
| 486 | }; | 486 | }; |
| @@ -491,7 +491,7 @@ void __exit lguest_arch_host_fini(void) | |||
| 491 | /* If we had PGE before we started, turn it back on now. */ | 491 | /* If we had PGE before we started, turn it back on now. */ |
| 492 | get_online_cpus(); | 492 | get_online_cpus(); |
| 493 | if (cpu_had_pge) { | 493 | if (cpu_had_pge) { |
| 494 | set_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); | 494 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE); |
| 495 | /* adjust_pge's argument "1" means set PGE. */ | 495 | /* adjust_pge's argument "1" means set PGE. */ |
| 496 | on_each_cpu(adjust_pge, (void *)1, 1); | 496 | on_each_cpu(adjust_pge, (void *)1, 1); |
| 497 | } | 497 | } |
