diff options
| -rw-r--r-- | arch/x86/include/asm/lguest_hcall.h | 1 | ||||
| -rw-r--r-- | drivers/lguest/core.c | 20 | ||||
| -rw-r--r-- | drivers/lguest/hypercalls.c | 4 | ||||
| -rw-r--r-- | drivers/lguest/lg.h | 12 | ||||
| -rw-r--r-- | drivers/lguest/lguest_user.c | 186 | ||||
| -rw-r--r-- | include/linux/lguest_launcher.h | 2 |
6 files changed, 10 insertions, 215 deletions
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h index 879fd7d33877..ef01fef3eebc 100644 --- a/arch/x86/include/asm/lguest_hcall.h +++ b/arch/x86/include/asm/lguest_hcall.h | |||
| @@ -16,7 +16,6 @@ | |||
| 16 | #define LHCALL_SET_PTE 14 | 16 | #define LHCALL_SET_PTE 14 |
| 17 | #define LHCALL_SET_PGD 15 | 17 | #define LHCALL_SET_PGD 15 |
| 18 | #define LHCALL_LOAD_TLS 16 | 18 | #define LHCALL_LOAD_TLS 16 |
| 19 | #define LHCALL_NOTIFY 17 | ||
| 20 | #define LHCALL_LOAD_GDT_ENTRY 18 | 19 | #define LHCALL_LOAD_GDT_ENTRY 18 |
| 21 | #define LHCALL_SEND_INTERRUPTS 19 | 20 | #define LHCALL_SEND_INTERRUPTS 19 |
| 22 | 21 | ||
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 9159dbc583f6..7dc93aa004c8 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c | |||
| @@ -225,22 +225,12 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) | |||
| 225 | if (cpu->hcall) | 225 | if (cpu->hcall) |
| 226 | do_hypercalls(cpu); | 226 | do_hypercalls(cpu); |
| 227 | 227 | ||
| 228 | /* | 228 | /* Do we have to tell the Launcher about a trap? */ |
| 229 | * It's possible the Guest did a NOTIFY hypercall to the | ||
| 230 | * Launcher. | ||
| 231 | */ | ||
| 232 | if (cpu->pending.trap) { | 229 | if (cpu->pending.trap) { |
| 233 | /* | 230 | if (copy_to_user(user, &cpu->pending, |
| 234 | * Does it just needs to write to a registered | 231 | sizeof(cpu->pending))) |
| 235 | * eventfd (ie. the appropriate virtqueue thread)? | 232 | return -EFAULT; |
| 236 | */ | 233 | return sizeof(cpu->pending); |
| 237 | if (!send_notify_to_eventfd(cpu)) { | ||
| 238 | /* OK, we tell the main Launcher. */ | ||
| 239 | if (copy_to_user(user, &cpu->pending, | ||
| 240 | sizeof(cpu->pending))) | ||
| 241 | return -EFAULT; | ||
| 242 | return sizeof(cpu->pending); | ||
| 243 | } | ||
| 244 | } | 234 | } |
| 245 | 235 | ||
| 246 | /* | 236 | /* |
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index 5dd1fb8a6610..1219af493c0f 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c | |||
| @@ -117,10 +117,6 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) | |||
| 117 | /* Similarly, this sets the halted flag for run_guest(). */ | 117 | /* Similarly, this sets the halted flag for run_guest(). */ |
| 118 | cpu->halted = 1; | 118 | cpu->halted = 1; |
| 119 | break; | 119 | break; |
| 120 | case LHCALL_NOTIFY: | ||
| 121 | cpu->pending.trap = LGUEST_TRAP_ENTRY; | ||
| 122 | cpu->pending.addr = args->arg1; | ||
| 123 | break; | ||
| 124 | default: | 120 | default: |
| 125 | /* It should be an architecture-specific hypercall. */ | 121 | /* It should be an architecture-specific hypercall. */ |
| 126 | if (lguest_arch_do_hcall(cpu, args)) | 122 | if (lguest_arch_do_hcall(cpu, args)) |
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index eb81abc05995..307e8b39e7d1 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h | |||
| @@ -81,16 +81,6 @@ struct lg_cpu { | |||
| 81 | struct lg_cpu_arch arch; | 81 | struct lg_cpu_arch arch; |
| 82 | }; | 82 | }; |
| 83 | 83 | ||
| 84 | struct lg_eventfd { | ||
| 85 | unsigned long addr; | ||
| 86 | struct eventfd_ctx *event; | ||
| 87 | }; | ||
| 88 | |||
| 89 | struct lg_eventfd_map { | ||
| 90 | unsigned int num; | ||
| 91 | struct lg_eventfd map[]; | ||
| 92 | }; | ||
| 93 | |||
| 94 | /* The private info the thread maintains about the guest. */ | 84 | /* The private info the thread maintains about the guest. */ |
| 95 | struct lguest { | 85 | struct lguest { |
| 96 | struct lguest_data __user *lguest_data; | 86 | struct lguest_data __user *lguest_data; |
| @@ -117,8 +107,6 @@ struct lguest { | |||
| 117 | unsigned int stack_pages; | 107 | unsigned int stack_pages; |
| 118 | u32 tsc_khz; | 108 | u32 tsc_khz; |
| 119 | 109 | ||
| 120 | struct lg_eventfd_map *eventfds; | ||
| 121 | |||
| 122 | /* Dead? */ | 110 | /* Dead? */ |
| 123 | const char *dead; | 111 | const char *dead; |
| 124 | }; | 112 | }; |
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index c8b0e8575b44..c4c6113eb9a6 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c | |||
| @@ -2,182 +2,20 @@ | |||
| 2 | * launcher controls and communicates with the Guest. For example, | 2 | * launcher controls and communicates with the Guest. For example, |
| 3 | * the first write will tell us the Guest's memory layout and entry | 3 | * the first write will tell us the Guest's memory layout and entry |
| 4 | * point. A read will run the Guest until something happens, such as | 4 | * point. A read will run the Guest until something happens, such as |
| 5 | * a signal or the Guest doing a NOTIFY out to the Launcher. There is | 5 | * a signal or the Guest accessing a device. |
| 6 | * also a way for the Launcher to attach eventfds to particular NOTIFY | ||
| 7 | * values instead of returning from the read() call. | ||
| 8 | :*/ | 6 | :*/ |
| 9 | #include <linux/uaccess.h> | 7 | #include <linux/uaccess.h> |
| 10 | #include <linux/miscdevice.h> | 8 | #include <linux/miscdevice.h> |
| 11 | #include <linux/fs.h> | 9 | #include <linux/fs.h> |
| 12 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
| 13 | #include <linux/eventfd.h> | ||
| 14 | #include <linux/file.h> | 11 | #include <linux/file.h> |
| 15 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
| 16 | #include <linux/export.h> | 13 | #include <linux/export.h> |
| 17 | #include "lg.h" | 14 | #include "lg.h" |
| 18 | 15 | ||
| 19 | /*L:056 | ||
| 20 | * Before we move on, let's jump ahead and look at what the kernel does when | ||
| 21 | * it needs to look up the eventfds. That will complete our picture of how we | ||
| 22 | * use RCU. | ||
| 23 | * | ||
| 24 | * The notification value is in cpu->pending_notify: we return true if it went | ||
| 25 | * to an eventfd. | ||
| 26 | */ | ||
| 27 | bool send_notify_to_eventfd(struct lg_cpu *cpu) | ||
| 28 | { | ||
| 29 | unsigned int i; | ||
| 30 | struct lg_eventfd_map *map; | ||
| 31 | |||
| 32 | /* We only connect LHCALL_NOTIFY to event fds, not other traps. */ | ||
| 33 | if (cpu->pending.trap != LGUEST_TRAP_ENTRY) | ||
| 34 | return false; | ||
| 35 | |||
| 36 | /* | ||
| 37 | * This "rcu_read_lock()" helps track when someone is still looking at | ||
| 38 | * the (RCU-using) eventfds array. It's not actually a lock at all; | ||
| 39 | * indeed it's a noop in many configurations. (You didn't expect me to | ||
| 40 | * explain all the RCU secrets here, did you?) | ||
| 41 | */ | ||
| 42 | rcu_read_lock(); | ||
| 43 | /* | ||
| 44 | * rcu_dereference is the counter-side of rcu_assign_pointer(); it | ||
| 45 | * makes sure we don't access the memory pointed to by | ||
| 46 | * cpu->lg->eventfds before cpu->lg->eventfds is set. Sounds crazy, | ||
| 47 | * but Alpha allows this! Paul McKenney points out that a really | ||
| 48 | * aggressive compiler could have the same effect: | ||
| 49 | * http://lists.ozlabs.org/pipermail/lguest/2009-July/001560.html | ||
| 50 | * | ||
| 51 | * So play safe, use rcu_dereference to get the rcu-protected pointer: | ||
| 52 | */ | ||
| 53 | map = rcu_dereference(cpu->lg->eventfds); | ||
| 54 | /* | ||
| 55 | * Simple array search: even if they add an eventfd while we do this, | ||
| 56 | * we'll continue to use the old array and just won't see the new one. | ||
| 57 | */ | ||
| 58 | for (i = 0; i < map->num; i++) { | ||
| 59 | if (map->map[i].addr == cpu->pending.addr) { | ||
| 60 | eventfd_signal(map->map[i].event, 1); | ||
| 61 | cpu->pending.trap = 0; | ||
| 62 | break; | ||
| 63 | } | ||
| 64 | } | ||
| 65 | /* We're done with the rcu-protected variable cpu->lg->eventfds. */ | ||
| 66 | rcu_read_unlock(); | ||
| 67 | |||
| 68 | /* If we cleared the notification, it's because we found a match. */ | ||
| 69 | return cpu->pending.trap == 0; | ||
| 70 | } | ||
| 71 | |||
| 72 | /*L:055 | ||
| 73 | * One of the more tricksy tricks in the Linux Kernel is a technique called | ||
| 74 | * Read Copy Update. Since one point of lguest is to teach lguest journeyers | ||
| 75 | * about kernel coding, I use it here. (In case you're curious, other purposes | ||
| 76 | * include learning about virtualization and instilling a deep appreciation for | ||
| 77 | * simplicity and puppies). | ||
| 78 | * | ||
| 79 | * We keep a simple array which maps LHCALL_NOTIFY values to eventfds, but we | ||
| 80 | * add new eventfds without ever blocking readers from accessing the array. | ||
| 81 | * The current Launcher only does this during boot, so that never happens. But | ||
| 82 | * Read Copy Update is cool, and adding a lock risks damaging even more puppies | ||
| 83 | * than this code does. | ||
| 84 | * | ||
| 85 | * We allocate a brand new one-larger array, copy the old one and add our new | ||
| 86 | * element. Then we make the lg eventfd pointer point to the new array. | ||
| 87 | * That's the easy part: now we need to free the old one, but we need to make | ||
| 88 | * sure no slow CPU somewhere is still looking at it. That's what | ||
| 89 | * synchronize_rcu does for us: waits until every CPU has indicated that it has | ||
| 90 | * moved on to know it's no longer using the old one. | ||
| 91 | * | ||
| 92 | * If that's unclear, see http://en.wikipedia.org/wiki/Read-copy-update. | ||
| 93 | */ | ||
| 94 | static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) | ||
| 95 | { | ||
| 96 | struct lg_eventfd_map *new, *old = lg->eventfds; | ||
| 97 | |||
| 98 | /* | ||
| 99 | * We don't allow notifications on value 0 anyway (pending_notify of | ||
| 100 | * 0 means "nothing pending"). | ||
| 101 | */ | ||
| 102 | if (!addr) | ||
| 103 | return -EINVAL; | ||
| 104 | |||
| 105 | /* | ||
| 106 | * Replace the old array with the new one, carefully: others can | ||
| 107 | * be accessing it at the same time. | ||
| 108 | */ | ||
| 109 | new = kmalloc(sizeof(*new) + sizeof(new->map[0]) * (old->num + 1), | ||
| 110 | GFP_KERNEL); | ||
| 111 | if (!new) | ||
| 112 | return -ENOMEM; | ||
| 113 | |||
| 114 | /* First make identical copy. */ | ||
| 115 | memcpy(new->map, old->map, sizeof(old->map[0]) * old->num); | ||
| 116 | new->num = old->num; | ||
| 117 | |||
| 118 | /* Now append new entry. */ | ||
| 119 | new->map[new->num].addr = addr; | ||
| 120 | new->map[new->num].event = eventfd_ctx_fdget(fd); | ||
| 121 | if (IS_ERR(new->map[new->num].event)) { | ||
| 122 | int err = PTR_ERR(new->map[new->num].event); | ||
| 123 | kfree(new); | ||
| 124 | return err; | ||
| 125 | } | ||
| 126 | new->num++; | ||
| 127 | |||
| 128 | /* | ||
| 129 | * Now put new one in place: rcu_assign_pointer() is a fancy way of | ||
| 130 | * doing "lg->eventfds = new", but it uses memory barriers to make | ||
| 131 | * absolutely sure that the contents of "new" written above is nailed | ||
| 132 | * down before we actually do the assignment. | ||
| 133 | * | ||
| 134 | * We have to think about these kinds of things when we're operating on | ||
| 135 | * live data without locks. | ||
| 136 | */ | ||
| 137 | rcu_assign_pointer(lg->eventfds, new); | ||
| 138 | |||
| 139 | /* | ||
| 140 | * We're not in a big hurry. Wait until no one's looking at old | ||
| 141 | * version, then free it. | ||
| 142 | */ | ||
| 143 | synchronize_rcu(); | ||
| 144 | kfree(old); | ||
| 145 | |||
| 146 | return 0; | ||
| 147 | } | ||
| 148 | |||
| 149 | /*L:052 | 16 | /*L:052 |
| 150 | * Receiving notifications from the Guest is usually done by attaching a | 17 | The Launcher can get the registers, and also set some of them. |
| 151 | * particular LHCALL_NOTIFY value to an event filedescriptor. The eventfd will | 18 | */ |
| 152 | * become readable when the Guest does an LHCALL_NOTIFY with that value. | ||
| 153 | * | ||
| 154 | * This is really convenient for processing each virtqueue in a separate | ||
| 155 | * thread. | ||
| 156 | */ | ||
| 157 | static int attach_eventfd(struct lguest *lg, const unsigned long __user *input) | ||
| 158 | { | ||
| 159 | unsigned long addr, fd; | ||
| 160 | int err; | ||
| 161 | |||
| 162 | if (get_user(addr, input) != 0) | ||
| 163 | return -EFAULT; | ||
| 164 | input++; | ||
| 165 | if (get_user(fd, input) != 0) | ||
| 166 | return -EFAULT; | ||
| 167 | |||
| 168 | /* | ||
| 169 | * Just make sure two callers don't add eventfds at once. We really | ||
| 170 | * only need to lock against callers adding to the same Guest, so using | ||
| 171 | * the Big Lguest Lock is overkill. But this is setup, not a fast path. | ||
| 172 | */ | ||
| 173 | mutex_lock(&lguest_lock); | ||
| 174 | err = add_eventfd(lg, addr, fd); | ||
| 175 | mutex_unlock(&lguest_lock); | ||
| 176 | |||
| 177 | return err; | ||
| 178 | } | ||
| 179 | |||
| 180 | /* The Launcher can get the registers, and also set some of them. */ | ||
| 181 | static int getreg_setup(struct lg_cpu *cpu, const unsigned long __user *input) | 19 | static int getreg_setup(struct lg_cpu *cpu, const unsigned long __user *input) |
| 182 | { | 20 | { |
| 183 | unsigned long which; | 21 | unsigned long which; |
| @@ -409,13 +247,6 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
| 409 | goto unlock; | 247 | goto unlock; |
| 410 | } | 248 | } |
| 411 | 249 | ||
| 412 | lg->eventfds = kmalloc(sizeof(*lg->eventfds), GFP_KERNEL); | ||
| 413 | if (!lg->eventfds) { | ||
| 414 | err = -ENOMEM; | ||
| 415 | goto free_lg; | ||
| 416 | } | ||
| 417 | lg->eventfds->num = 0; | ||
| 418 | |||
| 419 | /* Populate the easy fields of our "struct lguest" */ | 250 | /* Populate the easy fields of our "struct lguest" */ |
| 420 | lg->mem_base = (void __user *)args[0]; | 251 | lg->mem_base = (void __user *)args[0]; |
| 421 | lg->pfn_limit = args[1]; | 252 | lg->pfn_limit = args[1]; |
| @@ -424,7 +255,7 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
| 424 | /* This is the first cpu (cpu 0) and it will start booting at args[2] */ | 255 | /* This is the first cpu (cpu 0) and it will start booting at args[2] */ |
| 425 | err = lg_cpu_start(&lg->cpus[0], 0, args[2]); | 256 | err = lg_cpu_start(&lg->cpus[0], 0, args[2]); |
| 426 | if (err) | 257 | if (err) |
| 427 | goto free_eventfds; | 258 | goto free_lg; |
| 428 | 259 | ||
| 429 | /* | 260 | /* |
| 430 | * Initialize the Guest's shadow page tables. This allocates | 261 | * Initialize the Guest's shadow page tables. This allocates |
| @@ -445,8 +276,6 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
| 445 | free_regs: | 276 | free_regs: |
| 446 | /* FIXME: This should be in free_vcpu */ | 277 | /* FIXME: This should be in free_vcpu */ |
| 447 | free_page(lg->cpus[0].regs_page); | 278 | free_page(lg->cpus[0].regs_page); |
| 448 | free_eventfds: | ||
| 449 | kfree(lg->eventfds); | ||
| 450 | free_lg: | 279 | free_lg: |
| 451 | kfree(lg); | 280 | kfree(lg); |
| 452 | unlock: | 281 | unlock: |
| @@ -499,8 +328,6 @@ static ssize_t write(struct file *file, const char __user *in, | |||
| 499 | return initialize(file, input); | 328 | return initialize(file, input); |
| 500 | case LHREQ_IRQ: | 329 | case LHREQ_IRQ: |
| 501 | return user_send_irq(cpu, input); | 330 | return user_send_irq(cpu, input); |
| 502 | case LHREQ_EVENTFD: | ||
| 503 | return attach_eventfd(lg, input); | ||
| 504 | case LHREQ_GETREG: | 331 | case LHREQ_GETREG: |
| 505 | return getreg_setup(cpu, input); | 332 | return getreg_setup(cpu, input); |
| 506 | case LHREQ_SETREG: | 333 | case LHREQ_SETREG: |
| @@ -551,11 +378,6 @@ static int close(struct inode *inode, struct file *file) | |||
| 551 | mmput(lg->cpus[i].mm); | 378 | mmput(lg->cpus[i].mm); |
| 552 | } | 379 | } |
| 553 | 380 | ||
| 554 | /* Release any eventfds they registered. */ | ||
| 555 | for (i = 0; i < lg->eventfds->num; i++) | ||
| 556 | eventfd_ctx_put(lg->eventfds->map[i].event); | ||
| 557 | kfree(lg->eventfds); | ||
| 558 | |||
| 559 | /* | 381 | /* |
| 560 | * If lg->dead doesn't contain an error code it will be NULL or a | 382 | * If lg->dead doesn't contain an error code it will be NULL or a |
| 561 | * kmalloc()ed string, either of which is ok to hand to kfree(). | 383 | * kmalloc()ed string, either of which is ok to hand to kfree(). |
diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h index 677cde735d4b..acd5b12565cc 100644 --- a/include/linux/lguest_launcher.h +++ b/include/linux/lguest_launcher.h | |||
| @@ -23,7 +23,7 @@ enum lguest_req | |||
| 23 | LHREQ_GETDMA, /* No longer used */ | 23 | LHREQ_GETDMA, /* No longer used */ |
| 24 | LHREQ_IRQ, /* + irq */ | 24 | LHREQ_IRQ, /* + irq */ |
| 25 | LHREQ_BREAK, /* No longer used */ | 25 | LHREQ_BREAK, /* No longer used */ |
| 26 | LHREQ_EVENTFD, /* + address, fd. */ | 26 | LHREQ_EVENTFD, /* No longer used. */ |
| 27 | LHREQ_GETREG, /* + offset within struct pt_regs (then read value). */ | 27 | LHREQ_GETREG, /* + offset within struct pt_regs (then read value). */ |
| 28 | LHREQ_SETREG, /* + offset within struct pt_regs, value. */ | 28 | LHREQ_SETREG, /* + offset within struct pt_regs, value. */ |
| 29 | LHREQ_TRAP, /* + trap number to deliver to guest. */ | 29 | LHREQ_TRAP, /* + trap number to deliver to guest. */ |
