diff options
-rw-r--r-- | arch/x86/include/asm/lguest_hcall.h | 1 | ||||
-rw-r--r-- | drivers/lguest/core.c | 20 | ||||
-rw-r--r-- | drivers/lguest/hypercalls.c | 4 | ||||
-rw-r--r-- | drivers/lguest/lg.h | 12 | ||||
-rw-r--r-- | drivers/lguest/lguest_user.c | 186 | ||||
-rw-r--r-- | include/linux/lguest_launcher.h | 2 |
6 files changed, 10 insertions, 215 deletions
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h index 879fd7d33877..ef01fef3eebc 100644 --- a/arch/x86/include/asm/lguest_hcall.h +++ b/arch/x86/include/asm/lguest_hcall.h | |||
@@ -16,7 +16,6 @@ | |||
16 | #define LHCALL_SET_PTE 14 | 16 | #define LHCALL_SET_PTE 14 |
17 | #define LHCALL_SET_PGD 15 | 17 | #define LHCALL_SET_PGD 15 |
18 | #define LHCALL_LOAD_TLS 16 | 18 | #define LHCALL_LOAD_TLS 16 |
19 | #define LHCALL_NOTIFY 17 | ||
20 | #define LHCALL_LOAD_GDT_ENTRY 18 | 19 | #define LHCALL_LOAD_GDT_ENTRY 18 |
21 | #define LHCALL_SEND_INTERRUPTS 19 | 20 | #define LHCALL_SEND_INTERRUPTS 19 |
22 | 21 | ||
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 9159dbc583f6..7dc93aa004c8 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c | |||
@@ -225,22 +225,12 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) | |||
225 | if (cpu->hcall) | 225 | if (cpu->hcall) |
226 | do_hypercalls(cpu); | 226 | do_hypercalls(cpu); |
227 | 227 | ||
228 | /* | 228 | /* Do we have to tell the Launcher about a trap? */ |
229 | * It's possible the Guest did a NOTIFY hypercall to the | ||
230 | * Launcher. | ||
231 | */ | ||
232 | if (cpu->pending.trap) { | 229 | if (cpu->pending.trap) { |
233 | /* | 230 | if (copy_to_user(user, &cpu->pending, |
234 | * Does it just needs to write to a registered | 231 | sizeof(cpu->pending))) |
235 | * eventfd (ie. the appropriate virtqueue thread)? | 232 | return -EFAULT; |
236 | */ | 233 | return sizeof(cpu->pending); |
237 | if (!send_notify_to_eventfd(cpu)) { | ||
238 | /* OK, we tell the main Launcher. */ | ||
239 | if (copy_to_user(user, &cpu->pending, | ||
240 | sizeof(cpu->pending))) | ||
241 | return -EFAULT; | ||
242 | return sizeof(cpu->pending); | ||
243 | } | ||
244 | } | 234 | } |
245 | 235 | ||
246 | /* | 236 | /* |
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index 5dd1fb8a6610..1219af493c0f 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c | |||
@@ -117,10 +117,6 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) | |||
117 | /* Similarly, this sets the halted flag for run_guest(). */ | 117 | /* Similarly, this sets the halted flag for run_guest(). */ |
118 | cpu->halted = 1; | 118 | cpu->halted = 1; |
119 | break; | 119 | break; |
120 | case LHCALL_NOTIFY: | ||
121 | cpu->pending.trap = LGUEST_TRAP_ENTRY; | ||
122 | cpu->pending.addr = args->arg1; | ||
123 | break; | ||
124 | default: | 120 | default: |
125 | /* It should be an architecture-specific hypercall. */ | 121 | /* It should be an architecture-specific hypercall. */ |
126 | if (lguest_arch_do_hcall(cpu, args)) | 122 | if (lguest_arch_do_hcall(cpu, args)) |
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index eb81abc05995..307e8b39e7d1 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h | |||
@@ -81,16 +81,6 @@ struct lg_cpu { | |||
81 | struct lg_cpu_arch arch; | 81 | struct lg_cpu_arch arch; |
82 | }; | 82 | }; |
83 | 83 | ||
84 | struct lg_eventfd { | ||
85 | unsigned long addr; | ||
86 | struct eventfd_ctx *event; | ||
87 | }; | ||
88 | |||
89 | struct lg_eventfd_map { | ||
90 | unsigned int num; | ||
91 | struct lg_eventfd map[]; | ||
92 | }; | ||
93 | |||
94 | /* The private info the thread maintains about the guest. */ | 84 | /* The private info the thread maintains about the guest. */ |
95 | struct lguest { | 85 | struct lguest { |
96 | struct lguest_data __user *lguest_data; | 86 | struct lguest_data __user *lguest_data; |
@@ -117,8 +107,6 @@ struct lguest { | |||
117 | unsigned int stack_pages; | 107 | unsigned int stack_pages; |
118 | u32 tsc_khz; | 108 | u32 tsc_khz; |
119 | 109 | ||
120 | struct lg_eventfd_map *eventfds; | ||
121 | |||
122 | /* Dead? */ | 110 | /* Dead? */ |
123 | const char *dead; | 111 | const char *dead; |
124 | }; | 112 | }; |
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index c8b0e8575b44..c4c6113eb9a6 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c | |||
@@ -2,182 +2,20 @@ | |||
2 | * launcher controls and communicates with the Guest. For example, | 2 | * launcher controls and communicates with the Guest. For example, |
3 | * the first write will tell us the Guest's memory layout and entry | 3 | * the first write will tell us the Guest's memory layout and entry |
4 | * point. A read will run the Guest until something happens, such as | 4 | * point. A read will run the Guest until something happens, such as |
5 | * a signal or the Guest doing a NOTIFY out to the Launcher. There is | 5 | * a signal or the Guest accessing a device. |
6 | * also a way for the Launcher to attach eventfds to particular NOTIFY | ||
7 | * values instead of returning from the read() call. | ||
8 | :*/ | 6 | :*/ |
9 | #include <linux/uaccess.h> | 7 | #include <linux/uaccess.h> |
10 | #include <linux/miscdevice.h> | 8 | #include <linux/miscdevice.h> |
11 | #include <linux/fs.h> | 9 | #include <linux/fs.h> |
12 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
13 | #include <linux/eventfd.h> | ||
14 | #include <linux/file.h> | 11 | #include <linux/file.h> |
15 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
16 | #include <linux/export.h> | 13 | #include <linux/export.h> |
17 | #include "lg.h" | 14 | #include "lg.h" |
18 | 15 | ||
19 | /*L:056 | ||
20 | * Before we move on, let's jump ahead and look at what the kernel does when | ||
21 | * it needs to look up the eventfds. That will complete our picture of how we | ||
22 | * use RCU. | ||
23 | * | ||
24 | * The notification value is in cpu->pending_notify: we return true if it went | ||
25 | * to an eventfd. | ||
26 | */ | ||
27 | bool send_notify_to_eventfd(struct lg_cpu *cpu) | ||
28 | { | ||
29 | unsigned int i; | ||
30 | struct lg_eventfd_map *map; | ||
31 | |||
32 | /* We only connect LHCALL_NOTIFY to event fds, not other traps. */ | ||
33 | if (cpu->pending.trap != LGUEST_TRAP_ENTRY) | ||
34 | return false; | ||
35 | |||
36 | /* | ||
37 | * This "rcu_read_lock()" helps track when someone is still looking at | ||
38 | * the (RCU-using) eventfds array. It's not actually a lock at all; | ||
39 | * indeed it's a noop in many configurations. (You didn't expect me to | ||
40 | * explain all the RCU secrets here, did you?) | ||
41 | */ | ||
42 | rcu_read_lock(); | ||
43 | /* | ||
44 | * rcu_dereference is the counter-side of rcu_assign_pointer(); it | ||
45 | * makes sure we don't access the memory pointed to by | ||
46 | * cpu->lg->eventfds before cpu->lg->eventfds is set. Sounds crazy, | ||
47 | * but Alpha allows this! Paul McKenney points out that a really | ||
48 | * aggressive compiler could have the same effect: | ||
49 | * http://lists.ozlabs.org/pipermail/lguest/2009-July/001560.html | ||
50 | * | ||
51 | * So play safe, use rcu_dereference to get the rcu-protected pointer: | ||
52 | */ | ||
53 | map = rcu_dereference(cpu->lg->eventfds); | ||
54 | /* | ||
55 | * Simple array search: even if they add an eventfd while we do this, | ||
56 | * we'll continue to use the old array and just won't see the new one. | ||
57 | */ | ||
58 | for (i = 0; i < map->num; i++) { | ||
59 | if (map->map[i].addr == cpu->pending.addr) { | ||
60 | eventfd_signal(map->map[i].event, 1); | ||
61 | cpu->pending.trap = 0; | ||
62 | break; | ||
63 | } | ||
64 | } | ||
65 | /* We're done with the rcu-protected variable cpu->lg->eventfds. */ | ||
66 | rcu_read_unlock(); | ||
67 | |||
68 | /* If we cleared the notification, it's because we found a match. */ | ||
69 | return cpu->pending.trap == 0; | ||
70 | } | ||
71 | |||
72 | /*L:055 | ||
73 | * One of the more tricksy tricks in the Linux Kernel is a technique called | ||
74 | * Read Copy Update. Since one point of lguest is to teach lguest journeyers | ||
75 | * about kernel coding, I use it here. (In case you're curious, other purposes | ||
76 | * include learning about virtualization and instilling a deep appreciation for | ||
77 | * simplicity and puppies). | ||
78 | * | ||
79 | * We keep a simple array which maps LHCALL_NOTIFY values to eventfds, but we | ||
80 | * add new eventfds without ever blocking readers from accessing the array. | ||
81 | * The current Launcher only does this during boot, so that never happens. But | ||
82 | * Read Copy Update is cool, and adding a lock risks damaging even more puppies | ||
83 | * than this code does. | ||
84 | * | ||
85 | * We allocate a brand new one-larger array, copy the old one and add our new | ||
86 | * element. Then we make the lg eventfd pointer point to the new array. | ||
87 | * That's the easy part: now we need to free the old one, but we need to make | ||
88 | * sure no slow CPU somewhere is still looking at it. That's what | ||
89 | * synchronize_rcu does for us: waits until every CPU has indicated that it has | ||
90 | * moved on to know it's no longer using the old one. | ||
91 | * | ||
92 | * If that's unclear, see http://en.wikipedia.org/wiki/Read-copy-update. | ||
93 | */ | ||
94 | static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) | ||
95 | { | ||
96 | struct lg_eventfd_map *new, *old = lg->eventfds; | ||
97 | |||
98 | /* | ||
99 | * We don't allow notifications on value 0 anyway (pending_notify of | ||
100 | * 0 means "nothing pending"). | ||
101 | */ | ||
102 | if (!addr) | ||
103 | return -EINVAL; | ||
104 | |||
105 | /* | ||
106 | * Replace the old array with the new one, carefully: others can | ||
107 | * be accessing it at the same time. | ||
108 | */ | ||
109 | new = kmalloc(sizeof(*new) + sizeof(new->map[0]) * (old->num + 1), | ||
110 | GFP_KERNEL); | ||
111 | if (!new) | ||
112 | return -ENOMEM; | ||
113 | |||
114 | /* First make identical copy. */ | ||
115 | memcpy(new->map, old->map, sizeof(old->map[0]) * old->num); | ||
116 | new->num = old->num; | ||
117 | |||
118 | /* Now append new entry. */ | ||
119 | new->map[new->num].addr = addr; | ||
120 | new->map[new->num].event = eventfd_ctx_fdget(fd); | ||
121 | if (IS_ERR(new->map[new->num].event)) { | ||
122 | int err = PTR_ERR(new->map[new->num].event); | ||
123 | kfree(new); | ||
124 | return err; | ||
125 | } | ||
126 | new->num++; | ||
127 | |||
128 | /* | ||
129 | * Now put new one in place: rcu_assign_pointer() is a fancy way of | ||
130 | * doing "lg->eventfds = new", but it uses memory barriers to make | ||
131 | * absolutely sure that the contents of "new" written above is nailed | ||
132 | * down before we actually do the assignment. | ||
133 | * | ||
134 | * We have to think about these kinds of things when we're operating on | ||
135 | * live data without locks. | ||
136 | */ | ||
137 | rcu_assign_pointer(lg->eventfds, new); | ||
138 | |||
139 | /* | ||
140 | * We're not in a big hurry. Wait until no one's looking at old | ||
141 | * version, then free it. | ||
142 | */ | ||
143 | synchronize_rcu(); | ||
144 | kfree(old); | ||
145 | |||
146 | return 0; | ||
147 | } | ||
148 | |||
149 | /*L:052 | 16 | /*L:052 |
150 | * Receiving notifications from the Guest is usually done by attaching a | 17 | The Launcher can get the registers, and also set some of them. |
151 | * particular LHCALL_NOTIFY value to an event filedescriptor. The eventfd will | 18 | */ |
152 | * become readable when the Guest does an LHCALL_NOTIFY with that value. | ||
153 | * | ||
154 | * This is really convenient for processing each virtqueue in a separate | ||
155 | * thread. | ||
156 | */ | ||
157 | static int attach_eventfd(struct lguest *lg, const unsigned long __user *input) | ||
158 | { | ||
159 | unsigned long addr, fd; | ||
160 | int err; | ||
161 | |||
162 | if (get_user(addr, input) != 0) | ||
163 | return -EFAULT; | ||
164 | input++; | ||
165 | if (get_user(fd, input) != 0) | ||
166 | return -EFAULT; | ||
167 | |||
168 | /* | ||
169 | * Just make sure two callers don't add eventfds at once. We really | ||
170 | * only need to lock against callers adding to the same Guest, so using | ||
171 | * the Big Lguest Lock is overkill. But this is setup, not a fast path. | ||
172 | */ | ||
173 | mutex_lock(&lguest_lock); | ||
174 | err = add_eventfd(lg, addr, fd); | ||
175 | mutex_unlock(&lguest_lock); | ||
176 | |||
177 | return err; | ||
178 | } | ||
179 | |||
180 | /* The Launcher can get the registers, and also set some of them. */ | ||
181 | static int getreg_setup(struct lg_cpu *cpu, const unsigned long __user *input) | 19 | static int getreg_setup(struct lg_cpu *cpu, const unsigned long __user *input) |
182 | { | 20 | { |
183 | unsigned long which; | 21 | unsigned long which; |
@@ -409,13 +247,6 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
409 | goto unlock; | 247 | goto unlock; |
410 | } | 248 | } |
411 | 249 | ||
412 | lg->eventfds = kmalloc(sizeof(*lg->eventfds), GFP_KERNEL); | ||
413 | if (!lg->eventfds) { | ||
414 | err = -ENOMEM; | ||
415 | goto free_lg; | ||
416 | } | ||
417 | lg->eventfds->num = 0; | ||
418 | |||
419 | /* Populate the easy fields of our "struct lguest" */ | 250 | /* Populate the easy fields of our "struct lguest" */ |
420 | lg->mem_base = (void __user *)args[0]; | 251 | lg->mem_base = (void __user *)args[0]; |
421 | lg->pfn_limit = args[1]; | 252 | lg->pfn_limit = args[1]; |
@@ -424,7 +255,7 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
424 | /* This is the first cpu (cpu 0) and it will start booting at args[2] */ | 255 | /* This is the first cpu (cpu 0) and it will start booting at args[2] */ |
425 | err = lg_cpu_start(&lg->cpus[0], 0, args[2]); | 256 | err = lg_cpu_start(&lg->cpus[0], 0, args[2]); |
426 | if (err) | 257 | if (err) |
427 | goto free_eventfds; | 258 | goto free_lg; |
428 | 259 | ||
429 | /* | 260 | /* |
430 | * Initialize the Guest's shadow page tables. This allocates | 261 | * Initialize the Guest's shadow page tables. This allocates |
@@ -445,8 +276,6 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
445 | free_regs: | 276 | free_regs: |
446 | /* FIXME: This should be in free_vcpu */ | 277 | /* FIXME: This should be in free_vcpu */ |
447 | free_page(lg->cpus[0].regs_page); | 278 | free_page(lg->cpus[0].regs_page); |
448 | free_eventfds: | ||
449 | kfree(lg->eventfds); | ||
450 | free_lg: | 279 | free_lg: |
451 | kfree(lg); | 280 | kfree(lg); |
452 | unlock: | 281 | unlock: |
@@ -499,8 +328,6 @@ static ssize_t write(struct file *file, const char __user *in, | |||
499 | return initialize(file, input); | 328 | return initialize(file, input); |
500 | case LHREQ_IRQ: | 329 | case LHREQ_IRQ: |
501 | return user_send_irq(cpu, input); | 330 | return user_send_irq(cpu, input); |
502 | case LHREQ_EVENTFD: | ||
503 | return attach_eventfd(lg, input); | ||
504 | case LHREQ_GETREG: | 331 | case LHREQ_GETREG: |
505 | return getreg_setup(cpu, input); | 332 | return getreg_setup(cpu, input); |
506 | case LHREQ_SETREG: | 333 | case LHREQ_SETREG: |
@@ -551,11 +378,6 @@ static int close(struct inode *inode, struct file *file) | |||
551 | mmput(lg->cpus[i].mm); | 378 | mmput(lg->cpus[i].mm); |
552 | } | 379 | } |
553 | 380 | ||
554 | /* Release any eventfds they registered. */ | ||
555 | for (i = 0; i < lg->eventfds->num; i++) | ||
556 | eventfd_ctx_put(lg->eventfds->map[i].event); | ||
557 | kfree(lg->eventfds); | ||
558 | |||
559 | /* | 381 | /* |
560 | * If lg->dead doesn't contain an error code it will be NULL or a | 382 | * If lg->dead doesn't contain an error code it will be NULL or a |
561 | * kmalloc()ed string, either of which is ok to hand to kfree(). | 383 | * kmalloc()ed string, either of which is ok to hand to kfree(). |
diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h index 677cde735d4b..acd5b12565cc 100644 --- a/include/linux/lguest_launcher.h +++ b/include/linux/lguest_launcher.h | |||
@@ -23,7 +23,7 @@ enum lguest_req | |||
23 | LHREQ_GETDMA, /* No longer used */ | 23 | LHREQ_GETDMA, /* No longer used */ |
24 | LHREQ_IRQ, /* + irq */ | 24 | LHREQ_IRQ, /* + irq */ |
25 | LHREQ_BREAK, /* No longer used */ | 25 | LHREQ_BREAK, /* No longer used */ |
26 | LHREQ_EVENTFD, /* + address, fd. */ | 26 | LHREQ_EVENTFD, /* No longer used. */ |
27 | LHREQ_GETREG, /* + offset within struct pt_regs (then read value). */ | 27 | LHREQ_GETREG, /* + offset within struct pt_regs (then read value). */ |
28 | LHREQ_SETREG, /* + offset within struct pt_regs, value. */ | 28 | LHREQ_SETREG, /* + offset within struct pt_regs, value. */ |
29 | LHREQ_TRAP, /* + trap number to deliver to guest. */ | 29 | LHREQ_TRAP, /* + trap number to deliver to guest. */ |