diff options
Diffstat (limited to 'drivers/lguest/lguest_user.c')
-rw-r--r-- | drivers/lguest/lguest_user.c | 221 |
1 files changed, 58 insertions, 163 deletions
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 4263f4cc8c55..c4c6113eb9a6 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c | |||
@@ -2,175 +2,62 @@ | |||
2 | * launcher controls and communicates with the Guest. For example, | 2 | * launcher controls and communicates with the Guest. For example, |
3 | * the first write will tell us the Guest's memory layout and entry | 3 | * the first write will tell us the Guest's memory layout and entry |
4 | * point. A read will run the Guest until something happens, such as | 4 | * point. A read will run the Guest until something happens, such as |
5 | * a signal or the Guest doing a NOTIFY out to the Launcher. There is | 5 | * a signal or the Guest accessing a device. |
6 | * also a way for the Launcher to attach eventfds to particular NOTIFY | ||
7 | * values instead of returning from the read() call. | ||
8 | :*/ | 6 | :*/ |
9 | #include <linux/uaccess.h> | 7 | #include <linux/uaccess.h> |
10 | #include <linux/miscdevice.h> | 8 | #include <linux/miscdevice.h> |
11 | #include <linux/fs.h> | 9 | #include <linux/fs.h> |
12 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
13 | #include <linux/eventfd.h> | ||
14 | #include <linux/file.h> | 11 | #include <linux/file.h> |
15 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
16 | #include <linux/export.h> | 13 | #include <linux/export.h> |
17 | #include "lg.h" | 14 | #include "lg.h" |
18 | 15 | ||
19 | /*L:056 | 16 | /*L:052 |
20 | * Before we move on, let's jump ahead and look at what the kernel does when | 17 | The Launcher can get the registers, and also set some of them. |
21 | * it needs to look up the eventfds. That will complete our picture of how we | 18 | */ |
22 | * use RCU. | 19 | static int getreg_setup(struct lg_cpu *cpu, const unsigned long __user *input) |
23 | * | ||
24 | * The notification value is in cpu->pending_notify: we return true if it went | ||
25 | * to an eventfd. | ||
26 | */ | ||
27 | bool send_notify_to_eventfd(struct lg_cpu *cpu) | ||
28 | { | ||
29 | unsigned int i; | ||
30 | struct lg_eventfd_map *map; | ||
31 | |||
32 | /* | ||
33 | * This "rcu_read_lock()" helps track when someone is still looking at | ||
34 | * the (RCU-using) eventfds array. It's not actually a lock at all; | ||
35 | * indeed it's a noop in many configurations. (You didn't expect me to | ||
36 | * explain all the RCU secrets here, did you?) | ||
37 | */ | ||
38 | rcu_read_lock(); | ||
39 | /* | ||
40 | * rcu_dereference is the counter-side of rcu_assign_pointer(); it | ||
41 | * makes sure we don't access the memory pointed to by | ||
42 | * cpu->lg->eventfds before cpu->lg->eventfds is set. Sounds crazy, | ||
43 | * but Alpha allows this! Paul McKenney points out that a really | ||
44 | * aggressive compiler could have the same effect: | ||
45 | * http://lists.ozlabs.org/pipermail/lguest/2009-July/001560.html | ||
46 | * | ||
47 | * So play safe, use rcu_dereference to get the rcu-protected pointer: | ||
48 | */ | ||
49 | map = rcu_dereference(cpu->lg->eventfds); | ||
50 | /* | ||
51 | * Simple array search: even if they add an eventfd while we do this, | ||
52 | * we'll continue to use the old array and just won't see the new one. | ||
53 | */ | ||
54 | for (i = 0; i < map->num; i++) { | ||
55 | if (map->map[i].addr == cpu->pending_notify) { | ||
56 | eventfd_signal(map->map[i].event, 1); | ||
57 | cpu->pending_notify = 0; | ||
58 | break; | ||
59 | } | ||
60 | } | ||
61 | /* We're done with the rcu-protected variable cpu->lg->eventfds. */ | ||
62 | rcu_read_unlock(); | ||
63 | |||
64 | /* If we cleared the notification, it's because we found a match. */ | ||
65 | return cpu->pending_notify == 0; | ||
66 | } | ||
67 | |||
68 | /*L:055 | ||
69 | * One of the more tricksy tricks in the Linux Kernel is a technique called | ||
70 | * Read Copy Update. Since one point of lguest is to teach lguest journeyers | ||
71 | * about kernel coding, I use it here. (In case you're curious, other purposes | ||
72 | * include learning about virtualization and instilling a deep appreciation for | ||
73 | * simplicity and puppies). | ||
74 | * | ||
75 | * We keep a simple array which maps LHCALL_NOTIFY values to eventfds, but we | ||
76 | * add new eventfds without ever blocking readers from accessing the array. | ||
77 | * The current Launcher only does this during boot, so that never happens. But | ||
78 | * Read Copy Update is cool, and adding a lock risks damaging even more puppies | ||
79 | * than this code does. | ||
80 | * | ||
81 | * We allocate a brand new one-larger array, copy the old one and add our new | ||
82 | * element. Then we make the lg eventfd pointer point to the new array. | ||
83 | * That's the easy part: now we need to free the old one, but we need to make | ||
84 | * sure no slow CPU somewhere is still looking at it. That's what | ||
85 | * synchronize_rcu does for us: waits until every CPU has indicated that it has | ||
86 | * moved on to know it's no longer using the old one. | ||
87 | * | ||
88 | * If that's unclear, see http://en.wikipedia.org/wiki/Read-copy-update. | ||
89 | */ | ||
90 | static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) | ||
91 | { | 20 | { |
92 | struct lg_eventfd_map *new, *old = lg->eventfds; | 21 | unsigned long which; |
93 | |||
94 | /* | ||
95 | * We don't allow notifications on value 0 anyway (pending_notify of | ||
96 | * 0 means "nothing pending"). | ||
97 | */ | ||
98 | if (!addr) | ||
99 | return -EINVAL; | ||
100 | |||
101 | /* | ||
102 | * Replace the old array with the new one, carefully: others can | ||
103 | * be accessing it at the same time. | ||
104 | */ | ||
105 | new = kmalloc(sizeof(*new) + sizeof(new->map[0]) * (old->num + 1), | ||
106 | GFP_KERNEL); | ||
107 | if (!new) | ||
108 | return -ENOMEM; | ||
109 | 22 | ||
110 | /* First make identical copy. */ | 23 | /* We re-use the ptrace structure to specify which register to read. */ |
111 | memcpy(new->map, old->map, sizeof(old->map[0]) * old->num); | 24 | if (get_user(which, input) != 0) |
112 | new->num = old->num; | 25 | return -EFAULT; |
113 | |||
114 | /* Now append new entry. */ | ||
115 | new->map[new->num].addr = addr; | ||
116 | new->map[new->num].event = eventfd_ctx_fdget(fd); | ||
117 | if (IS_ERR(new->map[new->num].event)) { | ||
118 | int err = PTR_ERR(new->map[new->num].event); | ||
119 | kfree(new); | ||
120 | return err; | ||
121 | } | ||
122 | new->num++; | ||
123 | 26 | ||
124 | /* | 27 | /* |
125 | * Now put new one in place: rcu_assign_pointer() is a fancy way of | 28 | * We set up the cpu register pointer, and their next read will |
126 | * doing "lg->eventfds = new", but it uses memory barriers to make | 29 | * actually get the value (instead of running the guest). |
127 | * absolutely sure that the contents of "new" written above is nailed | ||
128 | * down before we actually do the assignment. | ||
129 | * | 30 | * |
130 | * We have to think about these kinds of things when we're operating on | 31 | * The last argument 'true' says we can access any register. |
131 | * live data without locks. | ||
132 | */ | 32 | */ |
133 | rcu_assign_pointer(lg->eventfds, new); | 33 | cpu->reg_read = lguest_arch_regptr(cpu, which, true); |
34 | if (!cpu->reg_read) | ||
35 | return -ENOENT; | ||
134 | 36 | ||
135 | /* | 37 | /* And because this is a write() call, we return the length used. */ |
136 | * We're not in a big hurry. Wait until no one's looking at old | 38 | return sizeof(unsigned long) * 2; |
137 | * version, then free it. | ||
138 | */ | ||
139 | synchronize_rcu(); | ||
140 | kfree(old); | ||
141 | |||
142 | return 0; | ||
143 | } | 39 | } |
144 | 40 | ||
145 | /*L:052 | 41 | static int setreg(struct lg_cpu *cpu, const unsigned long __user *input) |
146 | * Receiving notifications from the Guest is usually done by attaching a | ||
147 | * particular LHCALL_NOTIFY value to an event filedescriptor. The eventfd will | ||
148 | * become readable when the Guest does an LHCALL_NOTIFY with that value. | ||
149 | * | ||
150 | * This is really convenient for processing each virtqueue in a separate | ||
151 | * thread. | ||
152 | */ | ||
153 | static int attach_eventfd(struct lguest *lg, const unsigned long __user *input) | ||
154 | { | 42 | { |
155 | unsigned long addr, fd; | 43 | unsigned long which, value, *reg; |
156 | int err; | ||
157 | 44 | ||
158 | if (get_user(addr, input) != 0) | 45 | /* We re-use the ptrace structure to specify which register to read. */ |
46 | if (get_user(which, input) != 0) | ||
159 | return -EFAULT; | 47 | return -EFAULT; |
160 | input++; | 48 | input++; |
161 | if (get_user(fd, input) != 0) | 49 | if (get_user(value, input) != 0) |
162 | return -EFAULT; | 50 | return -EFAULT; |
163 | 51 | ||
164 | /* | 52 | /* The last argument 'false' means we can't access all registers. */ |
165 | * Just make sure two callers don't add eventfds at once. We really | 53 | reg = lguest_arch_regptr(cpu, which, false); |
166 | * only need to lock against callers adding to the same Guest, so using | 54 | if (!reg) |
167 | * the Big Lguest Lock is overkill. But this is setup, not a fast path. | 55 | return -ENOENT; |
168 | */ | ||
169 | mutex_lock(&lguest_lock); | ||
170 | err = add_eventfd(lg, addr, fd); | ||
171 | mutex_unlock(&lguest_lock); | ||
172 | 56 | ||
173 | return err; | 57 | *reg = value; |
58 | |||
59 | /* And because this is a write() call, we return the length used. */ | ||
60 | return sizeof(unsigned long) * 3; | ||
174 | } | 61 | } |
175 | 62 | ||
176 | /*L:050 | 63 | /*L:050 |
@@ -194,6 +81,23 @@ static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input) | |||
194 | return 0; | 81 | return 0; |
195 | } | 82 | } |
196 | 83 | ||
84 | /*L:053 | ||
85 | * Deliver a trap: this is used by the Launcher if it can't emulate | ||
86 | * an instruction. | ||
87 | */ | ||
88 | static int trap(struct lg_cpu *cpu, const unsigned long __user *input) | ||
89 | { | ||
90 | unsigned long trapnum; | ||
91 | |||
92 | if (get_user(trapnum, input) != 0) | ||
93 | return -EFAULT; | ||
94 | |||
95 | if (!deliver_trap(cpu, trapnum)) | ||
96 | return -EINVAL; | ||
97 | |||
98 | return 0; | ||
99 | } | ||
100 | |||
197 | /*L:040 | 101 | /*L:040 |
198 | * Once our Guest is initialized, the Launcher makes it run by reading | 102 | * Once our Guest is initialized, the Launcher makes it run by reading |
199 | * from /dev/lguest. | 103 | * from /dev/lguest. |
@@ -237,8 +141,8 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) | |||
237 | * If we returned from read() last time because the Guest sent I/O, | 141 | * If we returned from read() last time because the Guest sent I/O, |
238 | * clear the flag. | 142 | * clear the flag. |
239 | */ | 143 | */ |
240 | if (cpu->pending_notify) | 144 | if (cpu->pending.trap) |
241 | cpu->pending_notify = 0; | 145 | cpu->pending.trap = 0; |
242 | 146 | ||
243 | /* Run the Guest until something interesting happens. */ | 147 | /* Run the Guest until something interesting happens. */ |
244 | return run_guest(cpu, (unsigned long __user *)user); | 148 | return run_guest(cpu, (unsigned long __user *)user); |
@@ -319,7 +223,7 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
319 | /* "struct lguest" contains all we (the Host) know about a Guest. */ | 223 | /* "struct lguest" contains all we (the Host) know about a Guest. */ |
320 | struct lguest *lg; | 224 | struct lguest *lg; |
321 | int err; | 225 | int err; |
322 | unsigned long args[3]; | 226 | unsigned long args[4]; |
323 | 227 | ||
324 | /* | 228 | /* |
325 | * We grab the Big Lguest lock, which protects against multiple | 229 | * We grab the Big Lguest lock, which protects against multiple |
@@ -343,21 +247,15 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
343 | goto unlock; | 247 | goto unlock; |
344 | } | 248 | } |
345 | 249 | ||
346 | lg->eventfds = kmalloc(sizeof(*lg->eventfds), GFP_KERNEL); | ||
347 | if (!lg->eventfds) { | ||
348 | err = -ENOMEM; | ||
349 | goto free_lg; | ||
350 | } | ||
351 | lg->eventfds->num = 0; | ||
352 | |||
353 | /* Populate the easy fields of our "struct lguest" */ | 250 | /* Populate the easy fields of our "struct lguest" */ |
354 | lg->mem_base = (void __user *)args[0]; | 251 | lg->mem_base = (void __user *)args[0]; |
355 | lg->pfn_limit = args[1]; | 252 | lg->pfn_limit = args[1]; |
253 | lg->device_limit = args[3]; | ||
356 | 254 | ||
357 | /* This is the first cpu (cpu 0) and it will start booting at args[2] */ | 255 | /* This is the first cpu (cpu 0) and it will start booting at args[2] */ |
358 | err = lg_cpu_start(&lg->cpus[0], 0, args[2]); | 256 | err = lg_cpu_start(&lg->cpus[0], 0, args[2]); |
359 | if (err) | 257 | if (err) |
360 | goto free_eventfds; | 258 | goto free_lg; |
361 | 259 | ||
362 | /* | 260 | /* |
363 | * Initialize the Guest's shadow page tables. This allocates | 261 | * Initialize the Guest's shadow page tables. This allocates |
@@ -378,8 +276,6 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
378 | free_regs: | 276 | free_regs: |
379 | /* FIXME: This should be in free_vcpu */ | 277 | /* FIXME: This should be in free_vcpu */ |
380 | free_page(lg->cpus[0].regs_page); | 278 | free_page(lg->cpus[0].regs_page); |
381 | free_eventfds: | ||
382 | kfree(lg->eventfds); | ||
383 | free_lg: | 279 | free_lg: |
384 | kfree(lg); | 280 | kfree(lg); |
385 | unlock: | 281 | unlock: |
@@ -432,8 +328,12 @@ static ssize_t write(struct file *file, const char __user *in, | |||
432 | return initialize(file, input); | 328 | return initialize(file, input); |
433 | case LHREQ_IRQ: | 329 | case LHREQ_IRQ: |
434 | return user_send_irq(cpu, input); | 330 | return user_send_irq(cpu, input); |
435 | case LHREQ_EVENTFD: | 331 | case LHREQ_GETREG: |
436 | return attach_eventfd(lg, input); | 332 | return getreg_setup(cpu, input); |
333 | case LHREQ_SETREG: | ||
334 | return setreg(cpu, input); | ||
335 | case LHREQ_TRAP: | ||
336 | return trap(cpu, input); | ||
437 | default: | 337 | default: |
438 | return -EINVAL; | 338 | return -EINVAL; |
439 | } | 339 | } |
@@ -478,11 +378,6 @@ static int close(struct inode *inode, struct file *file) | |||
478 | mmput(lg->cpus[i].mm); | 378 | mmput(lg->cpus[i].mm); |
479 | } | 379 | } |
480 | 380 | ||
481 | /* Release any eventfds they registered. */ | ||
482 | for (i = 0; i < lg->eventfds->num; i++) | ||
483 | eventfd_ctx_put(lg->eventfds->map[i].event); | ||
484 | kfree(lg->eventfds); | ||
485 | |||
486 | /* | 381 | /* |
487 | * If lg->dead doesn't contain an error code it will be NULL or a | 382 | * If lg->dead doesn't contain an error code it will be NULL or a |
488 | * kmalloc()ed string, either of which is ok to hand to kfree(). | 383 | * kmalloc()ed string, either of which is ok to hand to kfree(). |