diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2015-02-10 23:58:01 -0500 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2015-02-11 01:17:46 -0500 |
commit | d9bab50aa46ce46dd4537d455eb13b200cdac516 (patch) | |
tree | efa139a078f1842b1388e54daa67896734e64a0b /drivers/lguest/lguest_user.c | |
parent | 00f8d546512a7661d43600625f87a42a98cae26a (diff) |
lguest: remove NOTIFY call and eventfd facility.
Disappointing, as this was kind of neat (especially getting to use RCU
to manage the address -> eventfd mapping). But now the devices are PCI
handled in userspace, we get rid of both the NOTIFY hypercall and
the interface to connect an eventfd.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'drivers/lguest/lguest_user.c')
-rw-r--r-- | drivers/lguest/lguest_user.c | 186 |
1 files changed, 4 insertions, 182 deletions
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index c8b0e8575b44..c4c6113eb9a6 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c | |||
@@ -2,182 +2,20 @@ | |||
2 | * launcher controls and communicates with the Guest. For example, | 2 | * launcher controls and communicates with the Guest. For example, |
3 | * the first write will tell us the Guest's memory layout and entry | 3 | * the first write will tell us the Guest's memory layout and entry |
4 | * point. A read will run the Guest until something happens, such as | 4 | * point. A read will run the Guest until something happens, such as |
5 | * a signal or the Guest doing a NOTIFY out to the Launcher. There is | 5 | * a signal or the Guest accessing a device. |
6 | * also a way for the Launcher to attach eventfds to particular NOTIFY | ||
7 | * values instead of returning from the read() call. | ||
8 | :*/ | 6 | :*/ |
9 | #include <linux/uaccess.h> | 7 | #include <linux/uaccess.h> |
10 | #include <linux/miscdevice.h> | 8 | #include <linux/miscdevice.h> |
11 | #include <linux/fs.h> | 9 | #include <linux/fs.h> |
12 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
13 | #include <linux/eventfd.h> | ||
14 | #include <linux/file.h> | 11 | #include <linux/file.h> |
15 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
16 | #include <linux/export.h> | 13 | #include <linux/export.h> |
17 | #include "lg.h" | 14 | #include "lg.h" |
18 | 15 | ||
19 | /*L:056 | ||
20 | * Before we move on, let's jump ahead and look at what the kernel does when | ||
21 | * it needs to look up the eventfds. That will complete our picture of how we | ||
22 | * use RCU. | ||
23 | * | ||
24 | * The notification value is in cpu->pending_notify: we return true if it went | ||
25 | * to an eventfd. | ||
26 | */ | ||
27 | bool send_notify_to_eventfd(struct lg_cpu *cpu) | ||
28 | { | ||
29 | unsigned int i; | ||
30 | struct lg_eventfd_map *map; | ||
31 | |||
32 | /* We only connect LHCALL_NOTIFY to event fds, not other traps. */ | ||
33 | if (cpu->pending.trap != LGUEST_TRAP_ENTRY) | ||
34 | return false; | ||
35 | |||
36 | /* | ||
37 | * This "rcu_read_lock()" helps track when someone is still looking at | ||
38 | * the (RCU-using) eventfds array. It's not actually a lock at all; | ||
39 | * indeed it's a noop in many configurations. (You didn't expect me to | ||
40 | * explain all the RCU secrets here, did you?) | ||
41 | */ | ||
42 | rcu_read_lock(); | ||
43 | /* | ||
44 | * rcu_dereference is the counter-side of rcu_assign_pointer(); it | ||
45 | * makes sure we don't access the memory pointed to by | ||
46 | * cpu->lg->eventfds before cpu->lg->eventfds is set. Sounds crazy, | ||
47 | * but Alpha allows this! Paul McKenney points out that a really | ||
48 | * aggressive compiler could have the same effect: | ||
49 | * http://lists.ozlabs.org/pipermail/lguest/2009-July/001560.html | ||
50 | * | ||
51 | * So play safe, use rcu_dereference to get the rcu-protected pointer: | ||
52 | */ | ||
53 | map = rcu_dereference(cpu->lg->eventfds); | ||
54 | /* | ||
55 | * Simple array search: even if they add an eventfd while we do this, | ||
56 | * we'll continue to use the old array and just won't see the new one. | ||
57 | */ | ||
58 | for (i = 0; i < map->num; i++) { | ||
59 | if (map->map[i].addr == cpu->pending.addr) { | ||
60 | eventfd_signal(map->map[i].event, 1); | ||
61 | cpu->pending.trap = 0; | ||
62 | break; | ||
63 | } | ||
64 | } | ||
65 | /* We're done with the rcu-protected variable cpu->lg->eventfds. */ | ||
66 | rcu_read_unlock(); | ||
67 | |||
68 | /* If we cleared the notification, it's because we found a match. */ | ||
69 | return cpu->pending.trap == 0; | ||
70 | } | ||
71 | |||
72 | /*L:055 | ||
73 | * One of the more tricksy tricks in the Linux Kernel is a technique called | ||
74 | * Read Copy Update. Since one point of lguest is to teach lguest journeyers | ||
75 | * about kernel coding, I use it here. (In case you're curious, other purposes | ||
76 | * include learning about virtualization and instilling a deep appreciation for | ||
77 | * simplicity and puppies). | ||
78 | * | ||
79 | * We keep a simple array which maps LHCALL_NOTIFY values to eventfds, but we | ||
80 | * add new eventfds without ever blocking readers from accessing the array. | ||
81 | * The current Launcher only does this during boot, so that never happens. But | ||
82 | * Read Copy Update is cool, and adding a lock risks damaging even more puppies | ||
83 | * than this code does. | ||
84 | * | ||
85 | * We allocate a brand new one-larger array, copy the old one and add our new | ||
86 | * element. Then we make the lg eventfd pointer point to the new array. | ||
87 | * That's the easy part: now we need to free the old one, but we need to make | ||
88 | * sure no slow CPU somewhere is still looking at it. That's what | ||
89 | * synchronize_rcu does for us: waits until every CPU has indicated that it has | ||
90 | * moved on to know it's no longer using the old one. | ||
91 | * | ||
92 | * If that's unclear, see http://en.wikipedia.org/wiki/Read-copy-update. | ||
93 | */ | ||
94 | static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) | ||
95 | { | ||
96 | struct lg_eventfd_map *new, *old = lg->eventfds; | ||
97 | |||
98 | /* | ||
99 | * We don't allow notifications on value 0 anyway (pending_notify of | ||
100 | * 0 means "nothing pending"). | ||
101 | */ | ||
102 | if (!addr) | ||
103 | return -EINVAL; | ||
104 | |||
105 | /* | ||
106 | * Replace the old array with the new one, carefully: others can | ||
107 | * be accessing it at the same time. | ||
108 | */ | ||
109 | new = kmalloc(sizeof(*new) + sizeof(new->map[0]) * (old->num + 1), | ||
110 | GFP_KERNEL); | ||
111 | if (!new) | ||
112 | return -ENOMEM; | ||
113 | |||
114 | /* First make identical copy. */ | ||
115 | memcpy(new->map, old->map, sizeof(old->map[0]) * old->num); | ||
116 | new->num = old->num; | ||
117 | |||
118 | /* Now append new entry. */ | ||
119 | new->map[new->num].addr = addr; | ||
120 | new->map[new->num].event = eventfd_ctx_fdget(fd); | ||
121 | if (IS_ERR(new->map[new->num].event)) { | ||
122 | int err = PTR_ERR(new->map[new->num].event); | ||
123 | kfree(new); | ||
124 | return err; | ||
125 | } | ||
126 | new->num++; | ||
127 | |||
128 | /* | ||
129 | * Now put new one in place: rcu_assign_pointer() is a fancy way of | ||
130 | * doing "lg->eventfds = new", but it uses memory barriers to make | ||
131 | * absolutely sure that the contents of "new" written above is nailed | ||
132 | * down before we actually do the assignment. | ||
133 | * | ||
134 | * We have to think about these kinds of things when we're operating on | ||
135 | * live data without locks. | ||
136 | */ | ||
137 | rcu_assign_pointer(lg->eventfds, new); | ||
138 | |||
139 | /* | ||
140 | * We're not in a big hurry. Wait until no one's looking at old | ||
141 | * version, then free it. | ||
142 | */ | ||
143 | synchronize_rcu(); | ||
144 | kfree(old); | ||
145 | |||
146 | return 0; | ||
147 | } | ||
148 | |||
149 | /*L:052 | 16 | /*L:052 |
150 | * Receiving notifications from the Guest is usually done by attaching a | 17 | The Launcher can get the registers, and also set some of them. |
151 | * particular LHCALL_NOTIFY value to an event filedescriptor. The eventfd will | 18 | */ |
152 | * become readable when the Guest does an LHCALL_NOTIFY with that value. | ||
153 | * | ||
154 | * This is really convenient for processing each virtqueue in a separate | ||
155 | * thread. | ||
156 | */ | ||
157 | static int attach_eventfd(struct lguest *lg, const unsigned long __user *input) | ||
158 | { | ||
159 | unsigned long addr, fd; | ||
160 | int err; | ||
161 | |||
162 | if (get_user(addr, input) != 0) | ||
163 | return -EFAULT; | ||
164 | input++; | ||
165 | if (get_user(fd, input) != 0) | ||
166 | return -EFAULT; | ||
167 | |||
168 | /* | ||
169 | * Just make sure two callers don't add eventfds at once. We really | ||
170 | * only need to lock against callers adding to the same Guest, so using | ||
171 | * the Big Lguest Lock is overkill. But this is setup, not a fast path. | ||
172 | */ | ||
173 | mutex_lock(&lguest_lock); | ||
174 | err = add_eventfd(lg, addr, fd); | ||
175 | mutex_unlock(&lguest_lock); | ||
176 | |||
177 | return err; | ||
178 | } | ||
179 | |||
180 | /* The Launcher can get the registers, and also set some of them. */ | ||
181 | static int getreg_setup(struct lg_cpu *cpu, const unsigned long __user *input) | 19 | static int getreg_setup(struct lg_cpu *cpu, const unsigned long __user *input) |
182 | { | 20 | { |
183 | unsigned long which; | 21 | unsigned long which; |
@@ -409,13 +247,6 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
409 | goto unlock; | 247 | goto unlock; |
410 | } | 248 | } |
411 | 249 | ||
412 | lg->eventfds = kmalloc(sizeof(*lg->eventfds), GFP_KERNEL); | ||
413 | if (!lg->eventfds) { | ||
414 | err = -ENOMEM; | ||
415 | goto free_lg; | ||
416 | } | ||
417 | lg->eventfds->num = 0; | ||
418 | |||
419 | /* Populate the easy fields of our "struct lguest" */ | 250 | /* Populate the easy fields of our "struct lguest" */ |
420 | lg->mem_base = (void __user *)args[0]; | 251 | lg->mem_base = (void __user *)args[0]; |
421 | lg->pfn_limit = args[1]; | 252 | lg->pfn_limit = args[1]; |
@@ -424,7 +255,7 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
424 | /* This is the first cpu (cpu 0) and it will start booting at args[2] */ | 255 | /* This is the first cpu (cpu 0) and it will start booting at args[2] */ |
425 | err = lg_cpu_start(&lg->cpus[0], 0, args[2]); | 256 | err = lg_cpu_start(&lg->cpus[0], 0, args[2]); |
426 | if (err) | 257 | if (err) |
427 | goto free_eventfds; | 258 | goto free_lg; |
428 | 259 | ||
429 | /* | 260 | /* |
430 | * Initialize the Guest's shadow page tables. This allocates | 261 | * Initialize the Guest's shadow page tables. This allocates |
@@ -445,8 +276,6 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
445 | free_regs: | 276 | free_regs: |
446 | /* FIXME: This should be in free_vcpu */ | 277 | /* FIXME: This should be in free_vcpu */ |
447 | free_page(lg->cpus[0].regs_page); | 278 | free_page(lg->cpus[0].regs_page); |
448 | free_eventfds: | ||
449 | kfree(lg->eventfds); | ||
450 | free_lg: | 279 | free_lg: |
451 | kfree(lg); | 280 | kfree(lg); |
452 | unlock: | 281 | unlock: |
@@ -499,8 +328,6 @@ static ssize_t write(struct file *file, const char __user *in, | |||
499 | return initialize(file, input); | 328 | return initialize(file, input); |
500 | case LHREQ_IRQ: | 329 | case LHREQ_IRQ: |
501 | return user_send_irq(cpu, input); | 330 | return user_send_irq(cpu, input); |
502 | case LHREQ_EVENTFD: | ||
503 | return attach_eventfd(lg, input); | ||
504 | case LHREQ_GETREG: | 331 | case LHREQ_GETREG: |
505 | return getreg_setup(cpu, input); | 332 | return getreg_setup(cpu, input); |
506 | case LHREQ_SETREG: | 333 | case LHREQ_SETREG: |
@@ -551,11 +378,6 @@ static int close(struct inode *inode, struct file *file) | |||
551 | mmput(lg->cpus[i].mm); | 378 | mmput(lg->cpus[i].mm); |
552 | } | 379 | } |
553 | 380 | ||
554 | /* Release any eventfds they registered. */ | ||
555 | for (i = 0; i < lg->eventfds->num; i++) | ||
556 | eventfd_ctx_put(lg->eventfds->map[i].event); | ||
557 | kfree(lg->eventfds); | ||
558 | |||
559 | /* | 381 | /* |
560 | * If lg->dead doesn't contain an error code it will be NULL or a | 382 | * If lg->dead doesn't contain an error code it will be NULL or a |
561 | * kmalloc()ed string, either of which is ok to hand to kfree(). | 383 | * kmalloc()ed string, either of which is ok to hand to kfree(). |