diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2009-07-30 18:03:45 -0400 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2009-07-30 02:33:46 -0400 |
commit | a91d74a3c4de8115295ee87350c13a329164aaaf (patch) | |
tree | 02c862fccc9abedf7fc354061e69c4b5fbcce06d /drivers/lguest/lguest_user.c | |
parent | 2e04ef76916d1e29a077ea9d0f2003c8fd86724d (diff) |
lguest: update commentry
Every so often, after code shuffles, I need to go through and unbitrot
the Lguest Journey (see drivers/lguest/README). Since we now use RCU in
a simple form in one place I took the opportunity to expand that explanation.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'drivers/lguest/lguest_user.c')
-rw-r--r-- | drivers/lguest/lguest_user.c | 100 |
1 files changed, 90 insertions, 10 deletions
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 7e92017103dc..b4d3f7ca554f 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c | |||
@@ -1,9 +1,8 @@ | |||
1 | /*P:200 | 1 | /*P:200 This contains all the /dev/lguest code, whereby the userspace launcher |
2 | * This contains all the /dev/lguest code, whereby the userspace launcher | ||
3 | * controls and communicates with the Guest. For example, the first write will | 2 | * controls and communicates with the Guest. For example, the first write will |
4 | * tell us the Guest's memory layout, pagetable, entry point and kernel address | 3 | * tell us the Guest's memory layout and entry point. A read will run the |
5 | * offset. A read will run the Guest until something happens, such as a signal | 4 | * Guest until something happens, such as a signal or the Guest doing a NOTIFY |
6 | * or the Guest doing a NOTIFY out to the Launcher. | 5 | * out to the Launcher. |
7 | :*/ | 6 | :*/ |
8 | #include <linux/uaccess.h> | 7 | #include <linux/uaccess.h> |
9 | #include <linux/miscdevice.h> | 8 | #include <linux/miscdevice.h> |
@@ -13,14 +12,41 @@ | |||
13 | #include <linux/file.h> | 12 | #include <linux/file.h> |
14 | #include "lg.h" | 13 | #include "lg.h" |
15 | 14 | ||
15 | /*L:056 | ||
16 | * Before we move on, let's jump ahead and look at what the kernel does when | ||
17 | * it needs to look up the eventfds. That will complete our picture of how we | ||
18 | * use RCU. | ||
19 | * | ||
20 | * The notification value is in cpu->pending_notify: we return true if it went | ||
21 | * to an eventfd. | ||
22 | */ | ||
16 | bool send_notify_to_eventfd(struct lg_cpu *cpu) | 23 | bool send_notify_to_eventfd(struct lg_cpu *cpu) |
17 | { | 24 | { |
18 | unsigned int i; | 25 | unsigned int i; |
19 | struct lg_eventfd_map *map; | 26 | struct lg_eventfd_map *map; |
20 | 27 | ||
21 | /* lg->eventfds is RCU-protected */ | 28 | /* |
29 | * This "rcu_read_lock()" helps track when someone is still looking at | ||
30 | * the (RCU-using) eventfds array. It's not actually a lock at all; | ||
31 | * indeed it's a noop in many configurations. (You didn't expect me to | ||
32 | * explain all the RCU secrets here, did you?) | ||
33 | */ | ||
22 | rcu_read_lock(); | 34 | rcu_read_lock(); |
35 | /* | ||
36 | * rcu_dereference is the counter-side of rcu_assign_pointer(); it | ||
37 | * makes sure we don't access the memory pointed to by | ||
38 | * cpu->lg->eventfds before cpu->lg->eventfds is set. Sounds crazy, | ||
39 | * but Alpha allows this! Paul McKenney points out that a really | ||
40 | * aggressive compiler could have the same effect: | ||
41 | * http://lists.ozlabs.org/pipermail/lguest/2009-July/001560.html | ||
42 | * | ||
43 | * So play safe, use rcu_dereference to get the rcu-protected pointer: | ||
44 | */ | ||
23 | map = rcu_dereference(cpu->lg->eventfds); | 45 | map = rcu_dereference(cpu->lg->eventfds); |
46 | /* | ||
47 | * Simple array search: even if they add an eventfd while we do this, | ||
48 | * we'll continue to use the old array and just won't see the new one. | ||
49 | */ | ||
24 | for (i = 0; i < map->num; i++) { | 50 | for (i = 0; i < map->num; i++) { |
25 | if (map->map[i].addr == cpu->pending_notify) { | 51 | if (map->map[i].addr == cpu->pending_notify) { |
26 | eventfd_signal(map->map[i].event, 1); | 52 | eventfd_signal(map->map[i].event, 1); |
@@ -28,14 +54,43 @@ bool send_notify_to_eventfd(struct lg_cpu *cpu) | |||
28 | break; | 54 | break; |
29 | } | 55 | } |
30 | } | 56 | } |
57 | /* We're done with the rcu-protected variable cpu->lg->eventfds. */ | ||
31 | rcu_read_unlock(); | 58 | rcu_read_unlock(); |
59 | |||
60 | /* If we cleared the notification, it's because we found a match. */ | ||
32 | return cpu->pending_notify == 0; | 61 | return cpu->pending_notify == 0; |
33 | } | 62 | } |
34 | 63 | ||
64 | /*L:055 | ||
65 | * One of the more tricksy tricks in the Linux Kernel is a technique called | ||
66 | * Read Copy Update. Since one point of lguest is to teach lguest journeyers | ||
67 | * about kernel coding, I use it here. (In case you're curious, other purposes | ||
68 | * include learning about virtualization and instilling a deep appreciation for | ||
69 | * simplicity and puppies). | ||
70 | * | ||
71 | * We keep a simple array which maps LHCALL_NOTIFY values to eventfds, but we | ||
72 | * add new eventfds without ever blocking readers from accessing the array. | ||
73 | * The current Launcher only does this during boot, so that never happens. But | ||
74 | * Read Copy Update is cool, and adding a lock risks damaging even more puppies | ||
75 | * than this code does. | ||
76 | * | ||
77 | * We allocate a brand new one-larger array, copy the old one and add our new | ||
78 | * element. Then we make the lg eventfd pointer point to the new array. | ||
79 | * That's the easy part: now we need to free the old one, but we need to make | ||
80 | * sure no slow CPU somewhere is still looking at it. That's what | ||
81 | * synchronize_rcu does for us: waits until every CPU has indicated that it has | ||
82 | * moved on to know it's no longer using the old one. | ||
83 | * | ||
84 | * If that's unclear, see http://en.wikipedia.org/wiki/Read-copy-update. | ||
85 | */ | ||
35 | static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) | 86 | static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) |
36 | { | 87 | { |
37 | struct lg_eventfd_map *new, *old = lg->eventfds; | 88 | struct lg_eventfd_map *new, *old = lg->eventfds; |
38 | 89 | ||
90 | /* | ||
91 | * We don't allow notifications on value 0 anyway (pending_notify of | ||
92 | * 0 means "nothing pending"). | ||
93 | */ | ||
39 | if (!addr) | 94 | if (!addr) |
40 | return -EINVAL; | 95 | return -EINVAL; |
41 | 96 | ||
@@ -62,12 +117,20 @@ static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) | |||
62 | } | 117 | } |
63 | new->num++; | 118 | new->num++; |
64 | 119 | ||
65 | /* Now put new one in place. */ | 120 | /* |
121 | * Now put new one in place: rcu_assign_pointer() is a fancy way of | ||
122 | * doing "lg->eventfds = new", but it uses memory barriers to make | ||
123 | * absolutely sure that the contents of "new" written above is nailed | ||
124 | * down before we actually do the assignment. | ||
125 | * | ||
126 | * We have to think about these kinds of things when we're operating on | ||
127 | * live data without locks. | ||
128 | */ | ||
66 | rcu_assign_pointer(lg->eventfds, new); | 129 | rcu_assign_pointer(lg->eventfds, new); |
67 | 130 | ||
68 | /* | 131 | /* |
69 | * We're not in a big hurry. Wait until noone's looking at old | 132 | * We're not in a big hurry. Wait until noone's looking at old |
70 | * version, then delete it. | 133 | * version, then free it. |
71 | */ | 134 | */ |
72 | synchronize_rcu(); | 135 | synchronize_rcu(); |
73 | kfree(old); | 136 | kfree(old); |
@@ -75,6 +138,14 @@ static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) | |||
75 | return 0; | 138 | return 0; |
76 | } | 139 | } |
77 | 140 | ||
141 | /*L:052 | ||
142 | * Receiving notifications from the Guest is usually done by attaching a | ||
143 | * particular LHCALL_NOTIFY value to an event filedescriptor. The eventfd will | ||
144 | * become readable when the Guest does an LHCALL_NOTIFY with that value. | ||
145 | * | ||
146 | * This is really convenient for processing each virtqueue in a separate | ||
147 | * thread. | ||
148 | */ | ||
78 | static int attach_eventfd(struct lguest *lg, const unsigned long __user *input) | 149 | static int attach_eventfd(struct lguest *lg, const unsigned long __user *input) |
79 | { | 150 | { |
80 | unsigned long addr, fd; | 151 | unsigned long addr, fd; |
@@ -86,6 +157,11 @@ static int attach_eventfd(struct lguest *lg, const unsigned long __user *input) | |||
86 | if (get_user(fd, input) != 0) | 157 | if (get_user(fd, input) != 0) |
87 | return -EFAULT; | 158 | return -EFAULT; |
88 | 159 | ||
160 | /* | ||
161 | * Just make sure two callers don't add eventfds at once. We really | ||
162 | * only need to lock against callers adding to the same Guest, so using | ||
163 | * the Big Lguest Lock is overkill. But this is setup, not a fast path. | ||
164 | */ | ||
89 | mutex_lock(&lguest_lock); | 165 | mutex_lock(&lguest_lock); |
90 | err = add_eventfd(lg, addr, fd); | 166 | err = add_eventfd(lg, addr, fd); |
91 | mutex_unlock(&lguest_lock); | 167 | mutex_unlock(&lguest_lock); |
@@ -106,6 +182,10 @@ static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input) | |||
106 | if (irq >= LGUEST_IRQS) | 182 | if (irq >= LGUEST_IRQS) |
107 | return -EINVAL; | 183 | return -EINVAL; |
108 | 184 | ||
185 | /* | ||
186 | * Next time the Guest runs, the core code will see if it can deliver | ||
187 | * this interrupt. | ||
188 | */ | ||
109 | set_interrupt(cpu, irq); | 189 | set_interrupt(cpu, irq); |
110 | return 0; | 190 | return 0; |
111 | } | 191 | } |
@@ -307,10 +387,10 @@ unlock: | |||
307 | * The first operation the Launcher does must be a write. All writes | 387 | * The first operation the Launcher does must be a write. All writes |
308 | * start with an unsigned long number: for the first write this must be | 388 | * start with an unsigned long number: for the first write this must be |
309 | * LHREQ_INITIALIZE to set up the Guest. After that the Launcher can use | 389 | * LHREQ_INITIALIZE to set up the Guest. After that the Launcher can use |
310 | * writes of other values to send interrupts. | 390 | * writes of other values to send interrupts or set up receipt of notifications. |
311 | * | 391 | * |
312 | * Note that we overload the "offset" in the /dev/lguest file to indicate what | 392 | * Note that we overload the "offset" in the /dev/lguest file to indicate what |
313 | * CPU number we're dealing with. Currently this is always 0, since we only | 393 | * CPU number we're dealing with. Currently this is always 0 since we only |
314 | * support uniprocessor Guests, but you can see the beginnings of SMP support | 394 | * support uniprocessor Guests, but you can see the beginnings of SMP support |
315 | * here. | 395 | * here. |
316 | */ | 396 | */ |