diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2009-06-13 00:27:09 -0400 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2009-06-12 08:57:10 -0400 |
commit | df60aeef4f4fe0645d9a195a7689005520422de5 (patch) | |
tree | 3cfa3c4a986436c8accd5f0a57d5a6f70f1b7965 | |
parent | 5718607bb670c721f45f0dbb1cc7d6c64969aab1 (diff) |
lguest: use eventfds for device notification
Currently, when a Guest wants to perform I/O it calls LHCALL_NOTIFY with
an address: the main Launcher process returns with this address, and figures
out what device to run.
A far nicer model is to let processes bind an eventfd to an address: if we
find one, we simply signal the eventfd.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Davide Libenzi <davidel@xmailserver.org>
-rw-r--r-- | drivers/lguest/Kconfig | 2 | ||||
-rw-r--r-- | drivers/lguest/core.c | 8 | ||||
-rw-r--r-- | drivers/lguest/lg.h | 13 | ||||
-rw-r--r-- | drivers/lguest/lguest_user.c | 98 | ||||
-rw-r--r-- | include/linux/lguest_launcher.h | 1 |
5 files changed, 116 insertions, 6 deletions
diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig index 8f63845db830..0aaa0597a622 100644 --- a/drivers/lguest/Kconfig +++ b/drivers/lguest/Kconfig | |||
@@ -1,6 +1,6 @@ | |||
1 | config LGUEST | 1 | config LGUEST |
2 | tristate "Linux hypervisor example code" | 2 | tristate "Linux hypervisor example code" |
3 | depends on X86_32 && EXPERIMENTAL && FUTEX | 3 | depends on X86_32 && EXPERIMENTAL && EVENTFD |
4 | select HVC_DRIVER | 4 | select HVC_DRIVER |
5 | ---help--- | 5 | ---help--- |
6 | This is a very simple module which allows you to run | 6 | This is a very simple module which allows you to run |
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index d0298dc45d97..508569c9571a 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c | |||
@@ -198,9 +198,11 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) | |||
198 | /* It's possible the Guest did a NOTIFY hypercall to the | 198 | /* It's possible the Guest did a NOTIFY hypercall to the |
199 | * Launcher, in which case we return from the read() now. */ | 199 | * Launcher, in which case we return from the read() now. */ |
200 | if (cpu->pending_notify) { | 200 | if (cpu->pending_notify) { |
201 | if (put_user(cpu->pending_notify, user)) | 201 | if (!send_notify_to_eventfd(cpu)) { |
202 | return -EFAULT; | 202 | if (put_user(cpu->pending_notify, user)) |
203 | return sizeof(cpu->pending_notify); | 203 | return -EFAULT; |
204 | return sizeof(cpu->pending_notify); | ||
205 | } | ||
204 | } | 206 | } |
205 | 207 | ||
206 | /* Check for signals */ | 208 | /* Check for signals */ |
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 040cb70780e7..32fefdc6ad3e 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h | |||
@@ -82,6 +82,16 @@ struct lg_cpu { | |||
82 | struct lg_cpu_arch arch; | 82 | struct lg_cpu_arch arch; |
83 | }; | 83 | }; |
84 | 84 | ||
85 | struct lg_eventfd { | ||
86 | unsigned long addr; | ||
87 | struct file *event; | ||
88 | }; | ||
89 | |||
90 | struct lg_eventfd_map { | ||
91 | unsigned int num; | ||
92 | struct lg_eventfd map[]; | ||
93 | }; | ||
94 | |||
85 | /* The private info the thread maintains about the guest. */ | 95 | /* The private info the thread maintains about the guest. */ |
86 | struct lguest | 96 | struct lguest |
87 | { | 97 | { |
@@ -102,6 +112,8 @@ struct lguest | |||
102 | unsigned int stack_pages; | 112 | unsigned int stack_pages; |
103 | u32 tsc_khz; | 113 | u32 tsc_khz; |
104 | 114 | ||
115 | struct lg_eventfd_map *eventfds; | ||
116 | |||
105 | /* Dead? */ | 117 | /* Dead? */ |
106 | const char *dead; | 118 | const char *dead; |
107 | }; | 119 | }; |
@@ -154,6 +166,7 @@ void setup_default_idt_entries(struct lguest_ro_state *state, | |||
154 | void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, | 166 | void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, |
155 | const unsigned long *def); | 167 | const unsigned long *def); |
156 | void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta); | 168 | void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta); |
169 | bool send_notify_to_eventfd(struct lg_cpu *cpu); | ||
157 | void init_clockdev(struct lg_cpu *cpu); | 170 | void init_clockdev(struct lg_cpu *cpu); |
158 | bool check_syscall_vector(struct lguest *lg); | 171 | bool check_syscall_vector(struct lguest *lg); |
159 | int init_interrupts(void); | 172 | int init_interrupts(void); |
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 1982b45bd935..f6bf255f1837 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c | |||
@@ -7,6 +7,8 @@ | |||
7 | #include <linux/miscdevice.h> | 7 | #include <linux/miscdevice.h> |
8 | #include <linux/fs.h> | 8 | #include <linux/fs.h> |
9 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
10 | #include <linux/eventfd.h> | ||
11 | #include <linux/file.h> | ||
10 | #include "lg.h" | 12 | #include "lg.h" |
11 | 13 | ||
12 | /*L:055 When something happens, the Waker process needs a way to stop the | 14 | /*L:055 When something happens, the Waker process needs a way to stop the |
@@ -35,6 +37,81 @@ static int break_guest_out(struct lg_cpu *cpu, const unsigned long __user*input) | |||
35 | } | 37 | } |
36 | } | 38 | } |
37 | 39 | ||
40 | bool send_notify_to_eventfd(struct lg_cpu *cpu) | ||
41 | { | ||
42 | unsigned int i; | ||
43 | struct lg_eventfd_map *map; | ||
44 | |||
45 | /* lg->eventfds is RCU-protected */ | ||
46 | rcu_read_lock(); | ||
47 | map = rcu_dereference(cpu->lg->eventfds); | ||
48 | for (i = 0; i < map->num; i++) { | ||
49 | if (map->map[i].addr == cpu->pending_notify) { | ||
50 | eventfd_signal(map->map[i].event, 1); | ||
51 | cpu->pending_notify = 0; | ||
52 | break; | ||
53 | } | ||
54 | } | ||
55 | rcu_read_unlock(); | ||
56 | return cpu->pending_notify == 0; | ||
57 | } | ||
58 | |||
59 | static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) | ||
60 | { | ||
61 | struct lg_eventfd_map *new, *old = lg->eventfds; | ||
62 | |||
63 | if (!addr) | ||
64 | return -EINVAL; | ||
65 | |||
66 | /* Replace the old array with the new one, carefully: others can | ||
67 | * be accessing it at the same time */ | ||
68 | new = kmalloc(sizeof(*new) + sizeof(new->map[0]) * (old->num + 1), | ||
69 | GFP_KERNEL); | ||
70 | if (!new) | ||
71 | return -ENOMEM; | ||
72 | |||
73 | /* First make identical copy. */ | ||
74 | memcpy(new->map, old->map, sizeof(old->map[0]) * old->num); | ||
75 | new->num = old->num; | ||
76 | |||
77 | /* Now append new entry. */ | ||
78 | new->map[new->num].addr = addr; | ||
79 | new->map[new->num].event = eventfd_fget(fd); | ||
80 | if (IS_ERR(new->map[new->num].event)) { | ||
81 | kfree(new); | ||
82 | return PTR_ERR(new->map[new->num].event); | ||
83 | } | ||
84 | new->num++; | ||
85 | |||
86 | /* Now put new one in place. */ | ||
87 | rcu_assign_pointer(lg->eventfds, new); | ||
88 | |||
89 | /* We're not in a big hurry. Wait until noone's looking at old | ||
90 | * version, then delete it. */ | ||
91 | synchronize_rcu(); | ||
92 | kfree(old); | ||
93 | |||
94 | return 0; | ||
95 | } | ||
96 | |||
97 | static int attach_eventfd(struct lguest *lg, const unsigned long __user *input) | ||
98 | { | ||
99 | unsigned long addr, fd; | ||
100 | int err; | ||
101 | |||
102 | if (get_user(addr, input) != 0) | ||
103 | return -EFAULT; | ||
104 | input++; | ||
105 | if (get_user(fd, input) != 0) | ||
106 | return -EFAULT; | ||
107 | |||
108 | mutex_lock(&lguest_lock); | ||
109 | err = add_eventfd(lg, addr, fd); | ||
110 | mutex_unlock(&lguest_lock); | ||
111 | |||
112 | return 0; | ||
113 | } | ||
114 | |||
38 | /*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt | 115 | /*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt |
39 | * number to /dev/lguest. */ | 116 | * number to /dev/lguest. */ |
40 | static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input) | 117 | static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input) |
@@ -184,6 +261,13 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
184 | goto unlock; | 261 | goto unlock; |
185 | } | 262 | } |
186 | 263 | ||
264 | lg->eventfds = kmalloc(sizeof(*lg->eventfds), GFP_KERNEL); | ||
265 | if (!lg->eventfds) { | ||
266 | err = -ENOMEM; | ||
267 | goto free_lg; | ||
268 | } | ||
269 | lg->eventfds->num = 0; | ||
270 | |||
187 | /* Populate the easy fields of our "struct lguest" */ | 271 | /* Populate the easy fields of our "struct lguest" */ |
188 | lg->mem_base = (void __user *)args[0]; | 272 | lg->mem_base = (void __user *)args[0]; |
189 | lg->pfn_limit = args[1]; | 273 | lg->pfn_limit = args[1]; |
@@ -191,7 +275,7 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
191 | /* This is the first cpu (cpu 0) and it will start booting at args[2] */ | 275 | /* This is the first cpu (cpu 0) and it will start booting at args[2] */ |
192 | err = lg_cpu_start(&lg->cpus[0], 0, args[2]); | 276 | err = lg_cpu_start(&lg->cpus[0], 0, args[2]); |
193 | if (err) | 277 | if (err) |
194 | goto release_guest; | 278 | goto free_eventfds; |
195 | 279 | ||
196 | /* Initialize the Guest's shadow page tables, using the toplevel | 280 | /* Initialize the Guest's shadow page tables, using the toplevel |
197 | * address the Launcher gave us. This allocates memory, so can fail. */ | 281 | * address the Launcher gave us. This allocates memory, so can fail. */ |
@@ -210,7 +294,9 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
210 | free_regs: | 294 | free_regs: |
211 | /* FIXME: This should be in free_vcpu */ | 295 | /* FIXME: This should be in free_vcpu */ |
212 | free_page(lg->cpus[0].regs_page); | 296 | free_page(lg->cpus[0].regs_page); |
213 | release_guest: | 297 | free_eventfds: |
298 | kfree(lg->eventfds); | ||
299 | free_lg: | ||
214 | kfree(lg); | 300 | kfree(lg); |
215 | unlock: | 301 | unlock: |
216 | mutex_unlock(&lguest_lock); | 302 | mutex_unlock(&lguest_lock); |
@@ -260,6 +346,8 @@ static ssize_t write(struct file *file, const char __user *in, | |||
260 | return user_send_irq(cpu, input); | 346 | return user_send_irq(cpu, input); |
261 | case LHREQ_BREAK: | 347 | case LHREQ_BREAK: |
262 | return break_guest_out(cpu, input); | 348 | return break_guest_out(cpu, input); |
349 | case LHREQ_EVENTFD: | ||
350 | return attach_eventfd(lg, input); | ||
263 | default: | 351 | default: |
264 | return -EINVAL; | 352 | return -EINVAL; |
265 | } | 353 | } |
@@ -297,6 +385,12 @@ static int close(struct inode *inode, struct file *file) | |||
297 | * the Launcher's memory management structure. */ | 385 | * the Launcher's memory management structure. */ |
298 | mmput(lg->cpus[i].mm); | 386 | mmput(lg->cpus[i].mm); |
299 | } | 387 | } |
388 | |||
389 | /* Release any eventfds they registered. */ | ||
390 | for (i = 0; i < lg->eventfds->num; i++) | ||
391 | fput(lg->eventfds->map[i].event); | ||
392 | kfree(lg->eventfds); | ||
393 | |||
300 | /* If lg->dead doesn't contain an error code it will be NULL or a | 394 | /* If lg->dead doesn't contain an error code it will be NULL or a |
301 | * kmalloc()ed string, either of which is ok to hand to kfree(). */ | 395 | * kmalloc()ed string, either of which is ok to hand to kfree(). */ |
302 | if (!IS_ERR(lg->dead)) | 396 | if (!IS_ERR(lg->dead)) |
diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h index a53407a4165c..9de964b90586 100644 --- a/include/linux/lguest_launcher.h +++ b/include/linux/lguest_launcher.h | |||
@@ -58,6 +58,7 @@ enum lguest_req | |||
58 | LHREQ_GETDMA, /* No longer used */ | 58 | LHREQ_GETDMA, /* No longer used */ |
59 | LHREQ_IRQ, /* + irq */ | 59 | LHREQ_IRQ, /* + irq */ |
60 | LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */ | 60 | LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */ |
61 | LHREQ_EVENTFD, /* + address, fd. */ | ||
61 | }; | 62 | }; |
62 | 63 | ||
63 | /* The alignment to use between consumer and producer parts of vring. | 64 | /* The alignment to use between consumer and producer parts of vring. |