diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2007-10-21 21:24:10 -0400 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2007-10-23 01:49:55 -0400 |
commit | 15045275c32bf6d15d32c2eca8157be9c0ba6e45 (patch) | |
tree | 32ef90c875b22cb1bbb94e38f557a690f1c0c6f8 /drivers | |
parent | 0ca49ca946409f87a8cd0b14d5acb6dea58de6f3 (diff) |
Remove old lguest I/O infrrasructure.
This patch gets rid of the old lguest host I/O infrastructure and
replaces it with a single hypercall "LHCALL_NOTIFY" which takes an
address.
The main change is the removal of io.c: that mainly did inter-guest
I/O, which virtio doesn't yet support.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/lguest/Makefile | 2 | ||||
-rw-r--r-- | drivers/lguest/core.c | 12 | ||||
-rw-r--r-- | drivers/lguest/hypercalls.c | 26 | ||||
-rw-r--r-- | drivers/lguest/io.c | 628 | ||||
-rw-r--r-- | drivers/lguest/lg.h | 27 | ||||
-rw-r--r-- | drivers/lguest/lguest_user.c | 39 |
6 files changed, 19 insertions, 715 deletions
diff --git a/drivers/lguest/Makefile b/drivers/lguest/Makefile index 8c28236ee1a..a63f75dc41a 100644 --- a/drivers/lguest/Makefile +++ b/drivers/lguest/Makefile | |||
@@ -1,7 +1,7 @@ | |||
1 | # Host requires the other files, which can be a module. | 1 | # Host requires the other files, which can be a module. |
2 | obj-$(CONFIG_LGUEST) += lg.o | 2 | obj-$(CONFIG_LGUEST) += lg.o |
3 | lg-y = core.o hypercalls.o page_tables.o interrupts_and_traps.o \ | 3 | lg-y = core.o hypercalls.o page_tables.o interrupts_and_traps.o \ |
4 | segments.o io.o lguest_user.o | 4 | segments.o lguest_user.o |
5 | 5 | ||
6 | lg-$(CONFIG_X86_32) += x86/switcher_32.o x86/core.o | 6 | lg-$(CONFIG_X86_32) += x86/switcher_32.o x86/core.o |
7 | 7 | ||
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 41b26e592d3..3aec29ec771 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c | |||
@@ -202,13 +202,12 @@ int run_guest(struct lguest *lg, unsigned long __user *user) | |||
202 | if (lg->hcall) | 202 | if (lg->hcall) |
203 | do_hypercalls(lg); | 203 | do_hypercalls(lg); |
204 | 204 | ||
205 | /* It's possible the Guest did a SEND_DMA hypercall to the | 205 | /* It's possible the Guest did a NOTIFY hypercall to the |
206 | * Launcher, in which case we return from the read() now. */ | 206 | * Launcher, in which case we return from the read() now. */ |
207 | if (lg->dma_is_pending) { | 207 | if (lg->pending_notify) { |
208 | if (put_user(lg->pending_dma, user) || | 208 | if (put_user(lg->pending_notify, user)) |
209 | put_user(lg->pending_key, user+1)) | ||
210 | return -EFAULT; | 209 | return -EFAULT; |
211 | return sizeof(unsigned long)*2; | 210 | return sizeof(lg->pending_notify); |
212 | } | 211 | } |
213 | 212 | ||
214 | /* Check for signals */ | 213 | /* Check for signals */ |
@@ -288,9 +287,6 @@ static int __init init(void) | |||
288 | if (err) | 287 | if (err) |
289 | goto unmap; | 288 | goto unmap; |
290 | 289 | ||
291 | /* The I/O subsystem needs some things initialized. */ | ||
292 | lguest_io_init(); | ||
293 | |||
294 | /* We might need to reserve an interrupt vector. */ | 290 | /* We might need to reserve an interrupt vector. */ |
295 | err = init_interrupts(); | 291 | err = init_interrupts(); |
296 | if (err) | 292 | if (err) |
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index 13b5f2f813d..3a53788ba45 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c | |||
@@ -60,22 +60,9 @@ static void do_hcall(struct lguest *lg, struct hcall_args *args) | |||
60 | else | 60 | else |
61 | guest_pagetable_flush_user(lg); | 61 | guest_pagetable_flush_user(lg); |
62 | break; | 62 | break; |
63 | case LHCALL_BIND_DMA: | ||
64 | /* BIND_DMA really wants four arguments, but it's the only call | ||
65 | * which does. So the Guest packs the number of buffers and | ||
66 | * the interrupt number into the final argument, and we decode | ||
67 | * it here. This can legitimately fail, since we currently | ||
68 | * place a limit on the number of DMA pools a Guest can have. | ||
69 | * So we return true or false from this call. */ | ||
70 | args->arg0 = bind_dma(lg, args->arg1, args->arg2, | ||
71 | args->arg3 >> 8, args->arg3 & 0xFF); | ||
72 | break; | ||
73 | 63 | ||
74 | /* All these calls simply pass the arguments through to the right | 64 | /* All these calls simply pass the arguments through to the right |
75 | * routines. */ | 65 | * routines. */ |
76 | case LHCALL_SEND_DMA: | ||
77 | send_dma(lg, args->arg1, args->arg2); | ||
78 | break; | ||
79 | case LHCALL_NEW_PGTABLE: | 66 | case LHCALL_NEW_PGTABLE: |
80 | guest_new_pagetable(lg, args->arg1); | 67 | guest_new_pagetable(lg, args->arg1); |
81 | break; | 68 | break; |
@@ -99,6 +86,9 @@ static void do_hcall(struct lguest *lg, struct hcall_args *args) | |||
99 | /* Similarly, this sets the halted flag for run_guest(). */ | 86 | /* Similarly, this sets the halted flag for run_guest(). */ |
100 | lg->halted = 1; | 87 | lg->halted = 1; |
101 | break; | 88 | break; |
89 | case LHCALL_NOTIFY: | ||
90 | lg->pending_notify = args->arg1; | ||
91 | break; | ||
102 | default: | 92 | default: |
103 | if (lguest_arch_do_hcall(lg, args)) | 93 | if (lguest_arch_do_hcall(lg, args)) |
104 | kill_guest(lg, "Bad hypercall %li\n", args->arg0); | 94 | kill_guest(lg, "Bad hypercall %li\n", args->arg0); |
@@ -156,9 +146,9 @@ static void do_async_hcalls(struct lguest *lg) | |||
156 | break; | 146 | break; |
157 | } | 147 | } |
158 | 148 | ||
159 | /* Stop doing hypercalls if we've just done a DMA to the | 149 | /* Stop doing hypercalls if they want to notify the Launcher: |
160 | * Launcher: it needs to service this first. */ | 150 | * it needs to service this first. */ |
161 | if (lg->dma_is_pending) | 151 | if (lg->pending_notify) |
162 | break; | 152 | break; |
163 | } | 153 | } |
164 | } | 154 | } |
@@ -220,9 +210,9 @@ void do_hypercalls(struct lguest *lg) | |||
220 | do_async_hcalls(lg); | 210 | do_async_hcalls(lg); |
221 | 211 | ||
222 | /* If we stopped reading the hypercall ring because the Guest did a | 212 | /* If we stopped reading the hypercall ring because the Guest did a |
223 | * SEND_DMA to the Launcher, we want to return now. Otherwise we do | 213 | * NOTIFY to the Launcher, we want to return now. Otherwise we do |
224 | * the hypercall. */ | 214 | * the hypercall. */ |
225 | if (!lg->dma_is_pending) { | 215 | if (!lg->pending_notify) { |
226 | do_hcall(lg, lg->hcall); | 216 | do_hcall(lg, lg->hcall); |
227 | /* Tricky point: we reset the hcall pointer to mark the | 217 | /* Tricky point: we reset the hcall pointer to mark the |
228 | * hypercall as "done". We use the hcall pointer rather than | 218 | * hypercall as "done". We use the hcall pointer rather than |
diff --git a/drivers/lguest/io.c b/drivers/lguest/io.c deleted file mode 100644 index 0e842e9caf6..00000000000 --- a/drivers/lguest/io.c +++ /dev/null | |||
@@ -1,628 +0,0 @@ | |||
1 | /*P:300 The I/O mechanism in lguest is simple yet flexible, allowing the Guest | ||
2 | * to talk to the Launcher or directly to another Guest. It uses familiar | ||
3 | * concepts of DMA and interrupts, plus some neat code stolen from | ||
4 | * futexes... :*/ | ||
5 | |||
6 | /* Copyright (C) 2006 Rusty Russell IBM Corporation | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
21 | */ | ||
22 | #include <linux/types.h> | ||
23 | #include <linux/futex.h> | ||
24 | #include <linux/jhash.h> | ||
25 | #include <linux/mm.h> | ||
26 | #include <linux/highmem.h> | ||
27 | #include <linux/uaccess.h> | ||
28 | #include "lg.h" | ||
29 | |||
30 | /*L:300 | ||
31 | * I/O | ||
32 | * | ||
33 | * Getting data in and out of the Guest is quite an art. There are numerous | ||
34 | * ways to do it, and they all suck differently. We try to keep things fairly | ||
35 | * close to "real" hardware so our Guest's drivers don't look like an alien | ||
36 | * visitation in the middle of the Linux code, and yet make sure that Guests | ||
37 | * can talk directly to other Guests, not just the Launcher. | ||
38 | * | ||
39 | * To do this, the Guest gives us a key when it binds or sends DMA buffers. | ||
40 | * The key corresponds to a "physical" address inside the Guest (ie. a virtual | ||
41 | * address inside the Launcher process). We don't, however, use this key | ||
42 | * directly. | ||
43 | * | ||
44 | * We want Guests which share memory to be able to DMA to each other: two | ||
45 | * Launchers can mmap memory the same file, then the Guests can communicate. | ||
46 | * Fortunately, the futex code provides us with a way to get a "union | ||
47 | * futex_key" corresponding to the memory lying at a virtual address: if the | ||
48 | * two processes share memory, the "union futex_key" for that memory will match | ||
49 | * even if the memory is mapped at different addresses in each. So we always | ||
50 | * convert the keys to "union futex_key"s to compare them. | ||
51 | * | ||
52 | * Before we dive into this though, we need to look at another set of helper | ||
53 | * routines used throughout the Host kernel code to access Guest memory. | ||
54 | :*/ | ||
55 | static struct list_head dma_hash[61]; | ||
56 | |||
57 | /* An unfortunate side effect of the Linux double-linked list implementation is | ||
58 | * that there's no good way to statically initialize an array of linked | ||
59 | * lists. */ | ||
60 | void lguest_io_init(void) | ||
61 | { | ||
62 | unsigned int i; | ||
63 | |||
64 | for (i = 0; i < ARRAY_SIZE(dma_hash); i++) | ||
65 | INIT_LIST_HEAD(&dma_hash[i]); | ||
66 | } | ||
67 | |||
68 | /* FIXME: allow multi-page lengths. */ | ||
69 | static int check_dma_list(struct lguest *lg, const struct lguest_dma *dma) | ||
70 | { | ||
71 | unsigned int i; | ||
72 | |||
73 | for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { | ||
74 | if (!dma->len[i]) | ||
75 | return 1; | ||
76 | if (!lguest_address_ok(lg, dma->addr[i], dma->len[i])) | ||
77 | goto kill; | ||
78 | if (dma->len[i] > PAGE_SIZE) | ||
79 | goto kill; | ||
80 | /* We could do over a page, but is it worth it? */ | ||
81 | if ((dma->addr[i] % PAGE_SIZE) + dma->len[i] > PAGE_SIZE) | ||
82 | goto kill; | ||
83 | } | ||
84 | return 1; | ||
85 | |||
86 | kill: | ||
87 | kill_guest(lg, "bad DMA entry: %u@%#lx", dma->len[i], dma->addr[i]); | ||
88 | return 0; | ||
89 | } | ||
90 | |||
91 | /*L:330 This is our hash function, using the wonderful Jenkins hash. | ||
92 | * | ||
93 | * The futex key is a union with three parts: an unsigned long word, a pointer, | ||
94 | * and an int "offset". We could use jhash_2words() which takes three u32s. | ||
95 | * (Ok, the hash functions are great: the naming sucks though). | ||
96 | * | ||
97 | * It's nice to be portable to 64-bit platforms, so we use the more generic | ||
98 | * jhash2(), which takes an array of u32, the number of u32s, and an initial | ||
99 | * u32 to roll in. This is uglier, but breaks down to almost the same code on | ||
100 | * 32-bit platforms like this one. | ||
101 | * | ||
102 | * We want a position in the array, so we modulo ARRAY_SIZE(dma_hash) (ie. 61). | ||
103 | */ | ||
104 | static unsigned int hash(const union futex_key *key) | ||
105 | { | ||
106 | return jhash2((u32*)&key->both.word, | ||
107 | (sizeof(key->both.word)+sizeof(key->both.ptr))/4, | ||
108 | key->both.offset) | ||
109 | % ARRAY_SIZE(dma_hash); | ||
110 | } | ||
111 | |||
112 | /* This is a convenience routine to compare two keys. It's a much bemoaned C | ||
113 | * weakness that it doesn't allow '==' on structures or unions, so we have to | ||
114 | * open-code it like this. */ | ||
115 | static inline int key_eq(const union futex_key *a, const union futex_key *b) | ||
116 | { | ||
117 | return (a->both.word == b->both.word | ||
118 | && a->both.ptr == b->both.ptr | ||
119 | && a->both.offset == b->both.offset); | ||
120 | } | ||
121 | |||
122 | /*L:360 OK, when we need to actually free up a Guest's DMA array we do several | ||
123 | * things, so we have a convenient function to do it. | ||
124 | * | ||
125 | * The caller must hold a read lock on dmainfo owner's current->mm->mmap_sem | ||
126 | * for the drop_futex_key_refs(). */ | ||
127 | static void unlink_dma(struct lguest_dma_info *dmainfo) | ||
128 | { | ||
129 | /* You locked this too, right? */ | ||
130 | BUG_ON(!mutex_is_locked(&lguest_lock)); | ||
131 | /* This is how we know that the entry is free. */ | ||
132 | dmainfo->interrupt = 0; | ||
133 | /* Remove it from the hash table. */ | ||
134 | list_del(&dmainfo->list); | ||
135 | /* Drop the references we were holding (to the inode or mm). */ | ||
136 | drop_futex_key_refs(&dmainfo->key); | ||
137 | } | ||
138 | |||
139 | /*L:350 This is the routine which we call when the Guest asks to unregister a | ||
140 | * DMA array attached to a given key. Returns true if the array was found. */ | ||
141 | static int unbind_dma(struct lguest *lg, | ||
142 | const union futex_key *key, | ||
143 | unsigned long dmas) | ||
144 | { | ||
145 | int i, ret = 0; | ||
146 | |||
147 | /* We don't bother with the hash table, just look through all this | ||
148 | * Guest's DMA arrays. */ | ||
149 | for (i = 0; i < LGUEST_MAX_DMA; i++) { | ||
150 | /* In theory it could have more than one array on the same key, | ||
151 | * or one array on multiple keys, so we check both */ | ||
152 | if (key_eq(key, &lg->dma[i].key) && dmas == lg->dma[i].dmas) { | ||
153 | unlink_dma(&lg->dma[i]); | ||
154 | ret = 1; | ||
155 | break; | ||
156 | } | ||
157 | } | ||
158 | return ret; | ||
159 | } | ||
160 | |||
161 | /*L:340 BIND_DMA: this is the hypercall which sets up an array of "struct | ||
162 | * lguest_dma" for receiving I/O. | ||
163 | * | ||
164 | * The Guest wants to bind an array of "struct lguest_dma"s to a particular key | ||
165 | * to receive input. This only happens when the Guest is setting up a new | ||
166 | * device, so it doesn't have to be very fast. | ||
167 | * | ||
168 | * It returns 1 on a successful registration (it can fail if we hit the limit | ||
169 | * of registrations for this Guest). | ||
170 | */ | ||
171 | int bind_dma(struct lguest *lg, | ||
172 | unsigned long ukey, unsigned long dmas, u16 numdmas, u8 interrupt) | ||
173 | { | ||
174 | unsigned int i; | ||
175 | int ret = 0; | ||
176 | union futex_key key; | ||
177 | /* Futex code needs the mmap_sem. */ | ||
178 | struct rw_semaphore *fshared = ¤t->mm->mmap_sem; | ||
179 | |||
180 | /* Invalid interrupt? (We could kill the guest here). */ | ||
181 | if (interrupt >= LGUEST_IRQS) | ||
182 | return 0; | ||
183 | |||
184 | /* We need to grab the Big Lguest Lock, because other Guests may be | ||
185 | * trying to look through this Guest's DMAs to send something while | ||
186 | * we're doing this. */ | ||
187 | mutex_lock(&lguest_lock); | ||
188 | down_read(fshared); | ||
189 | if (get_futex_key(lg->mem_base + ukey, fshared, &key) != 0) { | ||
190 | kill_guest(lg, "bad dma key %#lx", ukey); | ||
191 | goto unlock; | ||
192 | } | ||
193 | |||
194 | /* We want to keep this key valid once we drop mmap_sem, so we have to | ||
195 | * hold a reference. */ | ||
196 | get_futex_key_refs(&key); | ||
197 | |||
198 | /* If the Guest specified an interrupt of 0, that means they want to | ||
199 | * unregister this array of "struct lguest_dma"s. */ | ||
200 | if (interrupt == 0) | ||
201 | ret = unbind_dma(lg, &key, dmas); | ||
202 | else { | ||
203 | /* Look through this Guest's dma array for an unused entry. */ | ||
204 | for (i = 0; i < LGUEST_MAX_DMA; i++) { | ||
205 | /* If the interrupt is non-zero, the entry is already | ||
206 | * used. */ | ||
207 | if (lg->dma[i].interrupt) | ||
208 | continue; | ||
209 | |||
210 | /* OK, a free one! Fill on our details. */ | ||
211 | lg->dma[i].dmas = dmas; | ||
212 | lg->dma[i].num_dmas = numdmas; | ||
213 | lg->dma[i].next_dma = 0; | ||
214 | lg->dma[i].key = key; | ||
215 | lg->dma[i].owner = lg; | ||
216 | lg->dma[i].interrupt = interrupt; | ||
217 | |||
218 | /* Now we add it to the hash table: the position | ||
219 | * depends on the futex key that we got. */ | ||
220 | list_add(&lg->dma[i].list, &dma_hash[hash(&key)]); | ||
221 | /* Success! */ | ||
222 | ret = 1; | ||
223 | goto unlock; | ||
224 | } | ||
225 | } | ||
226 | /* If we didn't find a slot to put the key in, drop the reference | ||
227 | * again. */ | ||
228 | drop_futex_key_refs(&key); | ||
229 | unlock: | ||
230 | /* Unlock and out. */ | ||
231 | up_read(fshared); | ||
232 | mutex_unlock(&lguest_lock); | ||
233 | return ret; | ||
234 | } | ||
235 | |||
236 | /*L:385 Note that our routines to access a different Guest's memory are called | ||
237 | * lgread_other() and lgwrite_other(): these names emphasize that they are only | ||
238 | * used when the Guest is *not* the current Guest. | ||
239 | * | ||
240 | * The interface for copying from another process's memory is called | ||
241 | * access_process_vm(), with a final argument of 0 for a read, and 1 for a | ||
242 | * write. | ||
243 | * | ||
244 | * We need lgread_other() to read the destination Guest's "struct lguest_dma" | ||
245 | * array. */ | ||
246 | static int lgread_other(struct lguest *lg, | ||
247 | void *buf, u32 addr, unsigned bytes) | ||
248 | { | ||
249 | if (!lguest_address_ok(lg, addr, bytes) | ||
250 | || access_process_vm(lg->tsk, (unsigned long)lg->mem_base + addr, | ||
251 | buf, bytes, 0) != bytes) { | ||
252 | memset(buf, 0, bytes); | ||
253 | kill_guest(lg, "bad address in registered DMA struct"); | ||
254 | return 0; | ||
255 | } | ||
256 | return 1; | ||
257 | } | ||
258 | |||
259 | /* "lgwrite()" to another Guest: used to update the destination "used_len" once | ||
260 | * we've transferred data into the buffer. */ | ||
261 | static int lgwrite_other(struct lguest *lg, u32 addr, | ||
262 | const void *buf, unsigned bytes) | ||
263 | { | ||
264 | if (!lguest_address_ok(lg, addr, bytes) | ||
265 | || access_process_vm(lg->tsk, (unsigned long)lg->mem_base + addr, | ||
266 | (void *)buf, bytes, 1) != bytes) { | ||
267 | kill_guest(lg, "bad address writing to registered DMA"); | ||
268 | return 0; | ||
269 | } | ||
270 | return 1; | ||
271 | } | ||
272 | |||
273 | /*L:400 This is the generic engine which copies from a source "struct | ||
274 | * lguest_dma" from this Guest into another Guest's "struct lguest_dma". The | ||
275 | * destination Guest's pages have already been mapped, as contained in the | ||
276 | * pages array. | ||
277 | * | ||
278 | * If you're wondering if there's a nice "copy from one process to another" | ||
279 | * routine, so was I. But Linux isn't really set up to copy between two | ||
280 | * unrelated processes, so we have to write it ourselves. | ||
281 | */ | ||
282 | static u32 copy_data(struct lguest *srclg, | ||
283 | const struct lguest_dma *src, | ||
284 | const struct lguest_dma *dst, | ||
285 | struct page *pages[]) | ||
286 | { | ||
287 | unsigned int totlen, si, di, srcoff, dstoff; | ||
288 | void *maddr = NULL; | ||
289 | |||
290 | /* We return the total length transferred. */ | ||
291 | totlen = 0; | ||
292 | |||
293 | /* We keep indexes into the source and destination "struct lguest_dma", | ||
294 | * and an offset within each region. */ | ||
295 | si = di = 0; | ||
296 | srcoff = dstoff = 0; | ||
297 | |||
298 | /* We loop until the source or destination is exhausted. */ | ||
299 | while (si < LGUEST_MAX_DMA_SECTIONS && src->len[si] | ||
300 | && di < LGUEST_MAX_DMA_SECTIONS && dst->len[di]) { | ||
301 | /* We can only transfer the rest of the src buffer, or as much | ||
302 | * as will fit into the destination buffer. */ | ||
303 | u32 len = min(src->len[si] - srcoff, dst->len[di] - dstoff); | ||
304 | |||
305 | /* For systems using "highmem" we need to use kmap() to access | ||
306 | * the page we want. We often use the same page over and over, | ||
307 | * so rather than kmap() it on every loop, we set the maddr | ||
308 | * pointer to NULL when we need to move to the next | ||
309 | * destination page. */ | ||
310 | if (!maddr) | ||
311 | maddr = kmap(pages[di]); | ||
312 | |||
313 | /* Copy directly from (this Guest's) source address to the | ||
314 | * destination Guest's kmap()ed buffer. Note that maddr points | ||
315 | * to the start of the page: we need to add the offset of the | ||
316 | * destination address and offset within the buffer. */ | ||
317 | |||
318 | /* FIXME: This is not completely portable. I looked at | ||
319 | * copy_to_user_page(), and some arch's seem to need special | ||
320 | * flushes. x86 is fine. */ | ||
321 | if (copy_from_user(maddr + (dst->addr[di] + dstoff)%PAGE_SIZE, | ||
322 | srclg->mem_base+src->addr[si], len) != 0) { | ||
323 | /* If a copy failed, it's the source's fault. */ | ||
324 | kill_guest(srclg, "bad address in sending DMA"); | ||
325 | totlen = 0; | ||
326 | break; | ||
327 | } | ||
328 | |||
329 | /* Increment the total and src & dst offsets */ | ||
330 | totlen += len; | ||
331 | srcoff += len; | ||
332 | dstoff += len; | ||
333 | |||
334 | /* Presumably we reached the end of the src or dest buffers: */ | ||
335 | if (srcoff == src->len[si]) { | ||
336 | /* Move to the next buffer at offset 0 */ | ||
337 | si++; | ||
338 | srcoff = 0; | ||
339 | } | ||
340 | if (dstoff == dst->len[di]) { | ||
341 | /* We need to unmap that destination page and reset | ||
342 | * maddr ready for the next one. */ | ||
343 | kunmap(pages[di]); | ||
344 | maddr = NULL; | ||
345 | di++; | ||
346 | dstoff = 0; | ||
347 | } | ||
348 | } | ||
349 | |||
350 | /* If we still had a page mapped at the end, unmap now. */ | ||
351 | if (maddr) | ||
352 | kunmap(pages[di]); | ||
353 | |||
354 | return totlen; | ||
355 | } | ||
356 | |||
357 | /*L:390 This is how we transfer a "struct lguest_dma" from the source Guest | ||
358 | * (the current Guest which called SEND_DMA) to another Guest. */ | ||
359 | static u32 do_dma(struct lguest *srclg, const struct lguest_dma *src, | ||
360 | struct lguest *dstlg, const struct lguest_dma *dst) | ||
361 | { | ||
362 | int i; | ||
363 | u32 ret; | ||
364 | struct page *pages[LGUEST_MAX_DMA_SECTIONS]; | ||
365 | |||
366 | /* We check that both source and destination "struct lguest_dma"s are | ||
367 | * within the bounds of the source and destination Guests */ | ||
368 | if (!check_dma_list(dstlg, dst) || !check_dma_list(srclg, src)) | ||
369 | return 0; | ||
370 | |||
371 | /* We need to map the pages which correspond to each parts of | ||
372 | * destination buffer. */ | ||
373 | for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { | ||
374 | if (dst->len[i] == 0) | ||
375 | break; | ||
376 | /* get_user_pages() is a complicated function, especially since | ||
377 | * we only want a single page. But it works, and returns the | ||
378 | * number of pages. Note that we're holding the destination's | ||
379 | * mmap_sem, as get_user_pages() requires. */ | ||
380 | if (get_user_pages(dstlg->tsk, dstlg->mm, | ||
381 | (unsigned long)dstlg->mem_base+dst->addr[i], | ||
382 | 1, 1, 1, pages+i, NULL) | ||
383 | != 1) { | ||
384 | /* This means the destination gave us a bogus buffer */ | ||
385 | kill_guest(dstlg, "Error mapping DMA pages"); | ||
386 | ret = 0; | ||
387 | goto drop_pages; | ||
388 | } | ||
389 | } | ||
390 | |||
391 | /* Now copy the data until we run out of src or dst. */ | ||
392 | ret = copy_data(srclg, src, dst, pages); | ||
393 | |||
394 | drop_pages: | ||
395 | while (--i >= 0) | ||
396 | put_page(pages[i]); | ||
397 | return ret; | ||
398 | } | ||
399 | |||
400 | /*L:380 Transferring data from one Guest to another is not as simple as I'd | ||
401 | * like. We've found the "struct lguest_dma_info" bound to the same address as | ||
402 | * the send, we need to copy into it. | ||
403 | * | ||
404 | * This function returns true if the destination array was empty. */ | ||
405 | static int dma_transfer(struct lguest *srclg, | ||
406 | unsigned long udma, | ||
407 | struct lguest_dma_info *dst) | ||
408 | { | ||
409 | struct lguest_dma dst_dma, src_dma; | ||
410 | struct lguest *dstlg; | ||
411 | u32 i, dma = 0; | ||
412 | |||
413 | /* From the "struct lguest_dma_info" we found in the hash, grab the | ||
414 | * Guest. */ | ||
415 | dstlg = dst->owner; | ||
416 | /* Read in the source "struct lguest_dma" handed to SEND_DMA. */ | ||
417 | lgread(srclg, &src_dma, udma, sizeof(src_dma)); | ||
418 | |||
419 | /* We need the destination's mmap_sem, and we already hold the source's | ||
420 | * mmap_sem for the futex key lookup. Normally this would suggest that | ||
421 | * we could deadlock if the destination Guest was trying to send to | ||
422 | * this source Guest at the same time, which is another reason that all | ||
423 | * I/O is done under the big lguest_lock. */ | ||
424 | down_read(&dstlg->mm->mmap_sem); | ||
425 | |||
426 | /* Look through the destination DMA array for an available buffer. */ | ||
427 | for (i = 0; i < dst->num_dmas; i++) { | ||
428 | /* We keep a "next_dma" pointer which often helps us avoid | ||
429 | * looking at lots of previously-filled entries. */ | ||
430 | dma = (dst->next_dma + i) % dst->num_dmas; | ||
431 | if (!lgread_other(dstlg, &dst_dma, | ||
432 | dst->dmas + dma * sizeof(struct lguest_dma), | ||
433 | sizeof(dst_dma))) { | ||
434 | goto fail; | ||
435 | } | ||
436 | if (!dst_dma.used_len) | ||
437 | break; | ||
438 | } | ||
439 | |||
440 | /* If we found a buffer, we do the actual data copy. */ | ||
441 | if (i != dst->num_dmas) { | ||
442 | unsigned long used_lenp; | ||
443 | unsigned int ret; | ||
444 | |||
445 | ret = do_dma(srclg, &src_dma, dstlg, &dst_dma); | ||
446 | /* Put used length in the source "struct lguest_dma"'s used_len | ||
447 | * field. It's a little tricky to figure out where that is, | ||
448 | * though. */ | ||
449 | lgwrite_u32(srclg, | ||
450 | udma+offsetof(struct lguest_dma, used_len), ret); | ||
451 | /* Tranferring 0 bytes is OK if the source buffer was empty. */ | ||
452 | if (ret == 0 && src_dma.len[0] != 0) | ||
453 | goto fail; | ||
454 | |||
455 | /* The destination Guest might be running on a different CPU: | ||
456 | * we have to make sure that it will see the "used_len" field | ||
457 | * change to non-zero *after* it sees the data we copied into | ||
458 | * the buffer. Hence a write memory barrier. */ | ||
459 | wmb(); | ||
460 | /* Figuring out where the destination's used_len field for this | ||
461 | * "struct lguest_dma" in the array is also a little ugly. */ | ||
462 | used_lenp = dst->dmas | ||
463 | + dma * sizeof(struct lguest_dma) | ||
464 | + offsetof(struct lguest_dma, used_len); | ||
465 | lgwrite_other(dstlg, used_lenp, &ret, sizeof(ret)); | ||
466 | /* Move the cursor for next time. */ | ||
467 | dst->next_dma++; | ||
468 | } | ||
469 | up_read(&dstlg->mm->mmap_sem); | ||
470 | |||
471 | /* We trigger the destination interrupt, even if the destination was | ||
472 | * empty and we didn't transfer anything: this gives them a chance to | ||
473 | * wake up and refill. */ | ||
474 | set_bit(dst->interrupt, dstlg->irqs_pending); | ||
475 | /* Wake up the destination process. */ | ||
476 | wake_up_process(dstlg->tsk); | ||
477 | /* If we passed the last "struct lguest_dma", the receive had no | ||
478 | * buffers left. */ | ||
479 | return i == dst->num_dmas; | ||
480 | |||
481 | fail: | ||
482 | up_read(&dstlg->mm->mmap_sem); | ||
483 | return 0; | ||
484 | } | ||
485 | |||
486 | /*L:370 This is the counter-side to the BIND_DMA hypercall; the SEND_DMA | ||
487 | * hypercall. We find out who's listening, and send to them. */ | ||
488 | void send_dma(struct lguest *lg, unsigned long ukey, unsigned long udma) | ||
489 | { | ||
490 | union futex_key key; | ||
491 | int empty = 0; | ||
492 | struct rw_semaphore *fshared = ¤t->mm->mmap_sem; | ||
493 | |||
494 | again: | ||
495 | mutex_lock(&lguest_lock); | ||
496 | down_read(fshared); | ||
497 | /* Get the futex key for the key the Guest gave us */ | ||
498 | if (get_futex_key(lg->mem_base + ukey, fshared, &key) != 0) { | ||
499 | kill_guest(lg, "bad sending DMA key"); | ||
500 | goto unlock; | ||
501 | } | ||
502 | /* Since the key must be a multiple of 4, the futex key uses the lower | ||
503 | * bit of the "offset" field (which would always be 0) to indicate a | ||
504 | * mapping which is shared with other processes (ie. Guests). */ | ||
505 | if (key.shared.offset & 1) { | ||
506 | struct lguest_dma_info *i; | ||
507 | /* Look through the hash for other Guests. */ | ||
508 | list_for_each_entry(i, &dma_hash[hash(&key)], list) { | ||
509 | /* Don't send to ourselves (would deadlock). */ | ||
510 | if (i->owner->mm == lg->mm) | ||
511 | continue; | ||
512 | if (!key_eq(&key, &i->key)) | ||
513 | continue; | ||
514 | |||
515 | /* If dma_transfer() tells us the destination has no | ||
516 | * available buffers, we increment "empty". */ | ||
517 | empty += dma_transfer(lg, udma, i); | ||
518 | break; | ||
519 | } | ||
520 | /* If the destination is empty, we release our locks and | ||
521 | * give the destination Guest a brief chance to restock. */ | ||
522 | if (empty == 1) { | ||
523 | /* Give any recipients one chance to restock. */ | ||
524 | up_read(¤t->mm->mmap_sem); | ||
525 | mutex_unlock(&lguest_lock); | ||
526 | /* Next time, we won't try again. */ | ||
527 | empty++; | ||
528 | goto again; | ||
529 | } | ||
530 | } else { | ||
531 | /* Private mapping: Guest is sending to its Launcher. We set | ||
532 | * the "dma_is_pending" flag so that the main loop will exit | ||
533 | * and the Launcher's read() from /dev/lguest will return. */ | ||
534 | lg->dma_is_pending = 1; | ||
535 | lg->pending_dma = udma; | ||
536 | lg->pending_key = ukey; | ||
537 | } | ||
538 | unlock: | ||
539 | up_read(fshared); | ||
540 | mutex_unlock(&lguest_lock); | ||
541 | } | ||
542 | /*:*/ | ||
543 | |||
544 | void release_all_dma(struct lguest *lg) | ||
545 | { | ||
546 | unsigned int i; | ||
547 | |||
548 | BUG_ON(!mutex_is_locked(&lguest_lock)); | ||
549 | |||
550 | down_read(&lg->mm->mmap_sem); | ||
551 | for (i = 0; i < LGUEST_MAX_DMA; i++) { | ||
552 | if (lg->dma[i].interrupt) | ||
553 | unlink_dma(&lg->dma[i]); | ||
554 | } | ||
555 | up_read(&lg->mm->mmap_sem); | ||
556 | } | ||
557 | |||
558 | /*M:007 We only return a single DMA buffer to the Launcher, but it would be | ||
559 | * more efficient to return a pointer to the entire array of DMA buffers, which | ||
560 | * it can cache and choose one whenever it wants. | ||
561 | * | ||
562 | * Currently the Launcher uses a write to /dev/lguest, and the return value is | ||
563 | * the address of the DMA structure with the interrupt number placed in | ||
564 | * dma->used_len. If we wanted to return the entire array, we need to return | ||
565 | * the address, array size and interrupt number: this seems to require an | ||
566 | * ioctl(). :*/ | ||
567 | |||
568 | /*L:320 This routine looks for a DMA buffer registered by the Guest on the | ||
569 | * given key (using the BIND_DMA hypercall). */ | ||
570 | unsigned long get_dma_buffer(struct lguest *lg, | ||
571 | unsigned long ukey, unsigned long *interrupt) | ||
572 | { | ||
573 | unsigned long ret = 0; | ||
574 | union futex_key key; | ||
575 | struct lguest_dma_info *i; | ||
576 | struct rw_semaphore *fshared = ¤t->mm->mmap_sem; | ||
577 | |||
578 | /* Take the Big Lguest Lock to stop other Guests sending this Guest DMA | ||
579 | * at the same time. */ | ||
580 | mutex_lock(&lguest_lock); | ||
581 | /* To match between Guests sharing the same underlying memory we steal | ||
582 | * code from the futex infrastructure. This requires that we hold the | ||
583 | * "mmap_sem" for our process (the Launcher), and pass it to the futex | ||
584 | * code. */ | ||
585 | down_read(fshared); | ||
586 | |||
587 | /* This can fail if it's not a valid address, or if the address is not | ||
588 | * divisible by 4 (the futex code needs that, we don't really). */ | ||
589 | if (get_futex_key(lg->mem_base + ukey, fshared, &key) != 0) { | ||
590 | kill_guest(lg, "bad registered DMA buffer"); | ||
591 | goto unlock; | ||
592 | } | ||
593 | /* Search the hash table for matching entries (the Launcher can only | ||
594 | * send to its own Guest for the moment, so the entry must be for this | ||
595 | * Guest) */ | ||
596 | list_for_each_entry(i, &dma_hash[hash(&key)], list) { | ||
597 | if (key_eq(&key, &i->key) && i->owner == lg) { | ||
598 | unsigned int j; | ||
599 | /* Look through the registered DMA array for an | ||
600 | * available buffer. */ | ||
601 | for (j = 0; j < i->num_dmas; j++) { | ||
602 | struct lguest_dma dma; | ||
603 | |||
604 | ret = i->dmas + j * sizeof(struct lguest_dma); | ||
605 | lgread(lg, &dma, ret, sizeof(dma)); | ||
606 | if (dma.used_len == 0) | ||
607 | break; | ||
608 | } | ||
609 | /* Store the interrupt the Guest wants when the buffer | ||
610 | * is used. */ | ||
611 | *interrupt = i->interrupt; | ||
612 | break; | ||
613 | } | ||
614 | } | ||
615 | unlock: | ||
616 | up_read(fshared); | ||
617 | mutex_unlock(&lguest_lock); | ||
618 | return ret; | ||
619 | } | ||
620 | /*:*/ | ||
621 | |||
622 | /*L:410 This really has completed the Launcher. Not only have we now finished | ||
623 | * the longest chapter in our journey, but this also means we are over halfway | ||
624 | * through! | ||
625 | * | ||
626 | * Enough prevaricating around the bush: it is time for us to dive into the | ||
627 | * core of the Host, in "make Host". | ||
628 | */ | ||
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index e4845d7f068..4d45b7036e8 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h | |||
@@ -5,7 +5,6 @@ | |||
5 | #include <linux/types.h> | 5 | #include <linux/types.h> |
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
7 | #include <linux/stringify.h> | 7 | #include <linux/stringify.h> |
8 | #include <linux/futex.h> | ||
9 | #include <linux/lguest.h> | 8 | #include <linux/lguest.h> |
10 | #include <linux/lguest_launcher.h> | 9 | #include <linux/lguest_launcher.h> |
11 | #include <linux/wait.h> | 10 | #include <linux/wait.h> |
@@ -17,17 +16,6 @@ | |||
17 | void free_pagetables(void); | 16 | void free_pagetables(void); |
18 | int init_pagetables(struct page **switcher_page, unsigned int pages); | 17 | int init_pagetables(struct page **switcher_page, unsigned int pages); |
19 | 18 | ||
20 | struct lguest_dma_info | ||
21 | { | ||
22 | struct list_head list; | ||
23 | union futex_key key; | ||
24 | unsigned long dmas; | ||
25 | struct lguest *owner; | ||
26 | u16 next_dma; | ||
27 | u16 num_dmas; | ||
28 | u8 interrupt; /* 0 when not registered */ | ||
29 | }; | ||
30 | |||
31 | struct pgdir | 19 | struct pgdir |
32 | { | 20 | { |
33 | unsigned long gpgdir; | 21 | unsigned long gpgdir; |
@@ -90,15 +78,11 @@ struct lguest | |||
90 | struct task_struct *wake; | 78 | struct task_struct *wake; |
91 | 79 | ||
92 | unsigned long noirq_start, noirq_end; | 80 | unsigned long noirq_start, noirq_end; |
93 | int dma_is_pending; | 81 | unsigned long pending_notify; /* pfn from LHCALL_NOTIFY */ |
94 | unsigned long pending_dma; /* struct lguest_dma */ | ||
95 | unsigned long pending_key; /* address they're sending to */ | ||
96 | 82 | ||
97 | unsigned int stack_pages; | 83 | unsigned int stack_pages; |
98 | u32 tsc_khz; | 84 | u32 tsc_khz; |
99 | 85 | ||
100 | struct lguest_dma_info dma[LGUEST_MAX_DMA]; | ||
101 | |||
102 | /* Dead? */ | 86 | /* Dead? */ |
103 | const char *dead; | 87 | const char *dead; |
104 | 88 | ||
@@ -184,15 +168,6 @@ extern char start_switcher_text[], end_switcher_text[], switch_to_guest[]; | |||
184 | int lguest_device_init(void); | 168 | int lguest_device_init(void); |
185 | void lguest_device_remove(void); | 169 | void lguest_device_remove(void); |
186 | 170 | ||
187 | /* io.c: */ | ||
188 | void lguest_io_init(void); | ||
189 | int bind_dma(struct lguest *lg, | ||
190 | unsigned long key, unsigned long udma, u16 numdmas, u8 interrupt); | ||
191 | void send_dma(struct lguest *info, unsigned long key, unsigned long udma); | ||
192 | void release_all_dma(struct lguest *lg); | ||
193 | unsigned long get_dma_buffer(struct lguest *lg, unsigned long key, | ||
194 | unsigned long *interrupt); | ||
195 | |||
196 | /* hypercalls.c: */ | 171 | /* hypercalls.c: */ |
197 | void do_hypercalls(struct lguest *lg); | 172 | void do_hypercalls(struct lguest *lg); |
198 | void write_timestamp(struct lguest *lg); | 173 | void write_timestamp(struct lguest *lg); |
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 61b177e1e64..ee405b38383 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c | |||
@@ -2,37 +2,12 @@ | |||
2 | * controls and communicates with the Guest. For example, the first write will | 2 | * controls and communicates with the Guest. For example, the first write will |
3 | * tell us the Guest's memory layout, pagetable, entry point and kernel address | 3 | * tell us the Guest's memory layout, pagetable, entry point and kernel address |
4 | * offset. A read will run the Guest until something happens, such as a signal | 4 | * offset. A read will run the Guest until something happens, such as a signal |
5 | * or the Guest doing a DMA out to the Launcher. Writes are also used to get a | 5 | * or the Guest doing a NOTIFY out to the Launcher. :*/ |
6 | * DMA buffer registered by the Guest and to send the Guest an interrupt. :*/ | ||
7 | #include <linux/uaccess.h> | 6 | #include <linux/uaccess.h> |
8 | #include <linux/miscdevice.h> | 7 | #include <linux/miscdevice.h> |
9 | #include <linux/fs.h> | 8 | #include <linux/fs.h> |
10 | #include "lg.h" | 9 | #include "lg.h" |
11 | 10 | ||
12 | /*L:310 To send DMA into the Guest, the Launcher needs to be able to ask for a | ||
13 | * DMA buffer. This is done by writing LHREQ_GETDMA and the key to | ||
14 | * /dev/lguest. */ | ||
15 | static long user_get_dma(struct lguest *lg, const unsigned long __user *input) | ||
16 | { | ||
17 | unsigned long key, udma, irq; | ||
18 | |||
19 | /* Fetch the key they wrote to us. */ | ||
20 | if (get_user(key, input) != 0) | ||
21 | return -EFAULT; | ||
22 | /* Look for a free Guest DMA buffer bound to that key. */ | ||
23 | udma = get_dma_buffer(lg, key, &irq); | ||
24 | if (!udma) | ||
25 | return -ENOENT; | ||
26 | |||
27 | /* We need to tell the Launcher what interrupt the Guest expects after | ||
28 | * the buffer is filled. We stash it in udma->used_len. */ | ||
29 | lgwrite_u32(lg, udma + offsetof(struct lguest_dma, used_len), irq); | ||
30 | |||
31 | /* The (guest-physical) address of the DMA buffer is returned from | ||
32 | * the write(). */ | ||
33 | return udma; | ||
34 | } | ||
35 | |||
36 | /*L:315 To force the Guest to stop running and return to the Launcher, the | 11 | /*L:315 To force the Guest to stop running and return to the Launcher, the |
37 | * Waker sets writes LHREQ_BREAK and the value "1" to /dev/lguest. The | 12 | * Waker sets writes LHREQ_BREAK and the value "1" to /dev/lguest. The |
38 | * Launcher then writes LHREQ_BREAK and "0" to release the Waker. */ | 13 | * Launcher then writes LHREQ_BREAK and "0" to release the Waker. */ |
@@ -102,10 +77,10 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) | |||
102 | return len; | 77 | return len; |
103 | } | 78 | } |
104 | 79 | ||
105 | /* If we returned from read() last time because the Guest sent DMA, | 80 | /* If we returned from read() last time because the Guest notified, |
106 | * clear the flag. */ | 81 | * clear the flag. */ |
107 | if (lg->dma_is_pending) | 82 | if (lg->pending_notify) |
108 | lg->dma_is_pending = 0; | 83 | lg->pending_notify = 0; |
109 | 84 | ||
110 | /* Run the Guest until something interesting happens. */ | 85 | /* Run the Guest until something interesting happens. */ |
111 | return run_guest(lg, (unsigned long __user *)user); | 86 | return run_guest(lg, (unsigned long __user *)user); |
@@ -216,7 +191,7 @@ unlock: | |||
216 | /*L:010 The first operation the Launcher does must be a write. All writes | 191 | /*L:010 The first operation the Launcher does must be a write. All writes |
217 | * start with a 32 bit number: for the first write this must be | 192 | * start with a 32 bit number: for the first write this must be |
218 | * LHREQ_INITIALIZE to set up the Guest. After that the Launcher can use | 193 | * LHREQ_INITIALIZE to set up the Guest. After that the Launcher can use |
219 | * writes of other values to get DMA buffers and send interrupts. */ | 194 | * writes of other values to send interrupts. */ |
220 | static ssize_t write(struct file *file, const char __user *in, | 195 | static ssize_t write(struct file *file, const char __user *in, |
221 | size_t size, loff_t *off) | 196 | size_t size, loff_t *off) |
222 | { | 197 | { |
@@ -245,8 +220,6 @@ static ssize_t write(struct file *file, const char __user *in, | |||
245 | switch (req) { | 220 | switch (req) { |
246 | case LHREQ_INITIALIZE: | 221 | case LHREQ_INITIALIZE: |
247 | return initialize(file, input); | 222 | return initialize(file, input); |
248 | case LHREQ_GETDMA: | ||
249 | return user_get_dma(lg, input); | ||
250 | case LHREQ_IRQ: | 223 | case LHREQ_IRQ: |
251 | return user_send_irq(lg, input); | 224 | return user_send_irq(lg, input); |
252 | case LHREQ_BREAK: | 225 | case LHREQ_BREAK: |
@@ -276,8 +249,6 @@ static int close(struct inode *inode, struct file *file) | |||
276 | mutex_lock(&lguest_lock); | 249 | mutex_lock(&lguest_lock); |
277 | /* Cancels the hrtimer set via LHCALL_SET_CLOCKEVENT. */ | 250 | /* Cancels the hrtimer set via LHCALL_SET_CLOCKEVENT. */ |
278 | hrtimer_cancel(&lg->hrt); | 251 | hrtimer_cancel(&lg->hrt); |
279 | /* Free any DMA buffers the Guest had bound. */ | ||
280 | release_all_dma(lg); | ||
281 | /* Free up the shadow page tables for the Guest. */ | 252 | /* Free up the shadow page tables for the Guest. */ |
282 | free_guest_pagetable(lg); | 253 | free_guest_pagetable(lg); |
283 | /* Now all the memory cleanups are done, it's safe to release the | 254 | /* Now all the memory cleanups are done, it's safe to release the |