diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2007-07-26 13:41:03 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-07-26 14:35:17 -0400 |
commit | dde797899ac17ebb812b7566044124d785e98dc7 (patch) | |
tree | 531ae7fd415d267e49acfedbbf4f03cf86e5eac1 /drivers/lguest/io.c | |
parent | e2c9784325490c878b7f69aeec1bed98b288bd97 (diff) |
lguest: documentation IV: Launcher
Documentation: The Launcher
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/lguest/io.c')
-rw-r--r-- | drivers/lguest/io.c | 247 |
1 files changed, 230 insertions, 17 deletions
diff --git a/drivers/lguest/io.c b/drivers/lguest/io.c index d2f02f0653ca..da288128e44f 100644 --- a/drivers/lguest/io.c +++ b/drivers/lguest/io.c | |||
@@ -27,8 +27,36 @@ | |||
27 | #include <linux/uaccess.h> | 27 | #include <linux/uaccess.h> |
28 | #include "lg.h" | 28 | #include "lg.h" |
29 | 29 | ||
30 | /*L:300 | ||
31 | * I/O | ||
32 | * | ||
33 | * Getting data in and out of the Guest is quite an art. There are numerous | ||
34 | * ways to do it, and they all suck differently. We try to keep things fairly | ||
35 | * close to "real" hardware so our Guest's drivers don't look like an alien | ||
36 | * visitation in the middle of the Linux code, and yet make sure that Guests | ||
37 | * can talk directly to other Guests, not just the Launcher. | ||
38 | * | ||
39 | * To do this, the Guest gives us a key when it binds or sends DMA buffers. | ||
40 | * The key corresponds to a "physical" address inside the Guest (ie. a virtual | ||
41 | * address inside the Launcher process). We don't, however, use this key | ||
42 | * directly. | ||
43 | * | ||
44 | * We want Guests which share memory to be able to DMA to each other: two | ||
45 | * Launchers can mmap memory the same file, then the Guests can communicate. | ||
46 | * Fortunately, the futex code provides us with a way to get a "union | ||
47 | * futex_key" corresponding to the memory lying at a virtual address: if the | ||
48 | * two processes share memory, the "union futex_key" for that memory will match | ||
49 | * even if the memory is mapped at different addresses in each. So we always | ||
50 | * convert the keys to "union futex_key"s to compare them. | ||
51 | * | ||
52 | * Before we dive into this though, we need to look at another set of helper | ||
53 | * routines used throughout the Host kernel code to access Guest memory. | ||
54 | :*/ | ||
30 | static struct list_head dma_hash[61]; | 55 | static struct list_head dma_hash[61]; |
31 | 56 | ||
57 | /* An unfortunate side effect of the Linux double-linked list implementation is | ||
58 | * that there's no good way to statically initialize an array of linked | ||
59 | * lists. */ | ||
32 | void lguest_io_init(void) | 60 | void lguest_io_init(void) |
33 | { | 61 | { |
34 | unsigned int i; | 62 | unsigned int i; |
@@ -60,6 +88,19 @@ kill: | |||
60 | return 0; | 88 | return 0; |
61 | } | 89 | } |
62 | 90 | ||
91 | /*L:330 This is our hash function, using the wonderful Jenkins hash. | ||
92 | * | ||
93 | * The futex key is a union with three parts: an unsigned long word, a pointer, | ||
94 | * and an int "offset". We could use jhash_2words() which takes three u32s. | ||
95 | * (Ok, the hash functions are great: the naming sucks though). | ||
96 | * | ||
97 | * It's nice to be portable to 64-bit platforms, so we use the more generic | ||
98 | * jhash2(), which takes an array of u32, the number of u32s, and an initial | ||
99 | * u32 to roll in. This is uglier, but breaks down to almost the same code on | ||
100 | * 32-bit platforms like this one. | ||
101 | * | ||
102 | * We want a position in the array, so we modulo ARRAY_SIZE(dma_hash) (ie. 61). | ||
103 | */ | ||
63 | static unsigned int hash(const union futex_key *key) | 104 | static unsigned int hash(const union futex_key *key) |
64 | { | 105 | { |
65 | return jhash2((u32*)&key->both.word, | 106 | return jhash2((u32*)&key->both.word, |
@@ -68,6 +109,9 @@ static unsigned int hash(const union futex_key *key) | |||
68 | % ARRAY_SIZE(dma_hash); | 109 | % ARRAY_SIZE(dma_hash); |
69 | } | 110 | } |
70 | 111 | ||
112 | /* This is a convenience routine to compare two keys. It's a much bemoaned C | ||
113 | * weakness that it doesn't allow '==' on structures or unions, so we have to | ||
114 | * open-code it like this. */ | ||
71 | static inline int key_eq(const union futex_key *a, const union futex_key *b) | 115 | static inline int key_eq(const union futex_key *a, const union futex_key *b) |
72 | { | 116 | { |
73 | return (a->both.word == b->both.word | 117 | return (a->both.word == b->both.word |
@@ -75,22 +119,36 @@ static inline int key_eq(const union futex_key *a, const union futex_key *b) | |||
75 | && a->both.offset == b->both.offset); | 119 | && a->both.offset == b->both.offset); |
76 | } | 120 | } |
77 | 121 | ||
78 | /* Must hold read lock on dmainfo owner's current->mm->mmap_sem */ | 122 | /*L:360 OK, when we need to actually free up a Guest's DMA array we do several |
123 | * things, so we have a convenient function to do it. | ||
124 | * | ||
125 | * The caller must hold a read lock on dmainfo owner's current->mm->mmap_sem | ||
126 | * for the drop_futex_key_refs(). */ | ||
79 | static void unlink_dma(struct lguest_dma_info *dmainfo) | 127 | static void unlink_dma(struct lguest_dma_info *dmainfo) |
80 | { | 128 | { |
129 | /* You locked this too, right? */ | ||
81 | BUG_ON(!mutex_is_locked(&lguest_lock)); | 130 | BUG_ON(!mutex_is_locked(&lguest_lock)); |
131 | /* This is how we know that the entry is free. */ | ||
82 | dmainfo->interrupt = 0; | 132 | dmainfo->interrupt = 0; |
133 | /* Remove it from the hash table. */ | ||
83 | list_del(&dmainfo->list); | 134 | list_del(&dmainfo->list); |
135 | /* Drop the references we were holding (to the inode or mm). */ | ||
84 | drop_futex_key_refs(&dmainfo->key); | 136 | drop_futex_key_refs(&dmainfo->key); |
85 | } | 137 | } |
86 | 138 | ||
139 | /*L:350 This is the routine which we call when the Guest asks to unregister a | ||
140 | * DMA array attached to a given key. Returns true if the array was found. */ | ||
87 | static int unbind_dma(struct lguest *lg, | 141 | static int unbind_dma(struct lguest *lg, |
88 | const union futex_key *key, | 142 | const union futex_key *key, |
89 | unsigned long dmas) | 143 | unsigned long dmas) |
90 | { | 144 | { |
91 | int i, ret = 0; | 145 | int i, ret = 0; |
92 | 146 | ||
147 | /* We don't bother with the hash table, just look through all this | ||
148 | * Guest's DMA arrays. */ | ||
93 | for (i = 0; i < LGUEST_MAX_DMA; i++) { | 149 | for (i = 0; i < LGUEST_MAX_DMA; i++) { |
150 | /* In theory it could have more than one array on the same key, | ||
151 | * or one array on multiple keys, so we check both */ | ||
94 | if (key_eq(key, &lg->dma[i].key) && dmas == lg->dma[i].dmas) { | 152 | if (key_eq(key, &lg->dma[i].key) && dmas == lg->dma[i].dmas) { |
95 | unlink_dma(&lg->dma[i]); | 153 | unlink_dma(&lg->dma[i]); |
96 | ret = 1; | 154 | ret = 1; |
@@ -100,51 +158,91 @@ static int unbind_dma(struct lguest *lg, | |||
100 | return ret; | 158 | return ret; |
101 | } | 159 | } |
102 | 160 | ||
161 | /*L:340 BIND_DMA: this is the hypercall which sets up an array of "struct | ||
162 | * lguest_dma" for receiving I/O. | ||
163 | * | ||
164 | * The Guest wants to bind an array of "struct lguest_dma"s to a particular key | ||
165 | * to receive input. This only happens when the Guest is setting up a new | ||
166 | * device, so it doesn't have to be very fast. | ||
167 | * | ||
168 | * It returns 1 on a successful registration (it can fail if we hit the limit | ||
169 | * of registrations for this Guest). | ||
170 | */ | ||
103 | int bind_dma(struct lguest *lg, | 171 | int bind_dma(struct lguest *lg, |
104 | unsigned long ukey, unsigned long dmas, u16 numdmas, u8 interrupt) | 172 | unsigned long ukey, unsigned long dmas, u16 numdmas, u8 interrupt) |
105 | { | 173 | { |
106 | unsigned int i; | 174 | unsigned int i; |
107 | int ret = 0; | 175 | int ret = 0; |
108 | union futex_key key; | 176 | union futex_key key; |
177 | /* Futex code needs the mmap_sem. */ | ||
109 | struct rw_semaphore *fshared = ¤t->mm->mmap_sem; | 178 | struct rw_semaphore *fshared = ¤t->mm->mmap_sem; |
110 | 179 | ||
180 | /* Invalid interrupt? (We could kill the guest here). */ | ||
111 | if (interrupt >= LGUEST_IRQS) | 181 | if (interrupt >= LGUEST_IRQS) |
112 | return 0; | 182 | return 0; |
113 | 183 | ||
184 | /* We need to grab the Big Lguest Lock, because other Guests may be | ||
185 | * trying to look through this Guest's DMAs to send something while | ||
186 | * we're doing this. */ | ||
114 | mutex_lock(&lguest_lock); | 187 | mutex_lock(&lguest_lock); |
115 | down_read(fshared); | 188 | down_read(fshared); |
116 | if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { | 189 | if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { |
117 | kill_guest(lg, "bad dma key %#lx", ukey); | 190 | kill_guest(lg, "bad dma key %#lx", ukey); |
118 | goto unlock; | 191 | goto unlock; |
119 | } | 192 | } |
193 | |||
194 | /* We want to keep this key valid once we drop mmap_sem, so we have to | ||
195 | * hold a reference. */ | ||
120 | get_futex_key_refs(&key); | 196 | get_futex_key_refs(&key); |
121 | 197 | ||
198 | /* If the Guest specified an interrupt of 0, that means they want to | ||
199 | * unregister this array of "struct lguest_dma"s. */ | ||
122 | if (interrupt == 0) | 200 | if (interrupt == 0) |
123 | ret = unbind_dma(lg, &key, dmas); | 201 | ret = unbind_dma(lg, &key, dmas); |
124 | else { | 202 | else { |
203 | /* Look through this Guest's dma array for an unused entry. */ | ||
125 | for (i = 0; i < LGUEST_MAX_DMA; i++) { | 204 | for (i = 0; i < LGUEST_MAX_DMA; i++) { |
205 | /* If the interrupt is non-zero, the entry is already | ||
206 | * used. */ | ||
126 | if (lg->dma[i].interrupt) | 207 | if (lg->dma[i].interrupt) |
127 | continue; | 208 | continue; |
128 | 209 | ||
210 | /* OK, a free one! Fill on our details. */ | ||
129 | lg->dma[i].dmas = dmas; | 211 | lg->dma[i].dmas = dmas; |
130 | lg->dma[i].num_dmas = numdmas; | 212 | lg->dma[i].num_dmas = numdmas; |
131 | lg->dma[i].next_dma = 0; | 213 | lg->dma[i].next_dma = 0; |
132 | lg->dma[i].key = key; | 214 | lg->dma[i].key = key; |
133 | lg->dma[i].guestid = lg->guestid; | 215 | lg->dma[i].guestid = lg->guestid; |
134 | lg->dma[i].interrupt = interrupt; | 216 | lg->dma[i].interrupt = interrupt; |
217 | |||
218 | /* Now we add it to the hash table: the position | ||
219 | * depends on the futex key that we got. */ | ||
135 | list_add(&lg->dma[i].list, &dma_hash[hash(&key)]); | 220 | list_add(&lg->dma[i].list, &dma_hash[hash(&key)]); |
221 | /* Success! */ | ||
136 | ret = 1; | 222 | ret = 1; |
137 | goto unlock; | 223 | goto unlock; |
138 | } | 224 | } |
139 | } | 225 | } |
226 | /* If we didn't find a slot to put the key in, drop the reference | ||
227 | * again. */ | ||
140 | drop_futex_key_refs(&key); | 228 | drop_futex_key_refs(&key); |
141 | unlock: | 229 | unlock: |
230 | /* Unlock and out. */ | ||
142 | up_read(fshared); | 231 | up_read(fshared); |
143 | mutex_unlock(&lguest_lock); | 232 | mutex_unlock(&lguest_lock); |
144 | return ret; | 233 | return ret; |
145 | } | 234 | } |
146 | 235 | ||
147 | /* lgread from another guest */ | 236 | /*L:385 Note that our routines to access a different Guest's memory are called |
237 | * lgread_other() and lgwrite_other(): these names emphasize that they are only | ||
238 | * used when the Guest is *not* the current Guest. | ||
239 | * | ||
240 | * The interface for copying from another process's memory is called | ||
241 | * access_process_vm(), with a final argument of 0 for a read, and 1 for a | ||
242 | * write. | ||
243 | * | ||
244 | * We need lgread_other() to read the destination Guest's "struct lguest_dma" | ||
245 | * array. */ | ||
148 | static int lgread_other(struct lguest *lg, | 246 | static int lgread_other(struct lguest *lg, |
149 | void *buf, u32 addr, unsigned bytes) | 247 | void *buf, u32 addr, unsigned bytes) |
150 | { | 248 | { |
@@ -157,7 +255,8 @@ static int lgread_other(struct lguest *lg, | |||
157 | return 1; | 255 | return 1; |
158 | } | 256 | } |
159 | 257 | ||
160 | /* lgwrite to another guest */ | 258 | /* "lgwrite()" to another Guest: used to update the destination "used_len" once |
259 | * we've transferred data into the buffer. */ | ||
161 | static int lgwrite_other(struct lguest *lg, u32 addr, | 260 | static int lgwrite_other(struct lguest *lg, u32 addr, |
162 | const void *buf, unsigned bytes) | 261 | const void *buf, unsigned bytes) |
163 | { | 262 | { |
@@ -170,6 +269,15 @@ static int lgwrite_other(struct lguest *lg, u32 addr, | |||
170 | return 1; | 269 | return 1; |
171 | } | 270 | } |
172 | 271 | ||
272 | /*L:400 This is the generic engine which copies from a source "struct | ||
273 | * lguest_dma" from this Guest into another Guest's "struct lguest_dma". The | ||
274 | * destination Guest's pages have already been mapped, as contained in the | ||
275 | * pages array. | ||
276 | * | ||
277 | * If you're wondering if there's a nice "copy from one process to another" | ||
278 | * routine, so was I. But Linux isn't really set up to copy between two | ||
279 | * unrelated processes, so we have to write it ourselves. | ||
280 | */ | ||
173 | static u32 copy_data(struct lguest *srclg, | 281 | static u32 copy_data(struct lguest *srclg, |
174 | const struct lguest_dma *src, | 282 | const struct lguest_dma *src, |
175 | const struct lguest_dma *dst, | 283 | const struct lguest_dma *dst, |
@@ -178,33 +286,59 @@ static u32 copy_data(struct lguest *srclg, | |||
178 | unsigned int totlen, si, di, srcoff, dstoff; | 286 | unsigned int totlen, si, di, srcoff, dstoff; |
179 | void *maddr = NULL; | 287 | void *maddr = NULL; |
180 | 288 | ||
289 | /* We return the total length transferred. */ | ||
181 | totlen = 0; | 290 | totlen = 0; |
291 | |||
292 | /* We keep indexes into the source and destination "struct lguest_dma", | ||
293 | * and an offset within each region. */ | ||
182 | si = di = 0; | 294 | si = di = 0; |
183 | srcoff = dstoff = 0; | 295 | srcoff = dstoff = 0; |
296 | |||
297 | /* We loop until the source or destination is exhausted. */ | ||
184 | while (si < LGUEST_MAX_DMA_SECTIONS && src->len[si] | 298 | while (si < LGUEST_MAX_DMA_SECTIONS && src->len[si] |
185 | && di < LGUEST_MAX_DMA_SECTIONS && dst->len[di]) { | 299 | && di < LGUEST_MAX_DMA_SECTIONS && dst->len[di]) { |
300 | /* We can only transfer the rest of the src buffer, or as much | ||
301 | * as will fit into the destination buffer. */ | ||
186 | u32 len = min(src->len[si] - srcoff, dst->len[di] - dstoff); | 302 | u32 len = min(src->len[si] - srcoff, dst->len[di] - dstoff); |
187 | 303 | ||
304 | /* For systems using "highmem" we need to use kmap() to access | ||
305 | * the page we want. We often use the same page over and over, | ||
306 | * so rather than kmap() it on every loop, we set the maddr | ||
307 | * pointer to NULL when we need to move to the next | ||
308 | * destination page. */ | ||
188 | if (!maddr) | 309 | if (!maddr) |
189 | maddr = kmap(pages[di]); | 310 | maddr = kmap(pages[di]); |
190 | 311 | ||
191 | /* FIXME: This is not completely portable, since | 312 | /* Copy directly from (this Guest's) source address to the |
192 | archs do different things for copy_to_user_page. */ | 313 | * destination Guest's kmap()ed buffer. Note that maddr points |
314 | * to the start of the page: we need to add the offset of the | ||
315 | * destination address and offset within the buffer. */ | ||
316 | |||
317 | /* FIXME: This is not completely portable. I looked at | ||
318 | * copy_to_user_page(), and some arch's seem to need special | ||
319 | * flushes. x86 is fine. */ | ||
193 | if (copy_from_user(maddr + (dst->addr[di] + dstoff)%PAGE_SIZE, | 320 | if (copy_from_user(maddr + (dst->addr[di] + dstoff)%PAGE_SIZE, |
194 | (void __user *)src->addr[si], len) != 0) { | 321 | (void __user *)src->addr[si], len) != 0) { |
322 | /* If a copy failed, it's the source's fault. */ | ||
195 | kill_guest(srclg, "bad address in sending DMA"); | 323 | kill_guest(srclg, "bad address in sending DMA"); |
196 | totlen = 0; | 324 | totlen = 0; |
197 | break; | 325 | break; |
198 | } | 326 | } |
199 | 327 | ||
328 | /* Increment the total and src & dst offsets */ | ||
200 | totlen += len; | 329 | totlen += len; |
201 | srcoff += len; | 330 | srcoff += len; |
202 | dstoff += len; | 331 | dstoff += len; |
332 | |||
333 | /* Presumably we reached the end of the src or dest buffers: */ | ||
203 | if (srcoff == src->len[si]) { | 334 | if (srcoff == src->len[si]) { |
335 | /* Move to the next buffer at offset 0 */ | ||
204 | si++; | 336 | si++; |
205 | srcoff = 0; | 337 | srcoff = 0; |
206 | } | 338 | } |
207 | if (dstoff == dst->len[di]) { | 339 | if (dstoff == dst->len[di]) { |
340 | /* We need to unmap that destination page and reset | ||
341 | * maddr ready for the next one. */ | ||
208 | kunmap(pages[di]); | 342 | kunmap(pages[di]); |
209 | maddr = NULL; | 343 | maddr = NULL; |
210 | di++; | 344 | di++; |
@@ -212,13 +346,15 @@ static u32 copy_data(struct lguest *srclg, | |||
212 | } | 346 | } |
213 | } | 347 | } |
214 | 348 | ||
349 | /* If we still had a page mapped at the end, unmap now. */ | ||
215 | if (maddr) | 350 | if (maddr) |
216 | kunmap(pages[di]); | 351 | kunmap(pages[di]); |
217 | 352 | ||
218 | return totlen; | 353 | return totlen; |
219 | } | 354 | } |
220 | 355 | ||
221 | /* Src is us, ie. current. */ | 356 | /*L:390 This is how we transfer a "struct lguest_dma" from the source Guest |
357 | * (the current Guest which called SEND_DMA) to another Guest. */ | ||
222 | static u32 do_dma(struct lguest *srclg, const struct lguest_dma *src, | 358 | static u32 do_dma(struct lguest *srclg, const struct lguest_dma *src, |
223 | struct lguest *dstlg, const struct lguest_dma *dst) | 359 | struct lguest *dstlg, const struct lguest_dma *dst) |
224 | { | 360 | { |
@@ -226,23 +362,31 @@ static u32 do_dma(struct lguest *srclg, const struct lguest_dma *src, | |||
226 | u32 ret; | 362 | u32 ret; |
227 | struct page *pages[LGUEST_MAX_DMA_SECTIONS]; | 363 | struct page *pages[LGUEST_MAX_DMA_SECTIONS]; |
228 | 364 | ||
365 | /* We check that both source and destination "struct lguest_dma"s are | ||
366 | * within the bounds of the source and destination Guests */ | ||
229 | if (!check_dma_list(dstlg, dst) || !check_dma_list(srclg, src)) | 367 | if (!check_dma_list(dstlg, dst) || !check_dma_list(srclg, src)) |
230 | return 0; | 368 | return 0; |
231 | 369 | ||
232 | /* First get the destination pages */ | 370 | /* We need to map the pages which correspond to each parts of |
371 | * destination buffer. */ | ||
233 | for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { | 372 | for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { |
234 | if (dst->len[i] == 0) | 373 | if (dst->len[i] == 0) |
235 | break; | 374 | break; |
375 | /* get_user_pages() is a complicated function, especially since | ||
376 | * we only want a single page. But it works, and returns the | ||
377 | * number of pages. Note that we're holding the destination's | ||
378 | * mmap_sem, as get_user_pages() requires. */ | ||
236 | if (get_user_pages(dstlg->tsk, dstlg->mm, | 379 | if (get_user_pages(dstlg->tsk, dstlg->mm, |
237 | dst->addr[i], 1, 1, 1, pages+i, NULL) | 380 | dst->addr[i], 1, 1, 1, pages+i, NULL) |
238 | != 1) { | 381 | != 1) { |
382 | /* This means the destination gave us a bogus buffer */ | ||
239 | kill_guest(dstlg, "Error mapping DMA pages"); | 383 | kill_guest(dstlg, "Error mapping DMA pages"); |
240 | ret = 0; | 384 | ret = 0; |
241 | goto drop_pages; | 385 | goto drop_pages; |
242 | } | 386 | } |
243 | } | 387 | } |
244 | 388 | ||
245 | /* Now copy until we run out of src or dst. */ | 389 | /* Now copy the data until we run out of src or dst. */ |
246 | ret = copy_data(srclg, src, dst, pages); | 390 | ret = copy_data(srclg, src, dst, pages); |
247 | 391 | ||
248 | drop_pages: | 392 | drop_pages: |
@@ -251,6 +395,11 @@ drop_pages: | |||
251 | return ret; | 395 | return ret; |
252 | } | 396 | } |
253 | 397 | ||
398 | /*L:380 Transferring data from one Guest to another is not as simple as I'd | ||
399 | * like. We've found the "struct lguest_dma_info" bound to the same address as | ||
400 | * the send, we need to copy into it. | ||
401 | * | ||
402 | * This function returns true if the destination array was empty. */ | ||
254 | static int dma_transfer(struct lguest *srclg, | 403 | static int dma_transfer(struct lguest *srclg, |
255 | unsigned long udma, | 404 | unsigned long udma, |
256 | struct lguest_dma_info *dst) | 405 | struct lguest_dma_info *dst) |
@@ -259,15 +408,23 @@ static int dma_transfer(struct lguest *srclg, | |||
259 | struct lguest *dstlg; | 408 | struct lguest *dstlg; |
260 | u32 i, dma = 0; | 409 | u32 i, dma = 0; |
261 | 410 | ||
411 | /* From the "struct lguest_dma_info" we found in the hash, grab the | ||
412 | * Guest. */ | ||
262 | dstlg = &lguests[dst->guestid]; | 413 | dstlg = &lguests[dst->guestid]; |
263 | /* Get our dma list. */ | 414 | /* Read in the source "struct lguest_dma" handed to SEND_DMA. */ |
264 | lgread(srclg, &src_dma, udma, sizeof(src_dma)); | 415 | lgread(srclg, &src_dma, udma, sizeof(src_dma)); |
265 | 416 | ||
266 | /* We can't deadlock against them dmaing to us, because this | 417 | /* We need the destination's mmap_sem, and we already hold the source's |
267 | * is all under the lguest_lock. */ | 418 | * mmap_sem for the futex key lookup. Normally this would suggest that |
419 | * we could deadlock if the destination Guest was trying to send to | ||
420 | * this source Guest at the same time, which is another reason that all | ||
421 | * I/O is done under the big lguest_lock. */ | ||
268 | down_read(&dstlg->mm->mmap_sem); | 422 | down_read(&dstlg->mm->mmap_sem); |
269 | 423 | ||
424 | /* Look through the destination DMA array for an available buffer. */ | ||
270 | for (i = 0; i < dst->num_dmas; i++) { | 425 | for (i = 0; i < dst->num_dmas; i++) { |
426 | /* We keep a "next_dma" pointer which often helps us avoid | ||
427 | * looking at lots of previously-filled entries. */ | ||
271 | dma = (dst->next_dma + i) % dst->num_dmas; | 428 | dma = (dst->next_dma + i) % dst->num_dmas; |
272 | if (!lgread_other(dstlg, &dst_dma, | 429 | if (!lgread_other(dstlg, &dst_dma, |
273 | dst->dmas + dma * sizeof(struct lguest_dma), | 430 | dst->dmas + dma * sizeof(struct lguest_dma), |
@@ -277,30 +434,46 @@ static int dma_transfer(struct lguest *srclg, | |||
277 | if (!dst_dma.used_len) | 434 | if (!dst_dma.used_len) |
278 | break; | 435 | break; |
279 | } | 436 | } |
437 | |||
438 | /* If we found a buffer, we do the actual data copy. */ | ||
280 | if (i != dst->num_dmas) { | 439 | if (i != dst->num_dmas) { |
281 | unsigned long used_lenp; | 440 | unsigned long used_lenp; |
282 | unsigned int ret; | 441 | unsigned int ret; |
283 | 442 | ||
284 | ret = do_dma(srclg, &src_dma, dstlg, &dst_dma); | 443 | ret = do_dma(srclg, &src_dma, dstlg, &dst_dma); |
285 | /* Put used length in src. */ | 444 | /* Put used length in the source "struct lguest_dma"'s used_len |
445 | * field. It's a little tricky to figure out where that is, | ||
446 | * though. */ | ||
286 | lgwrite_u32(srclg, | 447 | lgwrite_u32(srclg, |
287 | udma+offsetof(struct lguest_dma, used_len), ret); | 448 | udma+offsetof(struct lguest_dma, used_len), ret); |
449 | /* Tranferring 0 bytes is OK if the source buffer was empty. */ | ||
288 | if (ret == 0 && src_dma.len[0] != 0) | 450 | if (ret == 0 && src_dma.len[0] != 0) |
289 | goto fail; | 451 | goto fail; |
290 | 452 | ||
291 | /* Make sure destination sees contents before length. */ | 453 | /* The destination Guest might be running on a different CPU: |
454 | * we have to make sure that it will see the "used_len" field | ||
455 | * change to non-zero *after* it sees the data we copied into | ||
456 | * the buffer. Hence a write memory barrier. */ | ||
292 | wmb(); | 457 | wmb(); |
458 | /* Figuring out where the destination's used_len field for this | ||
459 | * "struct lguest_dma" in the array is also a little ugly. */ | ||
293 | used_lenp = dst->dmas | 460 | used_lenp = dst->dmas |
294 | + dma * sizeof(struct lguest_dma) | 461 | + dma * sizeof(struct lguest_dma) |
295 | + offsetof(struct lguest_dma, used_len); | 462 | + offsetof(struct lguest_dma, used_len); |
296 | lgwrite_other(dstlg, used_lenp, &ret, sizeof(ret)); | 463 | lgwrite_other(dstlg, used_lenp, &ret, sizeof(ret)); |
464 | /* Move the cursor for next time. */ | ||
297 | dst->next_dma++; | 465 | dst->next_dma++; |
298 | } | 466 | } |
299 | up_read(&dstlg->mm->mmap_sem); | 467 | up_read(&dstlg->mm->mmap_sem); |
300 | 468 | ||
301 | /* Do this last so dst doesn't simply sleep on lock. */ | 469 | /* We trigger the destination interrupt, even if the destination was |
470 | * empty and we didn't transfer anything: this gives them a chance to | ||
471 | * wake up and refill. */ | ||
302 | set_bit(dst->interrupt, dstlg->irqs_pending); | 472 | set_bit(dst->interrupt, dstlg->irqs_pending); |
473 | /* Wake up the destination process. */ | ||
303 | wake_up_process(dstlg->tsk); | 474 | wake_up_process(dstlg->tsk); |
475 | /* If we passed the last "struct lguest_dma", the receive had no | ||
476 | * buffers left. */ | ||
304 | return i == dst->num_dmas; | 477 | return i == dst->num_dmas; |
305 | 478 | ||
306 | fail: | 479 | fail: |
@@ -308,6 +481,8 @@ fail: | |||
308 | return 0; | 481 | return 0; |
309 | } | 482 | } |
310 | 483 | ||
484 | /*L:370 This is the counter-side to the BIND_DMA hypercall; the SEND_DMA | ||
485 | * hypercall. We find out who's listening, and send to them. */ | ||
311 | void send_dma(struct lguest *lg, unsigned long ukey, unsigned long udma) | 486 | void send_dma(struct lguest *lg, unsigned long ukey, unsigned long udma) |
312 | { | 487 | { |
313 | union futex_key key; | 488 | union futex_key key; |
@@ -317,31 +492,43 @@ void send_dma(struct lguest *lg, unsigned long ukey, unsigned long udma) | |||
317 | again: | 492 | again: |
318 | mutex_lock(&lguest_lock); | 493 | mutex_lock(&lguest_lock); |
319 | down_read(fshared); | 494 | down_read(fshared); |
495 | /* Get the futex key for the key the Guest gave us */ | ||
320 | if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { | 496 | if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { |
321 | kill_guest(lg, "bad sending DMA key"); | 497 | kill_guest(lg, "bad sending DMA key"); |
322 | goto unlock; | 498 | goto unlock; |
323 | } | 499 | } |
324 | /* Shared mapping? Look for other guests... */ | 500 | /* Since the key must be a multiple of 4, the futex key uses the lower |
501 | * bit of the "offset" field (which would always be 0) to indicate a | ||
502 | * mapping which is shared with other processes (ie. Guests). */ | ||
325 | if (key.shared.offset & 1) { | 503 | if (key.shared.offset & 1) { |
326 | struct lguest_dma_info *i; | 504 | struct lguest_dma_info *i; |
505 | /* Look through the hash for other Guests. */ | ||
327 | list_for_each_entry(i, &dma_hash[hash(&key)], list) { | 506 | list_for_each_entry(i, &dma_hash[hash(&key)], list) { |
507 | /* Don't send to ourselves. */ | ||
328 | if (i->guestid == lg->guestid) | 508 | if (i->guestid == lg->guestid) |
329 | continue; | 509 | continue; |
330 | if (!key_eq(&key, &i->key)) | 510 | if (!key_eq(&key, &i->key)) |
331 | continue; | 511 | continue; |
332 | 512 | ||
513 | /* If dma_transfer() tells us the destination has no | ||
514 | * available buffers, we increment "empty". */ | ||
333 | empty += dma_transfer(lg, udma, i); | 515 | empty += dma_transfer(lg, udma, i); |
334 | break; | 516 | break; |
335 | } | 517 | } |
518 | /* If the destination is empty, we release our locks and | ||
519 | * give the destination Guest a brief chance to restock. */ | ||
336 | if (empty == 1) { | 520 | if (empty == 1) { |
337 | /* Give any recipients one chance to restock. */ | 521 | /* Give any recipients one chance to restock. */ |
338 | up_read(¤t->mm->mmap_sem); | 522 | up_read(¤t->mm->mmap_sem); |
339 | mutex_unlock(&lguest_lock); | 523 | mutex_unlock(&lguest_lock); |
524 | /* Next time, we won't try again. */ | ||
340 | empty++; | 525 | empty++; |
341 | goto again; | 526 | goto again; |
342 | } | 527 | } |
343 | } else { | 528 | } else { |
344 | /* Private mapping: tell our userspace. */ | 529 | /* Private mapping: Guest is sending to its Launcher. We set |
530 | * the "dma_is_pending" flag so that the main loop will exit | ||
531 | * and the Launcher's read() from /dev/lguest will return. */ | ||
345 | lg->dma_is_pending = 1; | 532 | lg->dma_is_pending = 1; |
346 | lg->pending_dma = udma; | 533 | lg->pending_dma = udma; |
347 | lg->pending_key = ukey; | 534 | lg->pending_key = ukey; |
@@ -350,6 +537,7 @@ unlock: | |||
350 | up_read(fshared); | 537 | up_read(fshared); |
351 | mutex_unlock(&lguest_lock); | 538 | mutex_unlock(&lguest_lock); |
352 | } | 539 | } |
540 | /*:*/ | ||
353 | 541 | ||
354 | void release_all_dma(struct lguest *lg) | 542 | void release_all_dma(struct lguest *lg) |
355 | { | 543 | { |
@@ -365,7 +553,8 @@ void release_all_dma(struct lguest *lg) | |||
365 | up_read(&lg->mm->mmap_sem); | 553 | up_read(&lg->mm->mmap_sem); |
366 | } | 554 | } |
367 | 555 | ||
368 | /* Userspace wants a dma buffer from this guest. */ | 556 | /*L:320 This routine looks for a DMA buffer registered by the Guest on the |
557 | * given key (using the BIND_DMA hypercall). */ | ||
369 | unsigned long get_dma_buffer(struct lguest *lg, | 558 | unsigned long get_dma_buffer(struct lguest *lg, |
370 | unsigned long ukey, unsigned long *interrupt) | 559 | unsigned long ukey, unsigned long *interrupt) |
371 | { | 560 | { |
@@ -374,15 +563,29 @@ unsigned long get_dma_buffer(struct lguest *lg, | |||
374 | struct lguest_dma_info *i; | 563 | struct lguest_dma_info *i; |
375 | struct rw_semaphore *fshared = ¤t->mm->mmap_sem; | 564 | struct rw_semaphore *fshared = ¤t->mm->mmap_sem; |
376 | 565 | ||
566 | /* Take the Big Lguest Lock to stop other Guests sending this Guest DMA | ||
567 | * at the same time. */ | ||
377 | mutex_lock(&lguest_lock); | 568 | mutex_lock(&lguest_lock); |
569 | /* To match between Guests sharing the same underlying memory we steal | ||
570 | * code from the futex infrastructure. This requires that we hold the | ||
571 | * "mmap_sem" for our process (the Launcher), and pass it to the futex | ||
572 | * code. */ | ||
378 | down_read(fshared); | 573 | down_read(fshared); |
574 | |||
575 | /* This can fail if it's not a valid address, or if the address is not | ||
576 | * divisible by 4 (the futex code needs that, we don't really). */ | ||
379 | if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { | 577 | if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { |
380 | kill_guest(lg, "bad registered DMA buffer"); | 578 | kill_guest(lg, "bad registered DMA buffer"); |
381 | goto unlock; | 579 | goto unlock; |
382 | } | 580 | } |
581 | /* Search the hash table for matching entries (the Launcher can only | ||
582 | * send to its own Guest for the moment, so the entry must be for this | ||
583 | * Guest) */ | ||
383 | list_for_each_entry(i, &dma_hash[hash(&key)], list) { | 584 | list_for_each_entry(i, &dma_hash[hash(&key)], list) { |
384 | if (key_eq(&key, &i->key) && i->guestid == lg->guestid) { | 585 | if (key_eq(&key, &i->key) && i->guestid == lg->guestid) { |
385 | unsigned int j; | 586 | unsigned int j; |
587 | /* Look through the registered DMA array for an | ||
588 | * available buffer. */ | ||
386 | for (j = 0; j < i->num_dmas; j++) { | 589 | for (j = 0; j < i->num_dmas; j++) { |
387 | struct lguest_dma dma; | 590 | struct lguest_dma dma; |
388 | 591 | ||
@@ -391,6 +594,8 @@ unsigned long get_dma_buffer(struct lguest *lg, | |||
391 | if (dma.used_len == 0) | 594 | if (dma.used_len == 0) |
392 | break; | 595 | break; |
393 | } | 596 | } |
597 | /* Store the interrupt the Guest wants when the buffer | ||
598 | * is used. */ | ||
394 | *interrupt = i->interrupt; | 599 | *interrupt = i->interrupt; |
395 | break; | 600 | break; |
396 | } | 601 | } |
@@ -400,4 +605,12 @@ unlock: | |||
400 | mutex_unlock(&lguest_lock); | 605 | mutex_unlock(&lguest_lock); |
401 | return ret; | 606 | return ret; |
402 | } | 607 | } |
608 | /*:*/ | ||
403 | 609 | ||
610 | /*L:410 This really has completed the Launcher. Not only have we now finished | ||
611 | * the longest chapter in our journey, but this also means we are over halfway | ||
612 | * through! | ||
613 | * | ||
614 | * Enough prevaricating around the bush: it is time for us to dive into the | ||
615 | * core of the Host, in "make Host". | ||
616 | */ | ||