diff options
| -rw-r--r-- | Documentation/lguest/lguest.c | 12 | ||||
| -rw-r--r-- | drivers/char/hvc_lguest.c | 3 | ||||
| -rw-r--r-- | drivers/lguest/interrupts_and_traps.c | 14 | ||||
| -rw-r--r-- | drivers/lguest/io.c | 10 | ||||
| -rw-r--r-- | drivers/lguest/lguest.c | 8 | ||||
| -rw-r--r-- | drivers/lguest/lguest_asm.S | 14 | ||||
| -rw-r--r-- | drivers/lguest/page_tables.c | 5 | ||||
| -rw-r--r-- | drivers/lguest/segments.c | 4 | ||||
| -rw-r--r-- | drivers/net/lguest_net.c | 19 |
9 files changed, 89 insertions, 0 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index d7e26f025959..f7918401a007 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
| @@ -1510,3 +1510,15 @@ int main(int argc, char *argv[]) | |||
| 1510 | /* Finally, run the Guest. This doesn't return. */ | 1510 | /* Finally, run the Guest. This doesn't return. */ |
| 1511 | run_guest(lguest_fd, &device_list); | 1511 | run_guest(lguest_fd, &device_list); |
| 1512 | } | 1512 | } |
| 1513 | /*:*/ | ||
| 1514 | |||
| 1515 | /*M:999 | ||
| 1516 | * Mastery is done: you now know everything I do. | ||
| 1517 | * | ||
| 1518 | * But surely you have seen code, features and bugs in your wanderings which | ||
| 1519 | * you now yearn to attack? That is the real game, and I look forward to you | ||
| 1520 | * patching and forking lguest into the Your-Name-Here-visor. | ||
| 1521 | * | ||
| 1522 | * Farewell, and good coding! | ||
| 1523 | * Rusty Russell. | ||
| 1524 | */ | ||
diff --git a/drivers/char/hvc_lguest.c b/drivers/char/hvc_lguest.c index 1de8967cce06..feeccbaec438 100644 --- a/drivers/char/hvc_lguest.c +++ b/drivers/char/hvc_lguest.c | |||
| @@ -13,6 +13,9 @@ | |||
| 13 | * functions. | 13 | * functions. |
| 14 | :*/ | 14 | :*/ |
| 15 | 15 | ||
| 16 | /*M:002 The console can be flooded: while the Guest is processing input the | ||
| 17 | * Host can send more. Buffering in the Host could alleviate this, but it is a | ||
| 18 | * difficult problem in general. :*/ | ||
| 16 | /* Copyright (C) 2006 Rusty Russell, IBM Corporation | 19 | /* Copyright (C) 2006 Rusty Russell, IBM Corporation |
| 17 | * | 20 | * |
| 18 | * This program is free software; you can redistribute it and/or modify | 21 | * This program is free software; you can redistribute it and/or modify |
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 3d9830322646..bd0091bf79ec 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c | |||
| @@ -231,6 +231,20 @@ static int direct_trap(const struct lguest *lg, | |||
| 231 | * go direct, of course 8) */ | 231 | * go direct, of course 8) */ |
| 232 | return idt_type(trap->a, trap->b) == 0xF; | 232 | return idt_type(trap->a, trap->b) == 0xF; |
| 233 | } | 233 | } |
| 234 | /*:*/ | ||
| 235 | |||
| 236 | /*M:005 The Guest has the ability to turn its interrupt gates into trap gates, | ||
| 237 | * if it is careful. The Host will let trap gates can go directly to the | ||
| 238 | * Guest, but the Guest needs the interrupts atomically disabled for an | ||
| 239 | * interrupt gate. It can do this by pointing the trap gate at instructions | ||
| 240 | * within noirq_start and noirq_end, where it can safely disable interrupts. */ | ||
| 241 | |||
| 242 | /*M:006 The Guests do not use the sysenter (fast system call) instruction, | ||
| 243 | * because it's hardcoded to enter privilege level 0 and so can't go direct. | ||
| 244 | * It's about twice as fast as the older "int 0x80" system call, so it might | ||
| 245 | * still be worthwhile to handle it in the Switcher and lcall down to the | ||
| 246 | * Guest. The sysenter semantics are hairy tho: search for that keyword in | ||
| 247 | * entry.S :*/ | ||
| 234 | 248 | ||
| 235 | /*H:260 When we make traps go directly into the Guest, we need to make sure | 249 | /*H:260 When we make traps go directly into the Guest, we need to make sure |
| 236 | * the kernel stack is valid (ie. mapped in the page tables). Otherwise, the | 250 | * the kernel stack is valid (ie. mapped in the page tables). Otherwise, the |
diff --git a/drivers/lguest/io.c b/drivers/lguest/io.c index da288128e44f..ea68613b43f6 100644 --- a/drivers/lguest/io.c +++ b/drivers/lguest/io.c | |||
| @@ -553,6 +553,16 @@ void release_all_dma(struct lguest *lg) | |||
| 553 | up_read(&lg->mm->mmap_sem); | 553 | up_read(&lg->mm->mmap_sem); |
| 554 | } | 554 | } |
| 555 | 555 | ||
| 556 | /*M:007 We only return a single DMA buffer to the Launcher, but it would be | ||
| 557 | * more efficient to return a pointer to the entire array of DMA buffers, which | ||
| 558 | * it can cache and choose one whenever it wants. | ||
| 559 | * | ||
| 560 | * Currently the Launcher uses a write to /dev/lguest, and the return value is | ||
| 561 | * the address of the DMA structure with the interrupt number placed in | ||
| 562 | * dma->used_len. If we wanted to return the entire array, we need to return | ||
| 563 | * the address, array size and interrupt number: this seems to require an | ||
| 564 | * ioctl(). :*/ | ||
| 565 | |||
| 556 | /*L:320 This routine looks for a DMA buffer registered by the Guest on the | 566 | /*L:320 This routine looks for a DMA buffer registered by the Guest on the |
| 557 | * given key (using the BIND_DMA hypercall). */ | 567 | * given key (using the BIND_DMA hypercall). */ |
| 558 | unsigned long get_dma_buffer(struct lguest *lg, | 568 | unsigned long get_dma_buffer(struct lguest *lg, |
diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c index 7e7e9fb3aefd..6dfe568523a2 100644 --- a/drivers/lguest/lguest.c +++ b/drivers/lguest/lguest.c | |||
| @@ -250,6 +250,14 @@ static void irq_enable(void) | |||
| 250 | { | 250 | { |
| 251 | lguest_data.irq_enabled = X86_EFLAGS_IF; | 251 | lguest_data.irq_enabled = X86_EFLAGS_IF; |
| 252 | } | 252 | } |
| 253 | /*:*/ | ||
| 254 | /*M:003 Note that we don't check for outstanding interrupts when we re-enable | ||
| 255 | * them (or when we unmask an interrupt). This seems to work for the moment, | ||
| 256 | * since interrupts are rare and we'll just get the interrupt on the next timer | ||
| 257 | * tick, but when we turn on CONFIG_NO_HZ, we should revisit this. One way | ||
| 258 | * would be to put the "irq_enabled" field in a page by itself, and have the | ||
| 259 | * Host write-protect it when an interrupt comes in when irqs are disabled. | ||
| 260 | * There will then be a page fault as soon as interrupts are re-enabled. :*/ | ||
| 253 | 261 | ||
| 254 | /*G:034 | 262 | /*G:034 |
| 255 | * The Interrupt Descriptor Table (IDT). | 263 | * The Interrupt Descriptor Table (IDT). |
diff --git a/drivers/lguest/lguest_asm.S b/drivers/lguest/lguest_asm.S index 3126ae923cc0..f182c6a36209 100644 --- a/drivers/lguest/lguest_asm.S +++ b/drivers/lguest/lguest_asm.S | |||
| @@ -39,6 +39,20 @@ LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax) | |||
| 39 | .global lguest_noirq_start | 39 | .global lguest_noirq_start |
| 40 | .global lguest_noirq_end | 40 | .global lguest_noirq_end |
| 41 | 41 | ||
| 42 | /*M:004 When the Host reflects a trap or injects an interrupt into the Guest, | ||
| 43 | * it sets the eflags interrupt bit on the stack based on | ||
| 44 | * lguest_data.irq_enabled, so the Guest iret logic does the right thing when | ||
| 45 | * restoring it. However, when the Host sets the Guest up for direct traps, | ||
| 46 | * such as system calls, the processor is the one to push eflags onto the | ||
| 47 | * stack, and the interrupt bit will be 1 (in reality, interrupts are always | ||
| 48 | * enabled in the Guest). | ||
| 49 | * | ||
| 50 | * This turns out to be harmless: the only trap which should happen under Linux | ||
| 51 | * with interrupts disabled is Page Fault (due to our lazy mapping of vmalloc | ||
| 52 | * regions), which has to be reflected through the Host anyway. If another | ||
| 53 | * trap *does* go off when interrupts are disabled, the Guest will panic, and | ||
| 54 | * we'll never get to this iret! :*/ | ||
| 55 | |||
| 42 | /*G:045 There is one final paravirt_op that the Guest implements, and glancing | 56 | /*G:045 There is one final paravirt_op that the Guest implements, and glancing |
| 43 | * at it you can see why I left it to last. It's *cool*! It's in *assembler*! | 57 | * at it you can see why I left it to last. It's *cool*! It's in *assembler*! |
| 44 | * | 58 | * |
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index cd047e81cd63..b7a924ace684 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c | |||
| @@ -15,6 +15,11 @@ | |||
| 15 | #include <asm/tlbflush.h> | 15 | #include <asm/tlbflush.h> |
| 16 | #include "lg.h" | 16 | #include "lg.h" |
| 17 | 17 | ||
| 18 | /*M:008 We hold reference to pages, which prevents them from being swapped. | ||
| 19 | * It'd be nice to have a callback in the "struct mm_struct" when Linux wants | ||
| 20 | * to swap out. If we had this, and a shrinker callback to trim PTE pages, we | ||
| 21 | * could probably consider launching Guests as non-root. :*/ | ||
| 22 | |||
| 18 | /*H:300 | 23 | /*H:300 |
| 19 | * The Page Table Code | 24 | * The Page Table Code |
| 20 | * | 25 | * |
diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c index 4d4e5a4586f9..f675a41a80da 100644 --- a/drivers/lguest/segments.c +++ b/drivers/lguest/segments.c | |||
| @@ -94,6 +94,10 @@ static void check_segment_use(struct lguest *lg, unsigned int desc) | |||
| 94 | || lg->regs->ss / 8 == desc) | 94 | || lg->regs->ss / 8 == desc) |
| 95 | kill_guest(lg, "Removed live GDT entry %u", desc); | 95 | kill_guest(lg, "Removed live GDT entry %u", desc); |
| 96 | } | 96 | } |
| 97 | /*:*/ | ||
| 98 | /*M:009 We wouldn't need to check for removal of in-use segments if we handled | ||
| 99 | * faults in the Switcher. However, it's probably not a worthwhile | ||
| 100 | * optimization. :*/ | ||
| 97 | 101 | ||
| 98 | /*H:610 Once the GDT has been changed, we look through the changed entries and | 102 | /*H:610 Once the GDT has been changed, we look through the changed entries and |
| 99 | * see if they're OK. If not, we'll call kill_guest() and the Guest will never | 103 | * see if they're OK. If not, we'll call kill_guest() and the Guest will never |
diff --git a/drivers/net/lguest_net.c b/drivers/net/lguest_net.c index 20df6a848923..cab57911a80e 100644 --- a/drivers/net/lguest_net.c +++ b/drivers/net/lguest_net.c | |||
| @@ -35,6 +35,25 @@ | |||
| 35 | #define MAX_LANS 4 | 35 | #define MAX_LANS 4 |
| 36 | #define NUM_SKBS 8 | 36 | #define NUM_SKBS 8 |
| 37 | 37 | ||
| 38 | /*M:011 Network code master Jeff Garzik points out numerous shortcomings in | ||
| 39 | * this driver if it aspires to greatness. | ||
| 40 | * | ||
| 41 | * Firstly, it doesn't use "NAPI": the networking's New API, and is poorer for | ||
| 42 | * it. As he says "NAPI means system-wide load leveling, across multiple | ||
| 43 | * network interfaces. Lack of NAPI can mean competition at higher loads." | ||
| 44 | * | ||
| 45 | * He also points out that we don't implement set_mac_address, so users cannot | ||
| 46 | * change the devices hardware address. When I asked why one would want to: | ||
| 47 | * "Bonding, and situations where you /do/ want the MAC address to "leak" out | ||
| 48 | * of the host onto the wider net." | ||
| 49 | * | ||
| 50 | * Finally, he would like module unloading: "It is not unrealistic to think of | ||
| 51 | * [un|re|]loading the net support module in an lguest guest. And, adding | ||
| 52 | * module support makes the programmer more responsible, because they now have | ||
| 53 | * to learn to clean up after themselves. Any driver that cannot clean up | ||
| 54 | * after itself is an incomplete driver in my book." | ||
| 55 | :*/ | ||
| 56 | |||
| 38 | /*D:530 The "struct lguestnet_info" contains all the information we need to | 57 | /*D:530 The "struct lguestnet_info" contains all the information we need to |
| 39 | * know about the network device. */ | 58 | * know about the network device. */ |
| 40 | struct lguestnet_info | 59 | struct lguestnet_info |
