diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2007-07-26 13:41:02 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-07-26 14:35:16 -0400 |
commit | f938d2c892db0d80d144253d4a7b7083efdbedeb (patch) | |
tree | 1fbc946a9fb59827001a5d4d5224abe5e624e605 | |
parent | dfb68689bf3e3d31dc9fb5c2bde5379a4ca9b0ec (diff) |
lguest: documentation I: Preparation
The netfilter code had very good documentation: the Netfilter Hacking HOWTO.
Noone ever read it.
So this time I'm trying something different, using a bit of Knuthiness.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/lguest/extract | 58 | ||||
-rw-r--r-- | Documentation/lguest/lguest.c | 9 | ||||
-rw-r--r-- | drivers/lguest/Makefile | 12 | ||||
-rw-r--r-- | drivers/lguest/README | 47 | ||||
-rw-r--r-- | drivers/lguest/core.c | 7 | ||||
-rw-r--r-- | drivers/lguest/hypercalls.c | 9 | ||||
-rw-r--r-- | drivers/lguest/interrupts_and_traps.c | 13 | ||||
-rw-r--r-- | drivers/lguest/io.c | 8 | ||||
-rw-r--r-- | drivers/lguest/lguest.c | 30 | ||||
-rw-r--r-- | drivers/lguest/lguest_bus.c | 3 | ||||
-rw-r--r-- | drivers/lguest/lguest_user.c | 7 | ||||
-rw-r--r-- | drivers/lguest/page_tables.c | 10 | ||||
-rw-r--r-- | drivers/lguest/segments.c | 11 | ||||
-rw-r--r-- | drivers/lguest/switcher.S | 13 |
14 files changed, 218 insertions, 19 deletions
diff --git a/Documentation/lguest/extract b/Documentation/lguest/extract new file mode 100644 index 000000000000..7730bb6e4b94 --- /dev/null +++ b/Documentation/lguest/extract | |||
@@ -0,0 +1,58 @@ | |||
1 | #! /bin/sh | ||
2 | |||
3 | set -e | ||
4 | |||
5 | PREFIX=$1 | ||
6 | shift | ||
7 | |||
8 | trap 'rm -r $TMPDIR' 0 | ||
9 | TMPDIR=`mktemp -d` | ||
10 | |||
11 | exec 3>/dev/null | ||
12 | for f; do | ||
13 | while IFS=" | ||
14 | " read -r LINE; do | ||
15 | case "$LINE" in | ||
16 | *$PREFIX:[0-9]*:\**) | ||
17 | NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"` | ||
18 | if [ -f $TMPDIR/$NUM ]; then | ||
19 | echo "$TMPDIR/$NUM already exits prior to $f" | ||
20 | exit 1 | ||
21 | fi | ||
22 | exec 3>>$TMPDIR/$NUM | ||
23 | echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM | ||
24 | /bin/echo "$LINE" | sed -e "s/$PREFIX:[0-9]*//" -e "s/:\*/*/" >&3 | ||
25 | ;; | ||
26 | *$PREFIX:[0-9]*) | ||
27 | NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"` | ||
28 | if [ -f $TMPDIR/$NUM ]; then | ||
29 | echo "$TMPDIR/$NUM already exits prior to $f" | ||
30 | exit 1 | ||
31 | fi | ||
32 | exec 3>>$TMPDIR/$NUM | ||
33 | echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM | ||
34 | /bin/echo "$LINE" | sed "s/$PREFIX:[0-9]*//" >&3 | ||
35 | ;; | ||
36 | *:\**) | ||
37 | /bin/echo "$LINE" | sed -e "s/:\*/*/" -e "s,/\*\*/,," >&3 | ||
38 | echo >&3 | ||
39 | exec 3>/dev/null | ||
40 | ;; | ||
41 | *) | ||
42 | /bin/echo "$LINE" >&3 | ||
43 | ;; | ||
44 | esac | ||
45 | done < $f | ||
46 | echo >&3 | ||
47 | exec 3>/dev/null | ||
48 | done | ||
49 | |||
50 | LASTFILE="" | ||
51 | for f in $TMPDIR/*; do | ||
52 | if [ "$LASTFILE" != $(cat $TMPDIR/.$(basename $f) ) ]; then | ||
53 | LASTFILE=$(cat $TMPDIR/.$(basename $f) ) | ||
54 | echo "[ $LASTFILE ]" | ||
55 | fi | ||
56 | cat $f | ||
57 | done | ||
58 | |||
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 62a8133393e1..fc1bf70abfb1 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
@@ -1,5 +1,10 @@ | |||
1 | /* Simple program to layout "physical" memory for new lguest guest. | 1 | /*P:100 This is the Launcher code, a simple program which lays out the |
2 | * Linked high to avoid likely physical memory. */ | 2 | * "physical" memory for the new Guest by mapping the kernel image and the |
3 | * virtual devices, then reads repeatedly from /dev/lguest to run the Guest. | ||
4 | * | ||
5 | * The only trick: the Makefile links it at a high address so it will be clear | ||
6 | * of the guest memory region. It means that each Guest cannot have more than | ||
7 | * about 2.5G of memory on a normally configured Host. :*/ | ||
3 | #define _LARGEFILE64_SOURCE | 8 | #define _LARGEFILE64_SOURCE |
4 | #define _GNU_SOURCE | 9 | #define _GNU_SOURCE |
5 | #include <stdio.h> | 10 | #include <stdio.h> |
diff --git a/drivers/lguest/Makefile b/drivers/lguest/Makefile index 55382c7d799c..e5047471c334 100644 --- a/drivers/lguest/Makefile +++ b/drivers/lguest/Makefile | |||
@@ -5,3 +5,15 @@ obj-$(CONFIG_LGUEST_GUEST) += lguest.o lguest_asm.o lguest_bus.o | |||
5 | obj-$(CONFIG_LGUEST) += lg.o | 5 | obj-$(CONFIG_LGUEST) += lg.o |
6 | lg-y := core.o hypercalls.o page_tables.o interrupts_and_traps.o \ | 6 | lg-y := core.o hypercalls.o page_tables.o interrupts_and_traps.o \ |
7 | segments.o io.o lguest_user.o switcher.o | 7 | segments.o io.o lguest_user.o switcher.o |
8 | |||
9 | Preparation Preparation!: PREFIX=P | ||
10 | Guest: PREFIX=G | ||
11 | Drivers: PREFIX=D | ||
12 | Launcher: PREFIX=L | ||
13 | Host: PREFIX=H | ||
14 | Switcher: PREFIX=S | ||
15 | Mastery: PREFIX=M | ||
16 | Beer: | ||
17 | @for f in Preparation Guest Drivers Launcher Host Switcher Mastery; do echo "{==- $$f -==}"; make -s $$f; done; echo "{==-==}" | ||
18 | Preparation Preparation! Guest Drivers Launcher Host Switcher Mastery: | ||
19 | @sh ../../Documentation/lguest/extract $(PREFIX) `find ../../* -name '*.[chS]' -wholename '*lguest*'` | ||
diff --git a/drivers/lguest/README b/drivers/lguest/README new file mode 100644 index 000000000000..b7db39a64c66 --- /dev/null +++ b/drivers/lguest/README | |||
@@ -0,0 +1,47 @@ | |||
1 | Welcome, friend reader, to lguest. | ||
2 | |||
3 | Lguest is an adventure, with you, the reader, as Hero. I can't think of many | ||
4 | 5000-line projects which offer both such capability and glimpses of future | ||
5 | potential; it is an exciting time to be delving into the source! | ||
6 | |||
7 | But be warned; this is an arduous journey of several hours or more! And as we | ||
8 | know, all true Heroes are driven by a Noble Goal. Thus I offer a Beer (or | ||
9 | equivalent) to anyone I meet who has completed this documentation. | ||
10 | |||
11 | So get comfortable and keep your wits about you (both quick and humorous). | ||
12 | Along your way to the Noble Goal, you will also gain masterly insight into | ||
13 | lguest, and hypervisors and x86 virtualization in general. | ||
14 | |||
15 | Our Quest is in seven parts: (best read with C highlighting turned on) | ||
16 | |||
17 | I) Preparation | ||
18 | - In which our potential hero is flown quickly over the landscape for a | ||
19 | taste of its scope. Suitable for the armchair coders and other such | ||
20 | persons of faint constitution. | ||
21 | |||
22 | II) Guest | ||
23 | - Where we encounter the first tantalising wisps of code, and come to | ||
24 | understand the details of the life of a Guest kernel. | ||
25 | |||
26 | III) Drivers | ||
27 | - Whereby the Guest finds its voice and become useful, and our | ||
28 | understanding of the Guest is completed. | ||
29 | |||
30 | IV) Launcher | ||
31 | - Where we trace back to the creation of the Guest, and thus begin our | ||
32 | understanding of the Host. | ||
33 | |||
34 | V) Host | ||
35 | - Where we master the Host code, through a long and tortuous journey. | ||
36 | Indeed, it is here that our hero is tested in the Bit of Despair. | ||
37 | |||
38 | VI) Switcher | ||
39 | - Where our understanding of the intertwined nature of Guests and Hosts | ||
40 | is completed. | ||
41 | |||
42 | VII) Mastery | ||
43 | - Where our fully fledged hero grapples with the Great Question: | ||
44 | "What next?" | ||
45 | |||
46 | make Preparation! | ||
47 | Rusty Russell. | ||
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index ce909ec57499..2cea0c80c992 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c | |||
@@ -1,5 +1,8 @@ | |||
1 | /* World's simplest hypervisor, to test paravirt_ops and show | 1 | /*P:400 This contains run_guest() which actually calls into the Host<->Guest |
2 | * unbelievers that virtualization is the future. Plus, it's fun! */ | 2 | * Switcher and analyzes the return, such as determining if the Guest wants the |
3 | * Host to do something. This file also contains useful helper routines, and a | ||
4 | * couple of non-obvious setup and teardown pieces which were implemented after | ||
5 | * days of debugging pain. :*/ | ||
3 | #include <linux/module.h> | 6 | #include <linux/module.h> |
4 | #include <linux/stringify.h> | 7 | #include <linux/stringify.h> |
5 | #include <linux/stddef.h> | 8 | #include <linux/stddef.h> |
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index ea52ca451f74..fb546b046445 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c | |||
@@ -1,5 +1,10 @@ | |||
1 | /* Actual hypercalls, which allow guests to actually do something. | 1 | /*P:500 Just as userspace programs request kernel operations through a system |
2 | Copyright (C) 2006 Rusty Russell IBM Corporation | 2 | * call, the Guest requests Host operations through a "hypercall". You might |
3 | * notice this nomenclature doesn't really follow any logic, but the name has | ||
4 | * been around for long enough that we're stuck with it. As you'd expect, this | ||
5 | * code is basically a one big switch statement. :*/ | ||
6 | |||
7 | /* Copyright (C) 2006 Rusty Russell IBM Corporation | ||
3 | 8 | ||
4 | This program is free software; you can redistribute it and/or modify | 9 | This program is free software; you can redistribute it and/or modify |
5 | it under the terms of the GNU General Public License as published by | 10 | it under the terms of the GNU General Public License as published by |
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index bee029bb2c7b..b2647974e1a7 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c | |||
@@ -1,3 +1,16 @@ | |||
1 | /*P:800 Interrupts (traps) are complicated enough to earn their own file. | ||
2 | * There are three classes of interrupts: | ||
3 | * | ||
4 | * 1) Real hardware interrupts which occur while we're running the Guest, | ||
5 | * 2) Interrupts for virtual devices attached to the Guest, and | ||
6 | * 3) Traps and faults from the Guest. | ||
7 | * | ||
8 | * Real hardware interrupts must be delivered to the Host, not the Guest. | ||
9 | * Virtual interrupts must be delivered to the Guest, but we make them look | ||
10 | * just like real hardware would deliver them. Traps from the Guest can be set | ||
11 | * up to go directly back into the Guest, but sometimes the Host wants to see | ||
12 | * them first, so we also have a way of "reflecting" them into the Guest as if | ||
13 | * they had been delivered to it directly. :*/ | ||
1 | #include <linux/uaccess.h> | 14 | #include <linux/uaccess.h> |
2 | #include "lg.h" | 15 | #include "lg.h" |
3 | 16 | ||
diff --git a/drivers/lguest/io.c b/drivers/lguest/io.c index c8eb79266991..d2f02f0653ca 100644 --- a/drivers/lguest/io.c +++ b/drivers/lguest/io.c | |||
@@ -1,5 +1,9 @@ | |||
1 | /* Simple I/O model for guests, based on shared memory. | 1 | /*P:300 The I/O mechanism in lguest is simple yet flexible, allowing the Guest |
2 | * Copyright (C) 2006 Rusty Russell IBM Corporation | 2 | * to talk to the Launcher or directly to another Guest. It uses familiar |
3 | * concepts of DMA and interrupts, plus some neat code stolen from | ||
4 | * futexes... :*/ | ||
5 | |||
6 | /* Copyright (C) 2006 Rusty Russell IBM Corporation | ||
3 | * | 7 | * |
4 | * This program is free software; you can redistribute it and/or modify | 8 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 9 | * it under the terms of the GNU General Public License as published by |
diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c index 18dade06d4a9..e7d128312b23 100644 --- a/drivers/lguest/lguest.c +++ b/drivers/lguest/lguest.c | |||
@@ -1,6 +1,32 @@ | |||
1 | /* | 1 | /*P:010 |
2 | * Lguest specific paravirt-ops implementation | 2 | * A hypervisor allows multiple Operating Systems to run on a single machine. |
3 | * To quote David Wheeler: "Any problem in computer science can be solved with | ||
4 | * another layer of indirection." | ||
5 | * | ||
6 | * We keep things simple in two ways. First, we start with a normal Linux | ||
7 | * kernel and insert a module (lg.ko) which allows us to run other Linux | ||
8 | * kernels the same way we'd run processes. We call the first kernel the Host, | ||
9 | * and the others the Guests. The program which sets up and configures Guests | ||
10 | * (such as the example in Documentation/lguest/lguest.c) is called the | ||
11 | * Launcher. | ||
12 | * | ||
13 | * Secondly, we only run specially modified Guests, not normal kernels. When | ||
14 | * you set CONFIG_LGUEST to 'y' or 'm', this automatically sets | ||
15 | * CONFIG_LGUEST_GUEST=y, which compiles this file into the kernel so it knows | ||
16 | * how to be a Guest. This means that you can use the same kernel you boot | ||
17 | * normally (ie. as a Host) as a Guest. | ||
3 | * | 18 | * |
19 | * These Guests know that they cannot do privileged operations, such as disable | ||
20 | * interrupts, and that they have to ask the Host to do such things explicitly. | ||
21 | * This file consists of all the replacements for such low-level native | ||
22 | * hardware operations: these special Guest versions call the Host. | ||
23 | * | ||
24 | * So how does the kernel know it's a Guest? The Guest starts at a special | ||
25 | * entry point marked with a magic string, which sets up a few things then | ||
26 | * calls here. We replace the native functions in "struct paravirt_ops" | ||
27 | * with our Guest versions, then boot like normal. :*/ | ||
28 | |||
29 | /* | ||
4 | * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation. | 30 | * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation. |
5 | * | 31 | * |
6 | * This program is free software; you can redistribute it and/or modify | 32 | * This program is free software; you can redistribute it and/or modify |
diff --git a/drivers/lguest/lguest_bus.c b/drivers/lguest/lguest_bus.c index 18d6ab21a43b..9a22d199502e 100644 --- a/drivers/lguest/lguest_bus.c +++ b/drivers/lguest/lguest_bus.c | |||
@@ -1,3 +1,6 @@ | |||
1 | /*P:050 Lguest guests use a very simple bus for devices. It's a simple array | ||
2 | * of device descriptors contained just above the top of normal memory. The | ||
3 | * lguest bus is 80% tedious boilerplate code. :*/ | ||
1 | #include <linux/init.h> | 4 | #include <linux/init.h> |
2 | #include <linux/bootmem.h> | 5 | #include <linux/bootmem.h> |
3 | #include <linux/lguest_bus.h> | 6 | #include <linux/lguest_bus.h> |
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index e90d7a783daf..6ae86f20ce3d 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c | |||
@@ -1,4 +1,9 @@ | |||
1 | /* Userspace control of the guest, via /dev/lguest. */ | 1 | /*P:200 This contains all the /dev/lguest code, whereby the userspace launcher |
2 | * controls and communicates with the Guest. For example, the first write will | ||
3 | * tell us the memory size, pagetable, entry point and kernel address offset. | ||
4 | * A read will run the Guest until a signal is pending (-EINTR), or the Guest | ||
5 | * does a DMA out to the Launcher. Writes are also used to get a DMA buffer | ||
6 | * registered by the Guest and to send the Guest an interrupt. :*/ | ||
2 | #include <linux/uaccess.h> | 7 | #include <linux/uaccess.h> |
3 | #include <linux/miscdevice.h> | 8 | #include <linux/miscdevice.h> |
4 | #include <linux/fs.h> | 9 | #include <linux/fs.h> |
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 1b0ba09b1269..f9ca50d80466 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c | |||
@@ -1,5 +1,11 @@ | |||
1 | /* Shadow page table operations. | 1 | /*P:700 The pagetable code, on the other hand, still shows the scars of |
2 | * Copyright (C) Rusty Russell IBM Corporation 2006. | 2 | * previous encounters. It's functional, and as neat as it can be in the |
3 | * circumstances, but be wary, for these things are subtle and break easily. | ||
4 | * The Guest provides a virtual to physical mapping, but we can neither trust | ||
5 | * it nor use it: we verify and convert it here to point the hardware to the | ||
6 | * actual Guest pages when running the Guest. :*/ | ||
7 | |||
8 | /* Copyright (C) Rusty Russell IBM Corporation 2006. | ||
3 | * GPL v2 and any later version */ | 9 | * GPL v2 and any later version */ |
4 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
5 | #include <linux/types.h> | 11 | #include <linux/types.h> |
diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c index 1b2cfe89dcd5..c4fc7293b84b 100644 --- a/drivers/lguest/segments.c +++ b/drivers/lguest/segments.c | |||
@@ -1,3 +1,14 @@ | |||
1 | /*P:600 The x86 architecture has segments, which involve a table of descriptors | ||
2 | * which can be used to do funky things with virtual address interpretation. | ||
3 | * We originally used to use segments so the Guest couldn't alter the | ||
4 | * Guest<->Host Switcher, and then we had to trim Guest segments, and restore | ||
5 | * for userspace per-thread segments, but trim again for on userspace->kernel | ||
6 | * transitions... This nightmarish creation was contained within this file, | ||
7 | * where we knew not to tread without heavy armament and a change of underwear. | ||
8 | * | ||
9 | * In these modern times, the segment handling code consists of simple sanity | ||
10 | * checks, and the worst you'll experience reading this code is butterfly-rash | ||
11 | * from frolicking through its parklike serenity. :*/ | ||
1 | #include "lg.h" | 12 | #include "lg.h" |
2 | 13 | ||
3 | static int desc_ok(const struct desc_struct *gdt) | 14 | static int desc_ok(const struct desc_struct *gdt) |
diff --git a/drivers/lguest/switcher.S b/drivers/lguest/switcher.S index eadd4cc299d2..e7cb8c123558 100644 --- a/drivers/lguest/switcher.S +++ b/drivers/lguest/switcher.S | |||
@@ -1,10 +1,11 @@ | |||
1 | /* This code sits at 0xFFC00000 to do the low-level guest<->host switch. | 1 | /*P:900 This is the Switcher: code which sits at 0xFFC00000 to do the low-level |
2 | * Guest<->Host switch. It is as simple as it can be made, but it's naturally | ||
3 | * very specific to x86. | ||
4 | * | ||
5 | * You have now completed Preparation. If this has whet your appetite; if you | ||
6 | * are feeling invigorated and refreshed then the next, more challenging stage | ||
7 | * can be found in "make Guest". :*/ | ||
2 | 8 | ||
3 | There is are two pages above us for this CPU (struct lguest_pages). | ||
4 | The second page (struct lguest_ro_state) becomes read-only after the | ||
5 | context switch. The first page (the stack for traps) remains writable, | ||
6 | but while we're in here, the guest cannot be running. | ||
7 | */ | ||
8 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
9 | #include <asm/asm-offsets.h> | 10 | #include <asm/asm-offsets.h> |
10 | #include "lg.h" | 11 | #include "lg.h" |