diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2007-07-26 13:41:04 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-07-26 14:35:17 -0400 |
commit | f8f0fdcd40449d318f8dc30c1b361b0b7f54134a (patch) | |
tree | 09bdfa13377de602bcd0f363a417a93666115c7e /drivers/lguest/switcher.S | |
parent | bff672e630a015d5b54c8bfb16160b7edc39a57c (diff) |
lguest: documentation VI: Switcher
Documentation: The Switcher
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/lguest/switcher.S')
-rw-r--r-- | drivers/lguest/switcher.S | 271 |
1 files changed, 229 insertions, 42 deletions
diff --git a/drivers/lguest/switcher.S b/drivers/lguest/switcher.S index e7cb8c123558..d418179ea6b5 100644 --- a/drivers/lguest/switcher.S +++ b/drivers/lguest/switcher.S | |||
@@ -6,41 +6,131 @@ | |||
6 | * are feeling invigorated and refreshed then the next, more challenging stage | 6 | * are feeling invigorated and refreshed then the next, more challenging stage |
7 | * can be found in "make Guest". :*/ | 7 | * can be found in "make Guest". :*/ |
8 | 8 | ||
9 | /*S:100 | ||
10 | * Welcome to the Switcher itself! | ||
11 | * | ||
12 | * This file contains the low-level code which changes the CPU to run the Guest | ||
13 | * code, and returns to the Host when something happens. Understand this, and | ||
14 | * you understand the heart of our journey. | ||
15 | * | ||
16 | * Because this is in assembler rather than C, our tale switches from prose to | ||
17 | * verse. First I tried limericks: | ||
18 | * | ||
19 | * There once was an eax reg, | ||
20 | * To which our pointer was fed, | ||
21 | * It needed an add, | ||
22 | * Which asm-offsets.h had | ||
23 | * But this limerick is hurting my head. | ||
24 | * | ||
25 | * Next I tried haikus, but fitting the required reference to the seasons in | ||
26 | * every stanza was quickly becoming tiresome: | ||
27 | * | ||
28 | * The %eax reg | ||
29 | * Holds "struct lguest_pages" now: | ||
30 | * Cherry blossoms fall. | ||
31 | * | ||
32 | * Then I started with Heroic Verse, but the rhyming requirement leeched away | ||
33 | * the content density and led to some uniquely awful oblique rhymes: | ||
34 | * | ||
35 | * These constants are coming from struct offsets | ||
36 | * For use within the asm switcher text. | ||
37 | * | ||
38 | * Finally, I settled for something between heroic hexameter, and normal prose | ||
39 | * with inappropriate linebreaks. Anyway, it aint no Shakespeare. | ||
40 | */ | ||
41 | |||
42 | // Not all kernel headers work from assembler | ||
43 | // But these ones are needed: the ENTRY() define | ||
44 | // And constants extracted from struct offsets | ||
45 | // To avoid magic numbers and breakage: | ||
46 | // Should they change the compiler can't save us | ||
47 | // Down here in the depths of assembler code. | ||
9 | #include <linux/linkage.h> | 48 | #include <linux/linkage.h> |
10 | #include <asm/asm-offsets.h> | 49 | #include <asm/asm-offsets.h> |
11 | #include "lg.h" | 50 | #include "lg.h" |
12 | 51 | ||
52 | // We mark the start of the code to copy | ||
53 | // It's placed in .text tho it's never run here | ||
54 | // You'll see the trick macro at the end | ||
55 | // Which interleaves data and text to effect. | ||
13 | .text | 56 | .text |
14 | ENTRY(start_switcher_text) | 57 | ENTRY(start_switcher_text) |
15 | 58 | ||
16 | /* %eax points to lguest pages for this CPU. %ebx contains cr3 value. | 59 | // When we reach switch_to_guest we have just left |
17 | All normal registers can be clobbered! */ | 60 | // The safe and comforting shores of C code |
61 | // %eax has the "struct lguest_pages" to use | ||
62 | // Where we save state and still see it from the Guest | ||
63 | // And %ebx holds the Guest shadow pagetable: | ||
64 | // Once set we have truly left Host behind. | ||
18 | ENTRY(switch_to_guest) | 65 | ENTRY(switch_to_guest) |
19 | /* Save host segments on host stack. */ | 66 | // We told gcc all its regs could fade, |
67 | // Clobbered by our journey into the Guest | ||
68 | // We could have saved them, if we tried | ||
69 | // But time is our master and cycles count. | ||
70 | |||
71 | // Segment registers must be saved for the Host | ||
72 | // We push them on the Host stack for later | ||
20 | pushl %es | 73 | pushl %es |
21 | pushl %ds | 74 | pushl %ds |
22 | pushl %gs | 75 | pushl %gs |
23 | pushl %fs | 76 | pushl %fs |
24 | /* With CONFIG_FRAME_POINTER, gcc doesn't let us clobber this! */ | 77 | // But the compiler is fickle, and heeds |
78 | // No warning of %ebp clobbers | ||
79 | // When frame pointers are used. That register | ||
80 | // Must be saved and restored or chaos strikes. | ||
25 | pushl %ebp | 81 | pushl %ebp |
26 | /* Save host stack. */ | 82 | // The Host's stack is done, now save it away |
83 | // In our "struct lguest_pages" at offset | ||
84 | // Distilled into asm-offsets.h | ||
27 | movl %esp, LGUEST_PAGES_host_sp(%eax) | 85 | movl %esp, LGUEST_PAGES_host_sp(%eax) |
28 | /* Switch to guest stack: if we get NMI we expect to be there. */ | 86 | |
87 | // All saved and there's now five steps before us: | ||
88 | // Stack, GDT, IDT, TSS | ||
89 | // And last of all the page tables are flipped. | ||
90 | |||
91 | // Yet beware that our stack pointer must be | ||
92 | // Always valid lest an NMI hits | ||
93 | // %edx does the duty here as we juggle | ||
94 | // %eax is lguest_pages: our stack lies within. | ||
29 | movl %eax, %edx | 95 | movl %eax, %edx |
30 | addl $LGUEST_PAGES_regs, %edx | 96 | addl $LGUEST_PAGES_regs, %edx |
31 | movl %edx, %esp | 97 | movl %edx, %esp |
32 | /* Switch to guest's GDT, IDT. */ | 98 | |
99 | // The Guest's GDT we so carefully | ||
100 | // Placed in the "struct lguest_pages" before | ||
33 | lgdt LGUEST_PAGES_guest_gdt_desc(%eax) | 101 | lgdt LGUEST_PAGES_guest_gdt_desc(%eax) |
102 | |||
103 | // The Guest's IDT we did partially | ||
104 | // Move to the "struct lguest_pages" as well. | ||
34 | lidt LGUEST_PAGES_guest_idt_desc(%eax) | 105 | lidt LGUEST_PAGES_guest_idt_desc(%eax) |
35 | /* Switch to guest's TSS while GDT still writable. */ | 106 | |
107 | // The TSS entry which controls traps | ||
108 | // Must be loaded up with "ltr" now: | ||
109 | // For after we switch over our page tables | ||
110 | // It (as the rest) will be writable no more. | ||
111 | // (The GDT entry TSS needs | ||
112 | // Changes type when we load it: damn Intel!) | ||
36 | movl $(GDT_ENTRY_TSS*8), %edx | 113 | movl $(GDT_ENTRY_TSS*8), %edx |
37 | ltr %dx | 114 | ltr %dx |
38 | /* Set host's TSS GDT entry to available (clear byte 5 bit 2). */ | 115 | |
116 | // Look back now, before we take this last step! | ||
117 | // The Host's TSS entry was also marked used; | ||
118 | // Let's clear it again, ere we return. | ||
119 | // The GDT descriptor of the Host | ||
120 | // Points to the table after two "size" bytes | ||
39 | movl (LGUEST_PAGES_host_gdt_desc+2)(%eax), %edx | 121 | movl (LGUEST_PAGES_host_gdt_desc+2)(%eax), %edx |
122 | // Clear the type field of "used" (byte 5, bit 2) | ||
40 | andb $0xFD, (GDT_ENTRY_TSS*8 + 5)(%edx) | 123 | andb $0xFD, (GDT_ENTRY_TSS*8 + 5)(%edx) |
41 | /* Switch to guest page tables: lguest_pages->state now read-only. */ | 124 | |
125 | // Once our page table's switched, the Guest is live! | ||
126 | // The Host fades as we run this final step. | ||
127 | // Our "struct lguest_pages" is now read-only. | ||
42 | movl %ebx, %cr3 | 128 | movl %ebx, %cr3 |
43 | /* Restore guest regs */ | 129 | |
130 | // The page table change did one tricky thing: | ||
131 | // The Guest's register page has been mapped | ||
132 | // Writable onto our %esp (stack) -- | ||
133 | // We can simply pop off all Guest regs. | ||
44 | popl %ebx | 134 | popl %ebx |
45 | popl %ecx | 135 | popl %ecx |
46 | popl %edx | 136 | popl %edx |
@@ -52,12 +142,27 @@ ENTRY(switch_to_guest) | |||
52 | popl %fs | 142 | popl %fs |
53 | popl %ds | 143 | popl %ds |
54 | popl %es | 144 | popl %es |
55 | /* Skip error code and trap number */ | 145 | |
146 | // Near the base of the stack lurk two strange fields | ||
147 | // Which we fill as we exit the Guest | ||
148 | // These are the trap number and its error | ||
149 | // We can simply step past them on our way. | ||
56 | addl $8, %esp | 150 | addl $8, %esp |
151 | |||
152 | // The last five stack slots hold return address | ||
153 | // And everything needed to change privilege | ||
154 | // Into the Guest privilege level of 1, | ||
155 | // And the stack where the Guest had last left it. | ||
156 | // Interrupts are turned back on: we are Guest. | ||
57 | iret | 157 | iret |
58 | 158 | ||
159 | // There are two paths where we switch to the Host | ||
160 | // So we put the routine in a macro. | ||
161 | // We are on our way home, back to the Host | ||
162 | // Interrupted out of the Guest, we come here. | ||
59 | #define SWITCH_TO_HOST \ | 163 | #define SWITCH_TO_HOST \ |
60 | /* Save guest state */ \ | 164 | /* We save the Guest state: all registers first \ |
165 | * Laid out just as "struct lguest_regs" defines */ \ | ||
61 | pushl %es; \ | 166 | pushl %es; \ |
62 | pushl %ds; \ | 167 | pushl %ds; \ |
63 | pushl %fs; \ | 168 | pushl %fs; \ |
@@ -69,58 +174,119 @@ ENTRY(switch_to_guest) | |||
69 | pushl %edx; \ | 174 | pushl %edx; \ |
70 | pushl %ecx; \ | 175 | pushl %ecx; \ |
71 | pushl %ebx; \ | 176 | pushl %ebx; \ |
72 | /* Load lguest ds segment for convenience. */ \ | 177 | /* Our stack and our code are using segments \ |
178 | * Set in the TSS and IDT \ | ||
179 | * Yet if we were to touch data we'd use \ | ||
180 | * Whatever data segment the Guest had. \ | ||
181 | * Load the lguest ds segment for now. */ \ | ||
73 | movl $(LGUEST_DS), %eax; \ | 182 | movl $(LGUEST_DS), %eax; \ |
74 | movl %eax, %ds; \ | 183 | movl %eax, %ds; \ |
75 | /* Figure out where we are, based on stack (at top of regs). */ \ | 184 | /* So where are we? Which CPU, which struct? \ |
185 | * The stack is our clue: our TSS sets \ | ||
186 | * It at the end of "struct lguest_pages" \ | ||
187 | * And we then pushed and pushed and pushed Guest regs: \ | ||
188 | * Now stack points atop the "struct lguest_regs". \ | ||
189 | * Subtract that offset, and we find our struct. */ \ | ||
76 | movl %esp, %eax; \ | 190 | movl %esp, %eax; \ |
77 | subl $LGUEST_PAGES_regs, %eax; \ | 191 | subl $LGUEST_PAGES_regs, %eax; \ |
78 | /* Put trap number in %ebx before we switch cr3 and lose it. */ \ | 192 | /* Save our trap number: the switch will obscure it \ |
193 | * (The Guest regs are not mapped here in the Host) \ | ||
194 | * %ebx holds it safe for deliver_to_host */ \ | ||
79 | movl LGUEST_PAGES_regs_trapnum(%eax), %ebx; \ | 195 | movl LGUEST_PAGES_regs_trapnum(%eax), %ebx; \ |
80 | /* Switch to host page tables (host GDT, IDT and stack are in host \ | 196 | /* The Host GDT, IDT and stack! \ |
81 | mem, so need this first) */ \ | 197 | * All these lie safely hidden from the Guest: \ |
198 | * We must return to the Host page tables \ | ||
199 | * (Hence that was saved in struct lguest_pages) */ \ | ||
82 | movl LGUEST_PAGES_host_cr3(%eax), %edx; \ | 200 | movl LGUEST_PAGES_host_cr3(%eax), %edx; \ |
83 | movl %edx, %cr3; \ | 201 | movl %edx, %cr3; \ |
84 | /* Set guest's TSS to available (clear byte 5 bit 2). */ \ | 202 | /* As before, when we looked back at the Host \ |
203 | * As we left and marked TSS unused \ | ||
204 | * So must we now for the Guest left behind. */ \ | ||
85 | andb $0xFD, (LGUEST_PAGES_guest_gdt+GDT_ENTRY_TSS*8+5)(%eax); \ | 205 | andb $0xFD, (LGUEST_PAGES_guest_gdt+GDT_ENTRY_TSS*8+5)(%eax); \ |
86 | /* Switch to host's GDT & IDT. */ \ | 206 | /* Switch to Host's GDT, IDT. */ \ |
87 | lgdt LGUEST_PAGES_host_gdt_desc(%eax); \ | 207 | lgdt LGUEST_PAGES_host_gdt_desc(%eax); \ |
88 | lidt LGUEST_PAGES_host_idt_desc(%eax); \ | 208 | lidt LGUEST_PAGES_host_idt_desc(%eax); \ |
89 | /* Switch to host's stack. */ \ | 209 | /* Restore the Host's stack where it's saved regs lie */ \ |
90 | movl LGUEST_PAGES_host_sp(%eax), %esp; \ | 210 | movl LGUEST_PAGES_host_sp(%eax), %esp; \ |
91 | /* Switch to host's TSS */ \ | 211 | /* Last the TSS: our Host is complete */ \ |
92 | movl $(GDT_ENTRY_TSS*8), %edx; \ | 212 | movl $(GDT_ENTRY_TSS*8), %edx; \ |
93 | ltr %dx; \ | 213 | ltr %dx; \ |
214 | /* Restore now the regs saved right at the first. */ \ | ||
94 | popl %ebp; \ | 215 | popl %ebp; \ |
95 | popl %fs; \ | 216 | popl %fs; \ |
96 | popl %gs; \ | 217 | popl %gs; \ |
97 | popl %ds; \ | 218 | popl %ds; \ |
98 | popl %es | 219 | popl %es |
99 | 220 | ||
100 | /* Return to run_guest_once. */ | 221 | // Here's where we come when the Guest has just trapped: |
222 | // (Which trap we'll see has been pushed on the stack). | ||
223 | // We need only switch back, and the Host will decode | ||
224 | // Why we came home, and what needs to be done. | ||
101 | return_to_host: | 225 | return_to_host: |
102 | SWITCH_TO_HOST | 226 | SWITCH_TO_HOST |
103 | iret | 227 | iret |
104 | 228 | ||
229 | // An interrupt, with some cause external | ||
230 | // Has ajerked us rudely from the Guest's code | ||
231 | // Again we must return home to the Host | ||
105 | deliver_to_host: | 232 | deliver_to_host: |
106 | SWITCH_TO_HOST | 233 | SWITCH_TO_HOST |
107 | /* Decode IDT and jump to hosts' irq handler. When that does iret, it | 234 | // But now we must go home via that place |
108 | * will return to run_guest_once. This is a feature. */ | 235 | // Where that interrupt was supposed to go |
236 | // Had we not been ensconced, running the Guest. | ||
237 | // Here we see the cleverness of our stack: | ||
238 | // The Host stack is formed like an interrupt | ||
239 | // With EIP, CS and EFLAGS layered. | ||
240 | // Interrupt handlers end with "iret" | ||
241 | // And that will take us home at long long last. | ||
242 | |||
243 | // But first we must find the handler to call! | ||
244 | // The IDT descriptor for the Host | ||
245 | // Has two bytes for size, and four for address: | ||
246 | // %edx will hold it for us for now. | ||
109 | movl (LGUEST_PAGES_host_idt_desc+2)(%eax), %edx | 247 | movl (LGUEST_PAGES_host_idt_desc+2)(%eax), %edx |
248 | // We now know the table address we need, | ||
249 | // And saved the trap's number inside %ebx. | ||
250 | // Yet the pointer to the handler is smeared | ||
251 | // Across the bits of the table entry. | ||
252 | // What oracle can tell us how to extract | ||
253 | // From such a convoluted encoding? | ||
254 | // I consulted gcc, and it gave | ||
255 | // These instructions, which I gladly credit: | ||
110 | leal (%edx,%ebx,8), %eax | 256 | leal (%edx,%ebx,8), %eax |
111 | movzwl (%eax),%edx | 257 | movzwl (%eax),%edx |
112 | movl 4(%eax), %eax | 258 | movl 4(%eax), %eax |
113 | xorw %ax, %ax | 259 | xorw %ax, %ax |
114 | orl %eax, %edx | 260 | orl %eax, %edx |
261 | // Now the address of the handler's in %edx | ||
262 | // We call it now: its "iret" takes us home. | ||
115 | jmp *%edx | 263 | jmp *%edx |
116 | 264 | ||
117 | /* Real hardware interrupts are delivered straight to the host. Others | 265 | // Every interrupt can come to us here |
118 | cause us to return to run_guest_once so it can decide what to do. Note | 266 | // But we must truly tell each apart. |
119 | that some of these are overridden by the guest to deliver directly, and | 267 | // They number two hundred and fifty six |
120 | never enter here (see load_guest_idt_entry). */ | 268 | // And each must land in a different spot, |
269 | // Push its number on stack, and join the stream. | ||
270 | |||
271 | // And worse, a mere six of the traps stand apart | ||
272 | // And push on their stack an addition: | ||
273 | // An error number, thirty two bits long | ||
274 | // So we punish the other two fifty | ||
275 | // And make them push a zero so they match. | ||
276 | |||
277 | // Yet two fifty six entries is long | ||
278 | // And all will look most the same as the last | ||
279 | // So we create a macro which can make | ||
280 | // As many entries as we need to fill. | ||
281 | |||
282 | // Note the change to .data then .text: | ||
283 | // We plant the address of each entry | ||
284 | // Into a (data) table for the Host | ||
285 | // To know where each Guest interrupt should go. | ||
121 | .macro IRQ_STUB N TARGET | 286 | .macro IRQ_STUB N TARGET |
122 | .data; .long 1f; .text; 1: | 287 | .data; .long 1f; .text; 1: |
123 | /* Make an error number for most traps, which don't have one. */ | 288 | // Trap eight, ten through fourteen and seventeen |
289 | // Supply an error number. Else zero. | ||
124 | .if (\N <> 8) && (\N < 10 || \N > 14) && (\N <> 17) | 290 | .if (\N <> 8) && (\N < 10 || \N > 14) && (\N <> 17) |
125 | pushl $0 | 291 | pushl $0 |
126 | .endif | 292 | .endif |
@@ -129,6 +295,8 @@ deliver_to_host: | |||
129 | ALIGN | 295 | ALIGN |
130 | .endm | 296 | .endm |
131 | 297 | ||
298 | // This macro creates numerous entries | ||
299 | // Using GAS macros which out-power C's. | ||
132 | .macro IRQ_STUBS FIRST LAST TARGET | 300 | .macro IRQ_STUBS FIRST LAST TARGET |
133 | irq=\FIRST | 301 | irq=\FIRST |
134 | .rept \LAST-\FIRST+1 | 302 | .rept \LAST-\FIRST+1 |
@@ -137,24 +305,43 @@ deliver_to_host: | |||
137 | .endr | 305 | .endr |
138 | .endm | 306 | .endm |
139 | 307 | ||
140 | /* We intercept every interrupt, because we may need to switch back to | 308 | // Here's the marker for our pointer table |
141 | * host. Unfortunately we can't tell them apart except by entry | 309 | // Laid in the data section just before |
142 | * point, so we need 256 entry points. | 310 | // Each macro places the address of code |
143 | */ | 311 | // Forming an array: each one points to text |
312 | // Which handles interrupt in its turn. | ||
144 | .data | 313 | .data |
145 | .global default_idt_entries | 314 | .global default_idt_entries |
146 | default_idt_entries: | 315 | default_idt_entries: |
147 | .text | 316 | .text |
148 | IRQ_STUBS 0 1 return_to_host /* First two traps */ | 317 | // The first two traps go straight back to the Host |
149 | IRQ_STUB 2 handle_nmi /* NMI */ | 318 | IRQ_STUBS 0 1 return_to_host |
150 | IRQ_STUBS 3 31 return_to_host /* Rest of traps */ | 319 | // We'll say nothing, yet, about NMI |
151 | IRQ_STUBS 32 127 deliver_to_host /* Real interrupts */ | 320 | IRQ_STUB 2 handle_nmi |
152 | IRQ_STUB 128 return_to_host /* System call (overridden) */ | 321 | // Other traps also return to the Host |
153 | IRQ_STUBS 129 255 deliver_to_host /* Other real interrupts */ | 322 | IRQ_STUBS 3 31 return_to_host |
154 | 323 | // All interrupts go via their handlers | |
155 | /* We ignore NMI and return. */ | 324 | IRQ_STUBS 32 127 deliver_to_host |
325 | // 'Cept system calls coming from userspace | ||
326 | // Are to go to the Guest, never the Host. | ||
327 | IRQ_STUB 128 return_to_host | ||
328 | IRQ_STUBS 129 255 deliver_to_host | ||
329 | |||
330 | // The NMI, what a fabulous beast | ||
331 | // Which swoops in and stops us no matter that | ||
332 | // We're suspended between heaven and hell, | ||
333 | // (Or more likely between the Host and Guest) | ||
334 | // When in it comes! We are dazed and confused | ||
335 | // So we do the simplest thing which one can. | ||
336 | // Though we've pushed the trap number and zero | ||
337 | // We discard them, return, and hope we live. | ||
156 | handle_nmi: | 338 | handle_nmi: |
157 | addl $8, %esp | 339 | addl $8, %esp |
158 | iret | 340 | iret |
159 | 341 | ||
342 | // We are done; all that's left is Mastery | ||
343 | // And "make Mastery" is a journey long | ||
344 | // Designed to make your fingers itch to code. | ||
345 | |||
346 | // Here ends the text, the file and poem. | ||
160 | ENTRY(end_switcher_text) | 347 | ENTRY(end_switcher_text) |