lguest: documentation update
[deliverable/linux.git] / drivers / lguest / x86 / switcher_32.S
CommitLineData
f938d2c8
RR
1/*P:900 This is the Switcher: code which sits at 0xFFC00000 to do the low-level
2 * Guest<->Host switch. It is as simple as it can be made, but it's naturally
3 * very specific to x86.
4 *
5 * You have now completed Preparation. If this has whet your appetite; if you
6 * are feeling invigorated and refreshed then the next, more challenging stage
7 * can be found in "make Guest". :*/
d7e28ffe 8
e1e72965
RR
9/*M:012 Lguest is meant to be simple: my rule of thumb is that 1% more LOC must
10 * gain at least 1% more performance. Since neither LOC nor performance can be
11 * measured beforehand, it generally means implementing a feature then deciding
12 * if it's worth it. And once it's implemented, who can say no?
13 *
14 * This is why I haven't implemented this idea myself. I want to, but I
15 * haven't. You could, though.
16 *
17 * The main place where lguest performance sucks is Guest page faulting. When
18 * a Guest userspace process hits an unmapped page we switch back to the Host,
19 * walk the page tables, find it's not mapped, switch back to the Guest page
20 * fault handler, which calls a hypercall to set the page table entry, then
21 * finally returns to userspace. That's two round-trips.
22 *
23 * If we had a small walker in the Switcher, we could quickly check the Guest
24 * page table and if the page isn't mapped, immediately reflect the fault back
25 * into the Guest. This means the Switcher would have to know the top of the
26 * Guest page table and the page fault handler address.
27 *
28 * For simplicity, the Guest should only handle the case where the privilege
29 * level of the fault is 3 and probably only not present or write faults. It
30 * should also detect recursive faults, and hand the original fault to the
31 * Host (which is actually really easy).
32 *
33 * Two questions remain. Would the performance gain outweigh the complexity?
34 * And who would write the verse documenting it? :*/
35
36/*M:011 Lguest64 handles NMI. This gave me NMI envy (until I looked at their
37 * code). It's worth doing though, since it would let us use oprofile in the
38 * Host when a Guest is running. :*/
39
f8f0fdcd
RR
40/*S:100
41 * Welcome to the Switcher itself!
42 *
43 * This file contains the low-level code which changes the CPU to run the Guest
44 * code, and returns to the Host when something happens. Understand this, and
45 * you understand the heart of our journey.
46 *
47 * Because this is in assembler rather than C, our tale switches from prose to
48 * verse. First I tried limericks:
49 *
50 * There once was an eax reg,
51 * To which our pointer was fed,
52 * It needed an add,
53 * Which asm-offsets.h had
54 * But this limerick is hurting my head.
55 *
56 * Next I tried haikus, but fitting the required reference to the seasons in
57 * every stanza was quickly becoming tiresome:
58 *
59 * The %eax reg
60 * Holds "struct lguest_pages" now:
61 * Cherry blossoms fall.
62 *
63 * Then I started with Heroic Verse, but the rhyming requirement leeched away
64 * the content density and led to some uniquely awful oblique rhymes:
65 *
66 * These constants are coming from struct offsets
67 * For use within the asm switcher text.
68 *
69 * Finally, I settled for something between heroic hexameter, and normal prose
70 * with inappropriate linebreaks. Anyway, it aint no Shakespeare.
71 */
72
73// Not all kernel headers work from assembler
74// But these ones are needed: the ENTRY() define
75// And constants extracted from struct offsets
76// To avoid magic numbers and breakage:
77// Should they change the compiler can't save us
78// Down here in the depths of assembler code.
d7e28ffe
RR
79#include <linux/linkage.h>
80#include <asm/asm-offsets.h>
0d027c01 81#include <asm/page.h>
625efab1
JS
82#include <asm/segment.h>
83#include <asm/lguest.h>
d7e28ffe 84
f8f0fdcd
RR
85// We mark the start of the code to copy
86// It's placed in .text tho it's never run here
87// You'll see the trick macro at the end
88// Which interleaves data and text to effect.
d7e28ffe
RR
89.text
90ENTRY(start_switcher_text)
91
f8f0fdcd
RR
92// When we reach switch_to_guest we have just left
93// The safe and comforting shores of C code
94// %eax has the "struct lguest_pages" to use
95// Where we save state and still see it from the Guest
96// And %ebx holds the Guest shadow pagetable:
97// Once set we have truly left Host behind.
d7e28ffe 98ENTRY(switch_to_guest)
f8f0fdcd
RR
99 // We told gcc all its regs could fade,
100 // Clobbered by our journey into the Guest
101 // We could have saved them, if we tried
102 // But time is our master and cycles count.
103
104 // Segment registers must be saved for the Host
105 // We push them on the Host stack for later
d7e28ffe
RR
106 pushl %es
107 pushl %ds
108 pushl %gs
109 pushl %fs
f8f0fdcd
RR
110 // But the compiler is fickle, and heeds
111 // No warning of %ebp clobbers
112 // When frame pointers are used. That register
113 // Must be saved and restored or chaos strikes.
d7e28ffe 114 pushl %ebp
f8f0fdcd
RR
115 // The Host's stack is done, now save it away
116 // In our "struct lguest_pages" at offset
117 // Distilled into asm-offsets.h
d7e28ffe 118 movl %esp, LGUEST_PAGES_host_sp(%eax)
f8f0fdcd
RR
119
120 // All saved and there's now five steps before us:
121 // Stack, GDT, IDT, TSS
e1e72965 122 // Then last of all the page tables are flipped.
f8f0fdcd
RR
123
124 // Yet beware that our stack pointer must be
125 // Always valid lest an NMI hits
126 // %edx does the duty here as we juggle
127 // %eax is lguest_pages: our stack lies within.
d7e28ffe
RR
128 movl %eax, %edx
129 addl $LGUEST_PAGES_regs, %edx
130 movl %edx, %esp
f8f0fdcd
RR
131
132 // The Guest's GDT we so carefully
133 // Placed in the "struct lguest_pages" before
d7e28ffe 134 lgdt LGUEST_PAGES_guest_gdt_desc(%eax)
f8f0fdcd
RR
135
136 // The Guest's IDT we did partially
e1e72965 137 // Copy to "struct lguest_pages" as well.
d7e28ffe 138 lidt LGUEST_PAGES_guest_idt_desc(%eax)
f8f0fdcd
RR
139
140 // The TSS entry which controls traps
141 // Must be loaded up with "ltr" now:
e1e72965
RR
142 // The GDT entry that TSS uses
143 // Changes type when we load it: damn Intel!
f8f0fdcd 144 // For after we switch over our page tables
e1e72965 145 // That entry will be read-only: we'd crash.
d7e28ffe
RR
146 movl $(GDT_ENTRY_TSS*8), %edx
147 ltr %dx
f8f0fdcd
RR
148
149 // Look back now, before we take this last step!
150 // The Host's TSS entry was also marked used;
e1e72965 151 // Let's clear it again for our return.
f8f0fdcd
RR
152 // The GDT descriptor of the Host
153 // Points to the table after two "size" bytes
d7e28ffe 154 movl (LGUEST_PAGES_host_gdt_desc+2)(%eax), %edx
e1e72965 155 // Clear "used" from type field (byte 5, bit 2)
d7e28ffe 156 andb $0xFD, (GDT_ENTRY_TSS*8 + 5)(%edx)
f8f0fdcd
RR
157
158 // Once our page table's switched, the Guest is live!
159 // The Host fades as we run this final step.
160 // Our "struct lguest_pages" is now read-only.
d7e28ffe 161 movl %ebx, %cr3
f8f0fdcd
RR
162
163 // The page table change did one tricky thing:
164 // The Guest's register page has been mapped
e1e72965 165 // Writable under our %esp (stack) --
f8f0fdcd 166 // We can simply pop off all Guest regs.
4614a3a3 167 popl %eax
d7e28ffe
RR
168 popl %ebx
169 popl %ecx
170 popl %edx
171 popl %esi
172 popl %edi
173 popl %ebp
174 popl %gs
d7e28ffe
RR
175 popl %fs
176 popl %ds
177 popl %es
f8f0fdcd
RR
178
179 // Near the base of the stack lurk two strange fields
180 // Which we fill as we exit the Guest
181 // These are the trap number and its error
182 // We can simply step past them on our way.
d7e28ffe 183 addl $8, %esp
f8f0fdcd
RR
184
185 // The last five stack slots hold return address
e1e72965
RR
186 // And everything needed to switch privilege
187 // From Switcher's level 0 to Guest's 1,
f8f0fdcd
RR
188 // And the stack where the Guest had last left it.
189 // Interrupts are turned back on: we are Guest.
d7e28ffe
RR
190 iret
191
e1e72965
RR
192// We treat two paths to switch back to the Host
193// Yet both must save Guest state and restore Host
f8f0fdcd 194// So we put the routine in a macro.
d7e28ffe 195#define SWITCH_TO_HOST \
f8f0fdcd
RR
196 /* We save the Guest state: all registers first \
197 * Laid out just as "struct lguest_regs" defines */ \
d7e28ffe
RR
198 pushl %es; \
199 pushl %ds; \
200 pushl %fs; \
d7e28ffe
RR
201 pushl %gs; \
202 pushl %ebp; \
203 pushl %edi; \
204 pushl %esi; \
205 pushl %edx; \
206 pushl %ecx; \
207 pushl %ebx; \
4614a3a3 208 pushl %eax; \
f8f0fdcd
RR
209 /* Our stack and our code are using segments \
210 * Set in the TSS and IDT \
211 * Yet if we were to touch data we'd use \
212 * Whatever data segment the Guest had. \
213 * Load the lguest ds segment for now. */ \
d7e28ffe
RR
214 movl $(LGUEST_DS), %eax; \
215 movl %eax, %ds; \
f8f0fdcd 216 /* So where are we? Which CPU, which struct? \
0d027c01
RR
217 * The stack is our clue: our TSS starts \
218 * It at the end of "struct lguest_pages". \
219 * Or we may have stumbled while restoring \
220 * Our Guest segment regs while in switch_to_guest, \
221 * The fault pushed atop that part-unwound stack. \
222 * If we round the stack down to the page start \
223 * We're at the start of "struct lguest_pages". */ \
d7e28ffe 224 movl %esp, %eax; \
0d027c01 225 andl $(~(1 << PAGE_SHIFT - 1)), %eax; \
f8f0fdcd 226 /* Save our trap number: the switch will obscure it \
e1e72965 227 * (In the Host the Guest regs are not mapped here) \
f8f0fdcd 228 * %ebx holds it safe for deliver_to_host */ \
d7e28ffe 229 movl LGUEST_PAGES_regs_trapnum(%eax), %ebx; \
f8f0fdcd
RR
230 /* The Host GDT, IDT and stack! \
231 * All these lie safely hidden from the Guest: \
232 * We must return to the Host page tables \
233 * (Hence that was saved in struct lguest_pages) */ \
d7e28ffe
RR
234 movl LGUEST_PAGES_host_cr3(%eax), %edx; \
235 movl %edx, %cr3; \
f8f0fdcd
RR
236 /* As before, when we looked back at the Host \
237 * As we left and marked TSS unused \
238 * So must we now for the Guest left behind. */ \
d7e28ffe 239 andb $0xFD, (LGUEST_PAGES_guest_gdt+GDT_ENTRY_TSS*8+5)(%eax); \
f8f0fdcd 240 /* Switch to Host's GDT, IDT. */ \
d7e28ffe
RR
241 lgdt LGUEST_PAGES_host_gdt_desc(%eax); \
242 lidt LGUEST_PAGES_host_idt_desc(%eax); \
e1e72965 243 /* Restore the Host's stack where its saved regs lie */ \
d7e28ffe 244 movl LGUEST_PAGES_host_sp(%eax), %esp; \
e1e72965 245 /* Last the TSS: our Host is returned */ \
d7e28ffe
RR
246 movl $(GDT_ENTRY_TSS*8), %edx; \
247 ltr %dx; \
f8f0fdcd 248 /* Restore now the regs saved right at the first. */ \
d7e28ffe
RR
249 popl %ebp; \
250 popl %fs; \
251 popl %gs; \
252 popl %ds; \
253 popl %es
254
e1e72965
RR
255// The first path is trod when the Guest has trapped:
256// (Which trap it was has been pushed on the stack).
f8f0fdcd
RR
257// We need only switch back, and the Host will decode
258// Why we came home, and what needs to be done.
d7e28ffe
RR
259return_to_host:
260 SWITCH_TO_HOST
261 iret
262
e1e72965 263// We are lead to the second path like so:
f8f0fdcd
RR
264// An interrupt, with some cause external
265// Has ajerked us rudely from the Guest's code
266// Again we must return home to the Host
d7e28ffe
RR
267deliver_to_host:
268 SWITCH_TO_HOST
f8f0fdcd
RR
269 // But now we must go home via that place
270 // Where that interrupt was supposed to go
271 // Had we not been ensconced, running the Guest.
e1e72965 272 // Here we see the trickness of run_guest_once():
f8f0fdcd
RR
273 // The Host stack is formed like an interrupt
274 // With EIP, CS and EFLAGS layered.
275 // Interrupt handlers end with "iret"
276 // And that will take us home at long long last.
277
278 // But first we must find the handler to call!
279 // The IDT descriptor for the Host
280 // Has two bytes for size, and four for address:
281 // %edx will hold it for us for now.
d7e28ffe 282 movl (LGUEST_PAGES_host_idt_desc+2)(%eax), %edx
f8f0fdcd
RR
283 // We now know the table address we need,
284 // And saved the trap's number inside %ebx.
285 // Yet the pointer to the handler is smeared
286 // Across the bits of the table entry.
287 // What oracle can tell us how to extract
288 // From such a convoluted encoding?
289 // I consulted gcc, and it gave
290 // These instructions, which I gladly credit:
d7e28ffe
RR
291 leal (%edx,%ebx,8), %eax
292 movzwl (%eax),%edx
293 movl 4(%eax), %eax
294 xorw %ax, %ax
295 orl %eax, %edx
f8f0fdcd 296 // Now the address of the handler's in %edx
e1e72965 297 // We call it now: its "iret" drops us home.
d7e28ffe
RR
298 jmp *%edx
299
f8f0fdcd
RR
300// Every interrupt can come to us here
301// But we must truly tell each apart.
302// They number two hundred and fifty six
303// And each must land in a different spot,
304// Push its number on stack, and join the stream.
305
306// And worse, a mere six of the traps stand apart
307// And push on their stack an addition:
308// An error number, thirty two bits long
309// So we punish the other two fifty
310// And make them push a zero so they match.
311
312// Yet two fifty six entries is long
313// And all will look most the same as the last
314// So we create a macro which can make
315// As many entries as we need to fill.
316
317// Note the change to .data then .text:
318// We plant the address of each entry
319// Into a (data) table for the Host
320// To know where each Guest interrupt should go.
d7e28ffe
RR
321.macro IRQ_STUB N TARGET
322 .data; .long 1f; .text; 1:
f8f0fdcd
RR
323 // Trap eight, ten through fourteen and seventeen
324 // Supply an error number. Else zero.
d7e28ffe
RR
325 .if (\N <> 8) && (\N < 10 || \N > 14) && (\N <> 17)
326 pushl $0
327 .endif
328 pushl $\N
329 jmp \TARGET
330 ALIGN
331.endm
332
f8f0fdcd
RR
333// This macro creates numerous entries
334// Using GAS macros which out-power C's.
d7e28ffe
RR
335.macro IRQ_STUBS FIRST LAST TARGET
336 irq=\FIRST
337 .rept \LAST-\FIRST+1
338 IRQ_STUB irq \TARGET
339 irq=irq+1
340 .endr
341.endm
342
f8f0fdcd
RR
343// Here's the marker for our pointer table
344// Laid in the data section just before
345// Each macro places the address of code
346// Forming an array: each one points to text
347// Which handles interrupt in its turn.
d7e28ffe
RR
348.data
349.global default_idt_entries
350default_idt_entries:
351.text
f8f0fdcd
RR
352 // The first two traps go straight back to the Host
353 IRQ_STUBS 0 1 return_to_host
354 // We'll say nothing, yet, about NMI
355 IRQ_STUB 2 handle_nmi
356 // Other traps also return to the Host
357 IRQ_STUBS 3 31 return_to_host
358 // All interrupts go via their handlers
359 IRQ_STUBS 32 127 deliver_to_host
360 // 'Cept system calls coming from userspace
361 // Are to go to the Guest, never the Host.
362 IRQ_STUB 128 return_to_host
363 IRQ_STUBS 129 255 deliver_to_host
364
365// The NMI, what a fabulous beast
366// Which swoops in and stops us no matter that
367// We're suspended between heaven and hell,
368// (Or more likely between the Host and Guest)
369// When in it comes! We are dazed and confused
370// So we do the simplest thing which one can.
371// Though we've pushed the trap number and zero
372// We discard them, return, and hope we live.
d7e28ffe
RR
373handle_nmi:
374 addl $8, %esp
375 iret
376
f8f0fdcd
RR
377// We are done; all that's left is Mastery
378// And "make Mastery" is a journey long
379// Designed to make your fingers itch to code.
380
381// Here ends the text, the file and poem.
d7e28ffe 382ENTRY(end_switcher_text)
This page took 0.100797 seconds and 5 git commands to generate.