1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
|
/*P:500 Just as userspace programs request kernel operations through a system
* call, the Guest requests Host operations through a "hypercall". You might
* notice this nomenclature doesn't really follow any logic, but the name has
* been around for long enough that we're stuck with it. As you'd expect, this
* code is basically a one big switch statement. :*/
/* Copyright (C) 2006 Rusty Russell IBM Corporation
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <linux/uaccess.h>
#include <linux/syscalls.h>
#include <linux/mm.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <irq_vectors.h>
#include "lg.h"
/*H:120 This is the core hypercall routine: where the Guest gets what it
* wants. Or gets killed. Or, in the case of LHCALL_CRASH, both.
*
* Remember from the Guest: %eax == which call to make, and the arguments are
* packed into %edx, %ebx and %ecx if needed. */
static void do_hcall(struct lguest *lg, struct lguest_regs *regs)
{
switch (regs->eax) {
case LHCALL_FLUSH_ASYNC:
/* This call does nothing, except by breaking out of the Guest
* it makes us process all the asynchronous hypercalls. */
break;
case LHCALL_LGUEST_INIT:
/* You can't get here unless you're already initialized. Don't
* do that. */
kill_guest(lg, "already have lguest_data");
break;
case LHCALL_CRASH: {
/* Crash is such a trivial hypercall that we do it in four
* lines right here. */
char msg[128];
/* If the lgread fails, it will call kill_guest() itself; the
* kill_guest() with the message will be ignored. */
lgread(lg, msg, regs->edx, sizeof(msg));
msg[sizeof(msg)-1] = '\0';
kill_guest(lg, "CRASH: %s", msg);
break;
}
case LHCALL_FLUSH_TLB:
/* FLUSH_TLB comes in two flavors, depending on the
* argument: */
if (regs->edx)
guest_pagetable_clear_all(lg);
else
guest_pagetable_flush_user(lg);
break;
case LHCALL_BIND_DMA:
/* BIND_DMA really wants four arguments, but it's the only call
* which does. So the Guest packs the number of buffers and
* the interrupt number into the final argument, and we decode
* it here. This can legitimately fail, since we currently
* place a limit on the number of DMA pools a Guest can have.
* So we return true or false from this call. */
regs->eax = bind_dma(lg, regs->edx, regs->ebx,
regs->ecx >> 8, regs->ecx & 0xFF);
break;
/* All these calls simply pass the arguments through to the right
* routines. */
case LHCALL_SEND_DMA:
send_dma(lg, regs->edx, regs->ebx);
break;
case LHCALL_LOAD_GDT:
load_guest_gdt(lg, regs->edx, regs->ebx);
break;
case LHCALL_LOAD_IDT_ENTRY:
load_guest_idt_entry(lg, regs->edx, regs->ebx, regs->ecx);
break;
case LHCALL_NEW_PGTABLE:
guest_new_pagetable(lg, regs->edx);
break;
case LHCALL_SET_STACK:
guest_set_stack(lg, regs->edx, regs->ebx, regs->ecx);
break;
case LHCALL_SET_PTE:
guest_set_pte(lg, regs->edx, regs->ebx, mkgpte(regs->ecx));
break;
case LHCALL_SET_PMD:
guest_set_pmd(lg, regs->edx, regs->ebx);
break;
case LHCALL_LOAD_TLS:
guest_load_tls(lg, regs->edx);
break;
case LHCALL_SET_CLOCKEVENT:
guest_set_clockevent(lg, regs->edx);
break;
case LHCALL_TS:
/* This sets the TS flag, as we saw used in run_guest(). */
lg->ts = regs->edx;
break;
case LHCALL_HALT:
/* Similarly, this sets the halted flag for run_guest(). */
lg->halted = 1;
break;
default:
kill_guest(lg, "Bad hypercall %li\n", regs->eax);
}
}
/* Asynchronous hypercalls are easy: we just look in the array in the Guest's
* "struct lguest_data" and see if there are any new ones marked "ready".
*
* We are careful to do these in order: obviously we respect the order the
* Guest put them in the ring, but we also promise the Guest that they will
* happen before any normal hypercall (which is why we check this before
* checking for a normal hcall). */
static void do_async_hcalls(struct lguest *lg)
{
unsigned int i;
u8 st[LHCALL_RING_SIZE];
/* For simplicity, we copy the entire call status array in at once. */
if (copy_from_user(&st, &lg->lguest_data->hcall_status, sizeof(st)))
return;
/* We process "struct lguest_data"s hcalls[] ring once. */
for (i = 0; i < ARRAY_SIZE(st); i++) {
struct lguest_regs regs;
/* We remember where we were up to from last time. This makes
* sure that the hypercalls are done in the order the Guest
* places them in the ring. */
unsigned int n = lg->next_hcall;
/* 0xFF means there's no call here (yet). */
if (st[n] == 0xFF)
break;
/* OK, we have hypercall. Increment the "next_hcall" cursor,
* and wrap back to 0 if we reach the end. */
if (++lg->next_hcall == LHCALL_RING_SIZE)
lg->next_hcall = 0;
/* We copy the hypercall arguments into a fake register
* structure. This makes life simple for do_hcall(). */
if (get_user(regs.eax, &lg->lguest_data->hcalls[n].eax)
|| get_user(regs.edx, &lg->lguest_data->hcalls[n].edx)
|| get_user(regs.ecx, &lg->lguest_data->hcalls[n].ecx)
|| get_user(regs.ebx, &lg->lguest_data->hcalls[n].ebx)) {
kill_guest(lg, "Fetching async hypercalls");
break;
}
/* Do the hypercall, same as a normal one. */
do_hcall(lg, ®s);
/* Mark the hypercall done. */
if (put_user(0xFF, &lg->lguest_data->hcall_status[n])) {
kill_guest(lg, "Writing result for async hypercall");
break;
}
/* Stop doing hypercalls if we've just done a DMA to the
* Launcher: it needs to service this first. */
if (lg->dma_is_pending)
break;
}
}
/* Last of all, we look at what happens first of all. The very first time the
* Guest makes a hypercall, we end up here to set things up: */
static void initialize(struct lguest *lg)
{
u32 tsc_speed;
/* You can't do anything until you're initialized. The Guest knows the
* rules, so we're unforgiving here. */
if (lg->regs->eax != LHCALL_LGUEST_INIT) {
kill_guest(lg, "hypercall %li before LGUEST_INIT",
lg->regs->eax);
return;
}
/* We insist that the Time Stamp Counter exist and doesn't change with
* cpu frequency. Some devious chip manufacturers decided that TSC
* changes could be handled in software. I decided that time going
* backwards might be good for benchmarks, but it's bad for users.
*
* We also insist that the TSC be stable: the kernel detects unreliable
* TSCs for its own purposes, and we use that here. */
if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable())
tsc_speed = tsc_khz;
else
tsc_speed = 0;
/* The pointer to the Guest's "struct lguest_data" is the only
* argument. We check that address now. */
if (!lguest_address_ok(lg, lg->regs->edx, sizeof(*lg->lguest_data))) {
kill_guest(lg, "bad guest page %p", lg->lguest_data);
return;
}
/* Having checked it, we simply set lg->lguest_data to point straight
* into the Launcher's memory at the right place and then use
* copy_to_user/from_user from now on, instead of lgread/write. I put
* this in to show that I'm not immune to writing stupid
* optimizations. */
lg->lguest_data = lg->mem_base + lg->regs->edx;
/* The Guest tells us where we're not to deliver interrupts by putting
* the range of addresses into "struct lguest_data". */
if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start)
|| get_user(lg->noirq_end, &lg->lguest_data->noirq_end)
/* We tell the Guest that it can't use the top 4MB of virtual
* addresses used by the Switcher. */
|| put_user(4U*1024*1024, &lg->lguest_data->reserve_mem)
|| put_user(tsc_speed, &lg->lguest_data->tsc_khz))
kill_guest(lg, "bad guest page %p", lg->lguest_data);
/* We write the current time into the Guest's data page once now. */
write_timestamp(lg);
/* This is the one case where the above accesses might have been the
* first write to a Guest page. This may have caused a copy-on-write
* fault, but the Guest might be referring to the old (read-only)
* page. */
guest_pagetable_clear_all(lg);
}
/* Now we've examined the hypercall code; our Guest can make requests. There
* is one other way we can do things for the Guest, as we see in
* emulate_insn(). */
/*H:100
* Hypercalls
*
* Remember from the Guest, hypercalls come in two flavors: normal and
* asynchronous. This file handles both of types.
*/
void do_hypercalls(struct lguest *lg)
{
/* Not initialized yet? This hypercall must do it. */
if (unlikely(!lg->lguest_data)) {
/* Set up the "struct lguest_data" */
initialize(lg);
/* Hcall is done. */
lg->hcall = NULL;
return;
}
/* The Guest has initialized.
*
* Look in the hypercall ring for the async hypercalls: */
do_async_hcalls(lg);
/* If we stopped reading the hypercall ring because the Guest did a
* SEND_DMA to the Launcher, we want to return now. Otherwise we do
* the hypercall. */
if (!lg->dma_is_pending) {
do_hcall(lg, lg->hcall);
/* Tricky point: we reset the hcall pointer to mark the
* hypercall as "done". We use the hcall pointer rather than
* the trap number to indicate a hypercall is pending.
* Normally it doesn't matter: the Guest will run again and
* update the trap number before we come back here.
*
* However, if we are signalled or the Guest sends DMA to the
* Launcher, the run_guest() loop will exit without running the
* Guest. When it comes back it would try to re-run the
* hypercall. */
lg->hcall = NULL;
}
}
/* This routine supplies the Guest with time: it's used for wallclock time at
* initial boot and as a rough time source if the TSC isn't available. */
void write_timestamp(struct lguest *lg)
{
struct timespec now;
ktime_get_real_ts(&now);
if (copy_to_user(&lg->lguest_data->time, &now, sizeof(struct timespec)))
kill_guest(lg, "Writing timestamp");
}
|