aboutsummaryrefslogtreecommitdiffstats
path: root/include/xen/interface/xen.h
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@xensource.com>2007-07-17 21:37:04 -0400
committerJeremy Fitzhardinge <jeremy@goop.org>2007-07-18 11:47:42 -0400
commita42089dd358a7673a0a23126589a9029e57c2049 (patch)
treeaa076610832f5cdb0ee209c42ea7e40d54534ef4 /include/xen/interface/xen.h
parent24037a8b69dbf15bfed8fd42a2a2e442d7b0395b (diff)
xen: Add Xen interface header files
Add Xen interface header files. These are taken fairly directly from the Xen tree, but somewhat rearranged to suit the kernel's conventions. Define macros and inline functions for doing hypercalls into the hypervisor. Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Signed-off-by: Ian Pratt <ian.pratt@xensource.com> Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk> Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Diffstat (limited to 'include/xen/interface/xen.h')
-rw-r--r--include/xen/interface/xen.h447
1 files changed, 447 insertions, 0 deletions
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
new file mode 100644
index 000000000000..518a5bf79ed3
--- /dev/null
+++ b/include/xen/interface/xen.h
@@ -0,0 +1,447 @@
1/******************************************************************************
2 * xen.h
3 *
4 * Guest OS interface to Xen.
5 *
6 * Copyright (c) 2004, K A Fraser
7 */
8
9#ifndef __XEN_PUBLIC_XEN_H__
10#define __XEN_PUBLIC_XEN_H__
11
12#include <asm/xen/interface.h>
13
14/*
15 * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS).
16 */
17
18/*
19 * x86_32: EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5.
20 * EAX = return value
21 * (argument registers may be clobbered on return)
22 * x86_64: RAX = vector; RDI, RSI, RDX, R10, R8, R9 = args 1, 2, 3, 4, 5, 6.
23 * RAX = return value
24 * (argument registers not clobbered on return; RCX, R11 are)
25 */
26#define __HYPERVISOR_set_trap_table 0
27#define __HYPERVISOR_mmu_update 1
28#define __HYPERVISOR_set_gdt 2
29#define __HYPERVISOR_stack_switch 3
30#define __HYPERVISOR_set_callbacks 4
31#define __HYPERVISOR_fpu_taskswitch 5
32#define __HYPERVISOR_sched_op 6
33#define __HYPERVISOR_dom0_op 7
34#define __HYPERVISOR_set_debugreg 8
35#define __HYPERVISOR_get_debugreg 9
36#define __HYPERVISOR_update_descriptor 10
37#define __HYPERVISOR_memory_op 12
38#define __HYPERVISOR_multicall 13
39#define __HYPERVISOR_update_va_mapping 14
40#define __HYPERVISOR_set_timer_op 15
41#define __HYPERVISOR_event_channel_op_compat 16
42#define __HYPERVISOR_xen_version 17
43#define __HYPERVISOR_console_io 18
44#define __HYPERVISOR_physdev_op_compat 19
45#define __HYPERVISOR_grant_table_op 20
46#define __HYPERVISOR_vm_assist 21
47#define __HYPERVISOR_update_va_mapping_otherdomain 22
48#define __HYPERVISOR_iret 23 /* x86 only */
49#define __HYPERVISOR_vcpu_op 24
50#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */
51#define __HYPERVISOR_mmuext_op 26
52#define __HYPERVISOR_acm_op 27
53#define __HYPERVISOR_nmi_op 28
54#define __HYPERVISOR_sched_op_new 29
55#define __HYPERVISOR_callback_op 30
56#define __HYPERVISOR_xenoprof_op 31
57#define __HYPERVISOR_event_channel_op 32
58#define __HYPERVISOR_physdev_op 33
59#define __HYPERVISOR_hvm_op 34
60
61/*
62 * VIRTUAL INTERRUPTS
63 *
64 * Virtual interrupts that a guest OS may receive from Xen.
65 */
66#define VIRQ_TIMER 0 /* Timebase update, and/or requested timeout. */
67#define VIRQ_DEBUG 1 /* Request guest to dump debug info. */
68#define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */
69#define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */
70#define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */
71#define NR_VIRQS 8
72
73/*
74 * MMU-UPDATE REQUESTS
75 *
76 * HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs.
77 * A foreigndom (FD) can be specified (or DOMID_SELF for none).
78 * Where the FD has some effect, it is described below.
79 * ptr[1:0] specifies the appropriate MMU_* command.
80 *
81 * ptr[1:0] == MMU_NORMAL_PT_UPDATE:
82 * Updates an entry in a page table. If updating an L1 table, and the new
83 * table entry is valid/present, the mapped frame must belong to the FD, if
84 * an FD has been specified. If attempting to map an I/O page then the
85 * caller assumes the privilege of the FD.
86 * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller.
87 * FD == DOMID_XEN: Map restricted areas of Xen's heap space.
88 * ptr[:2] -- Machine address of the page-table entry to modify.
89 * val -- Value to write.
90 *
91 * ptr[1:0] == MMU_MACHPHYS_UPDATE:
92 * Updates an entry in the machine->pseudo-physical mapping table.
93 * ptr[:2] -- Machine address within the frame whose mapping to modify.
94 * The frame must belong to the FD, if one is specified.
95 * val -- Value to write into the mapping entry.
96 */
97#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */
98#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */
99
100/*
101 * MMU EXTENDED OPERATIONS
102 *
103 * HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures.
104 * A foreigndom (FD) can be specified (or DOMID_SELF for none).
105 * Where the FD has some effect, it is described below.
106 *
107 * cmd: MMUEXT_(UN)PIN_*_TABLE
108 * mfn: Machine frame number to be (un)pinned as a p.t. page.
109 * The frame must belong to the FD, if one is specified.
110 *
111 * cmd: MMUEXT_NEW_BASEPTR
112 * mfn: Machine frame number of new page-table base to install in MMU.
113 *
114 * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only]
115 * mfn: Machine frame number of new page-table base to install in MMU
116 * when in user space.
117 *
118 * cmd: MMUEXT_TLB_FLUSH_LOCAL
119 * No additional arguments. Flushes local TLB.
120 *
121 * cmd: MMUEXT_INVLPG_LOCAL
122 * linear_addr: Linear address to be flushed from the local TLB.
123 *
124 * cmd: MMUEXT_TLB_FLUSH_MULTI
125 * vcpumask: Pointer to bitmap of VCPUs to be flushed.
126 *
127 * cmd: MMUEXT_INVLPG_MULTI
128 * linear_addr: Linear address to be flushed.
129 * vcpumask: Pointer to bitmap of VCPUs to be flushed.
130 *
131 * cmd: MMUEXT_TLB_FLUSH_ALL
132 * No additional arguments. Flushes all VCPUs' TLBs.
133 *
134 * cmd: MMUEXT_INVLPG_ALL
135 * linear_addr: Linear address to be flushed from all VCPUs' TLBs.
136 *
137 * cmd: MMUEXT_FLUSH_CACHE
138 * No additional arguments. Writes back and flushes cache contents.
139 *
140 * cmd: MMUEXT_SET_LDT
141 * linear_addr: Linear address of LDT base (NB. must be page-aligned).
142 * nr_ents: Number of entries in LDT.
143 */
144#define MMUEXT_PIN_L1_TABLE 0
145#define MMUEXT_PIN_L2_TABLE 1
146#define MMUEXT_PIN_L3_TABLE 2
147#define MMUEXT_PIN_L4_TABLE 3
148#define MMUEXT_UNPIN_TABLE 4
149#define MMUEXT_NEW_BASEPTR 5
150#define MMUEXT_TLB_FLUSH_LOCAL 6
151#define MMUEXT_INVLPG_LOCAL 7
152#define MMUEXT_TLB_FLUSH_MULTI 8
153#define MMUEXT_INVLPG_MULTI 9
154#define MMUEXT_TLB_FLUSH_ALL 10
155#define MMUEXT_INVLPG_ALL 11
156#define MMUEXT_FLUSH_CACHE 12
157#define MMUEXT_SET_LDT 13
158#define MMUEXT_NEW_USER_BASEPTR 15
159
160#ifndef __ASSEMBLY__
161struct mmuext_op {
162 unsigned int cmd;
163 union {
164 /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */
165 unsigned long mfn;
166 /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
167 unsigned long linear_addr;
168 } arg1;
169 union {
170 /* SET_LDT */
171 unsigned int nr_ents;
172 /* TLB_FLUSH_MULTI, INVLPG_MULTI */
173 void *vcpumask;
174 } arg2;
175};
176DEFINE_GUEST_HANDLE_STRUCT(mmuext_op);
177#endif
178
179/* These are passed as 'flags' to update_va_mapping. They can be ORed. */
180/* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap. */
181/* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer. */
182#define UVMF_NONE (0UL<<0) /* No flushing at all. */
183#define UVMF_TLB_FLUSH (1UL<<0) /* Flush entire TLB(s). */
184#define UVMF_INVLPG (2UL<<0) /* Flush only one entry. */
185#define UVMF_FLUSHTYPE_MASK (3UL<<0)
186#define UVMF_MULTI (0UL<<2) /* Flush subset of TLBs. */
187#define UVMF_LOCAL (0UL<<2) /* Flush local TLB. */
188#define UVMF_ALL (1UL<<2) /* Flush all TLBs. */
189
190/*
191 * Commands to HYPERVISOR_console_io().
192 */
193#define CONSOLEIO_write 0
194#define CONSOLEIO_read 1
195
196/*
197 * Commands to HYPERVISOR_vm_assist().
198 */
199#define VMASST_CMD_enable 0
200#define VMASST_CMD_disable 1
201#define VMASST_TYPE_4gb_segments 0
202#define VMASST_TYPE_4gb_segments_notify 1
203#define VMASST_TYPE_writable_pagetables 2
204#define VMASST_TYPE_pae_extended_cr3 3
205#define MAX_VMASST_TYPE 3
206
207#ifndef __ASSEMBLY__
208
209typedef uint16_t domid_t;
210
211/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */
212#define DOMID_FIRST_RESERVED (0x7FF0U)
213
214/* DOMID_SELF is used in certain contexts to refer to oneself. */
215#define DOMID_SELF (0x7FF0U)
216
217/*
218 * DOMID_IO is used to restrict page-table updates to mapping I/O memory.
219 * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO
220 * is useful to ensure that no mappings to the OS's own heap are accidentally
221 * installed. (e.g., in Linux this could cause havoc as reference counts
222 * aren't adjusted on the I/O-mapping code path).
223 * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can
224 * be specified by any calling domain.
225 */
226#define DOMID_IO (0x7FF1U)
227
228/*
229 * DOMID_XEN is used to allow privileged domains to map restricted parts of
230 * Xen's heap space (e.g., the machine_to_phys table).
231 * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if
232 * the caller is privileged.
233 */
234#define DOMID_XEN (0x7FF2U)
235
236/*
237 * Send an array of these to HYPERVISOR_mmu_update().
238 * NB. The fields are natural pointer/address size for this architecture.
239 */
240struct mmu_update {
241 uint64_t ptr; /* Machine address of PTE. */
242 uint64_t val; /* New contents of PTE. */
243};
244DEFINE_GUEST_HANDLE_STRUCT(mmu_update);
245
246/*
247 * Send an array of these to HYPERVISOR_multicall().
248 * NB. The fields are natural register size for this architecture.
249 */
250struct multicall_entry {
251 unsigned long op;
252 long result;
253 unsigned long args[6];
254};
255DEFINE_GUEST_HANDLE_STRUCT(multicall_entry);
256
257/*
258 * Event channel endpoints per domain:
259 * 1024 if a long is 32 bits; 4096 if a long is 64 bits.
260 */
261#define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 64)
262
263struct vcpu_time_info {
264 /*
265 * Updates to the following values are preceded and followed
266 * by an increment of 'version'. The guest can therefore
267 * detect updates by looking for changes to 'version'. If the
268 * least-significant bit of the version number is set then an
269 * update is in progress and the guest must wait to read a
270 * consistent set of values. The correct way to interact with
271 * the version number is similar to Linux's seqlock: see the
272 * implementations of read_seqbegin/read_seqretry.
273 */
274 uint32_t version;
275 uint32_t pad0;
276 uint64_t tsc_timestamp; /* TSC at last update of time vals. */
277 uint64_t system_time; /* Time, in nanosecs, since boot. */
278 /*
279 * Current system time:
280 * system_time + ((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul
281 * CPU frequency (Hz):
282 * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift
283 */
284 uint32_t tsc_to_system_mul;
285 int8_t tsc_shift;
286 int8_t pad1[3];
287}; /* 32 bytes */
288
289struct vcpu_info {
290 /*
291 * 'evtchn_upcall_pending' is written non-zero by Xen to indicate
292 * a pending notification for a particular VCPU. It is then cleared
293 * by the guest OS /before/ checking for pending work, thus avoiding
294 * a set-and-check race. Note that the mask is only accessed by Xen
295 * on the CPU that is currently hosting the VCPU. This means that the
296 * pending and mask flags can be updated by the guest without special
297 * synchronisation (i.e., no need for the x86 LOCK prefix).
298 * This may seem suboptimal because if the pending flag is set by
299 * a different CPU then an IPI may be scheduled even when the mask
300 * is set. However, note:
301 * 1. The task of 'interrupt holdoff' is covered by the per-event-
302 * channel mask bits. A 'noisy' event that is continually being
303 * triggered can be masked at source at this very precise
304 * granularity.
305 * 2. The main purpose of the per-VCPU mask is therefore to restrict
306 * reentrant execution: whether for concurrency control, or to
307 * prevent unbounded stack usage. Whatever the purpose, we expect
308 * that the mask will be asserted only for short periods at a time,
309 * and so the likelihood of a 'spurious' IPI is suitably small.
310 * The mask is read before making an event upcall to the guest: a
311 * non-zero mask therefore guarantees that the VCPU will not receive
312 * an upcall activation. The mask is cleared when the VCPU requests
313 * to block: this avoids wakeup-waiting races.
314 */
315 uint8_t evtchn_upcall_pending;
316 uint8_t evtchn_upcall_mask;
317 unsigned long evtchn_pending_sel;
318 struct arch_vcpu_info arch;
319 struct vcpu_time_info time;
320}; /* 64 bytes (x86) */
321
322/*
323 * Xen/kernel shared data -- pointer provided in start_info.
324 * NB. We expect that this struct is smaller than a page.
325 */
326struct shared_info {
327 struct vcpu_info vcpu_info[MAX_VIRT_CPUS];
328
329 /*
330 * A domain can create "event channels" on which it can send and receive
331 * asynchronous event notifications. There are three classes of event that
332 * are delivered by this mechanism:
333 * 1. Bi-directional inter- and intra-domain connections. Domains must
334 * arrange out-of-band to set up a connection (usually by allocating
335 * an unbound 'listener' port and avertising that via a storage service
336 * such as xenstore).
337 * 2. Physical interrupts. A domain with suitable hardware-access
338 * privileges can bind an event-channel port to a physical interrupt
339 * source.
340 * 3. Virtual interrupts ('events'). A domain can bind an event-channel
341 * port to a virtual interrupt source, such as the virtual-timer
342 * device or the emergency console.
343 *
344 * Event channels are addressed by a "port index". Each channel is
345 * associated with two bits of information:
346 * 1. PENDING -- notifies the domain that there is a pending notification
347 * to be processed. This bit is cleared by the guest.
348 * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING
349 * will cause an asynchronous upcall to be scheduled. This bit is only
350 * updated by the guest. It is read-only within Xen. If a channel
351 * becomes pending while the channel is masked then the 'edge' is lost
352 * (i.e., when the channel is unmasked, the guest must manually handle
353 * pending notifications as no upcall will be scheduled by Xen).
354 *
355 * To expedite scanning of pending notifications, any 0->1 pending
356 * transition on an unmasked channel causes a corresponding bit in a
357 * per-vcpu selector word to be set. Each bit in the selector covers a
358 * 'C long' in the PENDING bitfield array.
359 */
360 unsigned long evtchn_pending[sizeof(unsigned long) * 8];
361 unsigned long evtchn_mask[sizeof(unsigned long) * 8];
362
363 /*
364 * Wallclock time: updated only by control software. Guests should base
365 * their gettimeofday() syscall on this wallclock-base value.
366 */
367 uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */
368 uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */
369 uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */
370
371 struct arch_shared_info arch;
372
373};
374
375/*
376 * Start-of-day memory layout for the initial domain (DOM0):
377 * 1. The domain is started within contiguous virtual-memory region.
378 * 2. The contiguous region begins and ends on an aligned 4MB boundary.
379 * 3. The region start corresponds to the load address of the OS image.
380 * If the load address is not 4MB aligned then the address is rounded down.
381 * 4. This the order of bootstrap elements in the initial virtual region:
382 * a. relocated kernel image
383 * b. initial ram disk [mod_start, mod_len]
384 * c. list of allocated page frames [mfn_list, nr_pages]
385 * d. start_info_t structure [register ESI (x86)]
386 * e. bootstrap page tables [pt_base, CR3 (x86)]
387 * f. bootstrap stack [register ESP (x86)]
388 * 5. Bootstrap elements are packed together, but each is 4kB-aligned.
389 * 6. The initial ram disk may be omitted.
390 * 7. The list of page frames forms a contiguous 'pseudo-physical' memory
391 * layout for the domain. In particular, the bootstrap virtual-memory
392 * region is a 1:1 mapping to the first section of the pseudo-physical map.
393 * 8. All bootstrap elements are mapped read-writable for the guest OS. The
394 * only exception is the bootstrap page table, which is mapped read-only.
395 * 9. There is guaranteed to be at least 512kB padding after the final
396 * bootstrap element. If necessary, the bootstrap virtual region is
397 * extended by an extra 4MB to ensure this.
398 */
399
400#define MAX_GUEST_CMDLINE 1024
401struct start_info {
402 /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */
403 char magic[32]; /* "xen-<version>-<platform>". */
404 unsigned long nr_pages; /* Total pages allocated to this domain. */
405 unsigned long shared_info; /* MACHINE address of shared info struct. */
406 uint32_t flags; /* SIF_xxx flags. */
407 unsigned long store_mfn; /* MACHINE page number of shared page. */
408 uint32_t store_evtchn; /* Event channel for store communication. */
409 union {
410 struct {
411 unsigned long mfn; /* MACHINE page number of console page. */
412 uint32_t evtchn; /* Event channel for console page. */
413 } domU;
414 struct {
415 uint32_t info_off; /* Offset of console_info struct. */
416 uint32_t info_size; /* Size of console_info struct from start.*/
417 } dom0;
418 } console;
419 /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */
420 unsigned long pt_base; /* VIRTUAL address of page directory. */
421 unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames. */
422 unsigned long mfn_list; /* VIRTUAL address of page-frame list. */
423 unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */
424 unsigned long mod_len; /* Size (bytes) of pre-loaded module. */
425 int8_t cmd_line[MAX_GUEST_CMDLINE];
426};
427
428/* These flags are passed in the 'flags' field of start_info_t. */
429#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */
430#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */
431
432typedef uint64_t cpumap_t;
433
434typedef uint8_t xen_domain_handle_t[16];
435
436/* Turn a plain number into a C unsigned long constant. */
437#define __mk_unsigned_long(x) x ## UL
438#define mk_unsigned_long(x) __mk_unsigned_long(x)
439
440#else /* __ASSEMBLY__ */
441
442/* In assembly code we cannot use C numeric constant suffixes. */
443#define mk_unsigned_long(x) x
444
445#endif /* !__ASSEMBLY__ */
446
447#endif /* __XEN_PUBLIC_XEN_H__ */