aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/i386/Makefile3
-rw-r--r--arch/i386/kernel/entry.S71
-rw-r--r--arch/i386/kernel/head.S5
-rw-r--r--arch/i386/kernel/vmlinux.lds.S1
-rw-r--r--arch/i386/xen/Makefile1
-rw-r--r--arch/i386/xen/enlighten.c745
-rw-r--r--arch/i386/xen/features.c29
-rw-r--r--arch/i386/xen/multicalls.c89
-rw-r--r--arch/i386/xen/multicalls.h45
-rw-r--r--arch/i386/xen/setup.c97
-rw-r--r--arch/i386/xen/xen-head.S36
-rw-r--r--arch/i386/xen/xen-ops.h31
-rw-r--r--include/asm-i386/irq.h1
-rw-r--r--include/asm-i386/xen/hypercall.h18
-rw-r--r--include/xen/features.h23
-rw-r--r--include/xen/page.h179
16 files changed, 1373 insertions, 1 deletions
diff --git a/arch/i386/Makefile b/arch/i386/Makefile
index 181cc29a7c4f..01f0ff0daaf4 100644
--- a/arch/i386/Makefile
+++ b/arch/i386/Makefile
@@ -93,6 +93,9 @@ mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-i386/mach-es7000
93mcore-$(CONFIG_X86_ES7000) := mach-default 93mcore-$(CONFIG_X86_ES7000) := mach-default
94core-$(CONFIG_X86_ES7000) := arch/i386/mach-es7000/ 94core-$(CONFIG_X86_ES7000) := arch/i386/mach-es7000/
95 95
96# Xen paravirtualization support
97core-$(CONFIG_XEN) += arch/i386/xen/
98
96# default subarch .h files 99# default subarch .h files
97mflags-y += -Iinclude/asm-i386/mach-default 100mflags-y += -Iinclude/asm-i386/mach-default
98 101
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 3c3c220488c9..ffb236544270 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -1023,6 +1023,77 @@ ENTRY(kernel_thread_helper)
1023 CFI_ENDPROC 1023 CFI_ENDPROC
1024ENDPROC(kernel_thread_helper) 1024ENDPROC(kernel_thread_helper)
1025 1025
1026#ifdef CONFIG_XEN
1027ENTRY(xen_hypervisor_callback)
1028 CFI_STARTPROC
1029 pushl $0
1030 CFI_ADJUST_CFA_OFFSET 4
1031 SAVE_ALL
1032 TRACE_IRQS_OFF
1033 mov %esp, %eax
1034 call xen_evtchn_do_upcall
1035 jmp ret_from_intr
1036 CFI_ENDPROC
1037ENDPROC(xen_hypervisor_callback)
1038
1039# Hypervisor uses this for application faults while it executes.
1040# We get here for two reasons:
1041# 1. Fault while reloading DS, ES, FS or GS
1042# 2. Fault while executing IRET
1043# Category 1 we fix up by reattempting the load, and zeroing the segment
1044# register if the load fails.
1045# Category 2 we fix up by jumping to do_iret_error. We cannot use the
1046# normal Linux return path in this case because if we use the IRET hypercall
1047# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1048# We distinguish between categories by maintaining a status value in EAX.
1049ENTRY(xen_failsafe_callback)
1050 CFI_STARTPROC
1051 pushl %eax
1052 CFI_ADJUST_CFA_OFFSET 4
1053 movl $1,%eax
10541: mov 4(%esp),%ds
10552: mov 8(%esp),%es
10563: mov 12(%esp),%fs
10574: mov 16(%esp),%gs
1058 testl %eax,%eax
1059 popl %eax
1060 CFI_ADJUST_CFA_OFFSET -4
1061 lea 16(%esp),%esp
1062 CFI_ADJUST_CFA_OFFSET -16
1063 jz 5f
1064 addl $16,%esp
1065 jmp iret_exc # EAX != 0 => Category 2 (Bad IRET)
10665: pushl $0 # EAX == 0 => Category 1 (Bad segment)
1067 CFI_ADJUST_CFA_OFFSET 4
1068 SAVE_ALL
1069 jmp ret_from_exception
1070 CFI_ENDPROC
1071
1072.section .fixup,"ax"
10736: xorl %eax,%eax
1074 movl %eax,4(%esp)
1075 jmp 1b
10767: xorl %eax,%eax
1077 movl %eax,8(%esp)
1078 jmp 2b
10798: xorl %eax,%eax
1080 movl %eax,12(%esp)
1081 jmp 3b
10829: xorl %eax,%eax
1083 movl %eax,16(%esp)
1084 jmp 4b
1085.previous
1086.section __ex_table,"a"
1087 .align 4
1088 .long 1b,6b
1089 .long 2b,7b
1090 .long 3b,8b
1091 .long 4b,9b
1092.previous
1093ENDPROC(xen_failsafe_callback)
1094
1095#endif /* CONFIG_XEN */
1096
1026.section .rodata,"a" 1097.section .rodata,"a"
1027#include "syscall_table.S" 1098#include "syscall_table.S"
1028 1099
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index 82714668d43b..7c52b222207e 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -510,7 +510,8 @@ ENTRY(_stext)
510/* 510/*
511 * BSS section 511 * BSS section
512 */ 512 */
513.section ".bss.page_aligned","w" 513.section ".bss.page_aligned","wa"
514 .align PAGE_SIZE_asm
514ENTRY(swapper_pg_dir) 515ENTRY(swapper_pg_dir)
515 .fill 1024,4,0 516 .fill 1024,4,0
516ENTRY(swapper_pg_pmd) 517ENTRY(swapper_pg_pmd)
@@ -538,6 +539,8 @@ fault_msg:
538 .ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n" 539 .ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n"
539 .asciz "Stack: %p %p %p %p %p %p %p %p\n" 540 .asciz "Stack: %p %p %p %p %p %p %p %p\n"
540 541
542#include "../xen/xen-head.S"
543
541/* 544/*
542 * The IDT and GDT 'descriptors' are a strange 48-bit object 545 * The IDT and GDT 'descriptors' are a strange 48-bit object
543 * only used by the lidt and lgdt instructions. They are not 546 * only used by the lidt and lgdt instructions. They are not
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index aa87b06c7c82..00f1bc47d3a2 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -88,6 +88,7 @@ SECTIONS
88 88
89 . = ALIGN(4096); 89 . = ALIGN(4096);
90 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { 90 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
91 *(.data.page_aligned)
91 *(.data.idt) 92 *(.data.idt)
92 } 93 }
93 94
diff --git a/arch/i386/xen/Makefile b/arch/i386/xen/Makefile
new file mode 100644
index 000000000000..60bc1cfb101c
--- /dev/null
+++ b/arch/i386/xen/Makefile
@@ -0,0 +1 @@
obj-y := enlighten.o setup.o features.o multicalls.o
diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c
new file mode 100644
index 000000000000..2d484f9320de
--- /dev/null
+++ b/arch/i386/xen/enlighten.c
@@ -0,0 +1,745 @@
1/*
2 * Core of Xen paravirt_ops implementation.
3 *
4 * This file contains the xen_paravirt_ops structure itself, and the
5 * implementations for:
6 * - privileged instructions
7 * - interrupt flags
8 * - segment operations
9 * - booting and setup
10 *
11 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
12 */
13
14#include <linux/kernel.h>
15#include <linux/init.h>
16#include <linux/smp.h>
17#include <linux/preempt.h>
18#include <linux/percpu.h>
19#include <linux/delay.h>
20#include <linux/start_kernel.h>
21#include <linux/sched.h>
22#include <linux/bootmem.h>
23#include <linux/module.h>
24
25#include <xen/interface/xen.h>
26#include <xen/interface/physdev.h>
27#include <xen/interface/vcpu.h>
28#include <xen/features.h>
29#include <xen/page.h>
30
31#include <asm/paravirt.h>
32#include <asm/page.h>
33#include <asm/xen/hypercall.h>
34#include <asm/xen/hypervisor.h>
35#include <asm/fixmap.h>
36#include <asm/processor.h>
37#include <asm/setup.h>
38#include <asm/desc.h>
39#include <asm/pgtable.h>
40
41#include "xen-ops.h"
42#include "multicalls.h"
43
44EXPORT_SYMBOL_GPL(hypercall_page);
45
46DEFINE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode);
47
48DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
49DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
50DEFINE_PER_CPU(unsigned long, xen_cr3);
51
52struct start_info *xen_start_info;
53EXPORT_SYMBOL_GPL(xen_start_info);
54
55static void xen_vcpu_setup(int cpu)
56{
57 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
58}
59
60static void __init xen_banner(void)
61{
62 printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
63 paravirt_ops.name);
64 printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic);
65}
66
67static void xen_cpuid(unsigned int *eax, unsigned int *ebx,
68 unsigned int *ecx, unsigned int *edx)
69{
70 unsigned maskedx = ~0;
71
72 /*
73 * Mask out inconvenient features, to try and disable as many
74 * unsupported kernel subsystems as possible.
75 */
76 if (*eax == 1)
77 maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */
78 (1 << X86_FEATURE_ACPI) | /* disable ACPI */
79 (1 << X86_FEATURE_ACC)); /* thermal monitoring */
80
81 asm(XEN_EMULATE_PREFIX "cpuid"
82 : "=a" (*eax),
83 "=b" (*ebx),
84 "=c" (*ecx),
85 "=d" (*edx)
86 : "0" (*eax), "2" (*ecx));
87 *edx &= maskedx;
88}
89
90static void xen_set_debugreg(int reg, unsigned long val)
91{
92 HYPERVISOR_set_debugreg(reg, val);
93}
94
95static unsigned long xen_get_debugreg(int reg)
96{
97 return HYPERVISOR_get_debugreg(reg);
98}
99
100static unsigned long xen_save_fl(void)
101{
102 struct vcpu_info *vcpu;
103 unsigned long flags;
104
105 preempt_disable();
106 vcpu = x86_read_percpu(xen_vcpu);
107 /* flag has opposite sense of mask */
108 flags = !vcpu->evtchn_upcall_mask;
109 preempt_enable();
110
111 /* convert to IF type flag
112 -0 -> 0x00000000
113 -1 -> 0xffffffff
114 */
115 return (-flags) & X86_EFLAGS_IF;
116}
117
118static void xen_restore_fl(unsigned long flags)
119{
120 struct vcpu_info *vcpu;
121
122 preempt_disable();
123
124 /* convert from IF type flag */
125 flags = !(flags & X86_EFLAGS_IF);
126 vcpu = x86_read_percpu(xen_vcpu);
127 vcpu->evtchn_upcall_mask = flags;
128
129 if (flags == 0) {
130 /* Unmask then check (avoid races). We're only protecting
131 against updates by this CPU, so there's no need for
132 anything stronger. */
133 barrier();
134
135 if (unlikely(vcpu->evtchn_upcall_pending))
136 force_evtchn_callback();
137 preempt_enable();
138 } else
139 preempt_enable_no_resched();
140}
141
142static void xen_irq_disable(void)
143{
144 struct vcpu_info *vcpu;
145 preempt_disable();
146 vcpu = x86_read_percpu(xen_vcpu);
147 vcpu->evtchn_upcall_mask = 1;
148 preempt_enable_no_resched();
149}
150
151static void xen_irq_enable(void)
152{
153 struct vcpu_info *vcpu;
154
155 preempt_disable();
156 vcpu = x86_read_percpu(xen_vcpu);
157 vcpu->evtchn_upcall_mask = 0;
158
159 /* Unmask then check (avoid races). We're only protecting
160 against updates by this CPU, so there's no need for
161 anything stronger. */
162 barrier();
163
164 if (unlikely(vcpu->evtchn_upcall_pending))
165 force_evtchn_callback();
166 preempt_enable();
167}
168
169static void xen_safe_halt(void)
170{
171 /* Blocking includes an implicit local_irq_enable(). */
172 if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0)
173 BUG();
174}
175
176static void xen_halt(void)
177{
178 if (irqs_disabled())
179 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
180 else
181 xen_safe_halt();
182}
183
184static void xen_set_lazy_mode(enum paravirt_lazy_mode mode)
185{
186 switch (mode) {
187 case PARAVIRT_LAZY_NONE:
188 BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE);
189 break;
190
191 case PARAVIRT_LAZY_MMU:
192 case PARAVIRT_LAZY_CPU:
193 BUG_ON(x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE);
194 break;
195
196 case PARAVIRT_LAZY_FLUSH:
197 /* flush if necessary, but don't change state */
198 if (x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE)
199 xen_mc_flush();
200 return;
201 }
202
203 xen_mc_flush();
204 x86_write_percpu(xen_lazy_mode, mode);
205}
206
207static unsigned long xen_store_tr(void)
208{
209 return 0;
210}
211
212static void xen_set_ldt(const void *addr, unsigned entries)
213{
214 unsigned long linear_addr = (unsigned long)addr;
215 struct mmuext_op *op;
216 struct multicall_space mcs = xen_mc_entry(sizeof(*op));
217
218 op = mcs.args;
219 op->cmd = MMUEXT_SET_LDT;
220 if (linear_addr) {
221 /* ldt my be vmalloced, use arbitrary_virt_to_machine */
222 xmaddr_t maddr;
223 maddr = arbitrary_virt_to_machine((unsigned long)addr);
224 linear_addr = (unsigned long)maddr.maddr;
225 }
226 op->arg1.linear_addr = linear_addr;
227 op->arg2.nr_ents = entries;
228
229 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
230
231 xen_mc_issue(PARAVIRT_LAZY_CPU);
232}
233
234static void xen_load_gdt(const struct Xgt_desc_struct *dtr)
235{
236 unsigned long *frames;
237 unsigned long va = dtr->address;
238 unsigned int size = dtr->size + 1;
239 unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
240 int f;
241 struct multicall_space mcs;
242
243 /* A GDT can be up to 64k in size, which corresponds to 8192
244 8-byte entries, or 16 4k pages.. */
245
246 BUG_ON(size > 65536);
247 BUG_ON(va & ~PAGE_MASK);
248
249 mcs = xen_mc_entry(sizeof(*frames) * pages);
250 frames = mcs.args;
251
252 for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
253 frames[f] = virt_to_mfn(va);
254 make_lowmem_page_readonly((void *)va);
255 }
256
257 MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct));
258
259 xen_mc_issue(PARAVIRT_LAZY_CPU);
260}
261
262static void load_TLS_descriptor(struct thread_struct *t,
263 unsigned int cpu, unsigned int i)
264{
265 struct desc_struct *gdt = get_cpu_gdt_table(cpu);
266 xmaddr_t maddr = virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
267 struct multicall_space mc = __xen_mc_entry(0);
268
269 MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]);
270}
271
272static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
273{
274 xen_mc_batch();
275
276 load_TLS_descriptor(t, cpu, 0);
277 load_TLS_descriptor(t, cpu, 1);
278 load_TLS_descriptor(t, cpu, 2);
279
280 xen_mc_issue(PARAVIRT_LAZY_CPU);
281}
282
283static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
284 u32 low, u32 high)
285{
286 unsigned long lp = (unsigned long)&dt[entrynum];
287 xmaddr_t mach_lp = virt_to_machine(lp);
288 u64 entry = (u64)high << 32 | low;
289
290 xen_mc_flush();
291 if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry))
292 BUG();
293}
294
295static int cvt_gate_to_trap(int vector, u32 low, u32 high,
296 struct trap_info *info)
297{
298 u8 type, dpl;
299
300 type = (high >> 8) & 0x1f;
301 dpl = (high >> 13) & 3;
302
303 if (type != 0xf && type != 0xe)
304 return 0;
305
306 info->vector = vector;
307 info->address = (high & 0xffff0000) | (low & 0x0000ffff);
308 info->cs = low >> 16;
309 info->flags = dpl;
310 /* interrupt gates clear IF */
311 if (type == 0xe)
312 info->flags |= 4;
313
314 return 1;
315}
316
317/* Locations of each CPU's IDT */
318static DEFINE_PER_CPU(struct Xgt_desc_struct, idt_desc);
319
320/* Set an IDT entry. If the entry is part of the current IDT, then
321 also update Xen. */
322static void xen_write_idt_entry(struct desc_struct *dt, int entrynum,
323 u32 low, u32 high)
324{
325
326 int cpu = smp_processor_id();
327 unsigned long p = (unsigned long)&dt[entrynum];
328 unsigned long start = per_cpu(idt_desc, cpu).address;
329 unsigned long end = start + per_cpu(idt_desc, cpu).size + 1;
330
331 xen_mc_flush();
332
333 write_dt_entry(dt, entrynum, low, high);
334
335 if (p >= start && (p + 8) <= end) {
336 struct trap_info info[2];
337
338 info[1].address = 0;
339
340 if (cvt_gate_to_trap(entrynum, low, high, &info[0]))
341 if (HYPERVISOR_set_trap_table(info))
342 BUG();
343 }
344}
345
346/* Load a new IDT into Xen. In principle this can be per-CPU, so we
347 hold a spinlock to protect the static traps[] array (static because
348 it avoids allocation, and saves stack space). */
349static void xen_load_idt(const struct Xgt_desc_struct *desc)
350{
351 static DEFINE_SPINLOCK(lock);
352 static struct trap_info traps[257];
353
354 int cpu = smp_processor_id();
355 unsigned in, out, count;
356
357 per_cpu(idt_desc, cpu) = *desc;
358
359 count = (desc->size+1) / 8;
360 BUG_ON(count > 256);
361
362 spin_lock(&lock);
363 for (in = out = 0; in < count; in++) {
364 const u32 *entry = (u32 *)(desc->address + in * 8);
365
366 if (cvt_gate_to_trap(in, entry[0], entry[1], &traps[out]))
367 out++;
368 }
369 traps[out].address = 0;
370
371 xen_mc_flush();
372 if (HYPERVISOR_set_trap_table(traps))
373 BUG();
374
375 spin_unlock(&lock);
376}
377
378/* Write a GDT descriptor entry. Ignore LDT descriptors, since
379 they're handled differently. */
380static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
381 u32 low, u32 high)
382{
383 switch ((high >> 8) & 0xff) {
384 case DESCTYPE_LDT:
385 case DESCTYPE_TSS:
386 /* ignore */
387 break;
388
389 default: {
390 xmaddr_t maddr = virt_to_machine(&dt[entry]);
391 u64 desc = (u64)high << 32 | low;
392
393 xen_mc_flush();
394 if (HYPERVISOR_update_descriptor(maddr.maddr, desc))
395 BUG();
396 }
397
398 }
399}
400
401static void xen_load_esp0(struct tss_struct *tss,
402 struct thread_struct *thread)
403{
404 struct multicall_space mcs = xen_mc_entry(0);
405 MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->esp0);
406 xen_mc_issue(PARAVIRT_LAZY_CPU);
407}
408
409static void xen_set_iopl_mask(unsigned mask)
410{
411 struct physdev_set_iopl set_iopl;
412
413 /* Force the change at ring 0. */
414 set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3;
415 HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
416}
417
418static void xen_io_delay(void)
419{
420}
421
422#ifdef CONFIG_X86_LOCAL_APIC
423static unsigned long xen_apic_read(unsigned long reg)
424{
425 return 0;
426}
427#endif
428
429static void xen_flush_tlb(void)
430{
431 struct mmuext_op op;
432
433 op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
434 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
435 BUG();
436}
437
438static void xen_flush_tlb_single(unsigned long addr)
439{
440 struct mmuext_op op;
441
442 op.cmd = MMUEXT_INVLPG_LOCAL;
443 op.arg1.linear_addr = addr & PAGE_MASK;
444 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
445 BUG();
446}
447
448static unsigned long xen_read_cr2(void)
449{
450 return x86_read_percpu(xen_vcpu)->arch.cr2;
451}
452
453static void xen_write_cr4(unsigned long cr4)
454{
455 /* never allow TSC to be disabled */
456 native_write_cr4(cr4 & ~X86_CR4_TSD);
457}
458
459/*
460 * Page-directory addresses above 4GB do not fit into architectural %cr3.
461 * When accessing %cr3, or equivalent field in vcpu_guest_context, guests
462 * must use the following accessor macros to pack/unpack valid MFNs.
463 *
464 * Note that Xen is using the fact that the pagetable base is always
465 * page-aligned, and putting the 12 MSB of the address into the 12 LSB
466 * of cr3.
467 */
468#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
469#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
470
471static unsigned long xen_read_cr3(void)
472{
473 return x86_read_percpu(xen_cr3);
474}
475
476static void xen_write_cr3(unsigned long cr3)
477{
478 if (cr3 == x86_read_percpu(xen_cr3)) {
479 /* just a simple tlb flush */
480 xen_flush_tlb();
481 return;
482 }
483
484 x86_write_percpu(xen_cr3, cr3);
485
486
487 {
488 struct mmuext_op *op;
489 struct multicall_space mcs = xen_mc_entry(sizeof(*op));
490 unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
491
492 op = mcs.args;
493 op->cmd = MMUEXT_NEW_BASEPTR;
494 op->arg1.mfn = mfn;
495
496 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
497
498 xen_mc_issue(PARAVIRT_LAZY_CPU);
499 }
500}
501
502static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
503{
504 /* XXX pfn isn't necessarily a lowmem page */
505 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
506}
507
508static void xen_alloc_pd(u32 pfn)
509{
510 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
511}
512
513static void xen_release_pd(u32 pfn)
514{
515 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
516}
517
518static void xen_release_pt(u32 pfn)
519{
520 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
521}
522
523static void xen_alloc_pd_clone(u32 pfn, u32 clonepfn,
524 u32 start, u32 count)
525{
526 xen_alloc_pd(pfn);
527}
528
529static __init void xen_pagetable_setup_start(pgd_t *base)
530{
531 pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
532
533 init_mm.pgd = base;
534 /*
535 * copy top-level of Xen-supplied pagetable into place. For
536 * !PAE we can use this as-is, but for PAE it is a stand-in
537 * while we copy the pmd pages.
538 */
539 memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t));
540
541 if (PTRS_PER_PMD > 1) {
542 int i;
543 /*
544 * For PAE, need to allocate new pmds, rather than
545 * share Xen's, since Xen doesn't like pmd's being
546 * shared between address spaces.
547 */
548 for (i = 0; i < PTRS_PER_PGD; i++) {
549 if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
550 pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
551
552 memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
553 PAGE_SIZE);
554
555 xen_alloc_pd(PFN_DOWN(__pa(pmd)));
556
557 set_pgd(&base[i], __pgd(1 + __pa(pmd)));
558 } else
559 pgd_clear(&base[i]);
560 }
561 }
562
563 /* make sure zero_page is mapped RO so we can use it in pagetables */
564 make_lowmem_page_readonly(empty_zero_page);
565 make_lowmem_page_readonly(base);
566 /*
567 * Switch to new pagetable. This is done before
568 * pagetable_init has done anything so that the new pages
569 * added to the table can be prepared properly for Xen.
570 */
571 xen_write_cr3(__pa(base));
572}
573
574static __init void xen_pagetable_setup_done(pgd_t *base)
575{
576 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
577 /*
578 * Create a mapping for the shared info page.
579 * Should be set_fixmap(), but shared_info is a machine
580 * address with no corresponding pseudo-phys address.
581 */
582#if 0
583 set_pte_mfn(fix_to_virt(FIX_PARAVIRT_BOOTMAP),
584 PFN_DOWN(xen_start_info->shared_info),
585 PAGE_KERNEL);
586#endif
587
588 HYPERVISOR_shared_info =
589 (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
590
591 } else
592 HYPERVISOR_shared_info =
593 (struct shared_info *)__va(xen_start_info->shared_info);
594
595#if 0
596 xen_pgd_pin(base);
597#endif
598
599 xen_vcpu_setup(smp_processor_id());
600}
601
602static const struct paravirt_ops xen_paravirt_ops __initdata = {
603 .paravirt_enabled = 1,
604 .shared_kernel_pmd = 0,
605
606 .name = "Xen",
607 .banner = xen_banner,
608
609 .patch = paravirt_patch_default,
610
611 .memory_setup = xen_memory_setup,
612 .arch_setup = xen_arch_setup,
613
614 .cpuid = xen_cpuid,
615
616 .set_debugreg = xen_set_debugreg,
617 .get_debugreg = xen_get_debugreg,
618
619 .clts = native_clts,
620
621 .read_cr0 = native_read_cr0,
622 .write_cr0 = native_write_cr0,
623
624 .read_cr2 = xen_read_cr2,
625 .write_cr2 = native_write_cr2,
626
627 .read_cr3 = xen_read_cr3,
628 .write_cr3 = xen_write_cr3,
629
630 .read_cr4 = native_read_cr4,
631 .read_cr4_safe = native_read_cr4_safe,
632 .write_cr4 = xen_write_cr4,
633
634 .save_fl = xen_save_fl,
635 .restore_fl = xen_restore_fl,
636 .irq_disable = xen_irq_disable,
637 .irq_enable = xen_irq_enable,
638 .safe_halt = xen_safe_halt,
639 .halt = xen_halt,
640 .wbinvd = native_wbinvd,
641
642 .read_msr = native_read_msr_safe,
643 .write_msr = native_write_msr_safe,
644 .read_tsc = native_read_tsc,
645 .read_pmc = native_read_pmc,
646
647 .iret = (void *)&hypercall_page[__HYPERVISOR_iret],
648 .irq_enable_sysexit = NULL, /* never called */
649
650 .load_tr_desc = paravirt_nop,
651 .set_ldt = xen_set_ldt,
652 .load_gdt = xen_load_gdt,
653 .load_idt = xen_load_idt,
654 .load_tls = xen_load_tls,
655
656 .store_gdt = native_store_gdt,
657 .store_idt = native_store_idt,
658 .store_tr = xen_store_tr,
659
660 .write_ldt_entry = xen_write_ldt_entry,
661 .write_gdt_entry = xen_write_gdt_entry,
662 .write_idt_entry = xen_write_idt_entry,
663 .load_esp0 = xen_load_esp0,
664
665 .set_iopl_mask = xen_set_iopl_mask,
666 .io_delay = xen_io_delay,
667
668#ifdef CONFIG_X86_LOCAL_APIC
669 .apic_write = paravirt_nop,
670 .apic_write_atomic = paravirt_nop,
671 .apic_read = xen_apic_read,
672 .setup_boot_clock = paravirt_nop,
673 .setup_secondary_clock = paravirt_nop,
674 .startup_ipi_hook = paravirt_nop,
675#endif
676
677 .flush_tlb_user = xen_flush_tlb,
678 .flush_tlb_kernel = xen_flush_tlb,
679 .flush_tlb_single = xen_flush_tlb_single,
680
681 .pte_update = paravirt_nop,
682 .pte_update_defer = paravirt_nop,
683
684 .pagetable_setup_start = xen_pagetable_setup_start,
685 .pagetable_setup_done = xen_pagetable_setup_done,
686
687 .alloc_pt = xen_alloc_pt,
688 .alloc_pd = xen_alloc_pd,
689 .alloc_pd_clone = xen_alloc_pd_clone,
690 .release_pd = xen_release_pd,
691 .release_pt = xen_release_pt,
692
693 .set_lazy_mode = xen_set_lazy_mode,
694};
695
696/* First C function to be called on Xen boot */
697asmlinkage void __init xen_start_kernel(void)
698{
699 pgd_t *pgd;
700
701 if (!xen_start_info)
702 return;
703
704 BUG_ON(memcmp(xen_start_info->magic, "xen-3.0", 7) != 0);
705
706 /* Install Xen paravirt ops */
707 paravirt_ops = xen_paravirt_ops;
708
709 xen_setup_features();
710
711 /* Get mfn list */
712 if (!xen_feature(XENFEAT_auto_translated_physmap))
713 phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list;
714
715 pgd = (pgd_t *)xen_start_info->pt_base;
716
717 init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
718
719 init_mm.pgd = pgd; /* use the Xen pagetables to start */
720
721 /* keep using Xen gdt for now; no urgent need to change it */
722
723 x86_write_percpu(xen_cr3, __pa(pgd));
724 xen_vcpu_setup(0);
725
726 paravirt_ops.kernel_rpl = 1;
727 if (xen_feature(XENFEAT_supervisor_mode_kernel))
728 paravirt_ops.kernel_rpl = 0;
729
730 /* set the limit of our address space */
731 reserve_top_address(-HYPERVISOR_VIRT_START + 2 * PAGE_SIZE);
732
733 /* set up basic CPUID stuff */
734 cpu_detect(&new_cpu_data);
735 new_cpu_data.hard_math = 1;
736 new_cpu_data.x86_capability[0] = cpuid_edx(1);
737
738 /* Poke various useful things into boot_params */
739 LOADER_TYPE = (9 << 4) | 0;
740 INITRD_START = xen_start_info->mod_start ? __pa(xen_start_info->mod_start) : 0;
741 INITRD_SIZE = xen_start_info->mod_len;
742
743 /* Start the world */
744 start_kernel();
745}
diff --git a/arch/i386/xen/features.c b/arch/i386/xen/features.c
new file mode 100644
index 000000000000..0707714e40d6
--- /dev/null
+++ b/arch/i386/xen/features.c
@@ -0,0 +1,29 @@
1/******************************************************************************
2 * features.c
3 *
4 * Xen feature flags.
5 *
6 * Copyright (c) 2006, Ian Campbell, XenSource Inc.
7 */
8#include <linux/types.h>
9#include <linux/cache.h>
10#include <linux/module.h>
11#include <asm/xen/hypervisor.h>
12#include <xen/features.h>
13
14u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
15EXPORT_SYMBOL_GPL(xen_features);
16
17void xen_setup_features(void)
18{
19 struct xen_feature_info fi;
20 int i, j;
21
22 for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) {
23 fi.submap_idx = i;
24 if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0)
25 break;
26 for (j = 0; j < 32; j++)
27 xen_features[i * 32 + j] = !!(fi.submap & 1<<j);
28 }
29}
diff --git a/arch/i386/xen/multicalls.c b/arch/i386/xen/multicalls.c
new file mode 100644
index 000000000000..869f9833f08f
--- /dev/null
+++ b/arch/i386/xen/multicalls.c
@@ -0,0 +1,89 @@
1/*
2 * Xen hypercall batching.
3 *
4 * Xen allows multiple hypercalls to be issued at once, using the
5 * multicall interface. This allows the cost of trapping into the
6 * hypervisor to be amortized over several calls.
7 *
8 * This file implements a simple interface for multicalls. There's a
9 * per-cpu buffer of outstanding multicalls. When you want to queue a
10 * multicall for issuing, you can allocate a multicall slot for the
11 * call and its arguments, along with storage for space which is
12 * pointed to by the arguments (for passing pointers to structures,
13 * etc). When the multicall is actually issued, all the space for the
14 * commands and allocated memory is freed for reuse.
15 *
16 * Multicalls are flushed whenever any of the buffers get full, or
17 * when explicitly requested. There's no way to get per-multicall
18 * return results back. It will BUG if any of the multicalls fail.
19 *
20 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
21 */
22#include <linux/percpu.h>
23
24#include <asm/xen/hypercall.h>
25
26#include "multicalls.h"
27
28#define MC_BATCH 8
29#define MC_ARGS (MC_BATCH * 32 / sizeof(u64))
30
31struct mc_buffer {
32 struct multicall_entry entries[MC_BATCH];
33 u64 args[MC_ARGS];
34 unsigned mcidx, argidx;
35};
36
37static DEFINE_PER_CPU(struct mc_buffer, mc_buffer);
38DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags);
39
40void xen_mc_flush(void)
41{
42 struct mc_buffer *b = &get_cpu_var(mc_buffer);
43 int ret = 0;
44 unsigned long flags;
45
46 /* Disable interrupts in case someone comes in and queues
47 something in the middle */
48 local_irq_save(flags);
49
50 if (b->mcidx) {
51 int i;
52
53 if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0)
54 BUG();
55 for (i = 0; i < b->mcidx; i++)
56 if (b->entries[i].result < 0)
57 ret++;
58 b->mcidx = 0;
59 b->argidx = 0;
60 } else
61 BUG_ON(b->argidx != 0);
62
63 put_cpu_var(mc_buffer);
64 local_irq_restore(flags);
65
66 BUG_ON(ret);
67}
68
69struct multicall_space __xen_mc_entry(size_t args)
70{
71 struct mc_buffer *b = &get_cpu_var(mc_buffer);
72 struct multicall_space ret;
73 unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64);
74
75 BUG_ON(argspace > MC_ARGS);
76
77 if (b->mcidx == MC_BATCH ||
78 (b->argidx + argspace) > MC_ARGS)
79 xen_mc_flush();
80
81 ret.mc = &b->entries[b->mcidx];
82 b->mcidx++;
83 ret.args = &b->args[b->argidx];
84 b->argidx += argspace;
85
86 put_cpu_var(mc_buffer);
87
88 return ret;
89}
diff --git a/arch/i386/xen/multicalls.h b/arch/i386/xen/multicalls.h
new file mode 100644
index 000000000000..e6f7530b156c
--- /dev/null
+++ b/arch/i386/xen/multicalls.h
@@ -0,0 +1,45 @@
1#ifndef _XEN_MULTICALLS_H
2#define _XEN_MULTICALLS_H
3
4#include "xen-ops.h"
5
6/* Multicalls */
7struct multicall_space
8{
9 struct multicall_entry *mc;
10 void *args;
11};
12
13/* Allocate room for a multicall and its args */
14struct multicall_space __xen_mc_entry(size_t args);
15
16DECLARE_PER_CPU(unsigned long, xen_mc_irq_flags);
17
18/* Call to start a batch of multiple __xen_mc_entry()s. Must be
19 paired with xen_mc_issue() */
20static inline void xen_mc_batch(void)
21{
22 /* need to disable interrupts until this entry is complete */
23 local_irq_save(__get_cpu_var(xen_mc_irq_flags));
24}
25
26static inline struct multicall_space xen_mc_entry(size_t args)
27{
28 xen_mc_batch();
29 return __xen_mc_entry(args);
30}
31
32/* Flush all pending multicalls */
33void xen_mc_flush(void);
34
35/* Issue a multicall if we're not in a lazy mode */
36static inline void xen_mc_issue(unsigned mode)
37{
38 if ((xen_get_lazy_mode() & mode) == 0)
39 xen_mc_flush();
40
41 /* restore flags saved in xen_mc_batch */
42 local_irq_restore(x86_read_percpu(xen_mc_irq_flags));
43}
44
45#endif /* _XEN_MULTICALLS_H */
diff --git a/arch/i386/xen/setup.c b/arch/i386/xen/setup.c
new file mode 100644
index 000000000000..7da93ee612f6
--- /dev/null
+++ b/arch/i386/xen/setup.c
@@ -0,0 +1,97 @@
1/*
2 * Machine specific setup for xen
3 *
4 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
5 */
6
7#include <linux/module.h>
8#include <linux/sched.h>
9#include <linux/mm.h>
10#include <linux/pm.h>
11
12#include <asm/elf.h>
13#include <asm/e820.h>
14#include <asm/setup.h>
15#include <asm/xen/hypervisor.h>
16#include <asm/xen/hypercall.h>
17
18#include <xen/interface/physdev.h>
19#include <xen/features.h>
20
21#include "xen-ops.h"
22
23/* These are code, but not functions. Defined in entry.S */
24extern const char xen_hypervisor_callback[];
25extern const char xen_failsafe_callback[];
26
27static __initdata struct shared_info init_shared;
28
29/*
30 * Point at some empty memory to start with. We map the real shared_info
31 * page as soon as fixmap is up and running.
32 */
33struct shared_info *HYPERVISOR_shared_info = &init_shared;
34
35unsigned long *phys_to_machine_mapping;
36EXPORT_SYMBOL(phys_to_machine_mapping);
37
38/**
39 * machine_specific_memory_setup - Hook for machine specific memory setup.
40 **/
41
42char * __init xen_memory_setup(void)
43{
44 unsigned long max_pfn = xen_start_info->nr_pages;
45
46 e820.nr_map = 0;
47 add_memory_region(0, PFN_PHYS(max_pfn), E820_RAM);
48
49 return "Xen";
50}
51
52static void xen_idle(void)
53{
54 local_irq_disable();
55
56 if (need_resched())
57 local_irq_enable();
58 else {
59 current_thread_info()->status &= ~TS_POLLING;
60 smp_mb__after_clear_bit();
61 safe_halt();
62 current_thread_info()->status |= TS_POLLING;
63 }
64}
65
66void __init xen_arch_setup(void)
67{
68 struct physdev_set_iopl set_iopl;
69 int rc;
70
71 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
72 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
73
74 if (!xen_feature(XENFEAT_auto_translated_physmap))
75 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3);
76
77 HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback,
78 __KERNEL_CS, (unsigned long)xen_failsafe_callback);
79
80 set_iopl.iopl = 1;
81 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
82 if (rc != 0)
83 printk(KERN_INFO "physdev_op failed %d\n", rc);
84
85#ifdef CONFIG_ACPI
86 if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
87 printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
88 disable_acpi();
89 }
90#endif
91
92 memcpy(boot_command_line, xen_start_info->cmd_line,
93 MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
94 COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
95
96 pm_idle = xen_idle;
97}
diff --git a/arch/i386/xen/xen-head.S b/arch/i386/xen/xen-head.S
new file mode 100644
index 000000000000..2998d55a0017
--- /dev/null
+++ b/arch/i386/xen/xen-head.S
@@ -0,0 +1,36 @@
1/* Xen-specific pieces of head.S, intended to be included in the right
2 place in head.S */
3
4#ifdef CONFIG_XEN
5
6#include <linux/elfnote.h>
7#include <asm/boot.h>
8#include <xen/interface/elfnote.h>
9
10ENTRY(startup_xen)
11 movl %esi,xen_start_info
12 cld
13 movl $(init_thread_union+THREAD_SIZE),%esp
14 jmp xen_start_kernel
15
16.pushsection ".bss.page_aligned"
17 .align PAGE_SIZE_asm
18ENTRY(hypercall_page)
19 .skip 0x1000
20.popsection
21
22 ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
23 ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6")
24 ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0")
25 ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long __PAGE_OFFSET)
26 ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen)
27 ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page)
28 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb")
29#ifdef CONFIG_X86_PAE
30 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
31#else
32 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no")
33#endif
34 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
35
36#endif /*CONFIG_XEN */
diff --git a/arch/i386/xen/xen-ops.h b/arch/i386/xen/xen-ops.h
new file mode 100644
index 000000000000..79648fe1ab77
--- /dev/null
+++ b/arch/i386/xen/xen-ops.h
@@ -0,0 +1,31 @@
1#ifndef XEN_OPS_H
2#define XEN_OPS_H
3
4#include <linux/init.h>
5#include <linux/clocksource.h>
6
7DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
8DECLARE_PER_CPU(unsigned long, xen_cr3);
9
10extern struct start_info *xen_start_info;
11extern struct shared_info *HYPERVISOR_shared_info;
12
13char * __init xen_memory_setup(void);
14void __init xen_arch_setup(void);
15void __init xen_init_IRQ(void);
16
17unsigned long xen_cpu_khz(void);
18void __init xen_time_init(void);
19unsigned long xen_get_wallclock(void);
20int xen_set_wallclock(unsigned long time);
21cycle_t xen_clocksource_read(void);
22
23DECLARE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode);
24
25static inline unsigned xen_get_lazy_mode(void)
26{
27 return x86_read_percpu(xen_lazy_mode);
28}
29
30
31#endif /* XEN_OPS_H */
diff --git a/include/asm-i386/irq.h b/include/asm-i386/irq.h
index 9e15ce0006eb..36f310632c49 100644
--- a/include/asm-i386/irq.h
+++ b/include/asm-i386/irq.h
@@ -41,6 +41,7 @@ extern int irqbalance_disable(char *str);
41extern void fixup_irqs(cpumask_t map); 41extern void fixup_irqs(cpumask_t map);
42#endif 42#endif
43 43
44unsigned int do_IRQ(struct pt_regs *regs);
44void init_IRQ(void); 45void init_IRQ(void);
45void __init native_init_IRQ(void); 46void __init native_init_IRQ(void);
46 47
diff --git a/include/asm-i386/xen/hypercall.h b/include/asm-i386/xen/hypercall.h
index 53912859708b..bc0ee7d961ca 100644
--- a/include/asm-i386/xen/hypercall.h
+++ b/include/asm-i386/xen/hypercall.h
@@ -392,4 +392,22 @@ MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count,
392 mcl->args[2] = (unsigned long)success_count; 392 mcl->args[2] = (unsigned long)success_count;
393 mcl->args[3] = domid; 393 mcl->args[3] = domid;
394} 394}
395
396static inline void
397MULTI_set_gdt(struct multicall_entry *mcl, unsigned long *frames, int entries)
398{
399 mcl->op = __HYPERVISOR_set_gdt;
400 mcl->args[0] = (unsigned long)frames;
401 mcl->args[1] = entries;
402}
403
404static inline void
405MULTI_stack_switch(struct multicall_entry *mcl,
406 unsigned long ss, unsigned long esp)
407{
408 mcl->op = __HYPERVISOR_stack_switch;
409 mcl->args[0] = ss;
410 mcl->args[1] = esp;
411}
412
395#endif /* __HYPERCALL_H__ */ 413#endif /* __HYPERCALL_H__ */
diff --git a/include/xen/features.h b/include/xen/features.h
new file mode 100644
index 000000000000..27292d4d2a6a
--- /dev/null
+++ b/include/xen/features.h
@@ -0,0 +1,23 @@
1/******************************************************************************
2 * features.h
3 *
4 * Query the features reported by Xen.
5 *
6 * Copyright (c) 2006, Ian Campbell
7 */
8
9#ifndef __XEN_FEATURES_H__
10#define __XEN_FEATURES_H__
11
12#include <xen/interface/features.h>
13
14void xen_setup_features(void);
15
16extern u8 xen_features[XENFEAT_NR_SUBMAPS * 32];
17
18static inline int xen_feature(int flag)
19{
20 return xen_features[flag];
21}
22
23#endif /* __ASM_XEN_FEATURES_H__ */
diff --git a/include/xen/page.h b/include/xen/page.h
new file mode 100644
index 000000000000..1df6c1930578
--- /dev/null
+++ b/include/xen/page.h
@@ -0,0 +1,179 @@
1#ifndef __XEN_PAGE_H
2#define __XEN_PAGE_H
3
4#include <linux/pfn.h>
5
6#include <asm/uaccess.h>
7
8#include <xen/features.h>
9
10#ifdef CONFIG_X86_PAE
11/* Xen machine address */
12typedef struct xmaddr {
13 unsigned long long maddr;
14} xmaddr_t;
15
16/* Xen pseudo-physical address */
17typedef struct xpaddr {
18 unsigned long long paddr;
19} xpaddr_t;
20#else
21/* Xen machine address */
22typedef struct xmaddr {
23 unsigned long maddr;
24} xmaddr_t;
25
26/* Xen pseudo-physical address */
27typedef struct xpaddr {
28 unsigned long paddr;
29} xpaddr_t;
30#endif
31
32#define XMADDR(x) ((xmaddr_t) { .maddr = (x) })
33#define XPADDR(x) ((xpaddr_t) { .paddr = (x) })
34
35/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
36#define INVALID_P2M_ENTRY (~0UL)
37#define FOREIGN_FRAME_BIT (1UL<<31)
38#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
39
40extern unsigned long *phys_to_machine_mapping;
41
42static inline unsigned long pfn_to_mfn(unsigned long pfn)
43{
44 if (xen_feature(XENFEAT_auto_translated_physmap))
45 return pfn;
46
47 return phys_to_machine_mapping[(unsigned int)(pfn)] &
48 ~FOREIGN_FRAME_BIT;
49}
50
51static inline int phys_to_machine_mapping_valid(unsigned long pfn)
52{
53 if (xen_feature(XENFEAT_auto_translated_physmap))
54 return 1;
55
56 return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
57}
58
59static inline unsigned long mfn_to_pfn(unsigned long mfn)
60{
61 unsigned long pfn;
62
63 if (xen_feature(XENFEAT_auto_translated_physmap))
64 return mfn;
65
66#if 0
67 if (unlikely((mfn >> machine_to_phys_order) != 0))
68 return max_mapnr;
69#endif
70
71 pfn = 0;
72 /*
73 * The array access can fail (e.g., device space beyond end of RAM).
74 * In such cases it doesn't matter what we return (we return garbage),
75 * but we must handle the fault without crashing!
76 */
77 __get_user(pfn, &machine_to_phys_mapping[mfn]);
78
79 return pfn;
80}
81
82static inline xmaddr_t phys_to_machine(xpaddr_t phys)
83{
84 unsigned offset = phys.paddr & ~PAGE_MASK;
85 return XMADDR(PFN_PHYS((u64)pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset);
86}
87
88static inline xpaddr_t machine_to_phys(xmaddr_t machine)
89{
90 unsigned offset = machine.maddr & ~PAGE_MASK;
91 return XPADDR(PFN_PHYS((u64)mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset);
92}
93
94/*
95 * We detect special mappings in one of two ways:
96 * 1. If the MFN is an I/O page then Xen will set the m2p entry
97 * to be outside our maximum possible pseudophys range.
98 * 2. If the MFN belongs to a different domain then we will certainly
99 * not have MFN in our p2m table. Conversely, if the page is ours,
100 * then we'll have p2m(m2p(MFN))==MFN.
101 * If we detect a special mapping then it doesn't have a 'struct page'.
102 * We force !pfn_valid() by returning an out-of-range pointer.
103 *
104 * NB. These checks require that, for any MFN that is not in our reservation,
105 * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if
106 * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN.
107 * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety.
108 *
109 * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
110 * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
111 * require. In all the cases we care about, the FOREIGN_FRAME bit is
112 * masked (e.g., pfn_to_mfn()) so behaviour there is correct.
113 */
114static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
115{
116 extern unsigned long max_mapnr;
117 unsigned long pfn = mfn_to_pfn(mfn);
118 if ((pfn < max_mapnr)
119 && !xen_feature(XENFEAT_auto_translated_physmap)
120 && (phys_to_machine_mapping[pfn] != mfn))
121 return max_mapnr; /* force !pfn_valid() */
122 return pfn;
123}
124
125static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
126{
127 if (xen_feature(XENFEAT_auto_translated_physmap)) {
128 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
129 return;
130 }
131 phys_to_machine_mapping[pfn] = mfn;
132}
133
134/* VIRT <-> MACHINE conversion */
135#define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v))))
136#define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v))))
137#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
138
139#ifdef CONFIG_X86_PAE
140#define pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \
141 (((_pte).pte_high & 0xfff) << (32-PAGE_SHIFT)))
142
143static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot)
144{
145 pte_t pte;
146
147 pte.pte_high = (page_nr >> (32 - PAGE_SHIFT)) |
148 (pgprot_val(pgprot) >> 32);
149 pte.pte_high &= (__supported_pte_mask >> 32);
150 pte.pte_low = ((page_nr << PAGE_SHIFT) | pgprot_val(pgprot));
151 pte.pte_low &= __supported_pte_mask;
152
153 return pte;
154}
155
156static inline unsigned long long pte_val_ma(pte_t x)
157{
158 return ((unsigned long long)x.pte_high << 32) | x.pte_low;
159}
160#define pmd_val_ma(v) ((v).pmd)
161#define pud_val_ma(v) ((v).pgd.pgd)
162#define __pte_ma(x) ((pte_t) { .pte_low = (x), .pte_high = (x)>>32 } )
163#define __pmd_ma(x) ((pmd_t) { (x) } )
164#else /* !X86_PAE */
165#define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT)
166#define mfn_pte(pfn, prot) __pte_ma(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
167#define pte_val_ma(x) ((x).pte_low)
168#define pmd_val_ma(v) ((v).pud.pgd.pgd)
169#define __pte_ma(x) ((pte_t) { (x) } )
170#endif /* CONFIG_X86_PAE */
171
172#define pgd_val_ma(x) ((x).pgd)
173
174
175xmaddr_t arbitrary_virt_to_machine(unsigned long address);
176void make_lowmem_page_readonly(void *vaddr);
177void make_lowmem_page_readwrite(void *vaddr);
178
179#endif /* __XEN_PAGE_H */