diff options
-rw-r--r-- | arch/i386/Makefile | 3 | ||||
-rw-r--r-- | arch/i386/kernel/entry.S | 71 | ||||
-rw-r--r-- | arch/i386/kernel/head.S | 5 | ||||
-rw-r--r-- | arch/i386/kernel/vmlinux.lds.S | 1 | ||||
-rw-r--r-- | arch/i386/xen/Makefile | 1 | ||||
-rw-r--r-- | arch/i386/xen/enlighten.c | 745 | ||||
-rw-r--r-- | arch/i386/xen/features.c | 29 | ||||
-rw-r--r-- | arch/i386/xen/multicalls.c | 89 | ||||
-rw-r--r-- | arch/i386/xen/multicalls.h | 45 | ||||
-rw-r--r-- | arch/i386/xen/setup.c | 97 | ||||
-rw-r--r-- | arch/i386/xen/xen-head.S | 36 | ||||
-rw-r--r-- | arch/i386/xen/xen-ops.h | 31 | ||||
-rw-r--r-- | include/asm-i386/irq.h | 1 | ||||
-rw-r--r-- | include/asm-i386/xen/hypercall.h | 18 | ||||
-rw-r--r-- | include/xen/features.h | 23 | ||||
-rw-r--r-- | include/xen/page.h | 179 |
16 files changed, 1373 insertions, 1 deletions
diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 181cc29a7c4f..01f0ff0daaf4 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile | |||
@@ -93,6 +93,9 @@ mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-i386/mach-es7000 | |||
93 | mcore-$(CONFIG_X86_ES7000) := mach-default | 93 | mcore-$(CONFIG_X86_ES7000) := mach-default |
94 | core-$(CONFIG_X86_ES7000) := arch/i386/mach-es7000/ | 94 | core-$(CONFIG_X86_ES7000) := arch/i386/mach-es7000/ |
95 | 95 | ||
96 | # Xen paravirtualization support | ||
97 | core-$(CONFIG_XEN) += arch/i386/xen/ | ||
98 | |||
96 | # default subarch .h files | 99 | # default subarch .h files |
97 | mflags-y += -Iinclude/asm-i386/mach-default | 100 | mflags-y += -Iinclude/asm-i386/mach-default |
98 | 101 | ||
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 3c3c220488c9..ffb236544270 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S | |||
@@ -1023,6 +1023,77 @@ ENTRY(kernel_thread_helper) | |||
1023 | CFI_ENDPROC | 1023 | CFI_ENDPROC |
1024 | ENDPROC(kernel_thread_helper) | 1024 | ENDPROC(kernel_thread_helper) |
1025 | 1025 | ||
1026 | #ifdef CONFIG_XEN | ||
1027 | ENTRY(xen_hypervisor_callback) | ||
1028 | CFI_STARTPROC | ||
1029 | pushl $0 | ||
1030 | CFI_ADJUST_CFA_OFFSET 4 | ||
1031 | SAVE_ALL | ||
1032 | TRACE_IRQS_OFF | ||
1033 | mov %esp, %eax | ||
1034 | call xen_evtchn_do_upcall | ||
1035 | jmp ret_from_intr | ||
1036 | CFI_ENDPROC | ||
1037 | ENDPROC(xen_hypervisor_callback) | ||
1038 | |||
1039 | # Hypervisor uses this for application faults while it executes. | ||
1040 | # We get here for two reasons: | ||
1041 | # 1. Fault while reloading DS, ES, FS or GS | ||
1042 | # 2. Fault while executing IRET | ||
1043 | # Category 1 we fix up by reattempting the load, and zeroing the segment | ||
1044 | # register if the load fails. | ||
1045 | # Category 2 we fix up by jumping to do_iret_error. We cannot use the | ||
1046 | # normal Linux return path in this case because if we use the IRET hypercall | ||
1047 | # to pop the stack frame we end up in an infinite loop of failsafe callbacks. | ||
1048 | # We distinguish between categories by maintaining a status value in EAX. | ||
1049 | ENTRY(xen_failsafe_callback) | ||
1050 | CFI_STARTPROC | ||
1051 | pushl %eax | ||
1052 | CFI_ADJUST_CFA_OFFSET 4 | ||
1053 | movl $1,%eax | ||
1054 | 1: mov 4(%esp),%ds | ||
1055 | 2: mov 8(%esp),%es | ||
1056 | 3: mov 12(%esp),%fs | ||
1057 | 4: mov 16(%esp),%gs | ||
1058 | testl %eax,%eax | ||
1059 | popl %eax | ||
1060 | CFI_ADJUST_CFA_OFFSET -4 | ||
1061 | lea 16(%esp),%esp | ||
1062 | CFI_ADJUST_CFA_OFFSET -16 | ||
1063 | jz 5f | ||
1064 | addl $16,%esp | ||
1065 | jmp iret_exc # EAX != 0 => Category 2 (Bad IRET) | ||
1066 | 5: pushl $0 # EAX == 0 => Category 1 (Bad segment) | ||
1067 | CFI_ADJUST_CFA_OFFSET 4 | ||
1068 | SAVE_ALL | ||
1069 | jmp ret_from_exception | ||
1070 | CFI_ENDPROC | ||
1071 | |||
1072 | .section .fixup,"ax" | ||
1073 | 6: xorl %eax,%eax | ||
1074 | movl %eax,4(%esp) | ||
1075 | jmp 1b | ||
1076 | 7: xorl %eax,%eax | ||
1077 | movl %eax,8(%esp) | ||
1078 | jmp 2b | ||
1079 | 8: xorl %eax,%eax | ||
1080 | movl %eax,12(%esp) | ||
1081 | jmp 3b | ||
1082 | 9: xorl %eax,%eax | ||
1083 | movl %eax,16(%esp) | ||
1084 | jmp 4b | ||
1085 | .previous | ||
1086 | .section __ex_table,"a" | ||
1087 | .align 4 | ||
1088 | .long 1b,6b | ||
1089 | .long 2b,7b | ||
1090 | .long 3b,8b | ||
1091 | .long 4b,9b | ||
1092 | .previous | ||
1093 | ENDPROC(xen_failsafe_callback) | ||
1094 | |||
1095 | #endif /* CONFIG_XEN */ | ||
1096 | |||
1026 | .section .rodata,"a" | 1097 | .section .rodata,"a" |
1027 | #include "syscall_table.S" | 1098 | #include "syscall_table.S" |
1028 | 1099 | ||
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index 82714668d43b..7c52b222207e 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S | |||
@@ -510,7 +510,8 @@ ENTRY(_stext) | |||
510 | /* | 510 | /* |
511 | * BSS section | 511 | * BSS section |
512 | */ | 512 | */ |
513 | .section ".bss.page_aligned","w" | 513 | .section ".bss.page_aligned","wa" |
514 | .align PAGE_SIZE_asm | ||
514 | ENTRY(swapper_pg_dir) | 515 | ENTRY(swapper_pg_dir) |
515 | .fill 1024,4,0 | 516 | .fill 1024,4,0 |
516 | ENTRY(swapper_pg_pmd) | 517 | ENTRY(swapper_pg_pmd) |
@@ -538,6 +539,8 @@ fault_msg: | |||
538 | .ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n" | 539 | .ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n" |
539 | .asciz "Stack: %p %p %p %p %p %p %p %p\n" | 540 | .asciz "Stack: %p %p %p %p %p %p %p %p\n" |
540 | 541 | ||
542 | #include "../xen/xen-head.S" | ||
543 | |||
541 | /* | 544 | /* |
542 | * The IDT and GDT 'descriptors' are a strange 48-bit object | 545 | * The IDT and GDT 'descriptors' are a strange 48-bit object |
543 | * only used by the lidt and lgdt instructions. They are not | 546 | * only used by the lidt and lgdt instructions. They are not |
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index aa87b06c7c82..00f1bc47d3a2 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S | |||
@@ -88,6 +88,7 @@ SECTIONS | |||
88 | 88 | ||
89 | . = ALIGN(4096); | 89 | . = ALIGN(4096); |
90 | .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { | 90 | .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { |
91 | *(.data.page_aligned) | ||
91 | *(.data.idt) | 92 | *(.data.idt) |
92 | } | 93 | } |
93 | 94 | ||
diff --git a/arch/i386/xen/Makefile b/arch/i386/xen/Makefile new file mode 100644 index 000000000000..60bc1cfb101c --- /dev/null +++ b/arch/i386/xen/Makefile | |||
@@ -0,0 +1 @@ | |||
obj-y := enlighten.o setup.o features.o multicalls.o | |||
diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c new file mode 100644 index 000000000000..2d484f9320de --- /dev/null +++ b/arch/i386/xen/enlighten.c | |||
@@ -0,0 +1,745 @@ | |||
1 | /* | ||
2 | * Core of Xen paravirt_ops implementation. | ||
3 | * | ||
4 | * This file contains the xen_paravirt_ops structure itself, and the | ||
5 | * implementations for: | ||
6 | * - privileged instructions | ||
7 | * - interrupt flags | ||
8 | * - segment operations | ||
9 | * - booting and setup | ||
10 | * | ||
11 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 | ||
12 | */ | ||
13 | |||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/init.h> | ||
16 | #include <linux/smp.h> | ||
17 | #include <linux/preempt.h> | ||
18 | #include <linux/percpu.h> | ||
19 | #include <linux/delay.h> | ||
20 | #include <linux/start_kernel.h> | ||
21 | #include <linux/sched.h> | ||
22 | #include <linux/bootmem.h> | ||
23 | #include <linux/module.h> | ||
24 | |||
25 | #include <xen/interface/xen.h> | ||
26 | #include <xen/interface/physdev.h> | ||
27 | #include <xen/interface/vcpu.h> | ||
28 | #include <xen/features.h> | ||
29 | #include <xen/page.h> | ||
30 | |||
31 | #include <asm/paravirt.h> | ||
32 | #include <asm/page.h> | ||
33 | #include <asm/xen/hypercall.h> | ||
34 | #include <asm/xen/hypervisor.h> | ||
35 | #include <asm/fixmap.h> | ||
36 | #include <asm/processor.h> | ||
37 | #include <asm/setup.h> | ||
38 | #include <asm/desc.h> | ||
39 | #include <asm/pgtable.h> | ||
40 | |||
41 | #include "xen-ops.h" | ||
42 | #include "multicalls.h" | ||
43 | |||
44 | EXPORT_SYMBOL_GPL(hypercall_page); | ||
45 | |||
46 | DEFINE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode); | ||
47 | |||
48 | DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); | ||
49 | DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); | ||
50 | DEFINE_PER_CPU(unsigned long, xen_cr3); | ||
51 | |||
52 | struct start_info *xen_start_info; | ||
53 | EXPORT_SYMBOL_GPL(xen_start_info); | ||
54 | |||
55 | static void xen_vcpu_setup(int cpu) | ||
56 | { | ||
57 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; | ||
58 | } | ||
59 | |||
60 | static void __init xen_banner(void) | ||
61 | { | ||
62 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | ||
63 | paravirt_ops.name); | ||
64 | printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic); | ||
65 | } | ||
66 | |||
67 | static void xen_cpuid(unsigned int *eax, unsigned int *ebx, | ||
68 | unsigned int *ecx, unsigned int *edx) | ||
69 | { | ||
70 | unsigned maskedx = ~0; | ||
71 | |||
72 | /* | ||
73 | * Mask out inconvenient features, to try and disable as many | ||
74 | * unsupported kernel subsystems as possible. | ||
75 | */ | ||
76 | if (*eax == 1) | ||
77 | maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ | ||
78 | (1 << X86_FEATURE_ACPI) | /* disable ACPI */ | ||
79 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ | ||
80 | |||
81 | asm(XEN_EMULATE_PREFIX "cpuid" | ||
82 | : "=a" (*eax), | ||
83 | "=b" (*ebx), | ||
84 | "=c" (*ecx), | ||
85 | "=d" (*edx) | ||
86 | : "0" (*eax), "2" (*ecx)); | ||
87 | *edx &= maskedx; | ||
88 | } | ||
89 | |||
90 | static void xen_set_debugreg(int reg, unsigned long val) | ||
91 | { | ||
92 | HYPERVISOR_set_debugreg(reg, val); | ||
93 | } | ||
94 | |||
95 | static unsigned long xen_get_debugreg(int reg) | ||
96 | { | ||
97 | return HYPERVISOR_get_debugreg(reg); | ||
98 | } | ||
99 | |||
100 | static unsigned long xen_save_fl(void) | ||
101 | { | ||
102 | struct vcpu_info *vcpu; | ||
103 | unsigned long flags; | ||
104 | |||
105 | preempt_disable(); | ||
106 | vcpu = x86_read_percpu(xen_vcpu); | ||
107 | /* flag has opposite sense of mask */ | ||
108 | flags = !vcpu->evtchn_upcall_mask; | ||
109 | preempt_enable(); | ||
110 | |||
111 | /* convert to IF type flag | ||
112 | -0 -> 0x00000000 | ||
113 | -1 -> 0xffffffff | ||
114 | */ | ||
115 | return (-flags) & X86_EFLAGS_IF; | ||
116 | } | ||
117 | |||
118 | static void xen_restore_fl(unsigned long flags) | ||
119 | { | ||
120 | struct vcpu_info *vcpu; | ||
121 | |||
122 | preempt_disable(); | ||
123 | |||
124 | /* convert from IF type flag */ | ||
125 | flags = !(flags & X86_EFLAGS_IF); | ||
126 | vcpu = x86_read_percpu(xen_vcpu); | ||
127 | vcpu->evtchn_upcall_mask = flags; | ||
128 | |||
129 | if (flags == 0) { | ||
130 | /* Unmask then check (avoid races). We're only protecting | ||
131 | against updates by this CPU, so there's no need for | ||
132 | anything stronger. */ | ||
133 | barrier(); | ||
134 | |||
135 | if (unlikely(vcpu->evtchn_upcall_pending)) | ||
136 | force_evtchn_callback(); | ||
137 | preempt_enable(); | ||
138 | } else | ||
139 | preempt_enable_no_resched(); | ||
140 | } | ||
141 | |||
142 | static void xen_irq_disable(void) | ||
143 | { | ||
144 | struct vcpu_info *vcpu; | ||
145 | preempt_disable(); | ||
146 | vcpu = x86_read_percpu(xen_vcpu); | ||
147 | vcpu->evtchn_upcall_mask = 1; | ||
148 | preempt_enable_no_resched(); | ||
149 | } | ||
150 | |||
151 | static void xen_irq_enable(void) | ||
152 | { | ||
153 | struct vcpu_info *vcpu; | ||
154 | |||
155 | preempt_disable(); | ||
156 | vcpu = x86_read_percpu(xen_vcpu); | ||
157 | vcpu->evtchn_upcall_mask = 0; | ||
158 | |||
159 | /* Unmask then check (avoid races). We're only protecting | ||
160 | against updates by this CPU, so there's no need for | ||
161 | anything stronger. */ | ||
162 | barrier(); | ||
163 | |||
164 | if (unlikely(vcpu->evtchn_upcall_pending)) | ||
165 | force_evtchn_callback(); | ||
166 | preempt_enable(); | ||
167 | } | ||
168 | |||
169 | static void xen_safe_halt(void) | ||
170 | { | ||
171 | /* Blocking includes an implicit local_irq_enable(). */ | ||
172 | if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0) | ||
173 | BUG(); | ||
174 | } | ||
175 | |||
176 | static void xen_halt(void) | ||
177 | { | ||
178 | if (irqs_disabled()) | ||
179 | HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); | ||
180 | else | ||
181 | xen_safe_halt(); | ||
182 | } | ||
183 | |||
184 | static void xen_set_lazy_mode(enum paravirt_lazy_mode mode) | ||
185 | { | ||
186 | switch (mode) { | ||
187 | case PARAVIRT_LAZY_NONE: | ||
188 | BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE); | ||
189 | break; | ||
190 | |||
191 | case PARAVIRT_LAZY_MMU: | ||
192 | case PARAVIRT_LAZY_CPU: | ||
193 | BUG_ON(x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE); | ||
194 | break; | ||
195 | |||
196 | case PARAVIRT_LAZY_FLUSH: | ||
197 | /* flush if necessary, but don't change state */ | ||
198 | if (x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE) | ||
199 | xen_mc_flush(); | ||
200 | return; | ||
201 | } | ||
202 | |||
203 | xen_mc_flush(); | ||
204 | x86_write_percpu(xen_lazy_mode, mode); | ||
205 | } | ||
206 | |||
207 | static unsigned long xen_store_tr(void) | ||
208 | { | ||
209 | return 0; | ||
210 | } | ||
211 | |||
212 | static void xen_set_ldt(const void *addr, unsigned entries) | ||
213 | { | ||
214 | unsigned long linear_addr = (unsigned long)addr; | ||
215 | struct mmuext_op *op; | ||
216 | struct multicall_space mcs = xen_mc_entry(sizeof(*op)); | ||
217 | |||
218 | op = mcs.args; | ||
219 | op->cmd = MMUEXT_SET_LDT; | ||
220 | if (linear_addr) { | ||
221 | /* ldt my be vmalloced, use arbitrary_virt_to_machine */ | ||
222 | xmaddr_t maddr; | ||
223 | maddr = arbitrary_virt_to_machine((unsigned long)addr); | ||
224 | linear_addr = (unsigned long)maddr.maddr; | ||
225 | } | ||
226 | op->arg1.linear_addr = linear_addr; | ||
227 | op->arg2.nr_ents = entries; | ||
228 | |||
229 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | ||
230 | |||
231 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
232 | } | ||
233 | |||
234 | static void xen_load_gdt(const struct Xgt_desc_struct *dtr) | ||
235 | { | ||
236 | unsigned long *frames; | ||
237 | unsigned long va = dtr->address; | ||
238 | unsigned int size = dtr->size + 1; | ||
239 | unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; | ||
240 | int f; | ||
241 | struct multicall_space mcs; | ||
242 | |||
243 | /* A GDT can be up to 64k in size, which corresponds to 8192 | ||
244 | 8-byte entries, or 16 4k pages.. */ | ||
245 | |||
246 | BUG_ON(size > 65536); | ||
247 | BUG_ON(va & ~PAGE_MASK); | ||
248 | |||
249 | mcs = xen_mc_entry(sizeof(*frames) * pages); | ||
250 | frames = mcs.args; | ||
251 | |||
252 | for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { | ||
253 | frames[f] = virt_to_mfn(va); | ||
254 | make_lowmem_page_readonly((void *)va); | ||
255 | } | ||
256 | |||
257 | MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); | ||
258 | |||
259 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
260 | } | ||
261 | |||
262 | static void load_TLS_descriptor(struct thread_struct *t, | ||
263 | unsigned int cpu, unsigned int i) | ||
264 | { | ||
265 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | ||
266 | xmaddr_t maddr = virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); | ||
267 | struct multicall_space mc = __xen_mc_entry(0); | ||
268 | |||
269 | MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); | ||
270 | } | ||
271 | |||
272 | static void xen_load_tls(struct thread_struct *t, unsigned int cpu) | ||
273 | { | ||
274 | xen_mc_batch(); | ||
275 | |||
276 | load_TLS_descriptor(t, cpu, 0); | ||
277 | load_TLS_descriptor(t, cpu, 1); | ||
278 | load_TLS_descriptor(t, cpu, 2); | ||
279 | |||
280 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
281 | } | ||
282 | |||
283 | static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, | ||
284 | u32 low, u32 high) | ||
285 | { | ||
286 | unsigned long lp = (unsigned long)&dt[entrynum]; | ||
287 | xmaddr_t mach_lp = virt_to_machine(lp); | ||
288 | u64 entry = (u64)high << 32 | low; | ||
289 | |||
290 | xen_mc_flush(); | ||
291 | if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry)) | ||
292 | BUG(); | ||
293 | } | ||
294 | |||
295 | static int cvt_gate_to_trap(int vector, u32 low, u32 high, | ||
296 | struct trap_info *info) | ||
297 | { | ||
298 | u8 type, dpl; | ||
299 | |||
300 | type = (high >> 8) & 0x1f; | ||
301 | dpl = (high >> 13) & 3; | ||
302 | |||
303 | if (type != 0xf && type != 0xe) | ||
304 | return 0; | ||
305 | |||
306 | info->vector = vector; | ||
307 | info->address = (high & 0xffff0000) | (low & 0x0000ffff); | ||
308 | info->cs = low >> 16; | ||
309 | info->flags = dpl; | ||
310 | /* interrupt gates clear IF */ | ||
311 | if (type == 0xe) | ||
312 | info->flags |= 4; | ||
313 | |||
314 | return 1; | ||
315 | } | ||
316 | |||
317 | /* Locations of each CPU's IDT */ | ||
318 | static DEFINE_PER_CPU(struct Xgt_desc_struct, idt_desc); | ||
319 | |||
320 | /* Set an IDT entry. If the entry is part of the current IDT, then | ||
321 | also update Xen. */ | ||
322 | static void xen_write_idt_entry(struct desc_struct *dt, int entrynum, | ||
323 | u32 low, u32 high) | ||
324 | { | ||
325 | |||
326 | int cpu = smp_processor_id(); | ||
327 | unsigned long p = (unsigned long)&dt[entrynum]; | ||
328 | unsigned long start = per_cpu(idt_desc, cpu).address; | ||
329 | unsigned long end = start + per_cpu(idt_desc, cpu).size + 1; | ||
330 | |||
331 | xen_mc_flush(); | ||
332 | |||
333 | write_dt_entry(dt, entrynum, low, high); | ||
334 | |||
335 | if (p >= start && (p + 8) <= end) { | ||
336 | struct trap_info info[2]; | ||
337 | |||
338 | info[1].address = 0; | ||
339 | |||
340 | if (cvt_gate_to_trap(entrynum, low, high, &info[0])) | ||
341 | if (HYPERVISOR_set_trap_table(info)) | ||
342 | BUG(); | ||
343 | } | ||
344 | } | ||
345 | |||
346 | /* Load a new IDT into Xen. In principle this can be per-CPU, so we | ||
347 | hold a spinlock to protect the static traps[] array (static because | ||
348 | it avoids allocation, and saves stack space). */ | ||
349 | static void xen_load_idt(const struct Xgt_desc_struct *desc) | ||
350 | { | ||
351 | static DEFINE_SPINLOCK(lock); | ||
352 | static struct trap_info traps[257]; | ||
353 | |||
354 | int cpu = smp_processor_id(); | ||
355 | unsigned in, out, count; | ||
356 | |||
357 | per_cpu(idt_desc, cpu) = *desc; | ||
358 | |||
359 | count = (desc->size+1) / 8; | ||
360 | BUG_ON(count > 256); | ||
361 | |||
362 | spin_lock(&lock); | ||
363 | for (in = out = 0; in < count; in++) { | ||
364 | const u32 *entry = (u32 *)(desc->address + in * 8); | ||
365 | |||
366 | if (cvt_gate_to_trap(in, entry[0], entry[1], &traps[out])) | ||
367 | out++; | ||
368 | } | ||
369 | traps[out].address = 0; | ||
370 | |||
371 | xen_mc_flush(); | ||
372 | if (HYPERVISOR_set_trap_table(traps)) | ||
373 | BUG(); | ||
374 | |||
375 | spin_unlock(&lock); | ||
376 | } | ||
377 | |||
378 | /* Write a GDT descriptor entry. Ignore LDT descriptors, since | ||
379 | they're handled differently. */ | ||
380 | static void xen_write_gdt_entry(struct desc_struct *dt, int entry, | ||
381 | u32 low, u32 high) | ||
382 | { | ||
383 | switch ((high >> 8) & 0xff) { | ||
384 | case DESCTYPE_LDT: | ||
385 | case DESCTYPE_TSS: | ||
386 | /* ignore */ | ||
387 | break; | ||
388 | |||
389 | default: { | ||
390 | xmaddr_t maddr = virt_to_machine(&dt[entry]); | ||
391 | u64 desc = (u64)high << 32 | low; | ||
392 | |||
393 | xen_mc_flush(); | ||
394 | if (HYPERVISOR_update_descriptor(maddr.maddr, desc)) | ||
395 | BUG(); | ||
396 | } | ||
397 | |||
398 | } | ||
399 | } | ||
400 | |||
401 | static void xen_load_esp0(struct tss_struct *tss, | ||
402 | struct thread_struct *thread) | ||
403 | { | ||
404 | struct multicall_space mcs = xen_mc_entry(0); | ||
405 | MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->esp0); | ||
406 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
407 | } | ||
408 | |||
409 | static void xen_set_iopl_mask(unsigned mask) | ||
410 | { | ||
411 | struct physdev_set_iopl set_iopl; | ||
412 | |||
413 | /* Force the change at ring 0. */ | ||
414 | set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; | ||
415 | HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); | ||
416 | } | ||
417 | |||
418 | static void xen_io_delay(void) | ||
419 | { | ||
420 | } | ||
421 | |||
422 | #ifdef CONFIG_X86_LOCAL_APIC | ||
423 | static unsigned long xen_apic_read(unsigned long reg) | ||
424 | { | ||
425 | return 0; | ||
426 | } | ||
427 | #endif | ||
428 | |||
429 | static void xen_flush_tlb(void) | ||
430 | { | ||
431 | struct mmuext_op op; | ||
432 | |||
433 | op.cmd = MMUEXT_TLB_FLUSH_LOCAL; | ||
434 | if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) | ||
435 | BUG(); | ||
436 | } | ||
437 | |||
438 | static void xen_flush_tlb_single(unsigned long addr) | ||
439 | { | ||
440 | struct mmuext_op op; | ||
441 | |||
442 | op.cmd = MMUEXT_INVLPG_LOCAL; | ||
443 | op.arg1.linear_addr = addr & PAGE_MASK; | ||
444 | if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) | ||
445 | BUG(); | ||
446 | } | ||
447 | |||
448 | static unsigned long xen_read_cr2(void) | ||
449 | { | ||
450 | return x86_read_percpu(xen_vcpu)->arch.cr2; | ||
451 | } | ||
452 | |||
453 | static void xen_write_cr4(unsigned long cr4) | ||
454 | { | ||
455 | /* never allow TSC to be disabled */ | ||
456 | native_write_cr4(cr4 & ~X86_CR4_TSD); | ||
457 | } | ||
458 | |||
459 | /* | ||
460 | * Page-directory addresses above 4GB do not fit into architectural %cr3. | ||
461 | * When accessing %cr3, or equivalent field in vcpu_guest_context, guests | ||
462 | * must use the following accessor macros to pack/unpack valid MFNs. | ||
463 | * | ||
464 | * Note that Xen is using the fact that the pagetable base is always | ||
465 | * page-aligned, and putting the 12 MSB of the address into the 12 LSB | ||
466 | * of cr3. | ||
467 | */ | ||
468 | #define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) | ||
469 | #define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) | ||
470 | |||
471 | static unsigned long xen_read_cr3(void) | ||
472 | { | ||
473 | return x86_read_percpu(xen_cr3); | ||
474 | } | ||
475 | |||
476 | static void xen_write_cr3(unsigned long cr3) | ||
477 | { | ||
478 | if (cr3 == x86_read_percpu(xen_cr3)) { | ||
479 | /* just a simple tlb flush */ | ||
480 | xen_flush_tlb(); | ||
481 | return; | ||
482 | } | ||
483 | |||
484 | x86_write_percpu(xen_cr3, cr3); | ||
485 | |||
486 | |||
487 | { | ||
488 | struct mmuext_op *op; | ||
489 | struct multicall_space mcs = xen_mc_entry(sizeof(*op)); | ||
490 | unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3)); | ||
491 | |||
492 | op = mcs.args; | ||
493 | op->cmd = MMUEXT_NEW_BASEPTR; | ||
494 | op->arg1.mfn = mfn; | ||
495 | |||
496 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | ||
497 | |||
498 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
499 | } | ||
500 | } | ||
501 | |||
502 | static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) | ||
503 | { | ||
504 | /* XXX pfn isn't necessarily a lowmem page */ | ||
505 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); | ||
506 | } | ||
507 | |||
508 | static void xen_alloc_pd(u32 pfn) | ||
509 | { | ||
510 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); | ||
511 | } | ||
512 | |||
513 | static void xen_release_pd(u32 pfn) | ||
514 | { | ||
515 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | ||
516 | } | ||
517 | |||
518 | static void xen_release_pt(u32 pfn) | ||
519 | { | ||
520 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | ||
521 | } | ||
522 | |||
523 | static void xen_alloc_pd_clone(u32 pfn, u32 clonepfn, | ||
524 | u32 start, u32 count) | ||
525 | { | ||
526 | xen_alloc_pd(pfn); | ||
527 | } | ||
528 | |||
529 | static __init void xen_pagetable_setup_start(pgd_t *base) | ||
530 | { | ||
531 | pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; | ||
532 | |||
533 | init_mm.pgd = base; | ||
534 | /* | ||
535 | * copy top-level of Xen-supplied pagetable into place. For | ||
536 | * !PAE we can use this as-is, but for PAE it is a stand-in | ||
537 | * while we copy the pmd pages. | ||
538 | */ | ||
539 | memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t)); | ||
540 | |||
541 | if (PTRS_PER_PMD > 1) { | ||
542 | int i; | ||
543 | /* | ||
544 | * For PAE, need to allocate new pmds, rather than | ||
545 | * share Xen's, since Xen doesn't like pmd's being | ||
546 | * shared between address spaces. | ||
547 | */ | ||
548 | for (i = 0; i < PTRS_PER_PGD; i++) { | ||
549 | if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) { | ||
550 | pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); | ||
551 | |||
552 | memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), | ||
553 | PAGE_SIZE); | ||
554 | |||
555 | xen_alloc_pd(PFN_DOWN(__pa(pmd))); | ||
556 | |||
557 | set_pgd(&base[i], __pgd(1 + __pa(pmd))); | ||
558 | } else | ||
559 | pgd_clear(&base[i]); | ||
560 | } | ||
561 | } | ||
562 | |||
563 | /* make sure zero_page is mapped RO so we can use it in pagetables */ | ||
564 | make_lowmem_page_readonly(empty_zero_page); | ||
565 | make_lowmem_page_readonly(base); | ||
566 | /* | ||
567 | * Switch to new pagetable. This is done before | ||
568 | * pagetable_init has done anything so that the new pages | ||
569 | * added to the table can be prepared properly for Xen. | ||
570 | */ | ||
571 | xen_write_cr3(__pa(base)); | ||
572 | } | ||
573 | |||
574 | static __init void xen_pagetable_setup_done(pgd_t *base) | ||
575 | { | ||
576 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | ||
577 | /* | ||
578 | * Create a mapping for the shared info page. | ||
579 | * Should be set_fixmap(), but shared_info is a machine | ||
580 | * address with no corresponding pseudo-phys address. | ||
581 | */ | ||
582 | #if 0 | ||
583 | set_pte_mfn(fix_to_virt(FIX_PARAVIRT_BOOTMAP), | ||
584 | PFN_DOWN(xen_start_info->shared_info), | ||
585 | PAGE_KERNEL); | ||
586 | #endif | ||
587 | |||
588 | HYPERVISOR_shared_info = | ||
589 | (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); | ||
590 | |||
591 | } else | ||
592 | HYPERVISOR_shared_info = | ||
593 | (struct shared_info *)__va(xen_start_info->shared_info); | ||
594 | |||
595 | #if 0 | ||
596 | xen_pgd_pin(base); | ||
597 | #endif | ||
598 | |||
599 | xen_vcpu_setup(smp_processor_id()); | ||
600 | } | ||
601 | |||
602 | static const struct paravirt_ops xen_paravirt_ops __initdata = { | ||
603 | .paravirt_enabled = 1, | ||
604 | .shared_kernel_pmd = 0, | ||
605 | |||
606 | .name = "Xen", | ||
607 | .banner = xen_banner, | ||
608 | |||
609 | .patch = paravirt_patch_default, | ||
610 | |||
611 | .memory_setup = xen_memory_setup, | ||
612 | .arch_setup = xen_arch_setup, | ||
613 | |||
614 | .cpuid = xen_cpuid, | ||
615 | |||
616 | .set_debugreg = xen_set_debugreg, | ||
617 | .get_debugreg = xen_get_debugreg, | ||
618 | |||
619 | .clts = native_clts, | ||
620 | |||
621 | .read_cr0 = native_read_cr0, | ||
622 | .write_cr0 = native_write_cr0, | ||
623 | |||
624 | .read_cr2 = xen_read_cr2, | ||
625 | .write_cr2 = native_write_cr2, | ||
626 | |||
627 | .read_cr3 = xen_read_cr3, | ||
628 | .write_cr3 = xen_write_cr3, | ||
629 | |||
630 | .read_cr4 = native_read_cr4, | ||
631 | .read_cr4_safe = native_read_cr4_safe, | ||
632 | .write_cr4 = xen_write_cr4, | ||
633 | |||
634 | .save_fl = xen_save_fl, | ||
635 | .restore_fl = xen_restore_fl, | ||
636 | .irq_disable = xen_irq_disable, | ||
637 | .irq_enable = xen_irq_enable, | ||
638 | .safe_halt = xen_safe_halt, | ||
639 | .halt = xen_halt, | ||
640 | .wbinvd = native_wbinvd, | ||
641 | |||
642 | .read_msr = native_read_msr_safe, | ||
643 | .write_msr = native_write_msr_safe, | ||
644 | .read_tsc = native_read_tsc, | ||
645 | .read_pmc = native_read_pmc, | ||
646 | |||
647 | .iret = (void *)&hypercall_page[__HYPERVISOR_iret], | ||
648 | .irq_enable_sysexit = NULL, /* never called */ | ||
649 | |||
650 | .load_tr_desc = paravirt_nop, | ||
651 | .set_ldt = xen_set_ldt, | ||
652 | .load_gdt = xen_load_gdt, | ||
653 | .load_idt = xen_load_idt, | ||
654 | .load_tls = xen_load_tls, | ||
655 | |||
656 | .store_gdt = native_store_gdt, | ||
657 | .store_idt = native_store_idt, | ||
658 | .store_tr = xen_store_tr, | ||
659 | |||
660 | .write_ldt_entry = xen_write_ldt_entry, | ||
661 | .write_gdt_entry = xen_write_gdt_entry, | ||
662 | .write_idt_entry = xen_write_idt_entry, | ||
663 | .load_esp0 = xen_load_esp0, | ||
664 | |||
665 | .set_iopl_mask = xen_set_iopl_mask, | ||
666 | .io_delay = xen_io_delay, | ||
667 | |||
668 | #ifdef CONFIG_X86_LOCAL_APIC | ||
669 | .apic_write = paravirt_nop, | ||
670 | .apic_write_atomic = paravirt_nop, | ||
671 | .apic_read = xen_apic_read, | ||
672 | .setup_boot_clock = paravirt_nop, | ||
673 | .setup_secondary_clock = paravirt_nop, | ||
674 | .startup_ipi_hook = paravirt_nop, | ||
675 | #endif | ||
676 | |||
677 | .flush_tlb_user = xen_flush_tlb, | ||
678 | .flush_tlb_kernel = xen_flush_tlb, | ||
679 | .flush_tlb_single = xen_flush_tlb_single, | ||
680 | |||
681 | .pte_update = paravirt_nop, | ||
682 | .pte_update_defer = paravirt_nop, | ||
683 | |||
684 | .pagetable_setup_start = xen_pagetable_setup_start, | ||
685 | .pagetable_setup_done = xen_pagetable_setup_done, | ||
686 | |||
687 | .alloc_pt = xen_alloc_pt, | ||
688 | .alloc_pd = xen_alloc_pd, | ||
689 | .alloc_pd_clone = xen_alloc_pd_clone, | ||
690 | .release_pd = xen_release_pd, | ||
691 | .release_pt = xen_release_pt, | ||
692 | |||
693 | .set_lazy_mode = xen_set_lazy_mode, | ||
694 | }; | ||
695 | |||
696 | /* First C function to be called on Xen boot */ | ||
697 | asmlinkage void __init xen_start_kernel(void) | ||
698 | { | ||
699 | pgd_t *pgd; | ||
700 | |||
701 | if (!xen_start_info) | ||
702 | return; | ||
703 | |||
704 | BUG_ON(memcmp(xen_start_info->magic, "xen-3.0", 7) != 0); | ||
705 | |||
706 | /* Install Xen paravirt ops */ | ||
707 | paravirt_ops = xen_paravirt_ops; | ||
708 | |||
709 | xen_setup_features(); | ||
710 | |||
711 | /* Get mfn list */ | ||
712 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | ||
713 | phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list; | ||
714 | |||
715 | pgd = (pgd_t *)xen_start_info->pt_base; | ||
716 | |||
717 | init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; | ||
718 | |||
719 | init_mm.pgd = pgd; /* use the Xen pagetables to start */ | ||
720 | |||
721 | /* keep using Xen gdt for now; no urgent need to change it */ | ||
722 | |||
723 | x86_write_percpu(xen_cr3, __pa(pgd)); | ||
724 | xen_vcpu_setup(0); | ||
725 | |||
726 | paravirt_ops.kernel_rpl = 1; | ||
727 | if (xen_feature(XENFEAT_supervisor_mode_kernel)) | ||
728 | paravirt_ops.kernel_rpl = 0; | ||
729 | |||
730 | /* set the limit of our address space */ | ||
731 | reserve_top_address(-HYPERVISOR_VIRT_START + 2 * PAGE_SIZE); | ||
732 | |||
733 | /* set up basic CPUID stuff */ | ||
734 | cpu_detect(&new_cpu_data); | ||
735 | new_cpu_data.hard_math = 1; | ||
736 | new_cpu_data.x86_capability[0] = cpuid_edx(1); | ||
737 | |||
738 | /* Poke various useful things into boot_params */ | ||
739 | LOADER_TYPE = (9 << 4) | 0; | ||
740 | INITRD_START = xen_start_info->mod_start ? __pa(xen_start_info->mod_start) : 0; | ||
741 | INITRD_SIZE = xen_start_info->mod_len; | ||
742 | |||
743 | /* Start the world */ | ||
744 | start_kernel(); | ||
745 | } | ||
diff --git a/arch/i386/xen/features.c b/arch/i386/xen/features.c new file mode 100644 index 000000000000..0707714e40d6 --- /dev/null +++ b/arch/i386/xen/features.c | |||
@@ -0,0 +1,29 @@ | |||
1 | /****************************************************************************** | ||
2 | * features.c | ||
3 | * | ||
4 | * Xen feature flags. | ||
5 | * | ||
6 | * Copyright (c) 2006, Ian Campbell, XenSource Inc. | ||
7 | */ | ||
8 | #include <linux/types.h> | ||
9 | #include <linux/cache.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <asm/xen/hypervisor.h> | ||
12 | #include <xen/features.h> | ||
13 | |||
14 | u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly; | ||
15 | EXPORT_SYMBOL_GPL(xen_features); | ||
16 | |||
17 | void xen_setup_features(void) | ||
18 | { | ||
19 | struct xen_feature_info fi; | ||
20 | int i, j; | ||
21 | |||
22 | for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) { | ||
23 | fi.submap_idx = i; | ||
24 | if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0) | ||
25 | break; | ||
26 | for (j = 0; j < 32; j++) | ||
27 | xen_features[i * 32 + j] = !!(fi.submap & 1<<j); | ||
28 | } | ||
29 | } | ||
diff --git a/arch/i386/xen/multicalls.c b/arch/i386/xen/multicalls.c new file mode 100644 index 000000000000..869f9833f08f --- /dev/null +++ b/arch/i386/xen/multicalls.c | |||
@@ -0,0 +1,89 @@ | |||
1 | /* | ||
2 | * Xen hypercall batching. | ||
3 | * | ||
4 | * Xen allows multiple hypercalls to be issued at once, using the | ||
5 | * multicall interface. This allows the cost of trapping into the | ||
6 | * hypervisor to be amortized over several calls. | ||
7 | * | ||
8 | * This file implements a simple interface for multicalls. There's a | ||
9 | * per-cpu buffer of outstanding multicalls. When you want to queue a | ||
10 | * multicall for issuing, you can allocate a multicall slot for the | ||
11 | * call and its arguments, along with storage for space which is | ||
12 | * pointed to by the arguments (for passing pointers to structures, | ||
13 | * etc). When the multicall is actually issued, all the space for the | ||
14 | * commands and allocated memory is freed for reuse. | ||
15 | * | ||
16 | * Multicalls are flushed whenever any of the buffers get full, or | ||
17 | * when explicitly requested. There's no way to get per-multicall | ||
18 | * return results back. It will BUG if any of the multicalls fail. | ||
19 | * | ||
20 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 | ||
21 | */ | ||
22 | #include <linux/percpu.h> | ||
23 | |||
24 | #include <asm/xen/hypercall.h> | ||
25 | |||
26 | #include "multicalls.h" | ||
27 | |||
28 | #define MC_BATCH 8 | ||
29 | #define MC_ARGS (MC_BATCH * 32 / sizeof(u64)) | ||
30 | |||
31 | struct mc_buffer { | ||
32 | struct multicall_entry entries[MC_BATCH]; | ||
33 | u64 args[MC_ARGS]; | ||
34 | unsigned mcidx, argidx; | ||
35 | }; | ||
36 | |||
37 | static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); | ||
38 | DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); | ||
39 | |||
40 | void xen_mc_flush(void) | ||
41 | { | ||
42 | struct mc_buffer *b = &get_cpu_var(mc_buffer); | ||
43 | int ret = 0; | ||
44 | unsigned long flags; | ||
45 | |||
46 | /* Disable interrupts in case someone comes in and queues | ||
47 | something in the middle */ | ||
48 | local_irq_save(flags); | ||
49 | |||
50 | if (b->mcidx) { | ||
51 | int i; | ||
52 | |||
53 | if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0) | ||
54 | BUG(); | ||
55 | for (i = 0; i < b->mcidx; i++) | ||
56 | if (b->entries[i].result < 0) | ||
57 | ret++; | ||
58 | b->mcidx = 0; | ||
59 | b->argidx = 0; | ||
60 | } else | ||
61 | BUG_ON(b->argidx != 0); | ||
62 | |||
63 | put_cpu_var(mc_buffer); | ||
64 | local_irq_restore(flags); | ||
65 | |||
66 | BUG_ON(ret); | ||
67 | } | ||
68 | |||
69 | struct multicall_space __xen_mc_entry(size_t args) | ||
70 | { | ||
71 | struct mc_buffer *b = &get_cpu_var(mc_buffer); | ||
72 | struct multicall_space ret; | ||
73 | unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64); | ||
74 | |||
75 | BUG_ON(argspace > MC_ARGS); | ||
76 | |||
77 | if (b->mcidx == MC_BATCH || | ||
78 | (b->argidx + argspace) > MC_ARGS) | ||
79 | xen_mc_flush(); | ||
80 | |||
81 | ret.mc = &b->entries[b->mcidx]; | ||
82 | b->mcidx++; | ||
83 | ret.args = &b->args[b->argidx]; | ||
84 | b->argidx += argspace; | ||
85 | |||
86 | put_cpu_var(mc_buffer); | ||
87 | |||
88 | return ret; | ||
89 | } | ||
diff --git a/arch/i386/xen/multicalls.h b/arch/i386/xen/multicalls.h new file mode 100644 index 000000000000..e6f7530b156c --- /dev/null +++ b/arch/i386/xen/multicalls.h | |||
@@ -0,0 +1,45 @@ | |||
1 | #ifndef _XEN_MULTICALLS_H | ||
2 | #define _XEN_MULTICALLS_H | ||
3 | |||
4 | #include "xen-ops.h" | ||
5 | |||
6 | /* Multicalls */ | ||
7 | struct multicall_space | ||
8 | { | ||
9 | struct multicall_entry *mc; | ||
10 | void *args; | ||
11 | }; | ||
12 | |||
13 | /* Allocate room for a multicall and its args */ | ||
14 | struct multicall_space __xen_mc_entry(size_t args); | ||
15 | |||
16 | DECLARE_PER_CPU(unsigned long, xen_mc_irq_flags); | ||
17 | |||
18 | /* Call to start a batch of multiple __xen_mc_entry()s. Must be | ||
19 | paired with xen_mc_issue() */ | ||
20 | static inline void xen_mc_batch(void) | ||
21 | { | ||
22 | /* need to disable interrupts until this entry is complete */ | ||
23 | local_irq_save(__get_cpu_var(xen_mc_irq_flags)); | ||
24 | } | ||
25 | |||
26 | static inline struct multicall_space xen_mc_entry(size_t args) | ||
27 | { | ||
28 | xen_mc_batch(); | ||
29 | return __xen_mc_entry(args); | ||
30 | } | ||
31 | |||
32 | /* Flush all pending multicalls */ | ||
33 | void xen_mc_flush(void); | ||
34 | |||
35 | /* Issue a multicall if we're not in a lazy mode */ | ||
36 | static inline void xen_mc_issue(unsigned mode) | ||
37 | { | ||
38 | if ((xen_get_lazy_mode() & mode) == 0) | ||
39 | xen_mc_flush(); | ||
40 | |||
41 | /* restore flags saved in xen_mc_batch */ | ||
42 | local_irq_restore(x86_read_percpu(xen_mc_irq_flags)); | ||
43 | } | ||
44 | |||
45 | #endif /* _XEN_MULTICALLS_H */ | ||
diff --git a/arch/i386/xen/setup.c b/arch/i386/xen/setup.c new file mode 100644 index 000000000000..7da93ee612f6 --- /dev/null +++ b/arch/i386/xen/setup.c | |||
@@ -0,0 +1,97 @@ | |||
1 | /* | ||
2 | * Machine specific setup for xen | ||
3 | * | ||
4 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 | ||
5 | */ | ||
6 | |||
7 | #include <linux/module.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/mm.h> | ||
10 | #include <linux/pm.h> | ||
11 | |||
12 | #include <asm/elf.h> | ||
13 | #include <asm/e820.h> | ||
14 | #include <asm/setup.h> | ||
15 | #include <asm/xen/hypervisor.h> | ||
16 | #include <asm/xen/hypercall.h> | ||
17 | |||
18 | #include <xen/interface/physdev.h> | ||
19 | #include <xen/features.h> | ||
20 | |||
21 | #include "xen-ops.h" | ||
22 | |||
23 | /* These are code, but not functions. Defined in entry.S */ | ||
24 | extern const char xen_hypervisor_callback[]; | ||
25 | extern const char xen_failsafe_callback[]; | ||
26 | |||
27 | static __initdata struct shared_info init_shared; | ||
28 | |||
29 | /* | ||
30 | * Point at some empty memory to start with. We map the real shared_info | ||
31 | * page as soon as fixmap is up and running. | ||
32 | */ | ||
33 | struct shared_info *HYPERVISOR_shared_info = &init_shared; | ||
34 | |||
35 | unsigned long *phys_to_machine_mapping; | ||
36 | EXPORT_SYMBOL(phys_to_machine_mapping); | ||
37 | |||
38 | /** | ||
39 | * machine_specific_memory_setup - Hook for machine specific memory setup. | ||
40 | **/ | ||
41 | |||
42 | char * __init xen_memory_setup(void) | ||
43 | { | ||
44 | unsigned long max_pfn = xen_start_info->nr_pages; | ||
45 | |||
46 | e820.nr_map = 0; | ||
47 | add_memory_region(0, PFN_PHYS(max_pfn), E820_RAM); | ||
48 | |||
49 | return "Xen"; | ||
50 | } | ||
51 | |||
52 | static void xen_idle(void) | ||
53 | { | ||
54 | local_irq_disable(); | ||
55 | |||
56 | if (need_resched()) | ||
57 | local_irq_enable(); | ||
58 | else { | ||
59 | current_thread_info()->status &= ~TS_POLLING; | ||
60 | smp_mb__after_clear_bit(); | ||
61 | safe_halt(); | ||
62 | current_thread_info()->status |= TS_POLLING; | ||
63 | } | ||
64 | } | ||
65 | |||
66 | void __init xen_arch_setup(void) | ||
67 | { | ||
68 | struct physdev_set_iopl set_iopl; | ||
69 | int rc; | ||
70 | |||
71 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); | ||
72 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); | ||
73 | |||
74 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | ||
75 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3); | ||
76 | |||
77 | HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback, | ||
78 | __KERNEL_CS, (unsigned long)xen_failsafe_callback); | ||
79 | |||
80 | set_iopl.iopl = 1; | ||
81 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); | ||
82 | if (rc != 0) | ||
83 | printk(KERN_INFO "physdev_op failed %d\n", rc); | ||
84 | |||
85 | #ifdef CONFIG_ACPI | ||
86 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { | ||
87 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); | ||
88 | disable_acpi(); | ||
89 | } | ||
90 | #endif | ||
91 | |||
92 | memcpy(boot_command_line, xen_start_info->cmd_line, | ||
93 | MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? | ||
94 | COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); | ||
95 | |||
96 | pm_idle = xen_idle; | ||
97 | } | ||
diff --git a/arch/i386/xen/xen-head.S b/arch/i386/xen/xen-head.S new file mode 100644 index 000000000000..2998d55a0017 --- /dev/null +++ b/arch/i386/xen/xen-head.S | |||
@@ -0,0 +1,36 @@ | |||
1 | /* Xen-specific pieces of head.S, intended to be included in the right | ||
2 | place in head.S */ | ||
3 | |||
4 | #ifdef CONFIG_XEN | ||
5 | |||
6 | #include <linux/elfnote.h> | ||
7 | #include <asm/boot.h> | ||
8 | #include <xen/interface/elfnote.h> | ||
9 | |||
10 | ENTRY(startup_xen) | ||
11 | movl %esi,xen_start_info | ||
12 | cld | ||
13 | movl $(init_thread_union+THREAD_SIZE),%esp | ||
14 | jmp xen_start_kernel | ||
15 | |||
16 | .pushsection ".bss.page_aligned" | ||
17 | .align PAGE_SIZE_asm | ||
18 | ENTRY(hypercall_page) | ||
19 | .skip 0x1000 | ||
20 | .popsection | ||
21 | |||
22 | ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") | ||
23 | ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6") | ||
24 | ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0") | ||
25 | ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long __PAGE_OFFSET) | ||
26 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen) | ||
27 | ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page) | ||
28 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") | ||
29 | #ifdef CONFIG_X86_PAE | ||
30 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") | ||
31 | #else | ||
32 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no") | ||
33 | #endif | ||
34 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") | ||
35 | |||
36 | #endif /*CONFIG_XEN */ | ||
diff --git a/arch/i386/xen/xen-ops.h b/arch/i386/xen/xen-ops.h new file mode 100644 index 000000000000..79648fe1ab77 --- /dev/null +++ b/arch/i386/xen/xen-ops.h | |||
@@ -0,0 +1,31 @@ | |||
1 | #ifndef XEN_OPS_H | ||
2 | #define XEN_OPS_H | ||
3 | |||
4 | #include <linux/init.h> | ||
5 | #include <linux/clocksource.h> | ||
6 | |||
7 | DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); | ||
8 | DECLARE_PER_CPU(unsigned long, xen_cr3); | ||
9 | |||
10 | extern struct start_info *xen_start_info; | ||
11 | extern struct shared_info *HYPERVISOR_shared_info; | ||
12 | |||
13 | char * __init xen_memory_setup(void); | ||
14 | void __init xen_arch_setup(void); | ||
15 | void __init xen_init_IRQ(void); | ||
16 | |||
17 | unsigned long xen_cpu_khz(void); | ||
18 | void __init xen_time_init(void); | ||
19 | unsigned long xen_get_wallclock(void); | ||
20 | int xen_set_wallclock(unsigned long time); | ||
21 | cycle_t xen_clocksource_read(void); | ||
22 | |||
23 | DECLARE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode); | ||
24 | |||
25 | static inline unsigned xen_get_lazy_mode(void) | ||
26 | { | ||
27 | return x86_read_percpu(xen_lazy_mode); | ||
28 | } | ||
29 | |||
30 | |||
31 | #endif /* XEN_OPS_H */ | ||
diff --git a/include/asm-i386/irq.h b/include/asm-i386/irq.h index 9e15ce0006eb..36f310632c49 100644 --- a/include/asm-i386/irq.h +++ b/include/asm-i386/irq.h | |||
@@ -41,6 +41,7 @@ extern int irqbalance_disable(char *str); | |||
41 | extern void fixup_irqs(cpumask_t map); | 41 | extern void fixup_irqs(cpumask_t map); |
42 | #endif | 42 | #endif |
43 | 43 | ||
44 | unsigned int do_IRQ(struct pt_regs *regs); | ||
44 | void init_IRQ(void); | 45 | void init_IRQ(void); |
45 | void __init native_init_IRQ(void); | 46 | void __init native_init_IRQ(void); |
46 | 47 | ||
diff --git a/include/asm-i386/xen/hypercall.h b/include/asm-i386/xen/hypercall.h index 53912859708b..bc0ee7d961ca 100644 --- a/include/asm-i386/xen/hypercall.h +++ b/include/asm-i386/xen/hypercall.h | |||
@@ -392,4 +392,22 @@ MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count, | |||
392 | mcl->args[2] = (unsigned long)success_count; | 392 | mcl->args[2] = (unsigned long)success_count; |
393 | mcl->args[3] = domid; | 393 | mcl->args[3] = domid; |
394 | } | 394 | } |
395 | |||
396 | static inline void | ||
397 | MULTI_set_gdt(struct multicall_entry *mcl, unsigned long *frames, int entries) | ||
398 | { | ||
399 | mcl->op = __HYPERVISOR_set_gdt; | ||
400 | mcl->args[0] = (unsigned long)frames; | ||
401 | mcl->args[1] = entries; | ||
402 | } | ||
403 | |||
404 | static inline void | ||
405 | MULTI_stack_switch(struct multicall_entry *mcl, | ||
406 | unsigned long ss, unsigned long esp) | ||
407 | { | ||
408 | mcl->op = __HYPERVISOR_stack_switch; | ||
409 | mcl->args[0] = ss; | ||
410 | mcl->args[1] = esp; | ||
411 | } | ||
412 | |||
395 | #endif /* __HYPERCALL_H__ */ | 413 | #endif /* __HYPERCALL_H__ */ |
diff --git a/include/xen/features.h b/include/xen/features.h new file mode 100644 index 000000000000..27292d4d2a6a --- /dev/null +++ b/include/xen/features.h | |||
@@ -0,0 +1,23 @@ | |||
1 | /****************************************************************************** | ||
2 | * features.h | ||
3 | * | ||
4 | * Query the features reported by Xen. | ||
5 | * | ||
6 | * Copyright (c) 2006, Ian Campbell | ||
7 | */ | ||
8 | |||
9 | #ifndef __XEN_FEATURES_H__ | ||
10 | #define __XEN_FEATURES_H__ | ||
11 | |||
12 | #include <xen/interface/features.h> | ||
13 | |||
14 | void xen_setup_features(void); | ||
15 | |||
16 | extern u8 xen_features[XENFEAT_NR_SUBMAPS * 32]; | ||
17 | |||
18 | static inline int xen_feature(int flag) | ||
19 | { | ||
20 | return xen_features[flag]; | ||
21 | } | ||
22 | |||
23 | #endif /* __ASM_XEN_FEATURES_H__ */ | ||
diff --git a/include/xen/page.h b/include/xen/page.h new file mode 100644 index 000000000000..1df6c1930578 --- /dev/null +++ b/include/xen/page.h | |||
@@ -0,0 +1,179 @@ | |||
1 | #ifndef __XEN_PAGE_H | ||
2 | #define __XEN_PAGE_H | ||
3 | |||
4 | #include <linux/pfn.h> | ||
5 | |||
6 | #include <asm/uaccess.h> | ||
7 | |||
8 | #include <xen/features.h> | ||
9 | |||
10 | #ifdef CONFIG_X86_PAE | ||
11 | /* Xen machine address */ | ||
12 | typedef struct xmaddr { | ||
13 | unsigned long long maddr; | ||
14 | } xmaddr_t; | ||
15 | |||
16 | /* Xen pseudo-physical address */ | ||
17 | typedef struct xpaddr { | ||
18 | unsigned long long paddr; | ||
19 | } xpaddr_t; | ||
20 | #else | ||
21 | /* Xen machine address */ | ||
22 | typedef struct xmaddr { | ||
23 | unsigned long maddr; | ||
24 | } xmaddr_t; | ||
25 | |||
26 | /* Xen pseudo-physical address */ | ||
27 | typedef struct xpaddr { | ||
28 | unsigned long paddr; | ||
29 | } xpaddr_t; | ||
30 | #endif | ||
31 | |||
32 | #define XMADDR(x) ((xmaddr_t) { .maddr = (x) }) | ||
33 | #define XPADDR(x) ((xpaddr_t) { .paddr = (x) }) | ||
34 | |||
35 | /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ | ||
36 | #define INVALID_P2M_ENTRY (~0UL) | ||
37 | #define FOREIGN_FRAME_BIT (1UL<<31) | ||
38 | #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) | ||
39 | |||
40 | extern unsigned long *phys_to_machine_mapping; | ||
41 | |||
42 | static inline unsigned long pfn_to_mfn(unsigned long pfn) | ||
43 | { | ||
44 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
45 | return pfn; | ||
46 | |||
47 | return phys_to_machine_mapping[(unsigned int)(pfn)] & | ||
48 | ~FOREIGN_FRAME_BIT; | ||
49 | } | ||
50 | |||
51 | static inline int phys_to_machine_mapping_valid(unsigned long pfn) | ||
52 | { | ||
53 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
54 | return 1; | ||
55 | |||
56 | return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY); | ||
57 | } | ||
58 | |||
59 | static inline unsigned long mfn_to_pfn(unsigned long mfn) | ||
60 | { | ||
61 | unsigned long pfn; | ||
62 | |||
63 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
64 | return mfn; | ||
65 | |||
66 | #if 0 | ||
67 | if (unlikely((mfn >> machine_to_phys_order) != 0)) | ||
68 | return max_mapnr; | ||
69 | #endif | ||
70 | |||
71 | pfn = 0; | ||
72 | /* | ||
73 | * The array access can fail (e.g., device space beyond end of RAM). | ||
74 | * In such cases it doesn't matter what we return (we return garbage), | ||
75 | * but we must handle the fault without crashing! | ||
76 | */ | ||
77 | __get_user(pfn, &machine_to_phys_mapping[mfn]); | ||
78 | |||
79 | return pfn; | ||
80 | } | ||
81 | |||
82 | static inline xmaddr_t phys_to_machine(xpaddr_t phys) | ||
83 | { | ||
84 | unsigned offset = phys.paddr & ~PAGE_MASK; | ||
85 | return XMADDR(PFN_PHYS((u64)pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset); | ||
86 | } | ||
87 | |||
88 | static inline xpaddr_t machine_to_phys(xmaddr_t machine) | ||
89 | { | ||
90 | unsigned offset = machine.maddr & ~PAGE_MASK; | ||
91 | return XPADDR(PFN_PHYS((u64)mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset); | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * We detect special mappings in one of two ways: | ||
96 | * 1. If the MFN is an I/O page then Xen will set the m2p entry | ||
97 | * to be outside our maximum possible pseudophys range. | ||
98 | * 2. If the MFN belongs to a different domain then we will certainly | ||
99 | * not have MFN in our p2m table. Conversely, if the page is ours, | ||
100 | * then we'll have p2m(m2p(MFN))==MFN. | ||
101 | * If we detect a special mapping then it doesn't have a 'struct page'. | ||
102 | * We force !pfn_valid() by returning an out-of-range pointer. | ||
103 | * | ||
104 | * NB. These checks require that, for any MFN that is not in our reservation, | ||
105 | * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if | ||
106 | * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN. | ||
107 | * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety. | ||
108 | * | ||
109 | * NB2. When deliberately mapping foreign pages into the p2m table, you *must* | ||
110 | * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we | ||
111 | * require. In all the cases we care about, the FOREIGN_FRAME bit is | ||
112 | * masked (e.g., pfn_to_mfn()) so behaviour there is correct. | ||
113 | */ | ||
114 | static inline unsigned long mfn_to_local_pfn(unsigned long mfn) | ||
115 | { | ||
116 | extern unsigned long max_mapnr; | ||
117 | unsigned long pfn = mfn_to_pfn(mfn); | ||
118 | if ((pfn < max_mapnr) | ||
119 | && !xen_feature(XENFEAT_auto_translated_physmap) | ||
120 | && (phys_to_machine_mapping[pfn] != mfn)) | ||
121 | return max_mapnr; /* force !pfn_valid() */ | ||
122 | return pfn; | ||
123 | } | ||
124 | |||
125 | static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
126 | { | ||
127 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | ||
128 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
129 | return; | ||
130 | } | ||
131 | phys_to_machine_mapping[pfn] = mfn; | ||
132 | } | ||
133 | |||
134 | /* VIRT <-> MACHINE conversion */ | ||
135 | #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) | ||
136 | #define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v)))) | ||
137 | #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) | ||
138 | |||
139 | #ifdef CONFIG_X86_PAE | ||
140 | #define pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \ | ||
141 | (((_pte).pte_high & 0xfff) << (32-PAGE_SHIFT))) | ||
142 | |||
143 | static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot) | ||
144 | { | ||
145 | pte_t pte; | ||
146 | |||
147 | pte.pte_high = (page_nr >> (32 - PAGE_SHIFT)) | | ||
148 | (pgprot_val(pgprot) >> 32); | ||
149 | pte.pte_high &= (__supported_pte_mask >> 32); | ||
150 | pte.pte_low = ((page_nr << PAGE_SHIFT) | pgprot_val(pgprot)); | ||
151 | pte.pte_low &= __supported_pte_mask; | ||
152 | |||
153 | return pte; | ||
154 | } | ||
155 | |||
156 | static inline unsigned long long pte_val_ma(pte_t x) | ||
157 | { | ||
158 | return ((unsigned long long)x.pte_high << 32) | x.pte_low; | ||
159 | } | ||
160 | #define pmd_val_ma(v) ((v).pmd) | ||
161 | #define pud_val_ma(v) ((v).pgd.pgd) | ||
162 | #define __pte_ma(x) ((pte_t) { .pte_low = (x), .pte_high = (x)>>32 } ) | ||
163 | #define __pmd_ma(x) ((pmd_t) { (x) } ) | ||
164 | #else /* !X86_PAE */ | ||
165 | #define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT) | ||
166 | #define mfn_pte(pfn, prot) __pte_ma(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) | ||
167 | #define pte_val_ma(x) ((x).pte_low) | ||
168 | #define pmd_val_ma(v) ((v).pud.pgd.pgd) | ||
169 | #define __pte_ma(x) ((pte_t) { (x) } ) | ||
170 | #endif /* CONFIG_X86_PAE */ | ||
171 | |||
172 | #define pgd_val_ma(x) ((x).pgd) | ||
173 | |||
174 | |||
175 | xmaddr_t arbitrary_virt_to_machine(unsigned long address); | ||
176 | void make_lowmem_page_readonly(void *vaddr); | ||
177 | void make_lowmem_page_readwrite(void *vaddr); | ||
178 | |||
179 | #endif /* __XEN_PAGE_H */ | ||