aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen')
-rw-r--r--arch/x86/xen/Kconfig2
-rw-r--r--arch/x86/xen/Makefile4
-rw-r--r--arch/x86/xen/enlighten.c54
-rw-r--r--arch/x86/xen/events.c591
-rw-r--r--arch/x86/xen/features.c29
-rw-r--r--arch/x86/xen/grant-table.c91
-rw-r--r--arch/x86/xen/mmu.c143
-rw-r--r--arch/x86/xen/setup.c21
-rw-r--r--arch/x86/xen/smp.c20
-rw-r--r--arch/x86/xen/xen-asm.S42
-rw-r--r--arch/x86/xen/xen-ops.h8
11 files changed, 255 insertions, 750 deletions
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 4d5f2649bee4..2e641be2737e 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -6,7 +6,7 @@ config XEN
6 bool "Xen guest support" 6 bool "Xen guest support"
7 select PARAVIRT 7 select PARAVIRT
8 depends on X86_32 8 depends on X86_32
9 depends on X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES && !(X86_VISWS || X86_VOYAGER) 9 depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER)
10 help 10 help
11 This is the Linux Xen port. Enabling this will allow the 11 This is the Linux Xen port. Enabling this will allow the
12 kernel to boot in a paravirtualized environment under the 12 kernel to boot in a paravirtualized environment under the
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 343df246bd3e..3d8df981d5fd 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,4 @@
1obj-y := enlighten.o setup.o features.o multicalls.o mmu.o \ 1obj-y := enlighten.o setup.o multicalls.o mmu.o \
2 events.o time.o manage.o xen-asm.o 2 time.o manage.o xen-asm.o grant-table.o
3 3
4obj-$(CONFIG_SMP) += smp.o 4obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c0388220cf97..c8a56e457d61 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -155,7 +155,8 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
155 if (*ax == 1) 155 if (*ax == 1)
156 maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ 156 maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */
157 (1 << X86_FEATURE_ACPI) | /* disable ACPI */ 157 (1 << X86_FEATURE_ACPI) | /* disable ACPI */
158 (1 << X86_FEATURE_SEP) | /* disable SEP */ 158 (1 << X86_FEATURE_MCE) | /* disable MCE */
159 (1 << X86_FEATURE_MCA) | /* disable MCA */
159 (1 << X86_FEATURE_ACC)); /* thermal monitoring */ 160 (1 << X86_FEATURE_ACC)); /* thermal monitoring */
160 161
161 asm(XEN_EMULATE_PREFIX "cpuid" 162 asm(XEN_EMULATE_PREFIX "cpuid"
@@ -531,26 +532,37 @@ static void xen_apic_write(unsigned long reg, u32 val)
531static void xen_flush_tlb(void) 532static void xen_flush_tlb(void)
532{ 533{
533 struct mmuext_op *op; 534 struct mmuext_op *op;
534 struct multicall_space mcs = xen_mc_entry(sizeof(*op)); 535 struct multicall_space mcs;
536
537 preempt_disable();
538
539 mcs = xen_mc_entry(sizeof(*op));
535 540
536 op = mcs.args; 541 op = mcs.args;
537 op->cmd = MMUEXT_TLB_FLUSH_LOCAL; 542 op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
538 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 543 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
539 544
540 xen_mc_issue(PARAVIRT_LAZY_MMU); 545 xen_mc_issue(PARAVIRT_LAZY_MMU);
546
547 preempt_enable();
541} 548}
542 549
543static void xen_flush_tlb_single(unsigned long addr) 550static void xen_flush_tlb_single(unsigned long addr)
544{ 551{
545 struct mmuext_op *op; 552 struct mmuext_op *op;
546 struct multicall_space mcs = xen_mc_entry(sizeof(*op)); 553 struct multicall_space mcs;
554
555 preempt_disable();
547 556
557 mcs = xen_mc_entry(sizeof(*op));
548 op = mcs.args; 558 op = mcs.args;
549 op->cmd = MMUEXT_INVLPG_LOCAL; 559 op->cmd = MMUEXT_INVLPG_LOCAL;
550 op->arg1.linear_addr = addr & PAGE_MASK; 560 op->arg1.linear_addr = addr & PAGE_MASK;
551 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 561 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
552 562
553 xen_mc_issue(PARAVIRT_LAZY_MMU); 563 xen_mc_issue(PARAVIRT_LAZY_MMU);
564
565 preempt_enable();
554} 566}
555 567
556static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, 568static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
@@ -655,15 +667,17 @@ static void xen_write_cr3(unsigned long cr3)
655 667
656/* Early in boot, while setting up the initial pagetable, assume 668/* Early in boot, while setting up the initial pagetable, assume
657 everything is pinned. */ 669 everything is pinned. */
658static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn) 670static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
659{ 671{
672#ifdef CONFIG_FLATMEM
660 BUG_ON(mem_map); /* should only be used early */ 673 BUG_ON(mem_map); /* should only be used early */
674#endif
661 make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); 675 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
662} 676}
663 677
664/* Early release_pt assumes that all pts are pinned, since there's 678/* Early release_pte assumes that all pts are pinned, since there's
665 only init_mm and anything attached to that is pinned. */ 679 only init_mm and anything attached to that is pinned. */
666static void xen_release_pt_init(u32 pfn) 680static void xen_release_pte_init(u32 pfn)
667{ 681{
668 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); 682 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
669} 683}
@@ -697,12 +711,12 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level)
697 } 711 }
698} 712}
699 713
700static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) 714static void xen_alloc_pte(struct mm_struct *mm, u32 pfn)
701{ 715{
702 xen_alloc_ptpage(mm, pfn, PT_PTE); 716 xen_alloc_ptpage(mm, pfn, PT_PTE);
703} 717}
704 718
705static void xen_alloc_pd(struct mm_struct *mm, u32 pfn) 719static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn)
706{ 720{
707 xen_alloc_ptpage(mm, pfn, PT_PMD); 721 xen_alloc_ptpage(mm, pfn, PT_PMD);
708} 722}
@@ -722,12 +736,12 @@ static void xen_release_ptpage(u32 pfn, unsigned level)
722 } 736 }
723} 737}
724 738
725static void xen_release_pt(u32 pfn) 739static void xen_release_pte(u32 pfn)
726{ 740{
727 xen_release_ptpage(pfn, PT_PTE); 741 xen_release_ptpage(pfn, PT_PTE);
728} 742}
729 743
730static void xen_release_pd(u32 pfn) 744static void xen_release_pmd(u32 pfn)
731{ 745{
732 xen_release_ptpage(pfn, PT_PMD); 746 xen_release_ptpage(pfn, PT_PMD);
733} 747}
@@ -849,10 +863,10 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
849{ 863{
850 /* This will work as long as patching hasn't happened yet 864 /* This will work as long as patching hasn't happened yet
851 (which it hasn't) */ 865 (which it hasn't) */
852 pv_mmu_ops.alloc_pt = xen_alloc_pt; 866 pv_mmu_ops.alloc_pte = xen_alloc_pte;
853 pv_mmu_ops.alloc_pd = xen_alloc_pd; 867 pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
854 pv_mmu_ops.release_pt = xen_release_pt; 868 pv_mmu_ops.release_pte = xen_release_pte;
855 pv_mmu_ops.release_pd = xen_release_pd; 869 pv_mmu_ops.release_pmd = xen_release_pmd;
856 pv_mmu_ops.set_pte = xen_set_pte; 870 pv_mmu_ops.set_pte = xen_set_pte;
857 871
858 setup_shared_info(); 872 setup_shared_info();
@@ -994,7 +1008,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
994 .read_pmc = native_read_pmc, 1008 .read_pmc = native_read_pmc,
995 1009
996 .iret = xen_iret, 1010 .iret = xen_iret,
997 .irq_enable_syscall_ret = NULL, /* never called */ 1011 .irq_enable_syscall_ret = xen_sysexit,
998 1012
999 .load_tr_desc = paravirt_nop, 1013 .load_tr_desc = paravirt_nop,
1000 .set_ldt = xen_set_ldt, 1014 .set_ldt = xen_set_ldt,
@@ -1059,11 +1073,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1059 .pte_update = paravirt_nop, 1073 .pte_update = paravirt_nop,
1060 .pte_update_defer = paravirt_nop, 1074 .pte_update_defer = paravirt_nop,
1061 1075
1062 .alloc_pt = xen_alloc_pt_init, 1076 .alloc_pte = xen_alloc_pte_init,
1063 .release_pt = xen_release_pt_init, 1077 .release_pte = xen_release_pte_init,
1064 .alloc_pd = xen_alloc_pt_init, 1078 .alloc_pmd = xen_alloc_pte_init,
1065 .alloc_pd_clone = paravirt_nop, 1079 .alloc_pmd_clone = paravirt_nop,
1066 .release_pd = xen_release_pt_init, 1080 .release_pmd = xen_release_pte_init,
1067 1081
1068#ifdef CONFIG_HIGHPTE 1082#ifdef CONFIG_HIGHPTE
1069 .kmap_atomic_pte = xen_kmap_atomic_pte, 1083 .kmap_atomic_pte = xen_kmap_atomic_pte,
diff --git a/arch/x86/xen/events.c b/arch/x86/xen/events.c
deleted file mode 100644
index dcf613e17581..000000000000
--- a/arch/x86/xen/events.c
+++ /dev/null
@@ -1,591 +0,0 @@
1/*
2 * Xen event channels
3 *
4 * Xen models interrupts with abstract event channels. Because each
5 * domain gets 1024 event channels, but NR_IRQ is not that large, we
6 * must dynamically map irqs<->event channels. The event channels
7 * interface with the rest of the kernel by defining a xen interrupt
8 * chip. When an event is recieved, it is mapped to an irq and sent
9 * through the normal interrupt processing path.
10 *
11 * There are four kinds of events which can be mapped to an event
12 * channel:
13 *
14 * 1. Inter-domain notifications. This includes all the virtual
15 * device events, since they're driven by front-ends in another domain
16 * (typically dom0).
17 * 2. VIRQs, typically used for timers. These are per-cpu events.
18 * 3. IPIs.
19 * 4. Hardware interrupts. Not supported at present.
20 *
21 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
22 */
23
24#include <linux/linkage.h>
25#include <linux/interrupt.h>
26#include <linux/irq.h>
27#include <linux/module.h>
28#include <linux/string.h>
29
30#include <asm/ptrace.h>
31#include <asm/irq.h>
32#include <asm/sync_bitops.h>
33#include <asm/xen/hypercall.h>
34#include <asm/xen/hypervisor.h>
35
36#include <xen/events.h>
37#include <xen/interface/xen.h>
38#include <xen/interface/event_channel.h>
39
40#include "xen-ops.h"
41
42/*
43 * This lock protects updates to the following mapping and reference-count
44 * arrays. The lock does not need to be acquired to read the mapping tables.
45 */
46static DEFINE_SPINLOCK(irq_mapping_update_lock);
47
48/* IRQ <-> VIRQ mapping. */
49static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
50
51/* IRQ <-> IPI mapping */
52static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1};
53
54/* Packed IRQ information: binding type, sub-type index, and event channel. */
55struct packed_irq
56{
57 unsigned short evtchn;
58 unsigned char index;
59 unsigned char type;
60};
61
62static struct packed_irq irq_info[NR_IRQS];
63
64/* Binding types. */
65enum {
66 IRQT_UNBOUND,
67 IRQT_PIRQ,
68 IRQT_VIRQ,
69 IRQT_IPI,
70 IRQT_EVTCHN
71};
72
73/* Convenient shorthand for packed representation of an unbound IRQ. */
74#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0)
75
76static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
77 [0 ... NR_EVENT_CHANNELS-1] = -1
78};
79static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG];
80static u8 cpu_evtchn[NR_EVENT_CHANNELS];
81
82/* Reference counts for bindings to IRQs. */
83static int irq_bindcount[NR_IRQS];
84
85/* Xen will never allocate port zero for any purpose. */
86#define VALID_EVTCHN(chn) ((chn) != 0)
87
88/*
89 * Force a proper event-channel callback from Xen after clearing the
90 * callback mask. We do this in a very simple manner, by making a call
91 * down into Xen. The pending flag will be checked by Xen on return.
92 */
93void force_evtchn_callback(void)
94{
95 (void)HYPERVISOR_xen_version(0, NULL);
96}
97EXPORT_SYMBOL_GPL(force_evtchn_callback);
98
99static struct irq_chip xen_dynamic_chip;
100
101/* Constructor for packed IRQ information. */
102static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn)
103{
104 return (struct packed_irq) { evtchn, index, type };
105}
106
107/*
108 * Accessors for packed IRQ information.
109 */
110static inline unsigned int evtchn_from_irq(int irq)
111{
112 return irq_info[irq].evtchn;
113}
114
115static inline unsigned int index_from_irq(int irq)
116{
117 return irq_info[irq].index;
118}
119
120static inline unsigned int type_from_irq(int irq)
121{
122 return irq_info[irq].type;
123}
124
125static inline unsigned long active_evtchns(unsigned int cpu,
126 struct shared_info *sh,
127 unsigned int idx)
128{
129 return (sh->evtchn_pending[idx] &
130 cpu_evtchn_mask[cpu][idx] &
131 ~sh->evtchn_mask[idx]);
132}
133
134static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
135{
136 int irq = evtchn_to_irq[chn];
137
138 BUG_ON(irq == -1);
139#ifdef CONFIG_SMP
140 irq_desc[irq].affinity = cpumask_of_cpu(cpu);
141#endif
142
143 __clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]);
144 __set_bit(chn, cpu_evtchn_mask[cpu]);
145
146 cpu_evtchn[chn] = cpu;
147}
148
149static void init_evtchn_cpu_bindings(void)
150{
151#ifdef CONFIG_SMP
152 int i;
153 /* By default all event channels notify CPU#0. */
154 for (i = 0; i < NR_IRQS; i++)
155 irq_desc[i].affinity = cpumask_of_cpu(0);
156#endif
157
158 memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
159 memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
160}
161
162static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
163{
164 return cpu_evtchn[evtchn];
165}
166
167static inline void clear_evtchn(int port)
168{
169 struct shared_info *s = HYPERVISOR_shared_info;
170 sync_clear_bit(port, &s->evtchn_pending[0]);
171}
172
173static inline void set_evtchn(int port)
174{
175 struct shared_info *s = HYPERVISOR_shared_info;
176 sync_set_bit(port, &s->evtchn_pending[0]);
177}
178
179
180/**
181 * notify_remote_via_irq - send event to remote end of event channel via irq
182 * @irq: irq of event channel to send event to
183 *
184 * Unlike notify_remote_via_evtchn(), this is safe to use across
185 * save/restore. Notifications on a broken connection are silently
186 * dropped.
187 */
188void notify_remote_via_irq(int irq)
189{
190 int evtchn = evtchn_from_irq(irq);
191
192 if (VALID_EVTCHN(evtchn))
193 notify_remote_via_evtchn(evtchn);
194}
195EXPORT_SYMBOL_GPL(notify_remote_via_irq);
196
197static void mask_evtchn(int port)
198{
199 struct shared_info *s = HYPERVISOR_shared_info;
200 sync_set_bit(port, &s->evtchn_mask[0]);
201}
202
203static void unmask_evtchn(int port)
204{
205 struct shared_info *s = HYPERVISOR_shared_info;
206 unsigned int cpu = get_cpu();
207
208 BUG_ON(!irqs_disabled());
209
210 /* Slow path (hypercall) if this is a non-local port. */
211 if (unlikely(cpu != cpu_from_evtchn(port))) {
212 struct evtchn_unmask unmask = { .port = port };
213 (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
214 } else {
215 struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
216
217 sync_clear_bit(port, &s->evtchn_mask[0]);
218
219 /*
220 * The following is basically the equivalent of
221 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
222 * the interrupt edge' if the channel is masked.
223 */
224 if (sync_test_bit(port, &s->evtchn_pending[0]) &&
225 !sync_test_and_set_bit(port / BITS_PER_LONG,
226 &vcpu_info->evtchn_pending_sel))
227 vcpu_info->evtchn_upcall_pending = 1;
228 }
229
230 put_cpu();
231}
232
233static int find_unbound_irq(void)
234{
235 int irq;
236
237 /* Only allocate from dynirq range */
238 for (irq = 0; irq < NR_IRQS; irq++)
239 if (irq_bindcount[irq] == 0)
240 break;
241
242 if (irq == NR_IRQS)
243 panic("No available IRQ to bind to: increase NR_IRQS!\n");
244
245 return irq;
246}
247
248int bind_evtchn_to_irq(unsigned int evtchn)
249{
250 int irq;
251
252 spin_lock(&irq_mapping_update_lock);
253
254 irq = evtchn_to_irq[evtchn];
255
256 if (irq == -1) {
257 irq = find_unbound_irq();
258
259 dynamic_irq_init(irq);
260 set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
261 handle_level_irq, "event");
262
263 evtchn_to_irq[evtchn] = irq;
264 irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn);
265 }
266
267 irq_bindcount[irq]++;
268
269 spin_unlock(&irq_mapping_update_lock);
270
271 return irq;
272}
273EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
274
275static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
276{
277 struct evtchn_bind_ipi bind_ipi;
278 int evtchn, irq;
279
280 spin_lock(&irq_mapping_update_lock);
281
282 irq = per_cpu(ipi_to_irq, cpu)[ipi];
283 if (irq == -1) {
284 irq = find_unbound_irq();
285 if (irq < 0)
286 goto out;
287
288 dynamic_irq_init(irq);
289 set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
290 handle_level_irq, "ipi");
291
292 bind_ipi.vcpu = cpu;
293 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
294 &bind_ipi) != 0)
295 BUG();
296 evtchn = bind_ipi.port;
297
298 evtchn_to_irq[evtchn] = irq;
299 irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
300
301 per_cpu(ipi_to_irq, cpu)[ipi] = irq;
302
303 bind_evtchn_to_cpu(evtchn, cpu);
304 }
305
306 irq_bindcount[irq]++;
307
308 out:
309 spin_unlock(&irq_mapping_update_lock);
310 return irq;
311}
312
313
314static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
315{
316 struct evtchn_bind_virq bind_virq;
317 int evtchn, irq;
318
319 spin_lock(&irq_mapping_update_lock);
320
321 irq = per_cpu(virq_to_irq, cpu)[virq];
322
323 if (irq == -1) {
324 bind_virq.virq = virq;
325 bind_virq.vcpu = cpu;
326 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
327 &bind_virq) != 0)
328 BUG();
329 evtchn = bind_virq.port;
330
331 irq = find_unbound_irq();
332
333 dynamic_irq_init(irq);
334 set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
335 handle_level_irq, "virq");
336
337 evtchn_to_irq[evtchn] = irq;
338 irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
339
340 per_cpu(virq_to_irq, cpu)[virq] = irq;
341
342 bind_evtchn_to_cpu(evtchn, cpu);
343 }
344
345 irq_bindcount[irq]++;
346
347 spin_unlock(&irq_mapping_update_lock);
348
349 return irq;
350}
351
352static void unbind_from_irq(unsigned int irq)
353{
354 struct evtchn_close close;
355 int evtchn = evtchn_from_irq(irq);
356
357 spin_lock(&irq_mapping_update_lock);
358
359 if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) {
360 close.port = evtchn;
361 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
362 BUG();
363
364 switch (type_from_irq(irq)) {
365 case IRQT_VIRQ:
366 per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
367 [index_from_irq(irq)] = -1;
368 break;
369 default:
370 break;
371 }
372
373 /* Closed ports are implicitly re-bound to VCPU0. */
374 bind_evtchn_to_cpu(evtchn, 0);
375
376 evtchn_to_irq[evtchn] = -1;
377 irq_info[irq] = IRQ_UNBOUND;
378
379 dynamic_irq_init(irq);
380 }
381
382 spin_unlock(&irq_mapping_update_lock);
383}
384
385int bind_evtchn_to_irqhandler(unsigned int evtchn,
386 irq_handler_t handler,
387 unsigned long irqflags,
388 const char *devname, void *dev_id)
389{
390 unsigned int irq;
391 int retval;
392
393 irq = bind_evtchn_to_irq(evtchn);
394 retval = request_irq(irq, handler, irqflags, devname, dev_id);
395 if (retval != 0) {
396 unbind_from_irq(irq);
397 return retval;
398 }
399
400 return irq;
401}
402EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
403
404int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
405 irq_handler_t handler,
406 unsigned long irqflags, const char *devname, void *dev_id)
407{
408 unsigned int irq;
409 int retval;
410
411 irq = bind_virq_to_irq(virq, cpu);
412 retval = request_irq(irq, handler, irqflags, devname, dev_id);
413 if (retval != 0) {
414 unbind_from_irq(irq);
415 return retval;
416 }
417
418 return irq;
419}
420EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
421
422int bind_ipi_to_irqhandler(enum ipi_vector ipi,
423 unsigned int cpu,
424 irq_handler_t handler,
425 unsigned long irqflags,
426 const char *devname,
427 void *dev_id)
428{
429 int irq, retval;
430
431 irq = bind_ipi_to_irq(ipi, cpu);
432 if (irq < 0)
433 return irq;
434
435 retval = request_irq(irq, handler, irqflags, devname, dev_id);
436 if (retval != 0) {
437 unbind_from_irq(irq);
438 return retval;
439 }
440
441 return irq;
442}
443
444void unbind_from_irqhandler(unsigned int irq, void *dev_id)
445{
446 free_irq(irq, dev_id);
447 unbind_from_irq(irq);
448}
449EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
450
451void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
452{
453 int irq = per_cpu(ipi_to_irq, cpu)[vector];
454 BUG_ON(irq < 0);
455 notify_remote_via_irq(irq);
456}
457
458
459/*
460 * Search the CPUs pending events bitmasks. For each one found, map
461 * the event number to an irq, and feed it into do_IRQ() for
462 * handling.
463 *
464 * Xen uses a two-level bitmap to speed searching. The first level is
465 * a bitset of words which contain pending event bits. The second
466 * level is a bitset of pending events themselves.
467 */
468void xen_evtchn_do_upcall(struct pt_regs *regs)
469{
470 int cpu = get_cpu();
471 struct shared_info *s = HYPERVISOR_shared_info;
472 struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
473 unsigned long pending_words;
474
475 vcpu_info->evtchn_upcall_pending = 0;
476
477 /* NB. No need for a barrier here -- XCHG is a barrier on x86. */
478 pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
479 while (pending_words != 0) {
480 unsigned long pending_bits;
481 int word_idx = __ffs(pending_words);
482 pending_words &= ~(1UL << word_idx);
483
484 while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) {
485 int bit_idx = __ffs(pending_bits);
486 int port = (word_idx * BITS_PER_LONG) + bit_idx;
487 int irq = evtchn_to_irq[port];
488
489 if (irq != -1) {
490 regs->orig_ax = ~irq;
491 do_IRQ(regs);
492 }
493 }
494 }
495
496 put_cpu();
497}
498
499/* Rebind an evtchn so that it gets delivered to a specific cpu */
500static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
501{
502 struct evtchn_bind_vcpu bind_vcpu;
503 int evtchn = evtchn_from_irq(irq);
504
505 if (!VALID_EVTCHN(evtchn))
506 return;
507
508 /* Send future instances of this interrupt to other vcpu. */
509 bind_vcpu.port = evtchn;
510 bind_vcpu.vcpu = tcpu;
511
512 /*
513 * If this fails, it usually just indicates that we're dealing with a
514 * virq or IPI channel, which don't actually need to be rebound. Ignore
515 * it, but don't do the xenlinux-level rebind in that case.
516 */
517 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
518 bind_evtchn_to_cpu(evtchn, tcpu);
519}
520
521
522static void set_affinity_irq(unsigned irq, cpumask_t dest)
523{
524 unsigned tcpu = first_cpu(dest);
525 rebind_irq_to_cpu(irq, tcpu);
526}
527
528static void enable_dynirq(unsigned int irq)
529{
530 int evtchn = evtchn_from_irq(irq);
531
532 if (VALID_EVTCHN(evtchn))
533 unmask_evtchn(evtchn);
534}
535
536static void disable_dynirq(unsigned int irq)
537{
538 int evtchn = evtchn_from_irq(irq);
539
540 if (VALID_EVTCHN(evtchn))
541 mask_evtchn(evtchn);
542}
543
544static void ack_dynirq(unsigned int irq)
545{
546 int evtchn = evtchn_from_irq(irq);
547
548 move_native_irq(irq);
549
550 if (VALID_EVTCHN(evtchn))
551 clear_evtchn(evtchn);
552}
553
554static int retrigger_dynirq(unsigned int irq)
555{
556 int evtchn = evtchn_from_irq(irq);
557 int ret = 0;
558
559 if (VALID_EVTCHN(evtchn)) {
560 set_evtchn(evtchn);
561 ret = 1;
562 }
563
564 return ret;
565}
566
567static struct irq_chip xen_dynamic_chip __read_mostly = {
568 .name = "xen-dyn",
569 .mask = disable_dynirq,
570 .unmask = enable_dynirq,
571 .ack = ack_dynirq,
572 .set_affinity = set_affinity_irq,
573 .retrigger = retrigger_dynirq,
574};
575
576void __init xen_init_IRQ(void)
577{
578 int i;
579
580 init_evtchn_cpu_bindings();
581
582 /* No event channels are 'live' right now. */
583 for (i = 0; i < NR_EVENT_CHANNELS; i++)
584 mask_evtchn(i);
585
586 /* Dynamic IRQ space is currently unbound. Zero the refcnts. */
587 for (i = 0; i < NR_IRQS; i++)
588 irq_bindcount[i] = 0;
589
590 irq_ctx_init(smp_processor_id());
591}
diff --git a/arch/x86/xen/features.c b/arch/x86/xen/features.c
deleted file mode 100644
index 0707714e40d6..000000000000
--- a/arch/x86/xen/features.c
+++ /dev/null
@@ -1,29 +0,0 @@
1/******************************************************************************
2 * features.c
3 *
4 * Xen feature flags.
5 *
6 * Copyright (c) 2006, Ian Campbell, XenSource Inc.
7 */
8#include <linux/types.h>
9#include <linux/cache.h>
10#include <linux/module.h>
11#include <asm/xen/hypervisor.h>
12#include <xen/features.h>
13
14u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
15EXPORT_SYMBOL_GPL(xen_features);
16
17void xen_setup_features(void)
18{
19 struct xen_feature_info fi;
20 int i, j;
21
22 for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) {
23 fi.submap_idx = i;
24 if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0)
25 break;
26 for (j = 0; j < 32; j++)
27 xen_features[i * 32 + j] = !!(fi.submap & 1<<j);
28 }
29}
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
new file mode 100644
index 000000000000..49ba9b5224d1
--- /dev/null
+++ b/arch/x86/xen/grant-table.c
@@ -0,0 +1,91 @@
1/******************************************************************************
2 * grant_table.c
3 * x86 specific part
4 *
5 * Granting foreign access to our memory reservation.
6 *
7 * Copyright (c) 2005-2006, Christopher Clark
8 * Copyright (c) 2004-2005, K A Fraser
9 * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
10 * VA Linux Systems Japan. Split out x86 specific part.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License version 2
14 * as published by the Free Software Foundation; or, when distributed
15 * separately from the Linux kernel or incorporated into other
16 * software packages, subject to the following license:
17 *
18 * Permission is hereby granted, free of charge, to any person obtaining a copy
19 * of this source file (the "Software"), to deal in the Software without
20 * restriction, including without limitation the rights to use, copy, modify,
21 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
22 * and to permit persons to whom the Software is furnished to do so, subject to
23 * the following conditions:
24 *
25 * The above copyright notice and this permission notice shall be included in
26 * all copies or substantial portions of the Software.
27 *
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
34 * IN THE SOFTWARE.
35 */
36
37#include <linux/sched.h>
38#include <linux/mm.h>
39#include <linux/vmalloc.h>
40
41#include <xen/interface/xen.h>
42#include <xen/page.h>
43#include <xen/grant_table.h>
44
45#include <asm/pgtable.h>
46
47static int map_pte_fn(pte_t *pte, struct page *pmd_page,
48 unsigned long addr, void *data)
49{
50 unsigned long **frames = (unsigned long **)data;
51
52 set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
53 (*frames)++;
54 return 0;
55}
56
57static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
58 unsigned long addr, void *data)
59{
60
61 set_pte_at(&init_mm, addr, pte, __pte(0));
62 return 0;
63}
64
65int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
66 unsigned long max_nr_gframes,
67 struct grant_entry **__shared)
68{
69 int rc;
70 struct grant_entry *shared = *__shared;
71
72 if (shared == NULL) {
73 struct vm_struct *area =
74 xen_alloc_vm_area(PAGE_SIZE * max_nr_gframes);
75 BUG_ON(area == NULL);
76 shared = area->addr;
77 *__shared = shared;
78 }
79
80 rc = apply_to_page_range(&init_mm, (unsigned long)shared,
81 PAGE_SIZE * nr_gframes,
82 map_pte_fn, &frames);
83 return rc;
84}
85
86void arch_gnttab_unmap_shared(struct grant_entry *shared,
87 unsigned long nr_gframes)
88{
89 apply_to_page_range(&init_mm, (unsigned long)shared,
90 PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
91}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 2a054ef2a3da..6cbcf65609ad 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -156,6 +156,10 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
156void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, 156void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
157 pte_t *ptep, pte_t pteval) 157 pte_t *ptep, pte_t pteval)
158{ 158{
159 /* updates to init_mm may be done without lock */
160 if (mm == &init_mm)
161 preempt_disable();
162
159 if (mm == current->mm || mm == &init_mm) { 163 if (mm == current->mm || mm == &init_mm) {
160 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { 164 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
161 struct multicall_space mcs; 165 struct multicall_space mcs;
@@ -163,14 +167,61 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
163 167
164 MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); 168 MULTI_update_va_mapping(mcs.mc, addr, pteval, 0);
165 xen_mc_issue(PARAVIRT_LAZY_MMU); 169 xen_mc_issue(PARAVIRT_LAZY_MMU);
166 return; 170 goto out;
167 } else 171 } else
168 if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0) 172 if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0)
169 return; 173 goto out;
170 } 174 }
171 xen_set_pte(ptep, pteval); 175 xen_set_pte(ptep, pteval);
176
177out:
178 if (mm == &init_mm)
179 preempt_enable();
180}
181
182pteval_t xen_pte_val(pte_t pte)
183{
184 pteval_t ret = pte.pte;
185
186 if (ret & _PAGE_PRESENT)
187 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
188
189 return ret;
190}
191
192pgdval_t xen_pgd_val(pgd_t pgd)
193{
194 pgdval_t ret = pgd.pgd;
195 if (ret & _PAGE_PRESENT)
196 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
197 return ret;
198}
199
200pte_t xen_make_pte(pteval_t pte)
201{
202 if (pte & _PAGE_PRESENT) {
203 pte = phys_to_machine(XPADDR(pte)).maddr;
204 pte &= ~(_PAGE_PCD | _PAGE_PWT);
205 }
206
207 return (pte_t){ .pte = pte };
172} 208}
173 209
210pgd_t xen_make_pgd(pgdval_t pgd)
211{
212 if (pgd & _PAGE_PRESENT)
213 pgd = phys_to_machine(XPADDR(pgd)).maddr;
214
215 return (pgd_t){ pgd };
216}
217
218pmdval_t xen_pmd_val(pmd_t pmd)
219{
220 pmdval_t ret = native_pmd_val(pmd);
221 if (ret & _PAGE_PRESENT)
222 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
223 return ret;
224}
174#ifdef CONFIG_X86_PAE 225#ifdef CONFIG_X86_PAE
175void xen_set_pud(pud_t *ptr, pud_t val) 226void xen_set_pud(pud_t *ptr, pud_t val)
176{ 227{
@@ -214,100 +265,18 @@ void xen_pmd_clear(pmd_t *pmdp)
214 xen_set_pmd(pmdp, __pmd(0)); 265 xen_set_pmd(pmdp, __pmd(0));
215} 266}
216 267
217unsigned long long xen_pte_val(pte_t pte) 268pmd_t xen_make_pmd(pmdval_t pmd)
218{ 269{
219 unsigned long long ret = 0; 270 if (pmd & _PAGE_PRESENT)
220
221 if (pte.pte_low) {
222 ret = ((unsigned long long)pte.pte_high << 32) | pte.pte_low;
223 ret = machine_to_phys(XMADDR(ret)).paddr | 1;
224 }
225
226 return ret;
227}
228
229unsigned long long xen_pmd_val(pmd_t pmd)
230{
231 unsigned long long ret = pmd.pmd;
232 if (ret)
233 ret = machine_to_phys(XMADDR(ret)).paddr | 1;
234 return ret;
235}
236
237unsigned long long xen_pgd_val(pgd_t pgd)
238{
239 unsigned long long ret = pgd.pgd;
240 if (ret)
241 ret = machine_to_phys(XMADDR(ret)).paddr | 1;
242 return ret;
243}
244
245pte_t xen_make_pte(unsigned long long pte)
246{
247 if (pte & _PAGE_PRESENT) {
248 pte = phys_to_machine(XPADDR(pte)).maddr;
249 pte &= ~(_PAGE_PCD | _PAGE_PWT);
250 }
251
252 return (pte_t){ .pte = pte };
253}
254
255pmd_t xen_make_pmd(unsigned long long pmd)
256{
257 if (pmd & 1)
258 pmd = phys_to_machine(XPADDR(pmd)).maddr; 271 pmd = phys_to_machine(XPADDR(pmd)).maddr;
259 272
260 return (pmd_t){ pmd }; 273 return native_make_pmd(pmd);
261}
262
263pgd_t xen_make_pgd(unsigned long long pgd)
264{
265 if (pgd & _PAGE_PRESENT)
266 pgd = phys_to_machine(XPADDR(pgd)).maddr;
267
268 return (pgd_t){ pgd };
269} 274}
270#else /* !PAE */ 275#else /* !PAE */
271void xen_set_pte(pte_t *ptep, pte_t pte) 276void xen_set_pte(pte_t *ptep, pte_t pte)
272{ 277{
273 *ptep = pte; 278 *ptep = pte;
274} 279}
275
276unsigned long xen_pte_val(pte_t pte)
277{
278 unsigned long ret = pte.pte_low;
279
280 if (ret & _PAGE_PRESENT)
281 ret = machine_to_phys(XMADDR(ret)).paddr;
282
283 return ret;
284}
285
286unsigned long xen_pgd_val(pgd_t pgd)
287{
288 unsigned long ret = pgd.pgd;
289 if (ret)
290 ret = machine_to_phys(XMADDR(ret)).paddr | 1;
291 return ret;
292}
293
294pte_t xen_make_pte(unsigned long pte)
295{
296 if (pte & _PAGE_PRESENT) {
297 pte = phys_to_machine(XPADDR(pte)).maddr;
298 pte &= ~(_PAGE_PCD | _PAGE_PWT);
299 }
300
301 return (pte_t){ pte };
302}
303
304pgd_t xen_make_pgd(unsigned long pgd)
305{
306 if (pgd & _PAGE_PRESENT)
307 pgd = phys_to_machine(XPADDR(pgd)).maddr;
308
309 return (pgd_t){ pgd };
310}
311#endif /* CONFIG_X86_PAE */ 280#endif /* CONFIG_X86_PAE */
312 281
313/* 282/*
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 2341492bf7a0..82517e4a752a 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -16,6 +16,7 @@
16#include <asm/xen/hypervisor.h> 16#include <asm/xen/hypervisor.h>
17#include <asm/xen/hypercall.h> 17#include <asm/xen/hypercall.h>
18 18
19#include <xen/interface/callback.h>
19#include <xen/interface/physdev.h> 20#include <xen/interface/physdev.h>
20#include <xen/features.h> 21#include <xen/features.h>
21 22
@@ -68,6 +69,24 @@ static void __init fiddle_vdso(void)
68 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; 69 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
69} 70}
70 71
72void xen_enable_sysenter(void)
73{
74 int cpu = smp_processor_id();
75 extern void xen_sysenter_target(void);
76 /* Mask events on entry, even though they get enabled immediately */
77 static struct callback_register sysenter = {
78 .type = CALLBACKTYPE_sysenter,
79 .address = { __KERNEL_CS, (unsigned long)xen_sysenter_target },
80 .flags = CALLBACKF_mask_events,
81 };
82
83 if (!boot_cpu_has(X86_FEATURE_SEP) ||
84 HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) != 0) {
85 clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP);
86 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
87 }
88}
89
71void __init xen_arch_setup(void) 90void __init xen_arch_setup(void)
72{ 91{
73 struct physdev_set_iopl set_iopl; 92 struct physdev_set_iopl set_iopl;
@@ -82,6 +101,8 @@ void __init xen_arch_setup(void)
82 HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback, 101 HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback,
83 __KERNEL_CS, (unsigned long)xen_failsafe_callback); 102 __KERNEL_CS, (unsigned long)xen_failsafe_callback);
84 103
104 xen_enable_sysenter();
105
85 set_iopl.iopl = 1; 106 set_iopl.iopl = 1;
86 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); 107 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
87 if (rc != 0) 108 if (rc != 0)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index e340ff92f6b6..92dd3dbf3ffb 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -36,8 +36,9 @@
36#include "mmu.h" 36#include "mmu.h"
37 37
38static cpumask_t xen_cpu_initialized_map; 38static cpumask_t xen_cpu_initialized_map;
39static DEFINE_PER_CPU(int, resched_irq); 39static DEFINE_PER_CPU(int, resched_irq) = -1;
40static DEFINE_PER_CPU(int, callfunc_irq); 40static DEFINE_PER_CPU(int, callfunc_irq) = -1;
41static DEFINE_PER_CPU(int, debug_irq) = -1;
41 42
42/* 43/*
43 * Structure and data for smp_call_function(). This is designed to minimise 44 * Structure and data for smp_call_function(). This is designed to minimise
@@ -72,6 +73,7 @@ static __cpuinit void cpu_bringup_and_idle(void)
72 int cpu = smp_processor_id(); 73 int cpu = smp_processor_id();
73 74
74 cpu_init(); 75 cpu_init();
76 xen_enable_sysenter();
75 77
76 preempt_disable(); 78 preempt_disable();
77 per_cpu(cpu_state, cpu) = CPU_ONLINE; 79 per_cpu(cpu_state, cpu) = CPU_ONLINE;
@@ -88,9 +90,7 @@ static __cpuinit void cpu_bringup_and_idle(void)
88static int xen_smp_intr_init(unsigned int cpu) 90static int xen_smp_intr_init(unsigned int cpu)
89{ 91{
90 int rc; 92 int rc;
91 const char *resched_name, *callfunc_name; 93 const char *resched_name, *callfunc_name, *debug_name;
92
93 per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
94 94
95 resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); 95 resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
96 rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, 96 rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
@@ -114,6 +114,14 @@ static int xen_smp_intr_init(unsigned int cpu)
114 goto fail; 114 goto fail;
115 per_cpu(callfunc_irq, cpu) = rc; 115 per_cpu(callfunc_irq, cpu) = rc;
116 116
117 debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
118 rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt,
119 IRQF_DISABLED | IRQF_PERCPU | IRQF_NOBALANCING,
120 debug_name, NULL);
121 if (rc < 0)
122 goto fail;
123 per_cpu(debug_irq, cpu) = rc;
124
117 return 0; 125 return 0;
118 126
119 fail: 127 fail:
@@ -121,6 +129,8 @@ static int xen_smp_intr_init(unsigned int cpu)
121 unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); 129 unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
122 if (per_cpu(callfunc_irq, cpu) >= 0) 130 if (per_cpu(callfunc_irq, cpu) >= 0)
123 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); 131 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
132 if (per_cpu(debug_irq, cpu) >= 0)
133 unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
124 return rc; 134 return rc;
125} 135}
126 136
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index fe161ed4b01e..2497a30f41de 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -108,6 +108,20 @@ ENDPATCH(xen_restore_fl_direct)
108 RELOC(xen_restore_fl_direct, 2b+1) 108 RELOC(xen_restore_fl_direct, 2b+1)
109 109
110/* 110/*
111 We can't use sysexit directly, because we're not running in ring0.
112 But we can easily fake it up using iret. Assuming xen_sysexit
113 is jumped to with a standard stack frame, we can just strip it
114 back to a standard iret frame and use iret.
115 */
116ENTRY(xen_sysexit)
117 movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */
118 orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
119 lea PT_EIP(%esp), %esp
120
121 jmp xen_iret
122ENDPROC(xen_sysexit)
123
124/*
111 This is run where a normal iret would be run, with the same stack setup: 125 This is run where a normal iret would be run, with the same stack setup:
112 8: eflags 126 8: eflags
113 4: cs 127 4: cs
@@ -184,8 +198,12 @@ iret_restore_end:
184 region is OK. */ 198 region is OK. */
185 je xen_hypervisor_callback 199 je xen_hypervisor_callback
186 200
187 iret 2011: iret
188xen_iret_end_crit: 202xen_iret_end_crit:
203.section __ex_table,"a"
204 .align 4
205 .long 1b,iret_exc
206.previous
189 207
190hyper_iret: 208hyper_iret:
191 /* put this out of line since its very rarely used */ 209 /* put this out of line since its very rarely used */
@@ -219,9 +237,7 @@ hyper_iret:
219 ds } SAVE_ALL state 237 ds } SAVE_ALL state
220 eax } 238 eax }
221 : : 239 : :
222 ebx } 240 ebx }<- esp
223 ----------------
224 return addr <- esp
225 ---------------- 241 ----------------
226 242
227 In order to deliver the nested exception properly, we need to shift 243 In order to deliver the nested exception properly, we need to shift
@@ -236,10 +252,8 @@ hyper_iret:
236 it's usermode state which we eventually need to restore. 252 it's usermode state which we eventually need to restore.
237 */ 253 */
238ENTRY(xen_iret_crit_fixup) 254ENTRY(xen_iret_crit_fixup)
239 /* offsets +4 for return address */
240
241 /* 255 /*
242 Paranoia: Make sure we're really coming from userspace. 256 Paranoia: Make sure we're really coming from kernel space.
243 One could imagine a case where userspace jumps into the 257 One could imagine a case where userspace jumps into the
244 critical range address, but just before the CPU delivers a GP, 258 critical range address, but just before the CPU delivers a GP,
245 it decides to deliver an interrupt instead. Unlikely? 259 it decides to deliver an interrupt instead. Unlikely?
@@ -248,32 +262,32 @@ ENTRY(xen_iret_crit_fixup)
248 jump instruction itself, not the destination, but some virtual 262 jump instruction itself, not the destination, but some virtual
249 environments get this wrong. 263 environments get this wrong.
250 */ 264 */
251 movl PT_CS+4(%esp), %ecx 265 movl PT_CS(%esp), %ecx
252 andl $SEGMENT_RPL_MASK, %ecx 266 andl $SEGMENT_RPL_MASK, %ecx
253 cmpl $USER_RPL, %ecx 267 cmpl $USER_RPL, %ecx
254 je 2f 268 je 2f
255 269
256 lea PT_ORIG_EAX+4(%esp), %esi 270 lea PT_ORIG_EAX(%esp), %esi
257 lea PT_EFLAGS+4(%esp), %edi 271 lea PT_EFLAGS(%esp), %edi
258 272
259 /* If eip is before iret_restore_end then stack 273 /* If eip is before iret_restore_end then stack
260 hasn't been restored yet. */ 274 hasn't been restored yet. */
261 cmp $iret_restore_end, %eax 275 cmp $iret_restore_end, %eax
262 jae 1f 276 jae 1f
263 277
264 movl 0+4(%edi),%eax /* copy EAX */ 278 movl 0+4(%edi),%eax /* copy EAX (just above top of frame) */
265 movl %eax, PT_EAX+4(%esp) 279 movl %eax, PT_EAX(%esp)
266 280
267 lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */ 281 lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */
268 282
269 /* set up the copy */ 283 /* set up the copy */
2701: std 2841: std
271 mov $(PT_EIP+4) / 4, %ecx /* copy ret+saved regs up to orig_eax */ 285 mov $PT_EIP / 4, %ecx /* saved regs up to orig_eax */
272 rep movsl 286 rep movsl
273 cld 287 cld
274 288
275 lea 4(%edi),%esp /* point esp to new frame */ 289 lea 4(%edi),%esp /* point esp to new frame */
2762: ret 2902: jmp xen_do_upcall
277 291
278 292
279/* 293/*
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 956a491ea998..f1063ae08037 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -2,6 +2,8 @@
2#define XEN_OPS_H 2#define XEN_OPS_H
3 3
4#include <linux/init.h> 4#include <linux/init.h>
5#include <linux/irqreturn.h>
6#include <xen/xen-ops.h>
5 7
6/* These are code, but not functions. Defined in entry.S */ 8/* These are code, but not functions. Defined in entry.S */
7extern const char xen_hypervisor_callback[]; 9extern const char xen_hypervisor_callback[];
@@ -9,7 +11,6 @@ extern const char xen_failsafe_callback[];
9 11
10void xen_copy_trap_info(struct trap_info *traps); 12void xen_copy_trap_info(struct trap_info *traps);
11 13
12DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
13DECLARE_PER_CPU(unsigned long, xen_cr3); 14DECLARE_PER_CPU(unsigned long, xen_cr3);
14DECLARE_PER_CPU(unsigned long, xen_current_cr3); 15DECLARE_PER_CPU(unsigned long, xen_current_cr3);
15 16
@@ -19,6 +20,7 @@ extern struct shared_info *HYPERVISOR_shared_info;
19char * __init xen_memory_setup(void); 20char * __init xen_memory_setup(void);
20void __init xen_arch_setup(void); 21void __init xen_arch_setup(void);
21void __init xen_init_IRQ(void); 22void __init xen_init_IRQ(void);
23void xen_enable_sysenter(void);
22 24
23void xen_setup_timer(int cpu); 25void xen_setup_timer(int cpu);
24void xen_setup_cpu_clockevents(void); 26void xen_setup_cpu_clockevents(void);
@@ -28,6 +30,8 @@ unsigned long xen_get_wallclock(void);
28int xen_set_wallclock(unsigned long time); 30int xen_set_wallclock(unsigned long time);
29unsigned long long xen_sched_clock(void); 31unsigned long long xen_sched_clock(void);
30 32
33irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
34
31bool xen_vcpu_stolen(int vcpu); 35bool xen_vcpu_stolen(int vcpu);
32 36
33void xen_mark_init_mm_pinned(void); 37void xen_mark_init_mm_pinned(void);
@@ -64,4 +68,6 @@ DECL_ASM(unsigned long, xen_save_fl_direct, void);
64DECL_ASM(void, xen_restore_fl_direct, unsigned long); 68DECL_ASM(void, xen_restore_fl_direct, unsigned long);
65 69
66void xen_iret(void); 70void xen_iret(void);
71void xen_sysexit(void);
72
67#endif /* XEN_OPS_H */ 73#endif /* XEN_OPS_H */