aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/xen
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-23 01:00:18 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-23 01:00:18 -0500
commit84621c9b18d0bb6cb267e3395c7f3131ecf4d39c (patch)
tree28566fe0211798143136b5cd154e2239d38a7b68 /drivers/xen
parent7ebd3faa9b5b42caf2d5aa1352a93dcfa0098011 (diff)
parentc9f6e9977e38de15da96b732a8dec0ef56cbf977 (diff)
Merge tag 'stable/for-linus-3.14-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull Xen updates from Konrad Rzeszutek Wilk: "Two major features that Xen community is excited about: The first is event channel scalability by David Vrabel - we switch over from an two-level per-cpu bitmap of events (IRQs) - to an FIFO queue with priorities. This lets us be able to handle more events, have lower latency, and better scalability. Good stuff. The other is PVH by Mukesh Rathor. In short, PV is a mode where the kernel lets the hypervisor program page-tables, segments, etc. With EPT/NPT capabilities in current processors, the overhead of doing this in an HVM (Hardware Virtual Machine) container is much lower than the hypervisor doing it for us. In short we let a PV guest run without doing page-table, segment, syscall, etc updates through the hypervisor - instead it is all done within the guest container. It is a "hybrid" PV - hence the 'PVH' name - a PV guest within an HVM container. The major benefits are less code to deal with - for example we only use one function from the the pv_mmu_ops (which has 39 function calls); faster performance for syscall (no context switches into the hypervisor); less traps on various operations; etc. It is still being baked - the ABI is not yet set in stone. But it is pretty awesome and we are excited about it. Lastly, there are some changes to ARM code - you should get a simple conflict which has been resolved in #linux-next. In short, this pull has awesome features. Features: - FIFO event channels. Key advantages: support for over 100,000 events (2^17), 16 different event priorities, improved fairness in event latency through the use of FIFOs. - Xen PVH support. "It’s a fully PV kernel mode, running with paravirtualized disk and network, paravirtualized interrupts and timers, no emulated devices of any kind (and thus no qemu), no BIOS or legacy boot — but instead of requiring PV MMU, it uses the HVM hardware extensions to virtualize the pagetables, as well as system calls and other privileged operations." (from "The Paravirtualization Spectrum, Part 2: From poles to a spectrum") Bug-fixes: - Fixes in balloon driver (refactor and make it work under ARM) - Allow xenfb to be used in HVM guests. - Allow xen_platform_pci=0 to work properly. - Refactors in event channels" * tag 'stable/for-linus-3.14-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (52 commits) xen/pvh: Set X86_CR0_WP and others in CR0 (v2) MAINTAINERS: add git repository for Xen xen/pvh: Use 'depend' instead of 'select'. xen: delete new instances of __cpuinit usage xen/fb: allow xenfb initialization for hvm guests xen/evtchn_fifo: fix error return code in evtchn_fifo_setup() xen-platform: fix error return code in platform_pci_init() xen/pvh: remove duplicated include from enlighten.c xen/pvh: Fix compile issues with xen_pvh_domain() xen: Use dev_is_pci() to check whether it is pci device xen/grant-table: Force to use v1 of grants. xen/pvh: Support ParaVirtualized Hardware extensions (v3). xen/pvh: Piggyback on PVHVM XenBus. xen/pvh: Piggyback on PVHVM for grant driver (v4) xen/grant: Implement an grant frame array struct (v3). xen/grant-table: Refactor gnttab_init xen/grants: Remove gnttab_max_grant_frames dependency on gnttab_init. xen/pvh: Piggyback on PVHVM for event channels (v2) xen/pvh: Update E820 to work with PVH (v2) xen/pvh: Secondary VCPU bringup (non-bootup CPUs) ...
Diffstat (limited to 'drivers/xen')
-rw-r--r--drivers/xen/Kconfig1
-rw-r--r--drivers/xen/Makefile3
-rw-r--r--drivers/xen/balloon.c9
-rw-r--r--drivers/xen/dbgp.c2
-rw-r--r--drivers/xen/events/Makefile5
-rw-r--r--drivers/xen/events/events_2l.c372
-rw-r--r--drivers/xen/events/events_base.c (renamed from drivers/xen/events.c)797
-rw-r--r--drivers/xen/events/events_fifo.c428
-rw-r--r--drivers/xen/events/events_internal.h150
-rw-r--r--drivers/xen/evtchn.c2
-rw-r--r--drivers/xen/gntdev.c2
-rw-r--r--drivers/xen/grant-table.c90
-rw-r--r--drivers/xen/pci.c2
-rw-r--r--drivers/xen/platform-pci.c11
-rw-r--r--drivers/xen/xenbus/xenbus_client.c3
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c2
16 files changed, 1328 insertions, 551 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 12ba6db65142..38fb36e1c592 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -3,7 +3,6 @@ menu "Xen driver support"
3 3
4config XEN_BALLOON 4config XEN_BALLOON
5 bool "Xen memory balloon driver" 5 bool "Xen memory balloon driver"
6 depends on !ARM
7 default y 6 default y
8 help 7 help
9 The balloon driver allows the Xen domain to request more memory from 8 The balloon driver allows the Xen domain to request more memory from
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 14fe79d8634a..d75c811bfa56 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -2,7 +2,8 @@ ifeq ($(filter y, $(CONFIG_ARM) $(CONFIG_ARM64)),)
2obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o 2obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
3endif 3endif
4obj-$(CONFIG_X86) += fallback.o 4obj-$(CONFIG_X86) += fallback.o
5obj-y += grant-table.o features.o events.o balloon.o manage.o 5obj-y += grant-table.o features.o balloon.o manage.o
6obj-y += events/
6obj-y += xenbus/ 7obj-y += xenbus/
7 8
8nostackp := $(call cc-option, -fno-stack-protector) 9nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 4c02e2b94103..37d06ea624aa 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -157,13 +157,6 @@ static struct page *balloon_retrieve(bool prefer_highmem)
157 return page; 157 return page;
158} 158}
159 159
160static struct page *balloon_first_page(void)
161{
162 if (list_empty(&ballooned_pages))
163 return NULL;
164 return list_entry(ballooned_pages.next, struct page, lru);
165}
166
167static struct page *balloon_next_page(struct page *page) 160static struct page *balloon_next_page(struct page *page)
168{ 161{
169 struct list_head *next = page->lru.next; 162 struct list_head *next = page->lru.next;
@@ -328,7 +321,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
328 if (nr_pages > ARRAY_SIZE(frame_list)) 321 if (nr_pages > ARRAY_SIZE(frame_list))
329 nr_pages = ARRAY_SIZE(frame_list); 322 nr_pages = ARRAY_SIZE(frame_list);
330 323
331 page = balloon_first_page(); 324 page = list_first_entry_or_null(&ballooned_pages, struct page, lru);
332 for (i = 0; i < nr_pages; i++) { 325 for (i = 0; i < nr_pages; i++) {
333 if (!page) { 326 if (!page) {
334 nr_pages = i; 327 nr_pages = i;
diff --git a/drivers/xen/dbgp.c b/drivers/xen/dbgp.c
index f3ccc80a455f..8145a59fd9f6 100644
--- a/drivers/xen/dbgp.c
+++ b/drivers/xen/dbgp.c
@@ -19,7 +19,7 @@ static int xen_dbgp_op(struct usb_hcd *hcd, int op)
19 dbgp.op = op; 19 dbgp.op = op;
20 20
21#ifdef CONFIG_PCI 21#ifdef CONFIG_PCI
22 if (ctrlr->bus == &pci_bus_type) { 22 if (dev_is_pci(ctrlr)) {
23 const struct pci_dev *pdev = to_pci_dev(ctrlr); 23 const struct pci_dev *pdev = to_pci_dev(ctrlr);
24 24
25 dbgp.u.pci.seg = pci_domain_nr(pdev->bus); 25 dbgp.u.pci.seg = pci_domain_nr(pdev->bus);
diff --git a/drivers/xen/events/Makefile b/drivers/xen/events/Makefile
new file mode 100644
index 000000000000..62be55cd981d
--- /dev/null
+++ b/drivers/xen/events/Makefile
@@ -0,0 +1,5 @@
1obj-y += events.o
2
3events-y += events_base.o
4events-y += events_2l.o
5events-y += events_fifo.o
diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
new file mode 100644
index 000000000000..d7ff91757307
--- /dev/null
+++ b/drivers/xen/events/events_2l.c
@@ -0,0 +1,372 @@
1/*
2 * Xen event channels (2-level ABI)
3 *
4 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
5 */
6
7#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
8
9#include <linux/linkage.h>
10#include <linux/interrupt.h>
11#include <linux/irq.h>
12#include <linux/module.h>
13
14#include <asm/sync_bitops.h>
15#include <asm/xen/hypercall.h>
16#include <asm/xen/hypervisor.h>
17
18#include <xen/xen.h>
19#include <xen/xen-ops.h>
20#include <xen/events.h>
21#include <xen/interface/xen.h>
22#include <xen/interface/event_channel.h>
23
24#include "events_internal.h"
25
26/*
27 * Note sizeof(xen_ulong_t) can be more than sizeof(unsigned long). Be
28 * careful to only use bitops which allow for this (e.g
29 * test_bit/find_first_bit and friends but not __ffs) and to pass
30 * BITS_PER_EVTCHN_WORD as the bitmask length.
31 */
32#define BITS_PER_EVTCHN_WORD (sizeof(xen_ulong_t)*8)
33/*
34 * Make a bitmask (i.e. unsigned long *) of a xen_ulong_t
35 * array. Primarily to avoid long lines (hence the terse name).
36 */
37#define BM(x) (unsigned long *)(x)
38/* Find the first set bit in a evtchn mask */
39#define EVTCHN_FIRST_BIT(w) find_first_bit(BM(&(w)), BITS_PER_EVTCHN_WORD)
40
41static DEFINE_PER_CPU(xen_ulong_t [EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD],
42 cpu_evtchn_mask);
43
44static unsigned evtchn_2l_max_channels(void)
45{
46 return EVTCHN_2L_NR_CHANNELS;
47}
48
49static void evtchn_2l_bind_to_cpu(struct irq_info *info, unsigned cpu)
50{
51 clear_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, info->cpu)));
52 set_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, cpu)));
53}
54
55static void evtchn_2l_clear_pending(unsigned port)
56{
57 struct shared_info *s = HYPERVISOR_shared_info;
58 sync_clear_bit(port, BM(&s->evtchn_pending[0]));
59}
60
61static void evtchn_2l_set_pending(unsigned port)
62{
63 struct shared_info *s = HYPERVISOR_shared_info;
64 sync_set_bit(port, BM(&s->evtchn_pending[0]));
65}
66
67static bool evtchn_2l_is_pending(unsigned port)
68{
69 struct shared_info *s = HYPERVISOR_shared_info;
70 return sync_test_bit(port, BM(&s->evtchn_pending[0]));
71}
72
73static bool evtchn_2l_test_and_set_mask(unsigned port)
74{
75 struct shared_info *s = HYPERVISOR_shared_info;
76 return sync_test_and_set_bit(port, BM(&s->evtchn_mask[0]));
77}
78
79static void evtchn_2l_mask(unsigned port)
80{
81 struct shared_info *s = HYPERVISOR_shared_info;
82 sync_set_bit(port, BM(&s->evtchn_mask[0]));
83}
84
85static void evtchn_2l_unmask(unsigned port)
86{
87 struct shared_info *s = HYPERVISOR_shared_info;
88 unsigned int cpu = get_cpu();
89 int do_hypercall = 0, evtchn_pending = 0;
90
91 BUG_ON(!irqs_disabled());
92
93 if (unlikely((cpu != cpu_from_evtchn(port))))
94 do_hypercall = 1;
95 else {
96 /*
97 * Need to clear the mask before checking pending to
98 * avoid a race with an event becoming pending.
99 *
100 * EVTCHNOP_unmask will only trigger an upcall if the
101 * mask bit was set, so if a hypercall is needed
102 * remask the event.
103 */
104 sync_clear_bit(port, BM(&s->evtchn_mask[0]));
105 evtchn_pending = sync_test_bit(port, BM(&s->evtchn_pending[0]));
106
107 if (unlikely(evtchn_pending && xen_hvm_domain())) {
108 sync_set_bit(port, BM(&s->evtchn_mask[0]));
109 do_hypercall = 1;
110 }
111 }
112
113 /* Slow path (hypercall) if this is a non-local port or if this is
114 * an hvm domain and an event is pending (hvm domains don't have
115 * their own implementation of irq_enable). */
116 if (do_hypercall) {
117 struct evtchn_unmask unmask = { .port = port };
118 (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
119 } else {
120 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
121
122 /*
123 * The following is basically the equivalent of
124 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
125 * the interrupt edge' if the channel is masked.
126 */
127 if (evtchn_pending &&
128 !sync_test_and_set_bit(port / BITS_PER_EVTCHN_WORD,
129 BM(&vcpu_info->evtchn_pending_sel)))
130 vcpu_info->evtchn_upcall_pending = 1;
131 }
132
133 put_cpu();
134}
135
136static DEFINE_PER_CPU(unsigned int, current_word_idx);
137static DEFINE_PER_CPU(unsigned int, current_bit_idx);
138
139/*
140 * Mask out the i least significant bits of w
141 */
142#define MASK_LSBS(w, i) (w & ((~((xen_ulong_t)0UL)) << i))
143
144static inline xen_ulong_t active_evtchns(unsigned int cpu,
145 struct shared_info *sh,
146 unsigned int idx)
147{
148 return sh->evtchn_pending[idx] &
149 per_cpu(cpu_evtchn_mask, cpu)[idx] &
150 ~sh->evtchn_mask[idx];
151}
152
153/*
154 * Search the CPU's pending events bitmasks. For each one found, map
155 * the event number to an irq, and feed it into do_IRQ() for handling.
156 *
157 * Xen uses a two-level bitmap to speed searching. The first level is
158 * a bitset of words which contain pending event bits. The second
159 * level is a bitset of pending events themselves.
160 */
161static void evtchn_2l_handle_events(unsigned cpu)
162{
163 int irq;
164 xen_ulong_t pending_words;
165 xen_ulong_t pending_bits;
166 int start_word_idx, start_bit_idx;
167 int word_idx, bit_idx;
168 int i;
169 struct irq_desc *desc;
170 struct shared_info *s = HYPERVISOR_shared_info;
171 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
172
173 /* Timer interrupt has highest priority. */
174 irq = irq_from_virq(cpu, VIRQ_TIMER);
175 if (irq != -1) {
176 unsigned int evtchn = evtchn_from_irq(irq);
177 word_idx = evtchn / BITS_PER_LONG;
178 bit_idx = evtchn % BITS_PER_LONG;
179 if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx)) {
180 desc = irq_to_desc(irq);
181 if (desc)
182 generic_handle_irq_desc(irq, desc);
183 }
184 }
185
186 /*
187 * Master flag must be cleared /before/ clearing
188 * selector flag. xchg_xen_ulong must contain an
189 * appropriate barrier.
190 */
191 pending_words = xchg_xen_ulong(&vcpu_info->evtchn_pending_sel, 0);
192
193 start_word_idx = __this_cpu_read(current_word_idx);
194 start_bit_idx = __this_cpu_read(current_bit_idx);
195
196 word_idx = start_word_idx;
197
198 for (i = 0; pending_words != 0; i++) {
199 xen_ulong_t words;
200
201 words = MASK_LSBS(pending_words, word_idx);
202
203 /*
204 * If we masked out all events, wrap to beginning.
205 */
206 if (words == 0) {
207 word_idx = 0;
208 bit_idx = 0;
209 continue;
210 }
211 word_idx = EVTCHN_FIRST_BIT(words);
212
213 pending_bits = active_evtchns(cpu, s, word_idx);
214 bit_idx = 0; /* usually scan entire word from start */
215 /*
216 * We scan the starting word in two parts.
217 *
218 * 1st time: start in the middle, scanning the
219 * upper bits.
220 *
221 * 2nd time: scan the whole word (not just the
222 * parts skipped in the first pass) -- if an
223 * event in the previously scanned bits is
224 * pending again it would just be scanned on
225 * the next loop anyway.
226 */
227 if (word_idx == start_word_idx) {
228 if (i == 0)
229 bit_idx = start_bit_idx;
230 }
231
232 do {
233 xen_ulong_t bits;
234 int port;
235
236 bits = MASK_LSBS(pending_bits, bit_idx);
237
238 /* If we masked out all events, move on. */
239 if (bits == 0)
240 break;
241
242 bit_idx = EVTCHN_FIRST_BIT(bits);
243
244 /* Process port. */
245 port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
246 irq = get_evtchn_to_irq(port);
247
248 if (irq != -1) {
249 desc = irq_to_desc(irq);
250 if (desc)
251 generic_handle_irq_desc(irq, desc);
252 }
253
254 bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
255
256 /* Next caller starts at last processed + 1 */
257 __this_cpu_write(current_word_idx,
258 bit_idx ? word_idx :
259 (word_idx+1) % BITS_PER_EVTCHN_WORD);
260 __this_cpu_write(current_bit_idx, bit_idx);
261 } while (bit_idx != 0);
262
263 /* Scan start_l1i twice; all others once. */
264 if ((word_idx != start_word_idx) || (i != 0))
265 pending_words &= ~(1UL << word_idx);
266
267 word_idx = (word_idx + 1) % BITS_PER_EVTCHN_WORD;
268 }
269}
270
271irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
272{
273 struct shared_info *sh = HYPERVISOR_shared_info;
274 int cpu = smp_processor_id();
275 xen_ulong_t *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
276 int i;
277 unsigned long flags;
278 static DEFINE_SPINLOCK(debug_lock);
279 struct vcpu_info *v;
280
281 spin_lock_irqsave(&debug_lock, flags);
282
283 printk("\nvcpu %d\n ", cpu);
284
285 for_each_online_cpu(i) {
286 int pending;
287 v = per_cpu(xen_vcpu, i);
288 pending = (get_irq_regs() && i == cpu)
289 ? xen_irqs_disabled(get_irq_regs())
290 : v->evtchn_upcall_mask;
291 printk("%d: masked=%d pending=%d event_sel %0*"PRI_xen_ulong"\n ", i,
292 pending, v->evtchn_upcall_pending,
293 (int)(sizeof(v->evtchn_pending_sel)*2),
294 v->evtchn_pending_sel);
295 }
296 v = per_cpu(xen_vcpu, cpu);
297
298 printk("\npending:\n ");
299 for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
300 printk("%0*"PRI_xen_ulong"%s",
301 (int)sizeof(sh->evtchn_pending[0])*2,
302 sh->evtchn_pending[i],
303 i % 8 == 0 ? "\n " : " ");
304 printk("\nglobal mask:\n ");
305 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
306 printk("%0*"PRI_xen_ulong"%s",
307 (int)(sizeof(sh->evtchn_mask[0])*2),
308 sh->evtchn_mask[i],
309 i % 8 == 0 ? "\n " : " ");
310
311 printk("\nglobally unmasked:\n ");
312 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
313 printk("%0*"PRI_xen_ulong"%s",
314 (int)(sizeof(sh->evtchn_mask[0])*2),
315 sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
316 i % 8 == 0 ? "\n " : " ");
317
318 printk("\nlocal cpu%d mask:\n ", cpu);
319 for (i = (EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD)-1; i >= 0; i--)
320 printk("%0*"PRI_xen_ulong"%s", (int)(sizeof(cpu_evtchn[0])*2),
321 cpu_evtchn[i],
322 i % 8 == 0 ? "\n " : " ");
323
324 printk("\nlocally unmasked:\n ");
325 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
326 xen_ulong_t pending = sh->evtchn_pending[i]
327 & ~sh->evtchn_mask[i]
328 & cpu_evtchn[i];
329 printk("%0*"PRI_xen_ulong"%s",
330 (int)(sizeof(sh->evtchn_mask[0])*2),
331 pending, i % 8 == 0 ? "\n " : " ");
332 }
333
334 printk("\npending list:\n");
335 for (i = 0; i < EVTCHN_2L_NR_CHANNELS; i++) {
336 if (sync_test_bit(i, BM(sh->evtchn_pending))) {
337 int word_idx = i / BITS_PER_EVTCHN_WORD;
338 printk(" %d: event %d -> irq %d%s%s%s\n",
339 cpu_from_evtchn(i), i,
340 get_evtchn_to_irq(i),
341 sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
342 ? "" : " l2-clear",
343 !sync_test_bit(i, BM(sh->evtchn_mask))
344 ? "" : " globally-masked",
345 sync_test_bit(i, BM(cpu_evtchn))
346 ? "" : " locally-masked");
347 }
348 }
349
350 spin_unlock_irqrestore(&debug_lock, flags);
351
352 return IRQ_HANDLED;
353}
354
355static const struct evtchn_ops evtchn_ops_2l = {
356 .max_channels = evtchn_2l_max_channels,
357 .nr_channels = evtchn_2l_max_channels,
358 .bind_to_cpu = evtchn_2l_bind_to_cpu,
359 .clear_pending = evtchn_2l_clear_pending,
360 .set_pending = evtchn_2l_set_pending,
361 .is_pending = evtchn_2l_is_pending,
362 .test_and_set_mask = evtchn_2l_test_and_set_mask,
363 .mask = evtchn_2l_mask,
364 .unmask = evtchn_2l_unmask,
365 .handle_events = evtchn_2l_handle_events,
366};
367
368void __init xen_evtchn_2l_init(void)
369{
370 pr_info("Using 2-level ABI\n");
371 evtchn_ops = &evtchn_ops_2l;
372}
diff --git a/drivers/xen/events.c b/drivers/xen/events/events_base.c
index 4035e833ea26..4672e003c0ad 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events/events_base.c
@@ -59,6 +59,10 @@
59#include <xen/interface/vcpu.h> 59#include <xen/interface/vcpu.h>
60#include <asm/hw_irq.h> 60#include <asm/hw_irq.h>
61 61
62#include "events_internal.h"
63
64const struct evtchn_ops *evtchn_ops;
65
62/* 66/*
63 * This lock protects updates to the following mapping and reference-count 67 * This lock protects updates to the following mapping and reference-count
64 * arrays. The lock does not need to be acquired to read the mapping tables. 68 * arrays. The lock does not need to be acquired to read the mapping tables.
@@ -73,71 +77,15 @@ static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
73/* IRQ <-> IPI mapping */ 77/* IRQ <-> IPI mapping */
74static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1}; 78static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
75 79
76/* Interrupt types. */ 80int **evtchn_to_irq;
77enum xen_irq_type {
78 IRQT_UNBOUND = 0,
79 IRQT_PIRQ,
80 IRQT_VIRQ,
81 IRQT_IPI,
82 IRQT_EVTCHN
83};
84
85/*
86 * Packed IRQ information:
87 * type - enum xen_irq_type
88 * event channel - irq->event channel mapping
89 * cpu - cpu this event channel is bound to
90 * index - type-specific information:
91 * PIRQ - physical IRQ, GSI, flags, and owner domain
92 * VIRQ - virq number
93 * IPI - IPI vector
94 * EVTCHN -
95 */
96struct irq_info {
97 struct list_head list;
98 int refcnt;
99 enum xen_irq_type type; /* type */
100 unsigned irq;
101 unsigned short evtchn; /* event channel */
102 unsigned short cpu; /* cpu bound */
103
104 union {
105 unsigned short virq;
106 enum ipi_vector ipi;
107 struct {
108 unsigned short pirq;
109 unsigned short gsi;
110 unsigned char flags;
111 uint16_t domid;
112 } pirq;
113 } u;
114};
115#define PIRQ_NEEDS_EOI (1 << 0)
116#define PIRQ_SHAREABLE (1 << 1)
117
118static int *evtchn_to_irq;
119#ifdef CONFIG_X86 81#ifdef CONFIG_X86
120static unsigned long *pirq_eoi_map; 82static unsigned long *pirq_eoi_map;
121#endif 83#endif
122static bool (*pirq_needs_eoi)(unsigned irq); 84static bool (*pirq_needs_eoi)(unsigned irq);
123 85
124/* 86#define EVTCHN_ROW(e) (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
125 * Note sizeof(xen_ulong_t) can be more than sizeof(unsigned long). Be 87#define EVTCHN_COL(e) (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
126 * careful to only use bitops which allow for this (e.g 88#define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq))
127 * test_bit/find_first_bit and friends but not __ffs) and to pass
128 * BITS_PER_EVTCHN_WORD as the bitmask length.
129 */
130#define BITS_PER_EVTCHN_WORD (sizeof(xen_ulong_t)*8)
131/*
132 * Make a bitmask (i.e. unsigned long *) of a xen_ulong_t
133 * array. Primarily to avoid long lines (hence the terse name).
134 */
135#define BM(x) (unsigned long *)(x)
136/* Find the first set bit in a evtchn mask */
137#define EVTCHN_FIRST_BIT(w) find_first_bit(BM(&(w)), BITS_PER_EVTCHN_WORD)
138
139static DEFINE_PER_CPU(xen_ulong_t [NR_EVENT_CHANNELS/BITS_PER_EVTCHN_WORD],
140 cpu_evtchn_mask);
141 89
142/* Xen will never allocate port zero for any purpose. */ 90/* Xen will never allocate port zero for any purpose. */
143#define VALID_EVTCHN(chn) ((chn) != 0) 91#define VALID_EVTCHN(chn) ((chn) != 0)
@@ -148,19 +96,75 @@ static struct irq_chip xen_pirq_chip;
148static void enable_dynirq(struct irq_data *data); 96static void enable_dynirq(struct irq_data *data);
149static void disable_dynirq(struct irq_data *data); 97static void disable_dynirq(struct irq_data *data);
150 98
99static void clear_evtchn_to_irq_row(unsigned row)
100{
101 unsigned col;
102
103 for (col = 0; col < EVTCHN_PER_ROW; col++)
104 evtchn_to_irq[row][col] = -1;
105}
106
107static void clear_evtchn_to_irq_all(void)
108{
109 unsigned row;
110
111 for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) {
112 if (evtchn_to_irq[row] == NULL)
113 continue;
114 clear_evtchn_to_irq_row(row);
115 }
116}
117
118static int set_evtchn_to_irq(unsigned evtchn, unsigned irq)
119{
120 unsigned row;
121 unsigned col;
122
123 if (evtchn >= xen_evtchn_max_channels())
124 return -EINVAL;
125
126 row = EVTCHN_ROW(evtchn);
127 col = EVTCHN_COL(evtchn);
128
129 if (evtchn_to_irq[row] == NULL) {
130 /* Unallocated irq entries return -1 anyway */
131 if (irq == -1)
132 return 0;
133
134 evtchn_to_irq[row] = (int *)get_zeroed_page(GFP_KERNEL);
135 if (evtchn_to_irq[row] == NULL)
136 return -ENOMEM;
137
138 clear_evtchn_to_irq_row(row);
139 }
140
141 evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)] = irq;
142 return 0;
143}
144
145int get_evtchn_to_irq(unsigned evtchn)
146{
147 if (evtchn >= xen_evtchn_max_channels())
148 return -1;
149 if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
150 return -1;
151 return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)];
152}
153
151/* Get info for IRQ */ 154/* Get info for IRQ */
152static struct irq_info *info_for_irq(unsigned irq) 155struct irq_info *info_for_irq(unsigned irq)
153{ 156{
154 return irq_get_handler_data(irq); 157 return irq_get_handler_data(irq);
155} 158}
156 159
157/* Constructors for packed IRQ information. */ 160/* Constructors for packed IRQ information. */
158static void xen_irq_info_common_init(struct irq_info *info, 161static int xen_irq_info_common_setup(struct irq_info *info,
159 unsigned irq, 162 unsigned irq,
160 enum xen_irq_type type, 163 enum xen_irq_type type,
161 unsigned short evtchn, 164 unsigned evtchn,
162 unsigned short cpu) 165 unsigned short cpu)
163{ 166{
167 int ret;
164 168
165 BUG_ON(info->type != IRQT_UNBOUND && info->type != type); 169 BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
166 170
@@ -169,68 +173,78 @@ static void xen_irq_info_common_init(struct irq_info *info,
169 info->evtchn = evtchn; 173 info->evtchn = evtchn;
170 info->cpu = cpu; 174 info->cpu = cpu;
171 175
172 evtchn_to_irq[evtchn] = irq; 176 ret = set_evtchn_to_irq(evtchn, irq);
177 if (ret < 0)
178 return ret;
173 179
174 irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN); 180 irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
181
182 return xen_evtchn_port_setup(info);
175} 183}
176 184
177static void xen_irq_info_evtchn_init(unsigned irq, 185static int xen_irq_info_evtchn_setup(unsigned irq,
178 unsigned short evtchn) 186 unsigned evtchn)
179{ 187{
180 struct irq_info *info = info_for_irq(irq); 188 struct irq_info *info = info_for_irq(irq);
181 189
182 xen_irq_info_common_init(info, irq, IRQT_EVTCHN, evtchn, 0); 190 return xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
183} 191}
184 192
185static void xen_irq_info_ipi_init(unsigned cpu, 193static int xen_irq_info_ipi_setup(unsigned cpu,
186 unsigned irq, 194 unsigned irq,
187 unsigned short evtchn, 195 unsigned evtchn,
188 enum ipi_vector ipi) 196 enum ipi_vector ipi)
189{ 197{
190 struct irq_info *info = info_for_irq(irq); 198 struct irq_info *info = info_for_irq(irq);
191 199
192 xen_irq_info_common_init(info, irq, IRQT_IPI, evtchn, 0);
193
194 info->u.ipi = ipi; 200 info->u.ipi = ipi;
195 201
196 per_cpu(ipi_to_irq, cpu)[ipi] = irq; 202 per_cpu(ipi_to_irq, cpu)[ipi] = irq;
203
204 return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
197} 205}
198 206
199static void xen_irq_info_virq_init(unsigned cpu, 207static int xen_irq_info_virq_setup(unsigned cpu,
200 unsigned irq, 208 unsigned irq,
201 unsigned short evtchn, 209 unsigned evtchn,
202 unsigned short virq) 210 unsigned virq)
203{ 211{
204 struct irq_info *info = info_for_irq(irq); 212 struct irq_info *info = info_for_irq(irq);
205 213
206 xen_irq_info_common_init(info, irq, IRQT_VIRQ, evtchn, 0);
207
208 info->u.virq = virq; 214 info->u.virq = virq;
209 215
210 per_cpu(virq_to_irq, cpu)[virq] = irq; 216 per_cpu(virq_to_irq, cpu)[virq] = irq;
217
218 return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
211} 219}
212 220
213static void xen_irq_info_pirq_init(unsigned irq, 221static int xen_irq_info_pirq_setup(unsigned irq,
214 unsigned short evtchn, 222 unsigned evtchn,
215 unsigned short pirq, 223 unsigned pirq,
216 unsigned short gsi, 224 unsigned gsi,
217 uint16_t domid, 225 uint16_t domid,
218 unsigned char flags) 226 unsigned char flags)
219{ 227{
220 struct irq_info *info = info_for_irq(irq); 228 struct irq_info *info = info_for_irq(irq);
221 229
222 xen_irq_info_common_init(info, irq, IRQT_PIRQ, evtchn, 0);
223
224 info->u.pirq.pirq = pirq; 230 info->u.pirq.pirq = pirq;
225 info->u.pirq.gsi = gsi; 231 info->u.pirq.gsi = gsi;
226 info->u.pirq.domid = domid; 232 info->u.pirq.domid = domid;
227 info->u.pirq.flags = flags; 233 info->u.pirq.flags = flags;
234
235 return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
236}
237
238static void xen_irq_info_cleanup(struct irq_info *info)
239{
240 set_evtchn_to_irq(info->evtchn, -1);
241 info->evtchn = 0;
228} 242}
229 243
230/* 244/*
231 * Accessors for packed IRQ information. 245 * Accessors for packed IRQ information.
232 */ 246 */
233static unsigned int evtchn_from_irq(unsigned irq) 247unsigned int evtchn_from_irq(unsigned irq)
234{ 248{
235 if (unlikely(WARN(irq < 0 || irq >= nr_irqs, "Invalid irq %d!\n", irq))) 249 if (unlikely(WARN(irq < 0 || irq >= nr_irqs, "Invalid irq %d!\n", irq)))
236 return 0; 250 return 0;
@@ -240,10 +254,15 @@ static unsigned int evtchn_from_irq(unsigned irq)
240 254
241unsigned irq_from_evtchn(unsigned int evtchn) 255unsigned irq_from_evtchn(unsigned int evtchn)
242{ 256{
243 return evtchn_to_irq[evtchn]; 257 return get_evtchn_to_irq(evtchn);
244} 258}
245EXPORT_SYMBOL_GPL(irq_from_evtchn); 259EXPORT_SYMBOL_GPL(irq_from_evtchn);
246 260
261int irq_from_virq(unsigned int cpu, unsigned int virq)
262{
263 return per_cpu(virq_to_irq, cpu)[virq];
264}
265
247static enum ipi_vector ipi_from_irq(unsigned irq) 266static enum ipi_vector ipi_from_irq(unsigned irq)
248{ 267{
249 struct irq_info *info = info_for_irq(irq); 268 struct irq_info *info = info_for_irq(irq);
@@ -279,14 +298,14 @@ static enum xen_irq_type type_from_irq(unsigned irq)
279 return info_for_irq(irq)->type; 298 return info_for_irq(irq)->type;
280} 299}
281 300
282static unsigned cpu_from_irq(unsigned irq) 301unsigned cpu_from_irq(unsigned irq)
283{ 302{
284 return info_for_irq(irq)->cpu; 303 return info_for_irq(irq)->cpu;
285} 304}
286 305
287static unsigned int cpu_from_evtchn(unsigned int evtchn) 306unsigned int cpu_from_evtchn(unsigned int evtchn)
288{ 307{
289 int irq = evtchn_to_irq[evtchn]; 308 int irq = get_evtchn_to_irq(evtchn);
290 unsigned ret = 0; 309 unsigned ret = 0;
291 310
292 if (irq != -1) 311 if (irq != -1)
@@ -310,67 +329,29 @@ static bool pirq_needs_eoi_flag(unsigned irq)
310 return info->u.pirq.flags & PIRQ_NEEDS_EOI; 329 return info->u.pirq.flags & PIRQ_NEEDS_EOI;
311} 330}
312 331
313static inline xen_ulong_t active_evtchns(unsigned int cpu,
314 struct shared_info *sh,
315 unsigned int idx)
316{
317 return sh->evtchn_pending[idx] &
318 per_cpu(cpu_evtchn_mask, cpu)[idx] &
319 ~sh->evtchn_mask[idx];
320}
321
322static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) 332static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
323{ 333{
324 int irq = evtchn_to_irq[chn]; 334 int irq = get_evtchn_to_irq(chn);
335 struct irq_info *info = info_for_irq(irq);
325 336
326 BUG_ON(irq == -1); 337 BUG_ON(irq == -1);
327#ifdef CONFIG_SMP 338#ifdef CONFIG_SMP
328 cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu)); 339 cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu));
329#endif 340#endif
330 341
331 clear_bit(chn, BM(per_cpu(cpu_evtchn_mask, cpu_from_irq(irq)))); 342 xen_evtchn_port_bind_to_cpu(info, cpu);
332 set_bit(chn, BM(per_cpu(cpu_evtchn_mask, cpu)));
333
334 info_for_irq(irq)->cpu = cpu;
335}
336
337static void init_evtchn_cpu_bindings(void)
338{
339 int i;
340#ifdef CONFIG_SMP
341 struct irq_info *info;
342
343 /* By default all event channels notify CPU#0. */
344 list_for_each_entry(info, &xen_irq_list_head, list) {
345 struct irq_desc *desc = irq_to_desc(info->irq);
346 cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
347 }
348#endif
349
350 for_each_possible_cpu(i)
351 memset(per_cpu(cpu_evtchn_mask, i),
352 (i == 0) ? ~0 : 0, NR_EVENT_CHANNELS/8);
353}
354 343
355static inline void clear_evtchn(int port) 344 info->cpu = cpu;
356{
357 struct shared_info *s = HYPERVISOR_shared_info;
358 sync_clear_bit(port, BM(&s->evtchn_pending[0]));
359} 345}
360 346
361static inline void set_evtchn(int port) 347static void xen_evtchn_mask_all(void)
362{ 348{
363 struct shared_info *s = HYPERVISOR_shared_info; 349 unsigned int evtchn;
364 sync_set_bit(port, BM(&s->evtchn_pending[0]));
365}
366 350
367static inline int test_evtchn(int port) 351 for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++)
368{ 352 mask_evtchn(evtchn);
369 struct shared_info *s = HYPERVISOR_shared_info;
370 return sync_test_bit(port, BM(&s->evtchn_pending[0]));
371} 353}
372 354
373
374/** 355/**
375 * notify_remote_via_irq - send event to remote end of event channel via irq 356 * notify_remote_via_irq - send event to remote end of event channel via irq
376 * @irq: irq of event channel to send event to 357 * @irq: irq of event channel to send event to
@@ -388,63 +369,6 @@ void notify_remote_via_irq(int irq)
388} 369}
389EXPORT_SYMBOL_GPL(notify_remote_via_irq); 370EXPORT_SYMBOL_GPL(notify_remote_via_irq);
390 371
391static void mask_evtchn(int port)
392{
393 struct shared_info *s = HYPERVISOR_shared_info;
394 sync_set_bit(port, BM(&s->evtchn_mask[0]));
395}
396
397static void unmask_evtchn(int port)
398{
399 struct shared_info *s = HYPERVISOR_shared_info;
400 unsigned int cpu = get_cpu();
401 int do_hypercall = 0, evtchn_pending = 0;
402
403 BUG_ON(!irqs_disabled());
404
405 if (unlikely((cpu != cpu_from_evtchn(port))))
406 do_hypercall = 1;
407 else {
408 /*
409 * Need to clear the mask before checking pending to
410 * avoid a race with an event becoming pending.
411 *
412 * EVTCHNOP_unmask will only trigger an upcall if the
413 * mask bit was set, so if a hypercall is needed
414 * remask the event.
415 */
416 sync_clear_bit(port, BM(&s->evtchn_mask[0]));
417 evtchn_pending = sync_test_bit(port, BM(&s->evtchn_pending[0]));
418
419 if (unlikely(evtchn_pending && xen_hvm_domain())) {
420 sync_set_bit(port, BM(&s->evtchn_mask[0]));
421 do_hypercall = 1;
422 }
423 }
424
425 /* Slow path (hypercall) if this is a non-local port or if this is
426 * an hvm domain and an event is pending (hvm domains don't have
427 * their own implementation of irq_enable). */
428 if (do_hypercall) {
429 struct evtchn_unmask unmask = { .port = port };
430 (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
431 } else {
432 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
433
434 /*
435 * The following is basically the equivalent of
436 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
437 * the interrupt edge' if the channel is masked.
438 */
439 if (evtchn_pending &&
440 !sync_test_and_set_bit(port / BITS_PER_EVTCHN_WORD,
441 BM(&vcpu_info->evtchn_pending_sel)))
442 vcpu_info->evtchn_upcall_pending = 1;
443 }
444
445 put_cpu();
446}
447
448static void xen_irq_init(unsigned irq) 372static void xen_irq_init(unsigned irq)
449{ 373{
450 struct irq_info *info; 374 struct irq_info *info;
@@ -538,6 +462,18 @@ static void xen_free_irq(unsigned irq)
538 irq_free_desc(irq); 462 irq_free_desc(irq);
539} 463}
540 464
465static void xen_evtchn_close(unsigned int port)
466{
467 struct evtchn_close close;
468
469 close.port = port;
470 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
471 BUG();
472
473 /* Closed ports are implicitly re-bound to VCPU0. */
474 bind_evtchn_to_cpu(port, 0);
475}
476
541static void pirq_query_unmask(int irq) 477static void pirq_query_unmask(int irq)
542{ 478{
543 struct physdev_irq_status_query irq_status; 479 struct physdev_irq_status_query irq_status;
@@ -610,7 +546,13 @@ static unsigned int __startup_pirq(unsigned int irq)
610 546
611 pirq_query_unmask(irq); 547 pirq_query_unmask(irq);
612 548
613 evtchn_to_irq[evtchn] = irq; 549 rc = set_evtchn_to_irq(evtchn, irq);
550 if (rc != 0) {
551 pr_err("irq%d: Failed to set port to irq mapping (%d)\n",
552 irq, rc);
553 xen_evtchn_close(evtchn);
554 return 0;
555 }
614 bind_evtchn_to_cpu(evtchn, 0); 556 bind_evtchn_to_cpu(evtchn, 0);
615 info->evtchn = evtchn; 557 info->evtchn = evtchn;
616 558
@@ -628,10 +570,9 @@ static unsigned int startup_pirq(struct irq_data *data)
628 570
629static void shutdown_pirq(struct irq_data *data) 571static void shutdown_pirq(struct irq_data *data)
630{ 572{
631 struct evtchn_close close;
632 unsigned int irq = data->irq; 573 unsigned int irq = data->irq;
633 struct irq_info *info = info_for_irq(irq); 574 struct irq_info *info = info_for_irq(irq);
634 int evtchn = evtchn_from_irq(irq); 575 unsigned evtchn = evtchn_from_irq(irq);
635 576
636 BUG_ON(info->type != IRQT_PIRQ); 577 BUG_ON(info->type != IRQT_PIRQ);
637 578
@@ -639,14 +580,8 @@ static void shutdown_pirq(struct irq_data *data)
639 return; 580 return;
640 581
641 mask_evtchn(evtchn); 582 mask_evtchn(evtchn);
642 583 xen_evtchn_close(evtchn);
643 close.port = evtchn; 584 xen_irq_info_cleanup(info);
644 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
645 BUG();
646
647 bind_evtchn_to_cpu(evtchn, 0);
648 evtchn_to_irq[evtchn] = -1;
649 info->evtchn = 0;
650} 585}
651 586
652static void enable_pirq(struct irq_data *data) 587static void enable_pirq(struct irq_data *data)
@@ -675,6 +610,41 @@ int xen_irq_from_gsi(unsigned gsi)
675} 610}
676EXPORT_SYMBOL_GPL(xen_irq_from_gsi); 611EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
677 612
613static void __unbind_from_irq(unsigned int irq)
614{
615 int evtchn = evtchn_from_irq(irq);
616 struct irq_info *info = irq_get_handler_data(irq);
617
618 if (info->refcnt > 0) {
619 info->refcnt--;
620 if (info->refcnt != 0)
621 return;
622 }
623
624 if (VALID_EVTCHN(evtchn)) {
625 unsigned int cpu = cpu_from_irq(irq);
626
627 xen_evtchn_close(evtchn);
628
629 switch (type_from_irq(irq)) {
630 case IRQT_VIRQ:
631 per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
632 break;
633 case IRQT_IPI:
634 per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
635 break;
636 default:
637 break;
638 }
639
640 xen_irq_info_cleanup(info);
641 }
642
643 BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND);
644
645 xen_free_irq(irq);
646}
647
678/* 648/*
679 * Do not make any assumptions regarding the relationship between the 649 * Do not make any assumptions regarding the relationship between the
680 * IRQ number returned here and the Xen pirq argument. 650 * IRQ number returned here and the Xen pirq argument.
@@ -690,6 +660,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
690{ 660{
691 int irq = -1; 661 int irq = -1;
692 struct physdev_irq irq_op; 662 struct physdev_irq irq_op;
663 int ret;
693 664
694 mutex_lock(&irq_mapping_update_lock); 665 mutex_lock(&irq_mapping_update_lock);
695 666
@@ -717,8 +688,13 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
717 goto out; 688 goto out;
718 } 689 }
719 690
720 xen_irq_info_pirq_init(irq, 0, pirq, gsi, DOMID_SELF, 691 ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF,
721 shareable ? PIRQ_SHAREABLE : 0); 692 shareable ? PIRQ_SHAREABLE : 0);
693 if (ret < 0) {
694 __unbind_from_irq(irq);
695 irq = ret;
696 goto out;
697 }
722 698
723 pirq_query_unmask(irq); 699 pirq_query_unmask(irq);
724 /* We try to use the handler with the appropriate semantic for the 700 /* We try to use the handler with the appropriate semantic for the
@@ -778,7 +754,9 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
778 irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq, 754 irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq,
779 name); 755 name);
780 756
781 xen_irq_info_pirq_init(irq, 0, pirq, 0, domid, 0); 757 ret = xen_irq_info_pirq_setup(irq, 0, pirq, 0, domid, 0);
758 if (ret < 0)
759 goto error_irq;
782 ret = irq_set_msi_desc(irq, msidesc); 760 ret = irq_set_msi_desc(irq, msidesc);
783 if (ret < 0) 761 if (ret < 0)
784 goto error_irq; 762 goto error_irq;
@@ -786,8 +764,8 @@ out:
786 mutex_unlock(&irq_mapping_update_lock); 764 mutex_unlock(&irq_mapping_update_lock);
787 return irq; 765 return irq;
788error_irq: 766error_irq:
767 __unbind_from_irq(irq);
789 mutex_unlock(&irq_mapping_update_lock); 768 mutex_unlock(&irq_mapping_update_lock);
790 xen_free_irq(irq);
791 return ret; 769 return ret;
792} 770}
793#endif 771#endif
@@ -857,13 +835,18 @@ int xen_pirq_from_irq(unsigned irq)
857 return pirq_from_irq(irq); 835 return pirq_from_irq(irq);
858} 836}
859EXPORT_SYMBOL_GPL(xen_pirq_from_irq); 837EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
838
860int bind_evtchn_to_irq(unsigned int evtchn) 839int bind_evtchn_to_irq(unsigned int evtchn)
861{ 840{
862 int irq; 841 int irq;
842 int ret;
843
844 if (evtchn >= xen_evtchn_max_channels())
845 return -ENOMEM;
863 846
864 mutex_lock(&irq_mapping_update_lock); 847 mutex_lock(&irq_mapping_update_lock);
865 848
866 irq = evtchn_to_irq[evtchn]; 849 irq = get_evtchn_to_irq(evtchn);
867 850
868 if (irq == -1) { 851 if (irq == -1) {
869 irq = xen_allocate_irq_dynamic(); 852 irq = xen_allocate_irq_dynamic();
@@ -873,7 +856,12 @@ int bind_evtchn_to_irq(unsigned int evtchn)
873 irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, 856 irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
874 handle_edge_irq, "event"); 857 handle_edge_irq, "event");
875 858
876 xen_irq_info_evtchn_init(irq, evtchn); 859 ret = xen_irq_info_evtchn_setup(irq, evtchn);
860 if (ret < 0) {
861 __unbind_from_irq(irq);
862 irq = ret;
863 goto out;
864 }
877 } else { 865 } else {
878 struct irq_info *info = info_for_irq(irq); 866 struct irq_info *info = info_for_irq(irq);
879 WARN_ON(info == NULL || info->type != IRQT_EVTCHN); 867 WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
@@ -890,6 +878,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
890{ 878{
891 struct evtchn_bind_ipi bind_ipi; 879 struct evtchn_bind_ipi bind_ipi;
892 int evtchn, irq; 880 int evtchn, irq;
881 int ret;
893 882
894 mutex_lock(&irq_mapping_update_lock); 883 mutex_lock(&irq_mapping_update_lock);
895 884
@@ -909,8 +898,12 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
909 BUG(); 898 BUG();
910 evtchn = bind_ipi.port; 899 evtchn = bind_ipi.port;
911 900
912 xen_irq_info_ipi_init(cpu, irq, evtchn, ipi); 901 ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
913 902 if (ret < 0) {
903 __unbind_from_irq(irq);
904 irq = ret;
905 goto out;
906 }
914 bind_evtchn_to_cpu(evtchn, cpu); 907 bind_evtchn_to_cpu(evtchn, cpu);
915 } else { 908 } else {
916 struct irq_info *info = info_for_irq(irq); 909 struct irq_info *info = info_for_irq(irq);
@@ -943,7 +936,7 @@ static int find_virq(unsigned int virq, unsigned int cpu)
943 int port, rc = -ENOENT; 936 int port, rc = -ENOENT;
944 937
945 memset(&status, 0, sizeof(status)); 938 memset(&status, 0, sizeof(status));
946 for (port = 0; port <= NR_EVENT_CHANNELS; port++) { 939 for (port = 0; port < xen_evtchn_max_channels(); port++) {
947 status.dom = DOMID_SELF; 940 status.dom = DOMID_SELF;
948 status.port = port; 941 status.port = port;
949 rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status); 942 rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
@@ -959,6 +952,19 @@ static int find_virq(unsigned int virq, unsigned int cpu)
959 return rc; 952 return rc;
960} 953}
961 954
955/**
956 * xen_evtchn_nr_channels - number of usable event channel ports
957 *
958 * This may be less than the maximum supported by the current
959 * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum
960 * supported.
961 */
962unsigned xen_evtchn_nr_channels(void)
963{
964 return evtchn_ops->nr_channels();
965}
966EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels);
967
962int bind_virq_to_irq(unsigned int virq, unsigned int cpu) 968int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
963{ 969{
964 struct evtchn_bind_virq bind_virq; 970 struct evtchn_bind_virq bind_virq;
@@ -989,7 +995,12 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
989 evtchn = ret; 995 evtchn = ret;
990 } 996 }
991 997
992 xen_irq_info_virq_init(cpu, irq, evtchn, virq); 998 ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
999 if (ret < 0) {
1000 __unbind_from_irq(irq);
1001 irq = ret;
1002 goto out;
1003 }
993 1004
994 bind_evtchn_to_cpu(evtchn, cpu); 1005 bind_evtchn_to_cpu(evtchn, cpu);
995 } else { 1006 } else {
@@ -1005,50 +1016,8 @@ out:
1005 1016
1006static void unbind_from_irq(unsigned int irq) 1017static void unbind_from_irq(unsigned int irq)
1007{ 1018{
1008 struct evtchn_close close;
1009 int evtchn = evtchn_from_irq(irq);
1010 struct irq_info *info = irq_get_handler_data(irq);
1011
1012 if (WARN_ON(!info))
1013 return;
1014
1015 mutex_lock(&irq_mapping_update_lock); 1019 mutex_lock(&irq_mapping_update_lock);
1016 1020 __unbind_from_irq(irq);
1017 if (info->refcnt > 0) {
1018 info->refcnt--;
1019 if (info->refcnt != 0)
1020 goto done;
1021 }
1022
1023 if (VALID_EVTCHN(evtchn)) {
1024 close.port = evtchn;
1025 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
1026 BUG();
1027
1028 switch (type_from_irq(irq)) {
1029 case IRQT_VIRQ:
1030 per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
1031 [virq_from_irq(irq)] = -1;
1032 break;
1033 case IRQT_IPI:
1034 per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
1035 [ipi_from_irq(irq)] = -1;
1036 break;
1037 default:
1038 break;
1039 }
1040
1041 /* Closed ports are implicitly re-bound to VCPU0. */
1042 bind_evtchn_to_cpu(evtchn, 0);
1043
1044 evtchn_to_irq[evtchn] = -1;
1045 }
1046
1047 BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND);
1048
1049 xen_free_irq(irq);
1050
1051 done:
1052 mutex_unlock(&irq_mapping_update_lock); 1021 mutex_unlock(&irq_mapping_update_lock);
1053} 1022}
1054 1023
@@ -1148,9 +1117,26 @@ void unbind_from_irqhandler(unsigned int irq, void *dev_id)
1148} 1117}
1149EXPORT_SYMBOL_GPL(unbind_from_irqhandler); 1118EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
1150 1119
1120/**
1121 * xen_set_irq_priority() - set an event channel priority.
1122 * @irq:irq bound to an event channel.
1123 * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN.
1124 */
1125int xen_set_irq_priority(unsigned irq, unsigned priority)
1126{
1127 struct evtchn_set_priority set_priority;
1128
1129 set_priority.port = evtchn_from_irq(irq);
1130 set_priority.priority = priority;
1131
1132 return HYPERVISOR_event_channel_op(EVTCHNOP_set_priority,
1133 &set_priority);
1134}
1135EXPORT_SYMBOL_GPL(xen_set_irq_priority);
1136
1151int evtchn_make_refcounted(unsigned int evtchn) 1137int evtchn_make_refcounted(unsigned int evtchn)
1152{ 1138{
1153 int irq = evtchn_to_irq[evtchn]; 1139 int irq = get_evtchn_to_irq(evtchn);
1154 struct irq_info *info; 1140 struct irq_info *info;
1155 1141
1156 if (irq == -1) 1142 if (irq == -1)
@@ -1175,12 +1161,12 @@ int evtchn_get(unsigned int evtchn)
1175 struct irq_info *info; 1161 struct irq_info *info;
1176 int err = -ENOENT; 1162 int err = -ENOENT;
1177 1163
1178 if (evtchn >= NR_EVENT_CHANNELS) 1164 if (evtchn >= xen_evtchn_max_channels())
1179 return -EINVAL; 1165 return -EINVAL;
1180 1166
1181 mutex_lock(&irq_mapping_update_lock); 1167 mutex_lock(&irq_mapping_update_lock);
1182 1168
1183 irq = evtchn_to_irq[evtchn]; 1169 irq = get_evtchn_to_irq(evtchn);
1184 if (irq == -1) 1170 if (irq == -1)
1185 goto done; 1171 goto done;
1186 1172
@@ -1204,7 +1190,7 @@ EXPORT_SYMBOL_GPL(evtchn_get);
1204 1190
1205void evtchn_put(unsigned int evtchn) 1191void evtchn_put(unsigned int evtchn)
1206{ 1192{
1207 int irq = evtchn_to_irq[evtchn]; 1193 int irq = get_evtchn_to_irq(evtchn);
1208 if (WARN_ON(irq == -1)) 1194 if (WARN_ON(irq == -1))
1209 return; 1195 return;
1210 unbind_from_irq(irq); 1196 unbind_from_irq(irq);
@@ -1228,222 +1214,21 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
1228 notify_remote_via_irq(irq); 1214 notify_remote_via_irq(irq);
1229} 1215}
1230 1216
1231irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
1232{
1233 struct shared_info *sh = HYPERVISOR_shared_info;
1234 int cpu = smp_processor_id();
1235 xen_ulong_t *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
1236 int i;
1237 unsigned long flags;
1238 static DEFINE_SPINLOCK(debug_lock);
1239 struct vcpu_info *v;
1240
1241 spin_lock_irqsave(&debug_lock, flags);
1242
1243 printk("\nvcpu %d\n ", cpu);
1244
1245 for_each_online_cpu(i) {
1246 int pending;
1247 v = per_cpu(xen_vcpu, i);
1248 pending = (get_irq_regs() && i == cpu)
1249 ? xen_irqs_disabled(get_irq_regs())
1250 : v->evtchn_upcall_mask;
1251 printk("%d: masked=%d pending=%d event_sel %0*"PRI_xen_ulong"\n ", i,
1252 pending, v->evtchn_upcall_pending,
1253 (int)(sizeof(v->evtchn_pending_sel)*2),
1254 v->evtchn_pending_sel);
1255 }
1256 v = per_cpu(xen_vcpu, cpu);
1257
1258 printk("\npending:\n ");
1259 for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
1260 printk("%0*"PRI_xen_ulong"%s",
1261 (int)sizeof(sh->evtchn_pending[0])*2,
1262 sh->evtchn_pending[i],
1263 i % 8 == 0 ? "\n " : " ");
1264 printk("\nglobal mask:\n ");
1265 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
1266 printk("%0*"PRI_xen_ulong"%s",
1267 (int)(sizeof(sh->evtchn_mask[0])*2),
1268 sh->evtchn_mask[i],
1269 i % 8 == 0 ? "\n " : " ");
1270
1271 printk("\nglobally unmasked:\n ");
1272 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
1273 printk("%0*"PRI_xen_ulong"%s",
1274 (int)(sizeof(sh->evtchn_mask[0])*2),
1275 sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
1276 i % 8 == 0 ? "\n " : " ");
1277
1278 printk("\nlocal cpu%d mask:\n ", cpu);
1279 for (i = (NR_EVENT_CHANNELS/BITS_PER_EVTCHN_WORD)-1; i >= 0; i--)
1280 printk("%0*"PRI_xen_ulong"%s", (int)(sizeof(cpu_evtchn[0])*2),
1281 cpu_evtchn[i],
1282 i % 8 == 0 ? "\n " : " ");
1283
1284 printk("\nlocally unmasked:\n ");
1285 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
1286 xen_ulong_t pending = sh->evtchn_pending[i]
1287 & ~sh->evtchn_mask[i]
1288 & cpu_evtchn[i];
1289 printk("%0*"PRI_xen_ulong"%s",
1290 (int)(sizeof(sh->evtchn_mask[0])*2),
1291 pending, i % 8 == 0 ? "\n " : " ");
1292 }
1293
1294 printk("\npending list:\n");
1295 for (i = 0; i < NR_EVENT_CHANNELS; i++) {
1296 if (sync_test_bit(i, BM(sh->evtchn_pending))) {
1297 int word_idx = i / BITS_PER_EVTCHN_WORD;
1298 printk(" %d: event %d -> irq %d%s%s%s\n",
1299 cpu_from_evtchn(i), i,
1300 evtchn_to_irq[i],
1301 sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
1302 ? "" : " l2-clear",
1303 !sync_test_bit(i, BM(sh->evtchn_mask))
1304 ? "" : " globally-masked",
1305 sync_test_bit(i, BM(cpu_evtchn))
1306 ? "" : " locally-masked");
1307 }
1308 }
1309
1310 spin_unlock_irqrestore(&debug_lock, flags);
1311
1312 return IRQ_HANDLED;
1313}
1314
1315static DEFINE_PER_CPU(unsigned, xed_nesting_count); 1217static DEFINE_PER_CPU(unsigned, xed_nesting_count);
1316static DEFINE_PER_CPU(unsigned int, current_word_idx);
1317static DEFINE_PER_CPU(unsigned int, current_bit_idx);
1318
1319/*
1320 * Mask out the i least significant bits of w
1321 */
1322#define MASK_LSBS(w, i) (w & ((~((xen_ulong_t)0UL)) << i))
1323 1218
1324/*
1325 * Search the CPUs pending events bitmasks. For each one found, map
1326 * the event number to an irq, and feed it into do_IRQ() for
1327 * handling.
1328 *
1329 * Xen uses a two-level bitmap to speed searching. The first level is
1330 * a bitset of words which contain pending event bits. The second
1331 * level is a bitset of pending events themselves.
1332 */
1333static void __xen_evtchn_do_upcall(void) 1219static void __xen_evtchn_do_upcall(void)
1334{ 1220{
1335 int start_word_idx, start_bit_idx;
1336 int word_idx, bit_idx;
1337 int i, irq;
1338 int cpu = get_cpu();
1339 struct shared_info *s = HYPERVISOR_shared_info;
1340 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); 1221 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
1222 int cpu = get_cpu();
1341 unsigned count; 1223 unsigned count;
1342 1224
1343 do { 1225 do {
1344 xen_ulong_t pending_words;
1345 xen_ulong_t pending_bits;
1346 struct irq_desc *desc;
1347
1348 vcpu_info->evtchn_upcall_pending = 0; 1226 vcpu_info->evtchn_upcall_pending = 0;
1349 1227
1350 if (__this_cpu_inc_return(xed_nesting_count) - 1) 1228 if (__this_cpu_inc_return(xed_nesting_count) - 1)
1351 goto out; 1229 goto out;
1352 1230
1353 /* 1231 xen_evtchn_handle_events(cpu);
1354 * Master flag must be cleared /before/ clearing
1355 * selector flag. xchg_xen_ulong must contain an
1356 * appropriate barrier.
1357 */
1358 if ((irq = per_cpu(virq_to_irq, cpu)[VIRQ_TIMER]) != -1) {
1359 int evtchn = evtchn_from_irq(irq);
1360 word_idx = evtchn / BITS_PER_LONG;
1361 pending_bits = evtchn % BITS_PER_LONG;
1362 if (active_evtchns(cpu, s, word_idx) & (1ULL << pending_bits)) {
1363 desc = irq_to_desc(irq);
1364 if (desc)
1365 generic_handle_irq_desc(irq, desc);
1366 }
1367 }
1368
1369 pending_words = xchg_xen_ulong(&vcpu_info->evtchn_pending_sel, 0);
1370
1371 start_word_idx = __this_cpu_read(current_word_idx);
1372 start_bit_idx = __this_cpu_read(current_bit_idx);
1373
1374 word_idx = start_word_idx;
1375
1376 for (i = 0; pending_words != 0; i++) {
1377 xen_ulong_t words;
1378
1379 words = MASK_LSBS(pending_words, word_idx);
1380
1381 /*
1382 * If we masked out all events, wrap to beginning.
1383 */
1384 if (words == 0) {
1385 word_idx = 0;
1386 bit_idx = 0;
1387 continue;
1388 }
1389 word_idx = EVTCHN_FIRST_BIT(words);
1390
1391 pending_bits = active_evtchns(cpu, s, word_idx);
1392 bit_idx = 0; /* usually scan entire word from start */
1393 /*
1394 * We scan the starting word in two parts.
1395 *
1396 * 1st time: start in the middle, scanning the
1397 * upper bits.
1398 *
1399 * 2nd time: scan the whole word (not just the
1400 * parts skipped in the first pass) -- if an
1401 * event in the previously scanned bits is
1402 * pending again it would just be scanned on
1403 * the next loop anyway.
1404 */
1405 if (word_idx == start_word_idx) {
1406 if (i == 0)
1407 bit_idx = start_bit_idx;
1408 }
1409
1410 do {
1411 xen_ulong_t bits;
1412 int port;
1413
1414 bits = MASK_LSBS(pending_bits, bit_idx);
1415
1416 /* If we masked out all events, move on. */
1417 if (bits == 0)
1418 break;
1419
1420 bit_idx = EVTCHN_FIRST_BIT(bits);
1421
1422 /* Process port. */
1423 port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
1424 irq = evtchn_to_irq[port];
1425
1426 if (irq != -1) {
1427 desc = irq_to_desc(irq);
1428 if (desc)
1429 generic_handle_irq_desc(irq, desc);
1430 }
1431
1432 bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
1433
1434 /* Next caller starts at last processed + 1 */
1435 __this_cpu_write(current_word_idx,
1436 bit_idx ? word_idx :
1437 (word_idx+1) % BITS_PER_EVTCHN_WORD);
1438 __this_cpu_write(current_bit_idx, bit_idx);
1439 } while (bit_idx != 0);
1440
1441 /* Scan start_l1i twice; all others once. */
1442 if ((word_idx != start_word_idx) || (i != 0))
1443 pending_words &= ~(1UL << word_idx);
1444
1445 word_idx = (word_idx + 1) % BITS_PER_EVTCHN_WORD;
1446 }
1447 1232
1448 BUG_ON(!irqs_disabled()); 1233 BUG_ON(!irqs_disabled());
1449 1234
@@ -1492,12 +1277,12 @@ void rebind_evtchn_irq(int evtchn, int irq)
1492 mutex_lock(&irq_mapping_update_lock); 1277 mutex_lock(&irq_mapping_update_lock);
1493 1278
1494 /* After resume the irq<->evtchn mappings are all cleared out */ 1279 /* After resume the irq<->evtchn mappings are all cleared out */
1495 BUG_ON(evtchn_to_irq[evtchn] != -1); 1280 BUG_ON(get_evtchn_to_irq(evtchn) != -1);
1496 /* Expect irq to have been bound before, 1281 /* Expect irq to have been bound before,
1497 so there should be a proper type */ 1282 so there should be a proper type */
1498 BUG_ON(info->type == IRQT_UNBOUND); 1283 BUG_ON(info->type == IRQT_UNBOUND);
1499 1284
1500 xen_irq_info_evtchn_init(irq, evtchn); 1285 (void)xen_irq_info_evtchn_setup(irq, evtchn);
1501 1286
1502 mutex_unlock(&irq_mapping_update_lock); 1287 mutex_unlock(&irq_mapping_update_lock);
1503 1288
@@ -1511,7 +1296,6 @@ void rebind_evtchn_irq(int evtchn, int irq)
1511/* Rebind an evtchn so that it gets delivered to a specific cpu */ 1296/* Rebind an evtchn so that it gets delivered to a specific cpu */
1512static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) 1297static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
1513{ 1298{
1514 struct shared_info *s = HYPERVISOR_shared_info;
1515 struct evtchn_bind_vcpu bind_vcpu; 1299 struct evtchn_bind_vcpu bind_vcpu;
1516 int evtchn = evtchn_from_irq(irq); 1300 int evtchn = evtchn_from_irq(irq);
1517 int masked; 1301 int masked;
@@ -1534,7 +1318,7 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
1534 * Mask the event while changing the VCPU binding to prevent 1318 * Mask the event while changing the VCPU binding to prevent
1535 * it being delivered on an unexpected VCPU. 1319 * it being delivered on an unexpected VCPU.
1536 */ 1320 */
1537 masked = sync_test_and_set_bit(evtchn, BM(s->evtchn_mask)); 1321 masked = test_and_set_mask(evtchn);
1538 1322
1539 /* 1323 /*
1540 * If this fails, it usually just indicates that we're dealing with a 1324 * If this fails, it usually just indicates that we're dealing with a
@@ -1558,22 +1342,26 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
1558 return rebind_irq_to_cpu(data->irq, tcpu); 1342 return rebind_irq_to_cpu(data->irq, tcpu);
1559} 1343}
1560 1344
1561int resend_irq_on_evtchn(unsigned int irq) 1345static int retrigger_evtchn(int evtchn)
1562{ 1346{
1563 int masked, evtchn = evtchn_from_irq(irq); 1347 int masked;
1564 struct shared_info *s = HYPERVISOR_shared_info;
1565 1348
1566 if (!VALID_EVTCHN(evtchn)) 1349 if (!VALID_EVTCHN(evtchn))
1567 return 1; 1350 return 0;
1568 1351
1569 masked = sync_test_and_set_bit(evtchn, BM(s->evtchn_mask)); 1352 masked = test_and_set_mask(evtchn);
1570 sync_set_bit(evtchn, BM(s->evtchn_pending)); 1353 set_evtchn(evtchn);
1571 if (!masked) 1354 if (!masked)
1572 unmask_evtchn(evtchn); 1355 unmask_evtchn(evtchn);
1573 1356
1574 return 1; 1357 return 1;
1575} 1358}
1576 1359
1360int resend_irq_on_evtchn(unsigned int irq)
1361{
1362 return retrigger_evtchn(evtchn_from_irq(irq));
1363}
1364
1577static void enable_dynirq(struct irq_data *data) 1365static void enable_dynirq(struct irq_data *data)
1578{ 1366{
1579 int evtchn = evtchn_from_irq(data->irq); 1367 int evtchn = evtchn_from_irq(data->irq);
@@ -1608,21 +1396,7 @@ static void mask_ack_dynirq(struct irq_data *data)
1608 1396
1609static int retrigger_dynirq(struct irq_data *data) 1397static int retrigger_dynirq(struct irq_data *data)
1610{ 1398{
1611 int evtchn = evtchn_from_irq(data->irq); 1399 return retrigger_evtchn(evtchn_from_irq(data->irq));
1612 struct shared_info *sh = HYPERVISOR_shared_info;
1613 int ret = 0;
1614
1615 if (VALID_EVTCHN(evtchn)) {
1616 int masked;
1617
1618 masked = sync_test_and_set_bit(evtchn, BM(sh->evtchn_mask));
1619 sync_set_bit(evtchn, BM(sh->evtchn_pending));
1620 if (!masked)
1621 unmask_evtchn(evtchn);
1622 ret = 1;
1623 }
1624
1625 return ret;
1626} 1400}
1627 1401
1628static void restore_pirqs(void) 1402static void restore_pirqs(void)
@@ -1683,7 +1457,7 @@ static void restore_cpu_virqs(unsigned int cpu)
1683 evtchn = bind_virq.port; 1457 evtchn = bind_virq.port;
1684 1458
1685 /* Record the new mapping. */ 1459 /* Record the new mapping. */
1686 xen_irq_info_virq_init(cpu, irq, evtchn, virq); 1460 (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
1687 bind_evtchn_to_cpu(evtchn, cpu); 1461 bind_evtchn_to_cpu(evtchn, cpu);
1688 } 1462 }
1689} 1463}
@@ -1707,7 +1481,7 @@ static void restore_cpu_ipis(unsigned int cpu)
1707 evtchn = bind_ipi.port; 1481 evtchn = bind_ipi.port;
1708 1482
1709 /* Record the new mapping. */ 1483 /* Record the new mapping. */
1710 xen_irq_info_ipi_init(cpu, irq, evtchn, ipi); 1484 (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
1711 bind_evtchn_to_cpu(evtchn, cpu); 1485 bind_evtchn_to_cpu(evtchn, cpu);
1712 } 1486 }
1713} 1487}
@@ -1784,21 +1558,18 @@ EXPORT_SYMBOL_GPL(xen_test_irq_shared);
1784 1558
1785void xen_irq_resume(void) 1559void xen_irq_resume(void)
1786{ 1560{
1787 unsigned int cpu, evtchn; 1561 unsigned int cpu;
1788 struct irq_info *info; 1562 struct irq_info *info;
1789 1563
1790 init_evtchn_cpu_bindings();
1791
1792 /* New event-channel space is not 'live' yet. */ 1564 /* New event-channel space is not 'live' yet. */
1793 for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) 1565 xen_evtchn_mask_all();
1794 mask_evtchn(evtchn); 1566 xen_evtchn_resume();
1795 1567
1796 /* No IRQ <-> event-channel mappings. */ 1568 /* No IRQ <-> event-channel mappings. */
1797 list_for_each_entry(info, &xen_irq_list_head, list) 1569 list_for_each_entry(info, &xen_irq_list_head, list)
1798 info->evtchn = 0; /* zap event-channel binding */ 1570 info->evtchn = 0; /* zap event-channel binding */
1799 1571
1800 for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) 1572 clear_evtchn_to_irq_all();
1801 evtchn_to_irq[evtchn] = -1;
1802 1573
1803 for_each_possible_cpu(cpu) { 1574 for_each_possible_cpu(cpu) {
1804 restore_cpu_virqs(cpu); 1575 restore_cpu_virqs(cpu);
@@ -1889,27 +1660,40 @@ void xen_callback_vector(void)
1889void xen_callback_vector(void) {} 1660void xen_callback_vector(void) {}
1890#endif 1661#endif
1891 1662
1663#undef MODULE_PARAM_PREFIX
1664#define MODULE_PARAM_PREFIX "xen."
1665
1666static bool fifo_events = true;
1667module_param(fifo_events, bool, 0);
1668
1892void __init xen_init_IRQ(void) 1669void __init xen_init_IRQ(void)
1893{ 1670{
1894 int i; 1671 int ret = -EINVAL;
1895 1672
1896 evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), 1673 if (fifo_events)
1897 GFP_KERNEL); 1674 ret = xen_evtchn_fifo_init();
1898 BUG_ON(!evtchn_to_irq); 1675 if (ret < 0)
1899 for (i = 0; i < NR_EVENT_CHANNELS; i++) 1676 xen_evtchn_2l_init();
1900 evtchn_to_irq[i] = -1;
1901 1677
1902 init_evtchn_cpu_bindings(); 1678 evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
1679 sizeof(*evtchn_to_irq), GFP_KERNEL);
1680 BUG_ON(!evtchn_to_irq);
1903 1681
1904 /* No event channels are 'live' right now. */ 1682 /* No event channels are 'live' right now. */
1905 for (i = 0; i < NR_EVENT_CHANNELS; i++) 1683 xen_evtchn_mask_all();
1906 mask_evtchn(i);
1907 1684
1908 pirq_needs_eoi = pirq_needs_eoi_flag; 1685 pirq_needs_eoi = pirq_needs_eoi_flag;
1909 1686
1910#ifdef CONFIG_X86 1687#ifdef CONFIG_X86
1911 if (xen_hvm_domain()) { 1688 if (xen_pv_domain()) {
1689 irq_ctx_init(smp_processor_id());
1690 if (xen_initial_domain())
1691 pci_xen_initial_domain();
1692 }
1693 if (xen_feature(XENFEAT_hvm_callback_vector))
1912 xen_callback_vector(); 1694 xen_callback_vector();
1695
1696 if (xen_hvm_domain()) {
1913 native_init_IRQ(); 1697 native_init_IRQ();
1914 /* pci_xen_hvm_init must be called after native_init_IRQ so that 1698 /* pci_xen_hvm_init must be called after native_init_IRQ so that
1915 * __acpi_register_gsi can point at the right function */ 1699 * __acpi_register_gsi can point at the right function */
@@ -1918,13 +1702,10 @@ void __init xen_init_IRQ(void)
1918 int rc; 1702 int rc;
1919 struct physdev_pirq_eoi_gmfn eoi_gmfn; 1703 struct physdev_pirq_eoi_gmfn eoi_gmfn;
1920 1704
1921 irq_ctx_init(smp_processor_id());
1922 if (xen_initial_domain())
1923 pci_xen_initial_domain();
1924
1925 pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO); 1705 pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
1926 eoi_gmfn.gmfn = virt_to_mfn(pirq_eoi_map); 1706 eoi_gmfn.gmfn = virt_to_mfn(pirq_eoi_map);
1927 rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn); 1707 rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
1708 /* TODO: No PVH support for PIRQ EOI */
1928 if (rc != 0) { 1709 if (rc != 0) {
1929 free_page((unsigned long) pirq_eoi_map); 1710 free_page((unsigned long) pirq_eoi_map);
1930 pirq_eoi_map = NULL; 1711 pirq_eoi_map = NULL;
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
new file mode 100644
index 000000000000..1de2a191b395
--- /dev/null
+++ b/drivers/xen/events/events_fifo.c
@@ -0,0 +1,428 @@
1/*
2 * Xen event channels (FIFO-based ABI)
3 *
4 * Copyright (C) 2013 Citrix Systems R&D ltd.
5 *
6 * This source code is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
10 *
11 * Or, when distributed separately from the Linux kernel or
12 * incorporated into other software packages, subject to the following
13 * license:
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining a copy
16 * of this source file (the "Software"), to deal in the Software without
17 * restriction, including without limitation the rights to use, copy, modify,
18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19 * and to permit persons to whom the Software is furnished to do so, subject to
20 * the following conditions:
21 *
22 * The above copyright notice and this permission notice shall be included in
23 * all copies or substantial portions of the Software.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 * IN THE SOFTWARE.
32 */
33
34#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
35
36#include <linux/linkage.h>
37#include <linux/interrupt.h>
38#include <linux/irq.h>
39#include <linux/module.h>
40#include <linux/smp.h>
41#include <linux/percpu.h>
42#include <linux/cpu.h>
43
44#include <asm/sync_bitops.h>
45#include <asm/xen/hypercall.h>
46#include <asm/xen/hypervisor.h>
47#include <asm/xen/page.h>
48
49#include <xen/xen.h>
50#include <xen/xen-ops.h>
51#include <xen/events.h>
52#include <xen/interface/xen.h>
53#include <xen/interface/event_channel.h>
54
55#include "events_internal.h"
56
57#define EVENT_WORDS_PER_PAGE (PAGE_SIZE / sizeof(event_word_t))
58#define MAX_EVENT_ARRAY_PAGES (EVTCHN_FIFO_NR_CHANNELS / EVENT_WORDS_PER_PAGE)
59
60struct evtchn_fifo_queue {
61 uint32_t head[EVTCHN_FIFO_MAX_QUEUES];
62};
63
64static DEFINE_PER_CPU(struct evtchn_fifo_control_block *, cpu_control_block);
65static DEFINE_PER_CPU(struct evtchn_fifo_queue, cpu_queue);
66static event_word_t *event_array[MAX_EVENT_ARRAY_PAGES] __read_mostly;
67static unsigned event_array_pages __read_mostly;
68
69#define BM(w) ((unsigned long *)(w))
70
71static inline event_word_t *event_word_from_port(unsigned port)
72{
73 unsigned i = port / EVENT_WORDS_PER_PAGE;
74
75 return event_array[i] + port % EVENT_WORDS_PER_PAGE;
76}
77
78static unsigned evtchn_fifo_max_channels(void)
79{
80 return EVTCHN_FIFO_NR_CHANNELS;
81}
82
83static unsigned evtchn_fifo_nr_channels(void)
84{
85 return event_array_pages * EVENT_WORDS_PER_PAGE;
86}
87
88static void free_unused_array_pages(void)
89{
90 unsigned i;
91
92 for (i = event_array_pages; i < MAX_EVENT_ARRAY_PAGES; i++) {
93 if (!event_array[i])
94 break;
95 free_page((unsigned long)event_array[i]);
96 event_array[i] = NULL;
97 }
98}
99
100static void init_array_page(event_word_t *array_page)
101{
102 unsigned i;
103
104 for (i = 0; i < EVENT_WORDS_PER_PAGE; i++)
105 array_page[i] = 1 << EVTCHN_FIFO_MASKED;
106}
107
108static int evtchn_fifo_setup(struct irq_info *info)
109{
110 unsigned port = info->evtchn;
111 unsigned new_array_pages;
112 int ret;
113
114 new_array_pages = port / EVENT_WORDS_PER_PAGE + 1;
115
116 if (new_array_pages > MAX_EVENT_ARRAY_PAGES)
117 return -EINVAL;
118
119 while (event_array_pages < new_array_pages) {
120 void *array_page;
121 struct evtchn_expand_array expand_array;
122
123 /* Might already have a page if we've resumed. */
124 array_page = event_array[event_array_pages];
125 if (!array_page) {
126 array_page = (void *)__get_free_page(GFP_KERNEL);
127 if (array_page == NULL) {
128 ret = -ENOMEM;
129 goto error;
130 }
131 event_array[event_array_pages] = array_page;
132 }
133
134 /* Mask all events in this page before adding it. */
135 init_array_page(array_page);
136
137 expand_array.array_gfn = virt_to_mfn(array_page);
138
139 ret = HYPERVISOR_event_channel_op(EVTCHNOP_expand_array, &expand_array);
140 if (ret < 0)
141 goto error;
142
143 event_array_pages++;
144 }
145 return 0;
146
147 error:
148 if (event_array_pages == 0)
149 panic("xen: unable to expand event array with initial page (%d)\n", ret);
150 else
151 pr_err("unable to expand event array (%d)\n", ret);
152 free_unused_array_pages();
153 return ret;
154}
155
156static void evtchn_fifo_bind_to_cpu(struct irq_info *info, unsigned cpu)
157{
158 /* no-op */
159}
160
161static void evtchn_fifo_clear_pending(unsigned port)
162{
163 event_word_t *word = event_word_from_port(port);
164 sync_clear_bit(EVTCHN_FIFO_PENDING, BM(word));
165}
166
167static void evtchn_fifo_set_pending(unsigned port)
168{
169 event_word_t *word = event_word_from_port(port);
170 sync_set_bit(EVTCHN_FIFO_PENDING, BM(word));
171}
172
173static bool evtchn_fifo_is_pending(unsigned port)
174{
175 event_word_t *word = event_word_from_port(port);
176 return sync_test_bit(EVTCHN_FIFO_PENDING, BM(word));
177}
178
179static bool evtchn_fifo_test_and_set_mask(unsigned port)
180{
181 event_word_t *word = event_word_from_port(port);
182 return sync_test_and_set_bit(EVTCHN_FIFO_MASKED, BM(word));
183}
184
185static void evtchn_fifo_mask(unsigned port)
186{
187 event_word_t *word = event_word_from_port(port);
188 sync_set_bit(EVTCHN_FIFO_MASKED, BM(word));
189}
190
191/*
192 * Clear MASKED, spinning if BUSY is set.
193 */
194static void clear_masked(volatile event_word_t *word)
195{
196 event_word_t new, old, w;
197
198 w = *word;
199
200 do {
201 old = w & ~(1 << EVTCHN_FIFO_BUSY);
202 new = old & ~(1 << EVTCHN_FIFO_MASKED);
203 w = sync_cmpxchg(word, old, new);
204 } while (w != old);
205}
206
207static void evtchn_fifo_unmask(unsigned port)
208{
209 event_word_t *word = event_word_from_port(port);
210
211 BUG_ON(!irqs_disabled());
212
213 clear_masked(word);
214 if (sync_test_bit(EVTCHN_FIFO_PENDING, BM(word))) {
215 struct evtchn_unmask unmask = { .port = port };
216 (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
217 }
218}
219
220static uint32_t clear_linked(volatile event_word_t *word)
221{
222 event_word_t new, old, w;
223
224 w = *word;
225
226 do {
227 old = w;
228 new = (w & ~((1 << EVTCHN_FIFO_LINKED)
229 | EVTCHN_FIFO_LINK_MASK));
230 } while ((w = sync_cmpxchg(word, old, new)) != old);
231
232 return w & EVTCHN_FIFO_LINK_MASK;
233}
234
235static void handle_irq_for_port(unsigned port)
236{
237 int irq;
238 struct irq_desc *desc;
239
240 irq = get_evtchn_to_irq(port);
241 if (irq != -1) {
242 desc = irq_to_desc(irq);
243 if (desc)
244 generic_handle_irq_desc(irq, desc);
245 }
246}
247
248static void consume_one_event(unsigned cpu,
249 struct evtchn_fifo_control_block *control_block,
250 unsigned priority, uint32_t *ready)
251{
252 struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
253 uint32_t head;
254 unsigned port;
255 event_word_t *word;
256
257 head = q->head[priority];
258
259 /*
260 * Reached the tail last time? Read the new HEAD from the
261 * control block.
262 */
263 if (head == 0) {
264 rmb(); /* Ensure word is up-to-date before reading head. */
265 head = control_block->head[priority];
266 }
267
268 port = head;
269 word = event_word_from_port(port);
270 head = clear_linked(word);
271
272 /*
273 * If the link is non-zero, there are more events in the
274 * queue, otherwise the queue is empty.
275 *
276 * If the queue is empty, clear this priority from our local
277 * copy of the ready word.
278 */
279 if (head == 0)
280 clear_bit(priority, BM(ready));
281
282 if (sync_test_bit(EVTCHN_FIFO_PENDING, BM(word))
283 && !sync_test_bit(EVTCHN_FIFO_MASKED, BM(word)))
284 handle_irq_for_port(port);
285
286 q->head[priority] = head;
287}
288
289static void evtchn_fifo_handle_events(unsigned cpu)
290{
291 struct evtchn_fifo_control_block *control_block;
292 uint32_t ready;
293 unsigned q;
294
295 control_block = per_cpu(cpu_control_block, cpu);
296
297 ready = xchg(&control_block->ready, 0);
298
299 while (ready) {
300 q = find_first_bit(BM(&ready), EVTCHN_FIFO_MAX_QUEUES);
301 consume_one_event(cpu, control_block, q, &ready);
302 ready |= xchg(&control_block->ready, 0);
303 }
304}
305
306static void evtchn_fifo_resume(void)
307{
308 unsigned cpu;
309
310 for_each_possible_cpu(cpu) {
311 void *control_block = per_cpu(cpu_control_block, cpu);
312 struct evtchn_init_control init_control;
313 int ret;
314
315 if (!control_block)
316 continue;
317
318 /*
319 * If this CPU is offline, take the opportunity to
320 * free the control block while it is not being
321 * used.
322 */
323 if (!cpu_online(cpu)) {
324 free_page((unsigned long)control_block);
325 per_cpu(cpu_control_block, cpu) = NULL;
326 continue;
327 }
328
329 init_control.control_gfn = virt_to_mfn(control_block);
330 init_control.offset = 0;
331 init_control.vcpu = cpu;
332
333 ret = HYPERVISOR_event_channel_op(EVTCHNOP_init_control,
334 &init_control);
335 if (ret < 0)
336 BUG();
337 }
338
339 /*
340 * The event array starts out as empty again and is extended
341 * as normal when events are bound. The existing pages will
342 * be reused.
343 */
344 event_array_pages = 0;
345}
346
347static const struct evtchn_ops evtchn_ops_fifo = {
348 .max_channels = evtchn_fifo_max_channels,
349 .nr_channels = evtchn_fifo_nr_channels,
350 .setup = evtchn_fifo_setup,
351 .bind_to_cpu = evtchn_fifo_bind_to_cpu,
352 .clear_pending = evtchn_fifo_clear_pending,
353 .set_pending = evtchn_fifo_set_pending,
354 .is_pending = evtchn_fifo_is_pending,
355 .test_and_set_mask = evtchn_fifo_test_and_set_mask,
356 .mask = evtchn_fifo_mask,
357 .unmask = evtchn_fifo_unmask,
358 .handle_events = evtchn_fifo_handle_events,
359 .resume = evtchn_fifo_resume,
360};
361
362static int evtchn_fifo_init_control_block(unsigned cpu)
363{
364 struct page *control_block = NULL;
365 struct evtchn_init_control init_control;
366 int ret = -ENOMEM;
367
368 control_block = alloc_page(GFP_KERNEL|__GFP_ZERO);
369 if (control_block == NULL)
370 goto error;
371
372 init_control.control_gfn = virt_to_mfn(page_address(control_block));
373 init_control.offset = 0;
374 init_control.vcpu = cpu;
375
376 ret = HYPERVISOR_event_channel_op(EVTCHNOP_init_control, &init_control);
377 if (ret < 0)
378 goto error;
379
380 per_cpu(cpu_control_block, cpu) = page_address(control_block);
381
382 return 0;
383
384 error:
385 __free_page(control_block);
386 return ret;
387}
388
389static int evtchn_fifo_cpu_notification(struct notifier_block *self,
390 unsigned long action,
391 void *hcpu)
392{
393 int cpu = (long)hcpu;
394 int ret = 0;
395
396 switch (action) {
397 case CPU_UP_PREPARE:
398 if (!per_cpu(cpu_control_block, cpu))
399 ret = evtchn_fifo_init_control_block(cpu);
400 break;
401 default:
402 break;
403 }
404 return ret < 0 ? NOTIFY_BAD : NOTIFY_OK;
405}
406
407static struct notifier_block evtchn_fifo_cpu_notifier = {
408 .notifier_call = evtchn_fifo_cpu_notification,
409};
410
411int __init xen_evtchn_fifo_init(void)
412{
413 int cpu = get_cpu();
414 int ret;
415
416 ret = evtchn_fifo_init_control_block(cpu);
417 if (ret < 0)
418 goto out;
419
420 pr_info("Using FIFO-based ABI\n");
421
422 evtchn_ops = &evtchn_ops_fifo;
423
424 register_cpu_notifier(&evtchn_fifo_cpu_notifier);
425out:
426 put_cpu();
427 return ret;
428}
diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h
new file mode 100644
index 000000000000..677f41a0fff9
--- /dev/null
+++ b/drivers/xen/events/events_internal.h
@@ -0,0 +1,150 @@
1/*
2 * Xen Event Channels (internal header)
3 *
4 * Copyright (C) 2013 Citrix Systems R&D Ltd.
5 *
6 * This source code is licensed under the GNU General Public License,
7 * Version 2 or later. See the file COPYING for more details.
8 */
9#ifndef __EVENTS_INTERNAL_H__
10#define __EVENTS_INTERNAL_H__
11
12/* Interrupt types. */
13enum xen_irq_type {
14 IRQT_UNBOUND = 0,
15 IRQT_PIRQ,
16 IRQT_VIRQ,
17 IRQT_IPI,
18 IRQT_EVTCHN
19};
20
21/*
22 * Packed IRQ information:
23 * type - enum xen_irq_type
24 * event channel - irq->event channel mapping
25 * cpu - cpu this event channel is bound to
26 * index - type-specific information:
27 * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
28 * guest, or GSI (real passthrough IRQ) of the device.
29 * VIRQ - virq number
30 * IPI - IPI vector
31 * EVTCHN -
32 */
33struct irq_info {
34 struct list_head list;
35 int refcnt;
36 enum xen_irq_type type; /* type */
37 unsigned irq;
38 unsigned int evtchn; /* event channel */
39 unsigned short cpu; /* cpu bound */
40
41 union {
42 unsigned short virq;
43 enum ipi_vector ipi;
44 struct {
45 unsigned short pirq;
46 unsigned short gsi;
47 unsigned char vector;
48 unsigned char flags;
49 uint16_t domid;
50 } pirq;
51 } u;
52};
53
54#define PIRQ_NEEDS_EOI (1 << 0)
55#define PIRQ_SHAREABLE (1 << 1)
56
57struct evtchn_ops {
58 unsigned (*max_channels)(void);
59 unsigned (*nr_channels)(void);
60
61 int (*setup)(struct irq_info *info);
62 void (*bind_to_cpu)(struct irq_info *info, unsigned cpu);
63
64 void (*clear_pending)(unsigned port);
65 void (*set_pending)(unsigned port);
66 bool (*is_pending)(unsigned port);
67 bool (*test_and_set_mask)(unsigned port);
68 void (*mask)(unsigned port);
69 void (*unmask)(unsigned port);
70
71 void (*handle_events)(unsigned cpu);
72 void (*resume)(void);
73};
74
75extern const struct evtchn_ops *evtchn_ops;
76
77extern int **evtchn_to_irq;
78int get_evtchn_to_irq(unsigned int evtchn);
79
80struct irq_info *info_for_irq(unsigned irq);
81unsigned cpu_from_irq(unsigned irq);
82unsigned cpu_from_evtchn(unsigned int evtchn);
83
84static inline unsigned xen_evtchn_max_channels(void)
85{
86 return evtchn_ops->max_channels();
87}
88
89/*
90 * Do any ABI specific setup for a bound event channel before it can
91 * be unmasked and used.
92 */
93static inline int xen_evtchn_port_setup(struct irq_info *info)
94{
95 if (evtchn_ops->setup)
96 return evtchn_ops->setup(info);
97 return 0;
98}
99
100static inline void xen_evtchn_port_bind_to_cpu(struct irq_info *info,
101 unsigned cpu)
102{
103 evtchn_ops->bind_to_cpu(info, cpu);
104}
105
106static inline void clear_evtchn(unsigned port)
107{
108 evtchn_ops->clear_pending(port);
109}
110
111static inline void set_evtchn(unsigned port)
112{
113 evtchn_ops->set_pending(port);
114}
115
116static inline bool test_evtchn(unsigned port)
117{
118 return evtchn_ops->is_pending(port);
119}
120
121static inline bool test_and_set_mask(unsigned port)
122{
123 return evtchn_ops->test_and_set_mask(port);
124}
125
126static inline void mask_evtchn(unsigned port)
127{
128 return evtchn_ops->mask(port);
129}
130
131static inline void unmask_evtchn(unsigned port)
132{
133 return evtchn_ops->unmask(port);
134}
135
136static inline void xen_evtchn_handle_events(unsigned cpu)
137{
138 return evtchn_ops->handle_events(cpu);
139}
140
141static inline void xen_evtchn_resume(void)
142{
143 if (evtchn_ops->resume)
144 evtchn_ops->resume();
145}
146
147void xen_evtchn_2l_init(void);
148int xen_evtchn_fifo_init(void);
149
150#endif /* #ifndef __EVENTS_INTERNAL_H__ */
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 5de2063e16d3..00f40f051d95 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -417,7 +417,7 @@ static long evtchn_ioctl(struct file *file,
417 break; 417 break;
418 418
419 rc = -EINVAL; 419 rc = -EINVAL;
420 if (unbind.port >= NR_EVENT_CHANNELS) 420 if (unbind.port >= xen_evtchn_nr_channels())
421 break; 421 break;
422 422
423 rc = -ENOTCONN; 423 rc = -ENOTCONN;
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index e41c79c986ea..073b4a19a8b0 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -846,7 +846,7 @@ static int __init gntdev_init(void)
846 if (!xen_domain()) 846 if (!xen_domain())
847 return -ENODEV; 847 return -ENODEV;
848 848
849 use_ptemod = xen_pv_domain(); 849 use_ptemod = !xen_feature(XENFEAT_auto_translated_physmap);
850 850
851 err = misc_register(&gntdev_miscdev); 851 err = misc_register(&gntdev_miscdev);
852 if (err != 0) { 852 if (err != 0) {
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index aa846a48f400..1ce1c40331f3 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -62,12 +62,10 @@
62 62
63static grant_ref_t **gnttab_list; 63static grant_ref_t **gnttab_list;
64static unsigned int nr_grant_frames; 64static unsigned int nr_grant_frames;
65static unsigned int boot_max_nr_grant_frames;
66static int gnttab_free_count; 65static int gnttab_free_count;
67static grant_ref_t gnttab_free_head; 66static grant_ref_t gnttab_free_head;
68static DEFINE_SPINLOCK(gnttab_list_lock); 67static DEFINE_SPINLOCK(gnttab_list_lock);
69unsigned long xen_hvm_resume_frames; 68struct grant_frames xen_auto_xlat_grant_frames;
70EXPORT_SYMBOL_GPL(xen_hvm_resume_frames);
71 69
72static union { 70static union {
73 struct grant_entry_v1 *v1; 71 struct grant_entry_v1 *v1;
@@ -827,6 +825,11 @@ static unsigned int __max_nr_grant_frames(void)
827unsigned int gnttab_max_grant_frames(void) 825unsigned int gnttab_max_grant_frames(void)
828{ 826{
829 unsigned int xen_max = __max_nr_grant_frames(); 827 unsigned int xen_max = __max_nr_grant_frames();
828 static unsigned int boot_max_nr_grant_frames;
829
830 /* First time, initialize it properly. */
831 if (!boot_max_nr_grant_frames)
832 boot_max_nr_grant_frames = __max_nr_grant_frames();
830 833
831 if (xen_max > boot_max_nr_grant_frames) 834 if (xen_max > boot_max_nr_grant_frames)
832 return boot_max_nr_grant_frames; 835 return boot_max_nr_grant_frames;
@@ -834,6 +837,51 @@ unsigned int gnttab_max_grant_frames(void)
834} 837}
835EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); 838EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
836 839
840int gnttab_setup_auto_xlat_frames(unsigned long addr)
841{
842 xen_pfn_t *pfn;
843 unsigned int max_nr_gframes = __max_nr_grant_frames();
844 unsigned int i;
845 void *vaddr;
846
847 if (xen_auto_xlat_grant_frames.count)
848 return -EINVAL;
849
850 vaddr = xen_remap(addr, PAGE_SIZE * max_nr_gframes);
851 if (vaddr == NULL) {
852 pr_warn("Failed to ioremap gnttab share frames (addr=0x%08lx)!\n",
853 addr);
854 return -ENOMEM;
855 }
856 pfn = kcalloc(max_nr_gframes, sizeof(pfn[0]), GFP_KERNEL);
857 if (!pfn) {
858 xen_unmap(vaddr);
859 return -ENOMEM;
860 }
861 for (i = 0; i < max_nr_gframes; i++)
862 pfn[i] = PFN_DOWN(addr) + i;
863
864 xen_auto_xlat_grant_frames.vaddr = vaddr;
865 xen_auto_xlat_grant_frames.pfn = pfn;
866 xen_auto_xlat_grant_frames.count = max_nr_gframes;
867
868 return 0;
869}
870EXPORT_SYMBOL_GPL(gnttab_setup_auto_xlat_frames);
871
872void gnttab_free_auto_xlat_frames(void)
873{
874 if (!xen_auto_xlat_grant_frames.count)
875 return;
876 kfree(xen_auto_xlat_grant_frames.pfn);
877 xen_unmap(xen_auto_xlat_grant_frames.vaddr);
878
879 xen_auto_xlat_grant_frames.pfn = NULL;
880 xen_auto_xlat_grant_frames.count = 0;
881 xen_auto_xlat_grant_frames.vaddr = NULL;
882}
883EXPORT_SYMBOL_GPL(gnttab_free_auto_xlat_frames);
884
837/* Handling of paged out grant targets (GNTST_eagain) */ 885/* Handling of paged out grant targets (GNTST_eagain) */
838#define MAX_DELAY 256 886#define MAX_DELAY 256
839static inline void 887static inline void
@@ -1060,10 +1108,11 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
1060 unsigned int nr_gframes = end_idx + 1; 1108 unsigned int nr_gframes = end_idx + 1;
1061 int rc; 1109 int rc;
1062 1110
1063 if (xen_hvm_domain()) { 1111 if (xen_feature(XENFEAT_auto_translated_physmap)) {
1064 struct xen_add_to_physmap xatp; 1112 struct xen_add_to_physmap xatp;
1065 unsigned int i = end_idx; 1113 unsigned int i = end_idx;
1066 rc = 0; 1114 rc = 0;
1115 BUG_ON(xen_auto_xlat_grant_frames.count < nr_gframes);
1067 /* 1116 /*
1068 * Loop backwards, so that the first hypercall has the largest 1117 * Loop backwards, so that the first hypercall has the largest
1069 * index, ensuring that the table will grow only once. 1118 * index, ensuring that the table will grow only once.
@@ -1072,7 +1121,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
1072 xatp.domid = DOMID_SELF; 1121 xatp.domid = DOMID_SELF;
1073 xatp.idx = i; 1122 xatp.idx = i;
1074 xatp.space = XENMAPSPACE_grant_table; 1123 xatp.space = XENMAPSPACE_grant_table;
1075 xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i; 1124 xatp.gpfn = xen_auto_xlat_grant_frames.pfn[i];
1076 rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp); 1125 rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
1077 if (rc != 0) { 1126 if (rc != 0) {
1078 pr_warn("grant table add_to_physmap failed, err=%d\n", 1127 pr_warn("grant table add_to_physmap failed, err=%d\n",
@@ -1135,10 +1184,8 @@ static void gnttab_request_version(void)
1135 int rc; 1184 int rc;
1136 struct gnttab_set_version gsv; 1185 struct gnttab_set_version gsv;
1137 1186
1138 if (xen_hvm_domain()) 1187 gsv.version = 1;
1139 gsv.version = 1; 1188
1140 else
1141 gsv.version = 2;
1142 rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1); 1189 rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
1143 if (rc == 0 && gsv.version == 2) { 1190 if (rc == 0 && gsv.version == 2) {
1144 grant_table_version = 2; 1191 grant_table_version = 2;
@@ -1169,22 +1216,15 @@ static int gnttab_setup(void)
1169 if (max_nr_gframes < nr_grant_frames) 1216 if (max_nr_gframes < nr_grant_frames)
1170 return -ENOSYS; 1217 return -ENOSYS;
1171 1218
1172 if (xen_pv_domain()) 1219 if (xen_feature(XENFEAT_auto_translated_physmap) && gnttab_shared.addr == NULL) {
1173 return gnttab_map(0, nr_grant_frames - 1); 1220 gnttab_shared.addr = xen_auto_xlat_grant_frames.vaddr;
1174
1175 if (gnttab_shared.addr == NULL) {
1176 gnttab_shared.addr = xen_remap(xen_hvm_resume_frames,
1177 PAGE_SIZE * max_nr_gframes);
1178 if (gnttab_shared.addr == NULL) { 1221 if (gnttab_shared.addr == NULL) {
1179 pr_warn("Failed to ioremap gnttab share frames (addr=0x%08lx)!\n", 1222 pr_warn("gnttab share frames (addr=0x%08lx) is not mapped!\n",
1180 xen_hvm_resume_frames); 1223 (unsigned long)xen_auto_xlat_grant_frames.vaddr);
1181 return -ENOMEM; 1224 return -ENOMEM;
1182 } 1225 }
1183 } 1226 }
1184 1227 return gnttab_map(0, nr_grant_frames - 1);
1185 gnttab_map(0, nr_grant_frames - 1);
1186
1187 return 0;
1188} 1228}
1189 1229
1190int gnttab_resume(void) 1230int gnttab_resume(void)
@@ -1227,13 +1267,12 @@ int gnttab_init(void)
1227 1267
1228 gnttab_request_version(); 1268 gnttab_request_version();
1229 nr_grant_frames = 1; 1269 nr_grant_frames = 1;
1230 boot_max_nr_grant_frames = __max_nr_grant_frames();
1231 1270
1232 /* Determine the maximum number of frames required for the 1271 /* Determine the maximum number of frames required for the
1233 * grant reference free list on the current hypervisor. 1272 * grant reference free list on the current hypervisor.
1234 */ 1273 */
1235 BUG_ON(grefs_per_grant_frame == 0); 1274 BUG_ON(grefs_per_grant_frame == 0);
1236 max_nr_glist_frames = (boot_max_nr_grant_frames * 1275 max_nr_glist_frames = (gnttab_max_grant_frames() *
1237 grefs_per_grant_frame / RPP); 1276 grefs_per_grant_frame / RPP);
1238 1277
1239 gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *), 1278 gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
@@ -1286,5 +1325,6 @@ static int __gnttab_init(void)
1286 1325
1287 return gnttab_init(); 1326 return gnttab_init();
1288} 1327}
1289 1328/* Starts after core_initcall so that xen_pvh_gnttab_setup can be called
1290core_initcall(__gnttab_init); 1329 * beforehand to initialize xen_auto_xlat_grant_frames. */
1330core_initcall_sync(__gnttab_init);
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index 188825122aae..dd9c249ea311 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -26,7 +26,9 @@
26#include <asm/xen/hypervisor.h> 26#include <asm/xen/hypervisor.h>
27#include <asm/xen/hypercall.h> 27#include <asm/xen/hypercall.h>
28#include "../pci/pci.h" 28#include "../pci/pci.h"
29#ifdef CONFIG_PCI_MMCONFIG
29#include <asm/pci_x86.h> 30#include <asm/pci_x86.h>
31#endif
30 32
31static bool __read_mostly pci_seg_supported = true; 33static bool __read_mostly pci_seg_supported = true;
32 34
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index 2f3528e93cb9..a1361c312c06 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -108,6 +108,7 @@ static int platform_pci_init(struct pci_dev *pdev,
108 long ioaddr; 108 long ioaddr;
109 long mmio_addr, mmio_len; 109 long mmio_addr, mmio_len;
110 unsigned int max_nr_gframes; 110 unsigned int max_nr_gframes;
111 unsigned long grant_frames;
111 112
112 if (!xen_domain()) 113 if (!xen_domain())
113 return -ENODEV; 114 return -ENODEV;
@@ -154,13 +155,17 @@ static int platform_pci_init(struct pci_dev *pdev,
154 } 155 }
155 156
156 max_nr_gframes = gnttab_max_grant_frames(); 157 max_nr_gframes = gnttab_max_grant_frames();
157 xen_hvm_resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes); 158 grant_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
158 ret = gnttab_init(); 159 ret = gnttab_setup_auto_xlat_frames(grant_frames);
159 if (ret) 160 if (ret)
160 goto out; 161 goto out;
162 ret = gnttab_init();
163 if (ret)
164 goto grant_out;
161 xenbus_probe(NULL); 165 xenbus_probe(NULL);
162 return 0; 166 return 0;
163 167grant_out:
168 gnttab_free_auto_xlat_frames();
164out: 169out:
165 pci_release_region(pdev, 0); 170 pci_release_region(pdev, 0);
166mem_out: 171mem_out:
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index ec097d6f964d..01d59e66565d 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -45,6 +45,7 @@
45#include <xen/grant_table.h> 45#include <xen/grant_table.h>
46#include <xen/xenbus.h> 46#include <xen/xenbus.h>
47#include <xen/xen.h> 47#include <xen/xen.h>
48#include <xen/features.h>
48 49
49#include "xenbus_probe.h" 50#include "xenbus_probe.h"
50 51
@@ -743,7 +744,7 @@ static const struct xenbus_ring_ops ring_ops_hvm = {
743 744
744void __init xenbus_ring_ops_init(void) 745void __init xenbus_ring_ops_init(void)
745{ 746{
746 if (xen_pv_domain()) 747 if (!xen_feature(XENFEAT_auto_translated_physmap))
747 ring_ops = &ring_ops_pv; 748 ring_ops = &ring_ops_pv;
748 else 749 else
749 ring_ops = &ring_ops_hvm; 750 ring_ops = &ring_ops_hvm;
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index 129bf84c19ec..cb385c10d2b1 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -496,7 +496,7 @@ subsys_initcall(xenbus_probe_frontend_init);
496#ifndef MODULE 496#ifndef MODULE
497static int __init boot_wait_for_devices(void) 497static int __init boot_wait_for_devices(void)
498{ 498{
499 if (xen_hvm_domain() && !xen_platform_pci_unplug) 499 if (!xen_has_pv_devices())
500 return -ENODEV; 500 return -ENODEV;
501 501
502 ready_to_wait_for_devices = 1; 502 ready_to_wait_for_devices = 1;