diff options
author | Jeremy Fitzhardinge <jeremy@xensource.com> | 2007-07-17 21:37:05 -0400 |
---|---|---|
committer | Jeremy Fitzhardinge <jeremy@goop.org> | 2007-07-18 11:47:42 -0400 |
commit | e46cdb66c8fc1c8d61cfae0f219ff47ac4b9d531 (patch) | |
tree | 7d9cdfef91e69fcfcba762a5a70cd58900308a5b /arch/i386/xen/events.c | |
parent | 3b827c1b3aadf3adb4c602d19863f2d24e7cbc18 (diff) |
xen: event channels
Xen implements interrupts in terms of event channels. Each guest
domain gets 1024 event channels which can be used for a variety of
purposes, such as Xen timer events, inter-domain events,
inter-processor events (IPI) or for real hardware IRQs.
Within the kernel, we map the event channels to IRQs, and implement
the whole interrupt handling using a Xen irq_chip.
Rather than setting NR_IRQ to 1024 under PARAVIRT in order to
accomodate Xen, we create a dynamic mapping between event channels and
IRQs. Ideally, Linux will eventually move towards dynamically
allocating per-irq structures, and we can use a 1:1 mapping between
event channels and irqs.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Diffstat (limited to 'arch/i386/xen/events.c')
-rw-r--r-- | arch/i386/xen/events.c | 511 |
1 files changed, 511 insertions, 0 deletions
diff --git a/arch/i386/xen/events.c b/arch/i386/xen/events.c new file mode 100644 index 000000000000..e7c5d00ab4fe --- /dev/null +++ b/arch/i386/xen/events.c | |||
@@ -0,0 +1,511 @@ | |||
1 | /* | ||
2 | * Xen event channels | ||
3 | * | ||
4 | * Xen models interrupts with abstract event channels. Because each | ||
5 | * domain gets 1024 event channels, but NR_IRQ is not that large, we | ||
6 | * must dynamically map irqs<->event channels. The event channels | ||
7 | * interface with the rest of the kernel by defining a xen interrupt | ||
8 | * chip. When an event is recieved, it is mapped to an irq and sent | ||
9 | * through the normal interrupt processing path. | ||
10 | * | ||
11 | * There are four kinds of events which can be mapped to an event | ||
12 | * channel: | ||
13 | * | ||
14 | * 1. Inter-domain notifications. This includes all the virtual | ||
15 | * device events, since they're driven by front-ends in another domain | ||
16 | * (typically dom0). | ||
17 | * 2. VIRQs, typically used for timers. These are per-cpu events. | ||
18 | * 3. IPIs. | ||
19 | * 4. Hardware interrupts. Not supported at present. | ||
20 | * | ||
21 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 | ||
22 | */ | ||
23 | |||
24 | #include <linux/linkage.h> | ||
25 | #include <linux/interrupt.h> | ||
26 | #include <linux/irq.h> | ||
27 | #include <linux/module.h> | ||
28 | #include <linux/string.h> | ||
29 | |||
30 | #include <asm/ptrace.h> | ||
31 | #include <asm/irq.h> | ||
32 | #include <asm/sync_bitops.h> | ||
33 | #include <asm/xen/hypercall.h> | ||
34 | |||
35 | #include <xen/events.h> | ||
36 | #include <xen/interface/xen.h> | ||
37 | #include <xen/interface/event_channel.h> | ||
38 | |||
39 | #include "xen-ops.h" | ||
40 | |||
41 | /* | ||
42 | * This lock protects updates to the following mapping and reference-count | ||
43 | * arrays. The lock does not need to be acquired to read the mapping tables. | ||
44 | */ | ||
45 | static DEFINE_SPINLOCK(irq_mapping_update_lock); | ||
46 | |||
47 | /* IRQ <-> VIRQ mapping. */ | ||
48 | static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1}; | ||
49 | |||
50 | /* Packed IRQ information: binding type, sub-type index, and event channel. */ | ||
51 | struct packed_irq | ||
52 | { | ||
53 | unsigned short evtchn; | ||
54 | unsigned char index; | ||
55 | unsigned char type; | ||
56 | }; | ||
57 | |||
58 | static struct packed_irq irq_info[NR_IRQS]; | ||
59 | |||
60 | /* Binding types. */ | ||
61 | enum { IRQT_UNBOUND, IRQT_PIRQ, IRQT_VIRQ, IRQT_IPI, IRQT_EVTCHN }; | ||
62 | |||
63 | /* Convenient shorthand for packed representation of an unbound IRQ. */ | ||
64 | #define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0) | ||
65 | |||
66 | static int evtchn_to_irq[NR_EVENT_CHANNELS] = { | ||
67 | [0 ... NR_EVENT_CHANNELS-1] = -1 | ||
68 | }; | ||
69 | static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG]; | ||
70 | static u8 cpu_evtchn[NR_EVENT_CHANNELS]; | ||
71 | |||
72 | /* Reference counts for bindings to IRQs. */ | ||
73 | static int irq_bindcount[NR_IRQS]; | ||
74 | |||
75 | /* Xen will never allocate port zero for any purpose. */ | ||
76 | #define VALID_EVTCHN(chn) ((chn) != 0) | ||
77 | |||
78 | /* | ||
79 | * Force a proper event-channel callback from Xen after clearing the | ||
80 | * callback mask. We do this in a very simple manner, by making a call | ||
81 | * down into Xen. The pending flag will be checked by Xen on return. | ||
82 | */ | ||
83 | void force_evtchn_callback(void) | ||
84 | { | ||
85 | (void)HYPERVISOR_xen_version(0, NULL); | ||
86 | } | ||
87 | EXPORT_SYMBOL_GPL(force_evtchn_callback); | ||
88 | |||
89 | static struct irq_chip xen_dynamic_chip; | ||
90 | |||
91 | /* Constructor for packed IRQ information. */ | ||
92 | static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn) | ||
93 | { | ||
94 | return (struct packed_irq) { evtchn, index, type }; | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * Accessors for packed IRQ information. | ||
99 | */ | ||
100 | static inline unsigned int evtchn_from_irq(int irq) | ||
101 | { | ||
102 | return irq_info[irq].evtchn; | ||
103 | } | ||
104 | |||
105 | static inline unsigned int index_from_irq(int irq) | ||
106 | { | ||
107 | return irq_info[irq].index; | ||
108 | } | ||
109 | |||
110 | static inline unsigned int type_from_irq(int irq) | ||
111 | { | ||
112 | return irq_info[irq].type; | ||
113 | } | ||
114 | |||
115 | static inline unsigned long active_evtchns(unsigned int cpu, | ||
116 | struct shared_info *sh, | ||
117 | unsigned int idx) | ||
118 | { | ||
119 | return (sh->evtchn_pending[idx] & | ||
120 | cpu_evtchn_mask[cpu][idx] & | ||
121 | ~sh->evtchn_mask[idx]); | ||
122 | } | ||
123 | |||
124 | static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) | ||
125 | { | ||
126 | int irq = evtchn_to_irq[chn]; | ||
127 | |||
128 | BUG_ON(irq == -1); | ||
129 | #ifdef CONFIG_SMP | ||
130 | irq_desc[irq].affinity = cpumask_of_cpu(cpu); | ||
131 | #endif | ||
132 | |||
133 | __clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]); | ||
134 | __set_bit(chn, cpu_evtchn_mask[cpu]); | ||
135 | |||
136 | cpu_evtchn[chn] = cpu; | ||
137 | } | ||
138 | |||
139 | static void init_evtchn_cpu_bindings(void) | ||
140 | { | ||
141 | #ifdef CONFIG_SMP | ||
142 | int i; | ||
143 | /* By default all event channels notify CPU#0. */ | ||
144 | for (i = 0; i < NR_IRQS; i++) | ||
145 | irq_desc[i].affinity = cpumask_of_cpu(0); | ||
146 | #endif | ||
147 | |||
148 | memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); | ||
149 | memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0])); | ||
150 | } | ||
151 | |||
152 | static inline unsigned int cpu_from_evtchn(unsigned int evtchn) | ||
153 | { | ||
154 | return cpu_evtchn[evtchn]; | ||
155 | } | ||
156 | |||
157 | static inline void clear_evtchn(int port) | ||
158 | { | ||
159 | struct shared_info *s = HYPERVISOR_shared_info; | ||
160 | sync_clear_bit(port, &s->evtchn_pending[0]); | ||
161 | } | ||
162 | |||
163 | static inline void set_evtchn(int port) | ||
164 | { | ||
165 | struct shared_info *s = HYPERVISOR_shared_info; | ||
166 | sync_set_bit(port, &s->evtchn_pending[0]); | ||
167 | } | ||
168 | |||
169 | |||
170 | /** | ||
171 | * notify_remote_via_irq - send event to remote end of event channel via irq | ||
172 | * @irq: irq of event channel to send event to | ||
173 | * | ||
174 | * Unlike notify_remote_via_evtchn(), this is safe to use across | ||
175 | * save/restore. Notifications on a broken connection are silently | ||
176 | * dropped. | ||
177 | */ | ||
178 | void notify_remote_via_irq(int irq) | ||
179 | { | ||
180 | int evtchn = evtchn_from_irq(irq); | ||
181 | |||
182 | if (VALID_EVTCHN(evtchn)) | ||
183 | notify_remote_via_evtchn(evtchn); | ||
184 | } | ||
185 | EXPORT_SYMBOL_GPL(notify_remote_via_irq); | ||
186 | |||
187 | static void mask_evtchn(int port) | ||
188 | { | ||
189 | struct shared_info *s = HYPERVISOR_shared_info; | ||
190 | sync_set_bit(port, &s->evtchn_mask[0]); | ||
191 | } | ||
192 | |||
193 | static void unmask_evtchn(int port) | ||
194 | { | ||
195 | struct shared_info *s = HYPERVISOR_shared_info; | ||
196 | unsigned int cpu = get_cpu(); | ||
197 | |||
198 | BUG_ON(!irqs_disabled()); | ||
199 | |||
200 | /* Slow path (hypercall) if this is a non-local port. */ | ||
201 | if (unlikely(cpu != cpu_from_evtchn(port))) { | ||
202 | struct evtchn_unmask unmask = { .port = port }; | ||
203 | (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); | ||
204 | } else { | ||
205 | struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); | ||
206 | |||
207 | sync_clear_bit(port, &s->evtchn_mask[0]); | ||
208 | |||
209 | /* | ||
210 | * The following is basically the equivalent of | ||
211 | * 'hw_resend_irq'. Just like a real IO-APIC we 'lose | ||
212 | * the interrupt edge' if the channel is masked. | ||
213 | */ | ||
214 | if (sync_test_bit(port, &s->evtchn_pending[0]) && | ||
215 | !sync_test_and_set_bit(port / BITS_PER_LONG, | ||
216 | &vcpu_info->evtchn_pending_sel)) | ||
217 | vcpu_info->evtchn_upcall_pending = 1; | ||
218 | } | ||
219 | |||
220 | put_cpu(); | ||
221 | } | ||
222 | |||
223 | static int find_unbound_irq(void) | ||
224 | { | ||
225 | int irq; | ||
226 | |||
227 | /* Only allocate from dynirq range */ | ||
228 | for (irq = 0; irq < NR_IRQS; irq++) | ||
229 | if (irq_bindcount[irq] == 0) | ||
230 | break; | ||
231 | |||
232 | if (irq == NR_IRQS) | ||
233 | panic("No available IRQ to bind to: increase NR_IRQS!\n"); | ||
234 | |||
235 | return irq; | ||
236 | } | ||
237 | |||
238 | static int bind_evtchn_to_irq(unsigned int evtchn) | ||
239 | { | ||
240 | int irq; | ||
241 | |||
242 | spin_lock(&irq_mapping_update_lock); | ||
243 | |||
244 | irq = evtchn_to_irq[evtchn]; | ||
245 | |||
246 | if (irq == -1) { | ||
247 | irq = find_unbound_irq(); | ||
248 | |||
249 | dynamic_irq_init(irq); | ||
250 | set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, | ||
251 | handle_level_irq, "event"); | ||
252 | |||
253 | evtchn_to_irq[evtchn] = irq; | ||
254 | irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn); | ||
255 | } | ||
256 | |||
257 | irq_bindcount[irq]++; | ||
258 | |||
259 | spin_unlock(&irq_mapping_update_lock); | ||
260 | |||
261 | return irq; | ||
262 | } | ||
263 | |||
264 | static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) | ||
265 | { | ||
266 | struct evtchn_bind_virq bind_virq; | ||
267 | int evtchn, irq; | ||
268 | |||
269 | spin_lock(&irq_mapping_update_lock); | ||
270 | |||
271 | irq = per_cpu(virq_to_irq, cpu)[virq]; | ||
272 | |||
273 | if (irq == -1) { | ||
274 | bind_virq.virq = virq; | ||
275 | bind_virq.vcpu = cpu; | ||
276 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, | ||
277 | &bind_virq) != 0) | ||
278 | BUG(); | ||
279 | evtchn = bind_virq.port; | ||
280 | |||
281 | irq = find_unbound_irq(); | ||
282 | |||
283 | dynamic_irq_init(irq); | ||
284 | set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, | ||
285 | handle_level_irq, "virq"); | ||
286 | |||
287 | evtchn_to_irq[evtchn] = irq; | ||
288 | irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); | ||
289 | |||
290 | per_cpu(virq_to_irq, cpu)[virq] = irq; | ||
291 | |||
292 | bind_evtchn_to_cpu(evtchn, cpu); | ||
293 | } | ||
294 | |||
295 | irq_bindcount[irq]++; | ||
296 | |||
297 | spin_unlock(&irq_mapping_update_lock); | ||
298 | |||
299 | return irq; | ||
300 | } | ||
301 | |||
302 | static void unbind_from_irq(unsigned int irq) | ||
303 | { | ||
304 | struct evtchn_close close; | ||
305 | int evtchn = evtchn_from_irq(irq); | ||
306 | |||
307 | spin_lock(&irq_mapping_update_lock); | ||
308 | |||
309 | if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) { | ||
310 | close.port = evtchn; | ||
311 | if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) | ||
312 | BUG(); | ||
313 | |||
314 | switch (type_from_irq(irq)) { | ||
315 | case IRQT_VIRQ: | ||
316 | per_cpu(virq_to_irq, cpu_from_evtchn(evtchn)) | ||
317 | [index_from_irq(irq)] = -1; | ||
318 | break; | ||
319 | default: | ||
320 | break; | ||
321 | } | ||
322 | |||
323 | /* Closed ports are implicitly re-bound to VCPU0. */ | ||
324 | bind_evtchn_to_cpu(evtchn, 0); | ||
325 | |||
326 | evtchn_to_irq[evtchn] = -1; | ||
327 | irq_info[irq] = IRQ_UNBOUND; | ||
328 | |||
329 | dynamic_irq_init(irq); | ||
330 | } | ||
331 | |||
332 | spin_unlock(&irq_mapping_update_lock); | ||
333 | } | ||
334 | |||
335 | int bind_evtchn_to_irqhandler(unsigned int evtchn, | ||
336 | irqreturn_t (*handler)(int, void *), | ||
337 | unsigned long irqflags, | ||
338 | const char *devname, void *dev_id) | ||
339 | { | ||
340 | unsigned int irq; | ||
341 | int retval; | ||
342 | |||
343 | irq = bind_evtchn_to_irq(evtchn); | ||
344 | retval = request_irq(irq, handler, irqflags, devname, dev_id); | ||
345 | if (retval != 0) { | ||
346 | unbind_from_irq(irq); | ||
347 | return retval; | ||
348 | } | ||
349 | |||
350 | return irq; | ||
351 | } | ||
352 | EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); | ||
353 | |||
354 | int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, | ||
355 | irqreturn_t (*handler)(int, void *), | ||
356 | unsigned long irqflags, const char *devname, void *dev_id) | ||
357 | { | ||
358 | unsigned int irq; | ||
359 | int retval; | ||
360 | |||
361 | irq = bind_virq_to_irq(virq, cpu); | ||
362 | retval = request_irq(irq, handler, irqflags, devname, dev_id); | ||
363 | if (retval != 0) { | ||
364 | unbind_from_irq(irq); | ||
365 | return retval; | ||
366 | } | ||
367 | |||
368 | return irq; | ||
369 | } | ||
370 | EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler); | ||
371 | |||
372 | void unbind_from_irqhandler(unsigned int irq, void *dev_id) | ||
373 | { | ||
374 | free_irq(irq, dev_id); | ||
375 | unbind_from_irq(irq); | ||
376 | } | ||
377 | EXPORT_SYMBOL_GPL(unbind_from_irqhandler); | ||
378 | |||
379 | /* | ||
380 | * Search the CPUs pending events bitmasks. For each one found, map | ||
381 | * the event number to an irq, and feed it into do_IRQ() for | ||
382 | * handling. | ||
383 | * | ||
384 | * Xen uses a two-level bitmap to speed searching. The first level is | ||
385 | * a bitset of words which contain pending event bits. The second | ||
386 | * level is a bitset of pending events themselves. | ||
387 | */ | ||
388 | fastcall void xen_evtchn_do_upcall(struct pt_regs *regs) | ||
389 | { | ||
390 | int cpu = get_cpu(); | ||
391 | struct shared_info *s = HYPERVISOR_shared_info; | ||
392 | struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); | ||
393 | unsigned long pending_words; | ||
394 | |||
395 | vcpu_info->evtchn_upcall_pending = 0; | ||
396 | |||
397 | /* NB. No need for a barrier here -- XCHG is a barrier on x86. */ | ||
398 | pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0); | ||
399 | while (pending_words != 0) { | ||
400 | unsigned long pending_bits; | ||
401 | int word_idx = __ffs(pending_words); | ||
402 | pending_words &= ~(1UL << word_idx); | ||
403 | |||
404 | while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) { | ||
405 | int bit_idx = __ffs(pending_bits); | ||
406 | int port = (word_idx * BITS_PER_LONG) + bit_idx; | ||
407 | int irq = evtchn_to_irq[port]; | ||
408 | |||
409 | if (irq != -1) { | ||
410 | regs->orig_eax = ~irq; | ||
411 | do_IRQ(regs); | ||
412 | } | ||
413 | } | ||
414 | } | ||
415 | |||
416 | put_cpu(); | ||
417 | } | ||
418 | |||
419 | /* Rebind an evtchn so that it gets delivered to a specific cpu */ | ||
420 | static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) | ||
421 | { | ||
422 | struct evtchn_bind_vcpu bind_vcpu; | ||
423 | int evtchn = evtchn_from_irq(irq); | ||
424 | |||
425 | if (!VALID_EVTCHN(evtchn)) | ||
426 | return; | ||
427 | |||
428 | /* Send future instances of this interrupt to other vcpu. */ | ||
429 | bind_vcpu.port = evtchn; | ||
430 | bind_vcpu.vcpu = tcpu; | ||
431 | |||
432 | /* | ||
433 | * If this fails, it usually just indicates that we're dealing with a | ||
434 | * virq or IPI channel, which don't actually need to be rebound. Ignore | ||
435 | * it, but don't do the xenlinux-level rebind in that case. | ||
436 | */ | ||
437 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) | ||
438 | bind_evtchn_to_cpu(evtchn, tcpu); | ||
439 | } | ||
440 | |||
441 | |||
442 | static void set_affinity_irq(unsigned irq, cpumask_t dest) | ||
443 | { | ||
444 | unsigned tcpu = first_cpu(dest); | ||
445 | rebind_irq_to_cpu(irq, tcpu); | ||
446 | } | ||
447 | |||
448 | static void enable_dynirq(unsigned int irq) | ||
449 | { | ||
450 | int evtchn = evtchn_from_irq(irq); | ||
451 | |||
452 | if (VALID_EVTCHN(evtchn)) | ||
453 | unmask_evtchn(evtchn); | ||
454 | } | ||
455 | |||
456 | static void disable_dynirq(unsigned int irq) | ||
457 | { | ||
458 | int evtchn = evtchn_from_irq(irq); | ||
459 | |||
460 | if (VALID_EVTCHN(evtchn)) | ||
461 | mask_evtchn(evtchn); | ||
462 | } | ||
463 | |||
464 | static void ack_dynirq(unsigned int irq) | ||
465 | { | ||
466 | int evtchn = evtchn_from_irq(irq); | ||
467 | |||
468 | move_native_irq(irq); | ||
469 | |||
470 | if (VALID_EVTCHN(evtchn)) | ||
471 | clear_evtchn(evtchn); | ||
472 | } | ||
473 | |||
474 | static int retrigger_dynirq(unsigned int irq) | ||
475 | { | ||
476 | int evtchn = evtchn_from_irq(irq); | ||
477 | int ret = 0; | ||
478 | |||
479 | if (VALID_EVTCHN(evtchn)) { | ||
480 | set_evtchn(evtchn); | ||
481 | ret = 1; | ||
482 | } | ||
483 | |||
484 | return ret; | ||
485 | } | ||
486 | |||
487 | static struct irq_chip xen_dynamic_chip __read_mostly = { | ||
488 | .name = "xen-dyn", | ||
489 | .mask = disable_dynirq, | ||
490 | .unmask = enable_dynirq, | ||
491 | .ack = ack_dynirq, | ||
492 | .set_affinity = set_affinity_irq, | ||
493 | .retrigger = retrigger_dynirq, | ||
494 | }; | ||
495 | |||
496 | void __init xen_init_IRQ(void) | ||
497 | { | ||
498 | int i; | ||
499 | |||
500 | init_evtchn_cpu_bindings(); | ||
501 | |||
502 | /* No event channels are 'live' right now. */ | ||
503 | for (i = 0; i < NR_EVENT_CHANNELS; i++) | ||
504 | mask_evtchn(i); | ||
505 | |||
506 | /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ | ||
507 | for (i = 0; i < NR_IRQS; i++) | ||
508 | irq_bindcount[i] = 0; | ||
509 | |||
510 | irq_ctx_init(smp_processor_id()); | ||
511 | } | ||