aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2007-02-23 06:40:58 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-26 13:34:08 -0500
commit610142927b5bc149da92b03c7ab08b8b5f205b74 (patch)
tree600c115072fc5f28c07dddfbe52f0dcf376d8504
parentbc5e81a1519abc69472bb67deace7bb1ac09d65a (diff)
[PATCH] x86_64 irq: Safely cleanup an irq after moving it.
The problem: After moving an interrupt when is it safe to teardown the data structures for receiving the interrupt at the old location? With a normal pci device it is possible to issue a read to a device to flush all posted writes. This does not work for the oldest ioapics because they are on a 3-wire apic bus which is a completely different data path. For some more modern ioapics when everything is using front side bus delivery you can flush interrupts by simply issuing a read to the ioapic. For other modern ioapics emperical testing has shown that this does not work. So it appears the only reliable way to know the last of the irqs from an ioapic have been received from before the ioapic was reprogrammed is to received the first irq from the ioapic from after it was reprogrammed. Once we know the last irq message has been received from an ioapic into a local apic we then need to know that irq message has been processed through the local apics. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/x86_64/kernel/entry.S3
-rw-r--r--arch/x86_64/kernel/i8259.c6
-rw-r--r--arch/x86_64/kernel/io_apic.c78
-rw-r--r--include/asm-x86_64/hw_irq.h9
4 files changed, 88 insertions, 8 deletions
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 9f5dac64aa8f..ed4350ced3d0 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -675,6 +675,9 @@ END(invalidate_interrupt\num)
675ENTRY(call_function_interrupt) 675ENTRY(call_function_interrupt)
676 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt 676 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
677END(call_function_interrupt) 677END(call_function_interrupt)
678ENTRY(irq_move_cleanup_interrupt)
679 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
680END(irq_move_cleanup_interrupt)
678#endif 681#endif
679 682
680ENTRY(apic_timer_interrupt) 683ENTRY(apic_timer_interrupt)
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index 45d85630196a..21d95b747437 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -450,6 +450,7 @@ void spurious_interrupt(void);
450void error_interrupt(void); 450void error_interrupt(void);
451void reschedule_interrupt(void); 451void reschedule_interrupt(void);
452void call_function_interrupt(void); 452void call_function_interrupt(void);
453void irq_move_cleanup_interrupt(void);
453void invalidate_interrupt0(void); 454void invalidate_interrupt0(void);
454void invalidate_interrupt1(void); 455void invalidate_interrupt1(void);
455void invalidate_interrupt2(void); 456void invalidate_interrupt2(void);
@@ -537,7 +538,10 @@ void __init init_IRQ(void)
537 538
538 /* IPI for generic function call */ 539 /* IPI for generic function call */
539 set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 540 set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
540#endif 541
542 /* Low priority IPI to cleanup after moving an irq */
543 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
544#endif
541 set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); 545 set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
542 set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); 546 set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
543 547
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 8dede0bd2267..48593f6b708f 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -36,6 +36,7 @@
36#include <acpi/acpi_bus.h> 36#include <acpi/acpi_bus.h>
37#endif 37#endif
38 38
39#include <asm/idle.h>
39#include <asm/io.h> 40#include <asm/io.h>
40#include <asm/smp.h> 41#include <asm/smp.h>
41#include <asm/desc.h> 42#include <asm/desc.h>
@@ -49,7 +50,10 @@
49 50
50struct irq_cfg { 51struct irq_cfg {
51 cpumask_t domain; 52 cpumask_t domain;
53 cpumask_t old_domain;
54 unsigned move_cleanup_count;
52 u8 vector; 55 u8 vector;
56 u8 move_in_progress : 1;
53}; 57};
54 58
55/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ 59/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -652,7 +656,6 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
652 * 0x80, because int 0x80 is hm, kind of importantish. ;) 656 * 0x80, because int 0x80 is hm, kind of importantish. ;)
653 */ 657 */
654 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; 658 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
655 cpumask_t old_mask = CPU_MASK_NONE;
656 unsigned int old_vector; 659 unsigned int old_vector;
657 int cpu; 660 int cpu;
658 struct irq_cfg *cfg; 661 struct irq_cfg *cfg;
@@ -663,18 +666,20 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
663 /* Only try and allocate irqs on cpus that are present */ 666 /* Only try and allocate irqs on cpus that are present */
664 cpus_and(mask, mask, cpu_online_map); 667 cpus_and(mask, mask, cpu_online_map);
665 668
669 if ((cfg->move_in_progress) || cfg->move_cleanup_count)
670 return -EBUSY;
671
666 old_vector = cfg->vector; 672 old_vector = cfg->vector;
667 if (old_vector) { 673 if (old_vector) {
668 cpumask_t tmp; 674 cpumask_t tmp;
669 cpus_and(tmp, cfg->domain, mask); 675 cpus_and(tmp, cfg->domain, mask);
670 if (!cpus_empty(tmp)) 676 if (!cpus_empty(tmp))
671 return 0; 677 return 0;
672 cpus_and(old_mask, cfg->domain, cpu_online_map);
673 } 678 }
674 679
675 for_each_cpu_mask(cpu, mask) { 680 for_each_cpu_mask(cpu, mask) {
676 cpumask_t domain, new_mask; 681 cpumask_t domain, new_mask;
677 int new_cpu, old_cpu; 682 int new_cpu;
678 int vector, offset; 683 int vector, offset;
679 684
680 domain = vector_allocation_domain(cpu); 685 domain = vector_allocation_domain(cpu);
@@ -699,8 +704,10 @@ next:
699 /* Found one! */ 704 /* Found one! */
700 current_vector = vector; 705 current_vector = vector;
701 current_offset = offset; 706 current_offset = offset;
702 for_each_cpu_mask(old_cpu, old_mask) 707 if (old_vector) {
703 per_cpu(vector_irq, old_cpu)[old_vector] = -1; 708 cfg->move_in_progress = 1;
709 cfg->old_domain = cfg->domain;
710 }
704 for_each_cpu_mask(new_cpu, new_mask) 711 for_each_cpu_mask(new_cpu, new_mask)
705 per_cpu(vector_irq, new_cpu)[vector] = irq; 712 per_cpu(vector_irq, new_cpu)[vector] = irq;
706 cfg->vector = vector; 713 cfg->vector = vector;
@@ -1360,8 +1367,68 @@ static int ioapic_retrigger_irq(unsigned int irq)
1360 * races. 1367 * races.
1361 */ 1368 */
1362 1369
1370#ifdef CONFIG_SMP
1371asmlinkage void smp_irq_move_cleanup_interrupt(void)
1372{
1373 unsigned vector, me;
1374 ack_APIC_irq();
1375 exit_idle();
1376 irq_enter();
1377
1378 me = smp_processor_id();
1379 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
1380 unsigned int irq;
1381 struct irq_desc *desc;
1382 struct irq_cfg *cfg;
1383 irq = __get_cpu_var(vector_irq)[vector];
1384 if (irq >= NR_IRQS)
1385 continue;
1386
1387 desc = irq_desc + irq;
1388 cfg = irq_cfg + irq;
1389 spin_lock(&desc->lock);
1390 if (!cfg->move_cleanup_count)
1391 goto unlock;
1392
1393 if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
1394 goto unlock;
1395
1396 __get_cpu_var(vector_irq)[vector] = -1;
1397 cfg->move_cleanup_count--;
1398unlock:
1399 spin_unlock(&desc->lock);
1400 }
1401
1402 irq_exit();
1403}
1404
1405static void irq_complete_move(unsigned int irq)
1406{
1407 struct irq_cfg *cfg = irq_cfg + irq;
1408 unsigned vector, me;
1409
1410 if (likely(!cfg->move_in_progress))
1411 return;
1412
1413 vector = ~get_irq_regs()->orig_rax;
1414 me = smp_processor_id();
1415 if ((vector == cfg->vector) &&
1416 cpu_isset(smp_processor_id(), cfg->domain)) {
1417 cpumask_t cleanup_mask;
1418
1419 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
1420 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
1421 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
1422 cfg->move_in_progress = 0;
1423 }
1424}
1425#else
1426static inline void irq_complete_move(unsigned int irq) {}
1427#endif
1428
1363static void ack_apic_edge(unsigned int irq) 1429static void ack_apic_edge(unsigned int irq)
1364{ 1430{
1431 irq_complete_move(irq);
1365 move_native_irq(irq); 1432 move_native_irq(irq);
1366 ack_APIC_irq(); 1433 ack_APIC_irq();
1367} 1434}
@@ -1370,6 +1437,7 @@ static void ack_apic_level(unsigned int irq)
1370{ 1437{
1371 int do_unmask_irq = 0; 1438 int do_unmask_irq = 0;
1372 1439
1440 irq_complete_move(irq);
1373#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) 1441#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
1374 /* If we are moving the irq we need to mask it */ 1442 /* If we are moving the irq we need to mask it */
1375 if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { 1443 if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
diff --git a/include/asm-x86_64/hw_irq.h b/include/asm-x86_64/hw_irq.h
index dc395edc2f2a..2e4b7a5ed1c4 100644
--- a/include/asm-x86_64/hw_irq.h
+++ b/include/asm-x86_64/hw_irq.h
@@ -32,10 +32,15 @@
32#define IA32_SYSCALL_VECTOR 0x80 32#define IA32_SYSCALL_VECTOR 0x80
33 33
34 34
35/* Reserve the lowest usable priority level 0x20 - 0x2f for triggering
36 * cleanup after irq migration.
37 */
38#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR
39
35/* 40/*
36 * Vectors 0x20-0x2f are used for ISA interrupts. 41 * Vectors 0x20-0x2f are used for ISA interrupts.
37 */ 42 */
38#define IRQ0_VECTOR FIRST_EXTERNAL_VECTOR 43#define IRQ0_VECTOR FIRST_EXTERNAL_VECTOR + 0x10
39#define IRQ1_VECTOR IRQ0_VECTOR + 1 44#define IRQ1_VECTOR IRQ0_VECTOR + 1
40#define IRQ2_VECTOR IRQ0_VECTOR + 2 45#define IRQ2_VECTOR IRQ0_VECTOR + 2
41#define IRQ3_VECTOR IRQ0_VECTOR + 3 46#define IRQ3_VECTOR IRQ0_VECTOR + 3
@@ -82,7 +87,7 @@
82 87
83/* 88/*
84 * First APIC vector available to drivers: (vectors 0x30-0xee) 89 * First APIC vector available to drivers: (vectors 0x30-0xee)
85 * we start at 0x31 to spread out vectors evenly between priority 90 * we start at 0x41 to spread out vectors evenly between priority
86 * levels. (0x80 is the syscall vector) 91 * levels. (0x80 is the syscall vector)
87 */ 92 */
88#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) 93#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2)