diff options
author | Grant Grundler <grundler@parisc-linux.org> | 2007-06-10 18:31:41 -0400 |
---|---|---|
committer | Kyle McMartin <kyle@minerva.i.cabal.ca> | 2007-06-12 01:23:30 -0400 |
commit | 462b529f91b618f4bd144bbc6184f616dfb58a1e (patch) | |
tree | eb6cd254ef87ee5b9e3b875023f6368f9747e669 /arch/parisc | |
parent | c3d4ed4e3e5aa8d9e6b4b795f004a7028ce780e9 (diff) |
[PARISC] remove global_ack_eiem
Kudos to Thibaut Varene for spotting the (mis)use of appropriately named
global_ack_eiem. This took a long time to figure out and both insight
from myself, Kyle McMartin, and James Bottomley were required to narrow
down which bit of code could have this race condition.
The symptom was interrupts stopped getting delivered while some workload
was generating IO interrupts on two different CPUs. One of the interrupt
sources would get masked off and stay unmasked. Problem was global_ack_eiem
was accessed with read/modified/write sequence and not protected by
a spinlock.
PA-RISC doesn't need a global ack flag though. External Interrupts
are _always_ delivered to a single CPU (except for "global broadcast
interrupt" which AFAIK currently is not used.) So we don't have to worry
about any given IRQ vector getting delivered to more than one CPU.
Tested on a500 and rp34xx boxen. rsync to/from gsyprf11 (a500)
would lock up the box since NIC (tg3) interrupt and SCSI (sym2)
were on "opposite" CPUs (2 CPU system). Put them on the same CPU
or apply this patch and 10GB of data would rsync completely.
Please apply the following critical patch.
thanks,
grant
Signed-off-by: Grant Grundler <grundler@parisc-linux.org>
Acked-by: Thibaut VARENE <T-Bone@parisc-linux.org>
Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>
Diffstat (limited to 'arch/parisc')
-rw-r--r-- | arch/parisc/kernel/irq.c | 26 |
1 files changed, 8 insertions, 18 deletions
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index c5c9125dacec..76ce5e3b0050 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c | |||
@@ -46,14 +46,10 @@ extern irqreturn_t ipi_interrupt(int, void *); | |||
46 | static volatile unsigned long cpu_eiem = 0; | 46 | static volatile unsigned long cpu_eiem = 0; |
47 | 47 | ||
48 | /* | 48 | /* |
49 | ** ack bitmap ... habitually set to 1, but reset to zero | 49 | ** local ACK bitmap ... habitually set to 1, but reset to zero |
50 | ** between ->ack() and ->end() of the interrupt to prevent | 50 | ** between ->ack() and ->end() of the interrupt to prevent |
51 | ** re-interruption of a processing interrupt. | 51 | ** re-interruption of a processing interrupt. |
52 | */ | 52 | */ |
53 | static volatile unsigned long global_ack_eiem = ~0UL; | ||
54 | /* | ||
55 | ** Local bitmap, same as above but for per-cpu interrupts | ||
56 | */ | ||
57 | static DEFINE_PER_CPU(unsigned long, local_ack_eiem) = ~0UL; | 53 | static DEFINE_PER_CPU(unsigned long, local_ack_eiem) = ~0UL; |
58 | 54 | ||
59 | static void cpu_disable_irq(unsigned int irq) | 55 | static void cpu_disable_irq(unsigned int irq) |
@@ -94,13 +90,11 @@ void cpu_ack_irq(unsigned int irq) | |||
94 | int cpu = smp_processor_id(); | 90 | int cpu = smp_processor_id(); |
95 | 91 | ||
96 | /* Clear in EIEM so we can no longer process */ | 92 | /* Clear in EIEM so we can no longer process */ |
97 | if (CHECK_IRQ_PER_CPU(irq_desc[irq].status)) | 93 | per_cpu(local_ack_eiem, cpu) &= ~mask; |
98 | per_cpu(local_ack_eiem, cpu) &= ~mask; | ||
99 | else | ||
100 | global_ack_eiem &= ~mask; | ||
101 | 94 | ||
102 | /* disable the interrupt */ | 95 | /* disable the interrupt */ |
103 | set_eiem(cpu_eiem & global_ack_eiem & per_cpu(local_ack_eiem, cpu)); | 96 | set_eiem(cpu_eiem & per_cpu(local_ack_eiem, cpu)); |
97 | |||
104 | /* and now ack it */ | 98 | /* and now ack it */ |
105 | mtctl(mask, 23); | 99 | mtctl(mask, 23); |
106 | } | 100 | } |
@@ -111,13 +105,10 @@ void cpu_end_irq(unsigned int irq) | |||
111 | int cpu = smp_processor_id(); | 105 | int cpu = smp_processor_id(); |
112 | 106 | ||
113 | /* set it in the eiems---it's no longer in process */ | 107 | /* set it in the eiems---it's no longer in process */ |
114 | if (CHECK_IRQ_PER_CPU(irq_desc[irq].status)) | 108 | per_cpu(local_ack_eiem, cpu) |= mask; |
115 | per_cpu(local_ack_eiem, cpu) |= mask; | ||
116 | else | ||
117 | global_ack_eiem |= mask; | ||
118 | 109 | ||
119 | /* enable the interrupt */ | 110 | /* enable the interrupt */ |
120 | set_eiem(cpu_eiem & global_ack_eiem & per_cpu(local_ack_eiem, cpu)); | 111 | set_eiem(cpu_eiem & per_cpu(local_ack_eiem, cpu)); |
121 | } | 112 | } |
122 | 113 | ||
123 | #ifdef CONFIG_SMP | 114 | #ifdef CONFIG_SMP |
@@ -354,8 +345,7 @@ void do_cpu_irq_mask(struct pt_regs *regs) | |||
354 | local_irq_disable(); | 345 | local_irq_disable(); |
355 | irq_enter(); | 346 | irq_enter(); |
356 | 347 | ||
357 | eirr_val = mfctl(23) & cpu_eiem & global_ack_eiem & | 348 | eirr_val = mfctl(23) & cpu_eiem & per_cpu(local_ack_eiem, cpu); |
358 | per_cpu(local_ack_eiem, cpu); | ||
359 | if (!eirr_val) | 349 | if (!eirr_val) |
360 | goto set_out; | 350 | goto set_out; |
361 | irq = eirr_to_irq(eirr_val); | 351 | irq = eirr_to_irq(eirr_val); |
@@ -381,7 +371,7 @@ void do_cpu_irq_mask(struct pt_regs *regs) | |||
381 | return; | 371 | return; |
382 | 372 | ||
383 | set_out: | 373 | set_out: |
384 | set_eiem(cpu_eiem & global_ack_eiem & per_cpu(local_ack_eiem, cpu)); | 374 | set_eiem(cpu_eiem & per_cpu(local_ack_eiem, cpu)); |
385 | goto out; | 375 | goto out; |
386 | } | 376 | } |
387 | 377 | ||