diff options
-rw-r--r-- | Documentation/IRQ-affinity.txt | 37 | ||||
-rw-r--r-- | Documentation/filesystems/proc.txt | 29 | ||||
-rw-r--r-- | arch/alpha/kernel/irq.c | 5 | ||||
-rw-r--r-- | include/linux/interrupt.h | 5 | ||||
-rw-r--r-- | include/linux/irq.h | 9 | ||||
-rw-r--r-- | kernel/irq/manage.c | 28 | ||||
-rw-r--r-- | kernel/irq/proc.c | 59 |
7 files changed, 134 insertions, 38 deletions
diff --git a/Documentation/IRQ-affinity.txt b/Documentation/IRQ-affinity.txt index 938d7dd05490..b4a615b78403 100644 --- a/Documentation/IRQ-affinity.txt +++ b/Documentation/IRQ-affinity.txt | |||
@@ -1,17 +1,26 @@ | |||
1 | ChangeLog: | ||
2 | Started by Ingo Molnar <mingo@redhat.com> | ||
3 | Update by Max Krasnyansky <maxk@qualcomm.com> | ||
1 | 4 | ||
2 | SMP IRQ affinity, started by Ingo Molnar <mingo@redhat.com> | 5 | SMP IRQ affinity |
3 | |||
4 | 6 | ||
5 | /proc/irq/IRQ#/smp_affinity specifies which target CPUs are permitted | 7 | /proc/irq/IRQ#/smp_affinity specifies which target CPUs are permitted |
6 | for a given IRQ source. It's a bitmask of allowed CPUs. It's not allowed | 8 | for a given IRQ source. It's a bitmask of allowed CPUs. It's not allowed |
7 | to turn off all CPUs, and if an IRQ controller does not support IRQ | 9 | to turn off all CPUs, and if an IRQ controller does not support IRQ |
8 | affinity then the value will not change from the default 0xffffffff. | 10 | affinity then the value will not change from the default 0xffffffff. |
9 | 11 | ||
12 | /proc/irq/default_smp_affinity specifies default affinity mask that applies | ||
13 | to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask | ||
14 | will be set to the default mask. It can then be changed as described above. | ||
15 | Default mask is 0xffffffff. | ||
16 | |||
10 | Here is an example of restricting IRQ44 (eth1) to CPU0-3 then restricting | 17 | Here is an example of restricting IRQ44 (eth1) to CPU0-3 then restricting |
11 | the IRQ to CPU4-7 (this is an 8-CPU SMP box): | 18 | it to CPU4-7 (this is an 8-CPU SMP box): |
12 | 19 | ||
20 | [root@moon 44]# cd /proc/irq/44 | ||
13 | [root@moon 44]# cat smp_affinity | 21 | [root@moon 44]# cat smp_affinity |
14 | ffffffff | 22 | ffffffff |
23 | |||
15 | [root@moon 44]# echo 0f > smp_affinity | 24 | [root@moon 44]# echo 0f > smp_affinity |
16 | [root@moon 44]# cat smp_affinity | 25 | [root@moon 44]# cat smp_affinity |
17 | 0000000f | 26 | 0000000f |
@@ -21,17 +30,27 @@ PING hell (195.4.7.3): 56 data bytes | |||
21 | --- hell ping statistics --- | 30 | --- hell ping statistics --- |
22 | 6029 packets transmitted, 6027 packets received, 0% packet loss | 31 | 6029 packets transmitted, 6027 packets received, 0% packet loss |
23 | round-trip min/avg/max = 0.1/0.1/0.4 ms | 32 | round-trip min/avg/max = 0.1/0.1/0.4 ms |
24 | [root@moon 44]# cat /proc/interrupts | grep 44: | 33 | [root@moon 44]# cat /proc/interrupts | grep 'CPU\|44:' |
25 | 44: 0 1785 1785 1783 1783 1 | 34 | CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7 |
26 | 1 0 IO-APIC-level eth1 | 35 | 44: 1068 1785 1785 1783 0 0 0 0 IO-APIC-level eth1 |
36 | |||
37 | As can be seen from the line above IRQ44 was delivered only to the first four | ||
38 | processors (0-3). | ||
39 | Now lets restrict that IRQ to CPU(4-7). | ||
40 | |||
27 | [root@moon 44]# echo f0 > smp_affinity | 41 | [root@moon 44]# echo f0 > smp_affinity |
42 | [root@moon 44]# cat smp_affinity | ||
43 | 000000f0 | ||
28 | [root@moon 44]# ping -f h | 44 | [root@moon 44]# ping -f h |
29 | PING hell (195.4.7.3): 56 data bytes | 45 | PING hell (195.4.7.3): 56 data bytes |
30 | .. | 46 | .. |
31 | --- hell ping statistics --- | 47 | --- hell ping statistics --- |
32 | 2779 packets transmitted, 2777 packets received, 0% packet loss | 48 | 2779 packets transmitted, 2777 packets received, 0% packet loss |
33 | round-trip min/avg/max = 0.1/0.5/585.4 ms | 49 | round-trip min/avg/max = 0.1/0.5/585.4 ms |
34 | [root@moon 44]# cat /proc/interrupts | grep 44: | 50 | [root@moon 44]# cat /proc/interrupts | 'CPU\|44:' |
35 | 44: 1068 1785 1785 1784 1784 1069 1070 1069 IO-APIC-level eth1 | 51 | CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7 |
36 | [root@moon 44]# | 52 | 44: 1068 1785 1785 1783 1784 1069 1070 1069 IO-APIC-level eth1 |
53 | |||
54 | This time around IRQ44 was delivered only to the last four processors. | ||
55 | i.e counters for the CPU0-3 did not change. | ||
37 | 56 | ||
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index dbc3c6a3650f..7f268f327d75 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -380,28 +380,35 @@ i386 and x86_64 platforms support the new IRQ vector displays. | |||
380 | Of some interest is the introduction of the /proc/irq directory to 2.4. | 380 | Of some interest is the introduction of the /proc/irq directory to 2.4. |
381 | It could be used to set IRQ to CPU affinity, this means that you can "hook" an | 381 | It could be used to set IRQ to CPU affinity, this means that you can "hook" an |
382 | IRQ to only one CPU, or to exclude a CPU of handling IRQs. The contents of the | 382 | IRQ to only one CPU, or to exclude a CPU of handling IRQs. The contents of the |
383 | irq subdir is one subdir for each IRQ, and one file; prof_cpu_mask | 383 | irq subdir is one subdir for each IRQ, and two files; default_smp_affinity and |
384 | prof_cpu_mask. | ||
384 | 385 | ||
385 | For example | 386 | For example |
386 | > ls /proc/irq/ | 387 | > ls /proc/irq/ |
387 | 0 10 12 14 16 18 2 4 6 8 prof_cpu_mask | 388 | 0 10 12 14 16 18 2 4 6 8 prof_cpu_mask |
388 | 1 11 13 15 17 19 3 5 7 9 | 389 | 1 11 13 15 17 19 3 5 7 9 default_smp_affinity |
389 | > ls /proc/irq/0/ | 390 | > ls /proc/irq/0/ |
390 | smp_affinity | 391 | smp_affinity |
391 | 392 | ||
392 | The contents of the prof_cpu_mask file and each smp_affinity file for each IRQ | 393 | smp_affinity is a bitmask, in which you can specify which CPUs can handle the |
393 | is the same by default: | 394 | IRQ, you can set it by doing: |
394 | 395 | ||
395 | > cat /proc/irq/0/smp_affinity | 396 | > echo 1 > /proc/irq/10/smp_affinity |
396 | ffffffff | 397 | |
398 | This means that only the first CPU will handle the IRQ, but you can also echo | ||
399 | 5 which means that only the first and fourth CPU can handle the IRQ. | ||
397 | 400 | ||
398 | It's a bitmask, in which you can specify which CPUs can handle the IRQ, you can | 401 | The contents of each smp_affinity file is the same by default: |
399 | set it by doing: | 402 | |
403 | > cat /proc/irq/0/smp_affinity | ||
404 | ffffffff | ||
400 | 405 | ||
401 | > echo 1 > /proc/irq/prof_cpu_mask | 406 | The default_smp_affinity mask applies to all non-active IRQs, which are the |
407 | IRQs which have not yet been allocated/activated, and hence which lack a | ||
408 | /proc/irq/[0-9]* directory. | ||
402 | 409 | ||
403 | This means that only the first CPU will handle the IRQ, but you can also echo 5 | 410 | prof_cpu_mask specifies which CPUs are to be profiled by the system wide |
404 | which means that only the first and fourth CPU can handle the IRQ. | 411 | profiler. Default value is ffffffff (all cpus). |
405 | 412 | ||
406 | The way IRQs are routed is handled by the IO-APIC, and it's Round Robin | 413 | The way IRQs are routed is handled by the IO-APIC, and it's Round Robin |
407 | between all the CPUs which are allowed to handle it. As usual the kernel has | 414 | between all the CPUs which are allowed to handle it. As usual the kernel has |
diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index facf82a5499a..c626a821cdcb 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c | |||
@@ -42,8 +42,7 @@ void ack_bad_irq(unsigned int irq) | |||
42 | #ifdef CONFIG_SMP | 42 | #ifdef CONFIG_SMP |
43 | static char irq_user_affinity[NR_IRQS]; | 43 | static char irq_user_affinity[NR_IRQS]; |
44 | 44 | ||
45 | int | 45 | int irq_select_affinity(unsigned int irq) |
46 | select_smp_affinity(unsigned int irq) | ||
47 | { | 46 | { |
48 | static int last_cpu; | 47 | static int last_cpu; |
49 | int cpu = last_cpu + 1; | 48 | int cpu = last_cpu + 1; |
@@ -51,7 +50,7 @@ select_smp_affinity(unsigned int irq) | |||
51 | if (!irq_desc[irq].chip->set_affinity || irq_user_affinity[irq]) | 50 | if (!irq_desc[irq].chip->set_affinity || irq_user_affinity[irq]) |
52 | return 1; | 51 | return 1; |
53 | 52 | ||
54 | while (!cpu_possible(cpu)) | 53 | while (!cpu_possible(cpu) || !cpu_isset(cpu, irq_default_affinity)) |
55 | cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0); | 54 | cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0); |
56 | last_cpu = cpu; | 55 | last_cpu = cpu; |
57 | 56 | ||
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index f1fc7470d26c..043400f3d458 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h | |||
@@ -104,8 +104,11 @@ extern void enable_irq(unsigned int irq); | |||
104 | 104 | ||
105 | #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) | 105 | #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) |
106 | 106 | ||
107 | extern cpumask_t irq_default_affinity; | ||
108 | |||
107 | extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask); | 109 | extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask); |
108 | extern int irq_can_set_affinity(unsigned int irq); | 110 | extern int irq_can_set_affinity(unsigned int irq); |
111 | extern int irq_select_affinity(unsigned int irq); | ||
109 | 112 | ||
110 | #else /* CONFIG_SMP */ | 113 | #else /* CONFIG_SMP */ |
111 | 114 | ||
@@ -119,6 +122,8 @@ static inline int irq_can_set_affinity(unsigned int irq) | |||
119 | return 0; | 122 | return 0; |
120 | } | 123 | } |
121 | 124 | ||
125 | static inline int irq_select_affinity(unsigned int irq) { return 0; } | ||
126 | |||
122 | #endif /* CONFIG_SMP && CONFIG_GENERIC_HARDIRQS */ | 127 | #endif /* CONFIG_SMP && CONFIG_GENERIC_HARDIRQS */ |
123 | 128 | ||
124 | #ifdef CONFIG_GENERIC_HARDIRQS | 129 | #ifdef CONFIG_GENERIC_HARDIRQS |
diff --git a/include/linux/irq.h b/include/linux/irq.h index 552e0ec269c9..8ccb462ea42c 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h | |||
@@ -244,15 +244,6 @@ static inline void set_balance_irq_affinity(unsigned int irq, cpumask_t mask) | |||
244 | } | 244 | } |
245 | #endif | 245 | #endif |
246 | 246 | ||
247 | #ifdef CONFIG_AUTO_IRQ_AFFINITY | ||
248 | extern int select_smp_affinity(unsigned int irq); | ||
249 | #else | ||
250 | static inline int select_smp_affinity(unsigned int irq) | ||
251 | { | ||
252 | return 1; | ||
253 | } | ||
254 | #endif | ||
255 | |||
256 | extern int no_irq_affinity; | 247 | extern int no_irq_affinity; |
257 | 248 | ||
258 | static inline int irq_balancing_disabled(unsigned int irq) | 249 | static inline int irq_balancing_disabled(unsigned int irq) |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 46d6611a33bb..469814e9b9ee 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -17,6 +17,8 @@ | |||
17 | 17 | ||
18 | #ifdef CONFIG_SMP | 18 | #ifdef CONFIG_SMP |
19 | 19 | ||
20 | cpumask_t irq_default_affinity = CPU_MASK_ALL; | ||
21 | |||
20 | /** | 22 | /** |
21 | * synchronize_irq - wait for pending IRQ handlers (on other CPUs) | 23 | * synchronize_irq - wait for pending IRQ handlers (on other CPUs) |
22 | * @irq: interrupt number to wait for | 24 | * @irq: interrupt number to wait for |
@@ -95,6 +97,27 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask) | |||
95 | return 0; | 97 | return 0; |
96 | } | 98 | } |
97 | 99 | ||
100 | #ifndef CONFIG_AUTO_IRQ_AFFINITY | ||
101 | /* | ||
102 | * Generic version of the affinity autoselector. | ||
103 | */ | ||
104 | int irq_select_affinity(unsigned int irq) | ||
105 | { | ||
106 | cpumask_t mask; | ||
107 | |||
108 | if (!irq_can_set_affinity(irq)) | ||
109 | return 0; | ||
110 | |||
111 | cpus_and(mask, cpu_online_map, irq_default_affinity); | ||
112 | |||
113 | irq_desc[irq].affinity = mask; | ||
114 | irq_desc[irq].chip->set_affinity(irq, mask); | ||
115 | |||
116 | set_balance_irq_affinity(irq, mask); | ||
117 | return 0; | ||
118 | } | ||
119 | #endif | ||
120 | |||
98 | #endif | 121 | #endif |
99 | 122 | ||
100 | /** | 123 | /** |
@@ -382,6 +405,9 @@ int setup_irq(unsigned int irq, struct irqaction *new) | |||
382 | } else | 405 | } else |
383 | /* Undo nested disables: */ | 406 | /* Undo nested disables: */ |
384 | desc->depth = 1; | 407 | desc->depth = 1; |
408 | |||
409 | /* Set default affinity mask once everything is setup */ | ||
410 | irq_select_affinity(irq); | ||
385 | } | 411 | } |
386 | /* Reset broken irq detection when installing new handler */ | 412 | /* Reset broken irq detection when installing new handler */ |
387 | desc->irq_count = 0; | 413 | desc->irq_count = 0; |
@@ -571,8 +597,6 @@ int request_irq(unsigned int irq, irq_handler_t handler, | |||
571 | action->next = NULL; | 597 | action->next = NULL; |
572 | action->dev_id = dev_id; | 598 | action->dev_id = dev_id; |
573 | 599 | ||
574 | select_smp_affinity(irq); | ||
575 | |||
576 | #ifdef CONFIG_DEBUG_SHIRQ | 600 | #ifdef CONFIG_DEBUG_SHIRQ |
577 | if (irqflags & IRQF_SHARED) { | 601 | if (irqflags & IRQF_SHARED) { |
578 | /* | 602 | /* |
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index c2f2ccb0549a..6c6d35d68ee9 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -44,7 +44,7 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer, | |||
44 | unsigned long count, void *data) | 44 | unsigned long count, void *data) |
45 | { | 45 | { |
46 | unsigned int irq = (int)(long)data, full_count = count, err; | 46 | unsigned int irq = (int)(long)data, full_count = count, err; |
47 | cpumask_t new_value, tmp; | 47 | cpumask_t new_value; |
48 | 48 | ||
49 | if (!irq_desc[irq].chip->set_affinity || no_irq_affinity || | 49 | if (!irq_desc[irq].chip->set_affinity || no_irq_affinity || |
50 | irq_balancing_disabled(irq)) | 50 | irq_balancing_disabled(irq)) |
@@ -62,17 +62,51 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer, | |||
62 | * way to make the system unusable accidentally :-) At least | 62 | * way to make the system unusable accidentally :-) At least |
63 | * one online CPU still has to be targeted. | 63 | * one online CPU still has to be targeted. |
64 | */ | 64 | */ |
65 | cpus_and(tmp, new_value, cpu_online_map); | 65 | if (!cpus_intersects(new_value, cpu_online_map)) |
66 | if (cpus_empty(tmp)) | ||
67 | /* Special case for empty set - allow the architecture | 66 | /* Special case for empty set - allow the architecture |
68 | code to set default SMP affinity. */ | 67 | code to set default SMP affinity. */ |
69 | return select_smp_affinity(irq) ? -EINVAL : full_count; | 68 | return irq_select_affinity(irq) ? -EINVAL : full_count; |
70 | 69 | ||
71 | irq_set_affinity(irq, new_value); | 70 | irq_set_affinity(irq, new_value); |
72 | 71 | ||
73 | return full_count; | 72 | return full_count; |
74 | } | 73 | } |
75 | 74 | ||
75 | static int default_affinity_read(char *page, char **start, off_t off, | ||
76 | int count, int *eof, void *data) | ||
77 | { | ||
78 | int len = cpumask_scnprintf(page, count, irq_default_affinity); | ||
79 | if (count - len < 2) | ||
80 | return -EINVAL; | ||
81 | len += sprintf(page + len, "\n"); | ||
82 | return len; | ||
83 | } | ||
84 | |||
85 | static int default_affinity_write(struct file *file, const char __user *buffer, | ||
86 | unsigned long count, void *data) | ||
87 | { | ||
88 | unsigned int full_count = count, err; | ||
89 | cpumask_t new_value; | ||
90 | |||
91 | err = cpumask_parse_user(buffer, count, new_value); | ||
92 | if (err) | ||
93 | return err; | ||
94 | |||
95 | if (!is_affinity_mask_valid(new_value)) | ||
96 | return -EINVAL; | ||
97 | |||
98 | /* | ||
99 | * Do not allow disabling IRQs completely - it's a too easy | ||
100 | * way to make the system unusable accidentally :-) At least | ||
101 | * one online CPU still has to be targeted. | ||
102 | */ | ||
103 | if (!cpus_intersects(new_value, cpu_online_map)) | ||
104 | return -EINVAL; | ||
105 | |||
106 | irq_default_affinity = new_value; | ||
107 | |||
108 | return full_count; | ||
109 | } | ||
76 | #endif | 110 | #endif |
77 | 111 | ||
78 | static int irq_spurious_read(char *page, char **start, off_t off, | 112 | static int irq_spurious_read(char *page, char **start, off_t off, |
@@ -171,6 +205,21 @@ void unregister_handler_proc(unsigned int irq, struct irqaction *action) | |||
171 | remove_proc_entry(action->dir->name, irq_desc[irq].dir); | 205 | remove_proc_entry(action->dir->name, irq_desc[irq].dir); |
172 | } | 206 | } |
173 | 207 | ||
208 | void register_default_affinity_proc(void) | ||
209 | { | ||
210 | #ifdef CONFIG_SMP | ||
211 | struct proc_dir_entry *entry; | ||
212 | |||
213 | /* create /proc/irq/default_smp_affinity */ | ||
214 | entry = create_proc_entry("default_smp_affinity", 0600, root_irq_dir); | ||
215 | if (entry) { | ||
216 | entry->data = NULL; | ||
217 | entry->read_proc = default_affinity_read; | ||
218 | entry->write_proc = default_affinity_write; | ||
219 | } | ||
220 | #endif | ||
221 | } | ||
222 | |||
174 | void init_irq_proc(void) | 223 | void init_irq_proc(void) |
175 | { | 224 | { |
176 | int i; | 225 | int i; |
@@ -180,6 +229,8 @@ void init_irq_proc(void) | |||
180 | if (!root_irq_dir) | 229 | if (!root_irq_dir) |
181 | return; | 230 | return; |
182 | 231 | ||
232 | register_default_affinity_proc(); | ||
233 | |||
183 | /* | 234 | /* |
184 | * Create entries for all existing IRQs. | 235 | * Create entries for all existing IRQs. |
185 | */ | 236 | */ |