diff options
author | Yinghai Lu <yinghai@kernel.org> | 2008-12-11 03:15:01 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-12-16 18:14:01 -0500 |
commit | 48a1b10aff588833b73994704c47bbd0deb73e9c (patch) | |
tree | deb3c7b486346c3afa54014b3c3516344c2708f2 /arch/x86 | |
parent | 13bd41bc227a48d6cf8992a3286bf6eba3c71a0c (diff) |
x86, sparseirq: move irq_desc according to smp_affinity, v7
Impact: improve NUMA handling by migrating irq_desc on smp_affinity changes
if CONFIG_NUMA_MIGRATE_IRQ_DESC is set:
- make irq_desc to go with affinity aka irq_desc moving etc
- call move_irq_desc in irq_complete_move()
- legacy irq_desc is not moved, because they are allocated via static array
for logical apic mode, need to add move_desc_in_progress_in_same_domain,
otherwise it will not be moved ==> also could need two phases to get
irq_desc moved.
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 9 | ||||
-rw-r--r-- | arch/x86/kernel/io_apic.c | 142 |
2 files changed, 150 insertions, 1 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8943c13502c6..29073532f94c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -248,6 +248,15 @@ config SPARSE_IRQ | |||
248 | 248 | ||
249 | If you don't know what to do here, say Y. | 249 | If you don't know what to do here, say Y. |
250 | 250 | ||
251 | config NUMA_MIGRATE_IRQ_DESC | ||
252 | bool "Move irq desc when changing irq smp_affinity" | ||
253 | depends on SPARSE_IRQ && SMP | ||
254 | default n | ||
255 | help | ||
256 | This enables moving irq_desc to cpu/node that irq will use handled. | ||
257 | |||
258 | If you don't know what to do here, say N. | ||
259 | |||
251 | config X86_FIND_SMP_CONFIG | 260 | config X86_FIND_SMP_CONFIG |
252 | def_bool y | 261 | def_bool y |
253 | depends on X86_MPPARSE || X86_VOYAGER | 262 | depends on X86_MPPARSE || X86_VOYAGER |
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index a1a2e070f31a..bfe1245b1a3e 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c | |||
@@ -141,6 +141,9 @@ struct irq_cfg { | |||
141 | unsigned move_cleanup_count; | 141 | unsigned move_cleanup_count; |
142 | u8 vector; | 142 | u8 vector; |
143 | u8 move_in_progress : 1; | 143 | u8 move_in_progress : 1; |
144 | #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC | ||
145 | u8 move_desc_pending : 1; | ||
146 | #endif | ||
144 | }; | 147 | }; |
145 | 148 | ||
146 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ | 149 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ |
@@ -223,6 +226,121 @@ void arch_init_chip_data(struct irq_desc *desc, int cpu) | |||
223 | } | 226 | } |
224 | } | 227 | } |
225 | 228 | ||
229 | #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC | ||
230 | |||
231 | static void | ||
232 | init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) | ||
233 | { | ||
234 | struct irq_pin_list *old_entry, *head, *tail, *entry; | ||
235 | |||
236 | cfg->irq_2_pin = NULL; | ||
237 | old_entry = old_cfg->irq_2_pin; | ||
238 | if (!old_entry) | ||
239 | return; | ||
240 | |||
241 | entry = get_one_free_irq_2_pin(cpu); | ||
242 | if (!entry) | ||
243 | return; | ||
244 | |||
245 | entry->apic = old_entry->apic; | ||
246 | entry->pin = old_entry->pin; | ||
247 | head = entry; | ||
248 | tail = entry; | ||
249 | old_entry = old_entry->next; | ||
250 | while (old_entry) { | ||
251 | entry = get_one_free_irq_2_pin(cpu); | ||
252 | if (!entry) { | ||
253 | entry = head; | ||
254 | while (entry) { | ||
255 | head = entry->next; | ||
256 | kfree(entry); | ||
257 | entry = head; | ||
258 | } | ||
259 | /* still use the old one */ | ||
260 | return; | ||
261 | } | ||
262 | entry->apic = old_entry->apic; | ||
263 | entry->pin = old_entry->pin; | ||
264 | tail->next = entry; | ||
265 | tail = entry; | ||
266 | old_entry = old_entry->next; | ||
267 | } | ||
268 | |||
269 | tail->next = NULL; | ||
270 | cfg->irq_2_pin = head; | ||
271 | } | ||
272 | |||
273 | static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) | ||
274 | { | ||
275 | struct irq_pin_list *entry, *next; | ||
276 | |||
277 | if (old_cfg->irq_2_pin == cfg->irq_2_pin) | ||
278 | return; | ||
279 | |||
280 | entry = old_cfg->irq_2_pin; | ||
281 | |||
282 | while (entry) { | ||
283 | next = entry->next; | ||
284 | kfree(entry); | ||
285 | entry = next; | ||
286 | } | ||
287 | old_cfg->irq_2_pin = NULL; | ||
288 | } | ||
289 | |||
290 | void arch_init_copy_chip_data(struct irq_desc *old_desc, | ||
291 | struct irq_desc *desc, int cpu) | ||
292 | { | ||
293 | struct irq_cfg *cfg; | ||
294 | struct irq_cfg *old_cfg; | ||
295 | |||
296 | cfg = get_one_free_irq_cfg(cpu); | ||
297 | |||
298 | if (!cfg) | ||
299 | return; | ||
300 | |||
301 | desc->chip_data = cfg; | ||
302 | |||
303 | old_cfg = old_desc->chip_data; | ||
304 | |||
305 | memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); | ||
306 | |||
307 | init_copy_irq_2_pin(old_cfg, cfg, cpu); | ||
308 | } | ||
309 | |||
310 | static void free_irq_cfg(struct irq_cfg *old_cfg) | ||
311 | { | ||
312 | kfree(old_cfg); | ||
313 | } | ||
314 | |||
315 | void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) | ||
316 | { | ||
317 | struct irq_cfg *old_cfg, *cfg; | ||
318 | |||
319 | old_cfg = old_desc->chip_data; | ||
320 | cfg = desc->chip_data; | ||
321 | |||
322 | if (old_cfg == cfg) | ||
323 | return; | ||
324 | |||
325 | if (old_cfg) { | ||
326 | free_irq_2_pin(old_cfg, cfg); | ||
327 | free_irq_cfg(old_cfg); | ||
328 | old_desc->chip_data = NULL; | ||
329 | } | ||
330 | } | ||
331 | |||
332 | static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) | ||
333 | { | ||
334 | struct irq_cfg *cfg = desc->chip_data; | ||
335 | |||
336 | if (!cfg->move_in_progress) { | ||
337 | /* it means that domain is not changed */ | ||
338 | if (!cpus_intersects(desc->affinity, mask)) | ||
339 | cfg->move_desc_pending = 1; | ||
340 | } | ||
341 | } | ||
342 | #endif | ||
343 | |||
226 | #else | 344 | #else |
227 | static struct irq_cfg *irq_cfg(unsigned int irq) | 345 | static struct irq_cfg *irq_cfg(unsigned int irq) |
228 | { | 346 | { |
@@ -231,9 +349,11 @@ static struct irq_cfg *irq_cfg(unsigned int irq) | |||
231 | 349 | ||
232 | #endif | 350 | #endif |
233 | 351 | ||
352 | #ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC | ||
234 | static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) | 353 | static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) |
235 | { | 354 | { |
236 | } | 355 | } |
356 | #endif | ||
237 | 357 | ||
238 | struct io_apic { | 358 | struct io_apic { |
239 | unsigned int index; | 359 | unsigned int index; |
@@ -2346,14 +2466,34 @@ static void irq_complete_move(struct irq_desc **descp) | |||
2346 | struct irq_cfg *cfg = desc->chip_data; | 2466 | struct irq_cfg *cfg = desc->chip_data; |
2347 | unsigned vector, me; | 2467 | unsigned vector, me; |
2348 | 2468 | ||
2349 | if (likely(!cfg->move_in_progress)) | 2469 | if (likely(!cfg->move_in_progress)) { |
2470 | #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC | ||
2471 | if (likely(!cfg->move_desc_pending)) | ||
2472 | return; | ||
2473 | |||
2474 | /* domain is not change, but affinity is changed */ | ||
2475 | me = smp_processor_id(); | ||
2476 | if (cpu_isset(me, desc->affinity)) { | ||
2477 | *descp = desc = move_irq_desc(desc, me); | ||
2478 | /* get the new one */ | ||
2479 | cfg = desc->chip_data; | ||
2480 | cfg->move_desc_pending = 0; | ||
2481 | } | ||
2482 | #endif | ||
2350 | return; | 2483 | return; |
2484 | } | ||
2351 | 2485 | ||
2352 | vector = ~get_irq_regs()->orig_ax; | 2486 | vector = ~get_irq_regs()->orig_ax; |
2353 | me = smp_processor_id(); | 2487 | me = smp_processor_id(); |
2354 | if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { | 2488 | if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { |
2355 | cpumask_t cleanup_mask; | 2489 | cpumask_t cleanup_mask; |
2356 | 2490 | ||
2491 | #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC | ||
2492 | *descp = desc = move_irq_desc(desc, me); | ||
2493 | /* get the new one */ | ||
2494 | cfg = desc->chip_data; | ||
2495 | #endif | ||
2496 | |||
2357 | cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); | 2497 | cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); |
2358 | cfg->move_cleanup_count = cpus_weight(cleanup_mask); | 2498 | cfg->move_cleanup_count = cpus_weight(cleanup_mask); |
2359 | send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | 2499 | send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); |