aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorSuresh Siddha <suresh.b.siddha@intel.com>2008-07-10 14:16:57 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-12 02:45:05 -0400
commit75c46fa61bc5b4ccd20a168ff325c58771248fcd (patch)
treeff5abfe689fe732ad73a198e1f3e56b8c4ca6024 /arch/x86/kernel
parent89027d35aa5b8f45ce0f7fa0911db85b46563da0 (diff)
x64, x2apic/intr-remap: MSI and MSI-X support for interrupt remapping infrastructure
MSI and MSI-X support for interrupt remapping infrastructure. MSI address register will be programmed with interrupt-remapping table entry(IRTE) index and the IRTE will contain information about the vector, cpu destination, etc. For MSI-X, all the IRTE's will be consecutively allocated in the table, and the address registers will contain the starting index to the block and the data register will contain the subindex with in that block. This also introduces a new irq_chip for cleaner irq migration (in the process context as opposed to the current irq migration in the context of an interrupt. interrupt-remapping infrastructure will help us achieve this). As MSI is edge triggered, irq migration is a simple atomic update(of vector and cpu destination) of IRTE and flushing the hardware cache. Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/io_apic_64.c230
1 files changed, 222 insertions, 8 deletions
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 9bd02ef049a0..877aa2e9d7e8 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -2297,6 +2297,9 @@ void destroy_irq(unsigned int irq)
2297 2297
2298 dynamic_irq_cleanup(irq); 2298 dynamic_irq_cleanup(irq);
2299 2299
2300#ifdef CONFIG_INTR_REMAP
2301 free_irte(irq);
2302#endif
2300 spin_lock_irqsave(&vector_lock, flags); 2303 spin_lock_irqsave(&vector_lock, flags);
2301 __clear_irq_vector(irq); 2304 __clear_irq_vector(irq);
2302 spin_unlock_irqrestore(&vector_lock, flags); 2305 spin_unlock_irqrestore(&vector_lock, flags);
@@ -2315,11 +2318,42 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
2315 2318
2316 tmp = TARGET_CPUS; 2319 tmp = TARGET_CPUS;
2317 err = assign_irq_vector(irq, tmp); 2320 err = assign_irq_vector(irq, tmp);
2318 if (!err) { 2321 if (err)
2319 cpus_and(tmp, cfg->domain, tmp); 2322 return err;
2320 dest = cpu_mask_to_apicid(tmp); 2323
2324 cpus_and(tmp, cfg->domain, tmp);
2325 dest = cpu_mask_to_apicid(tmp);
2326
2327#ifdef CONFIG_INTR_REMAP
2328 if (irq_remapped(irq)) {
2329 struct irte irte;
2330 int ir_index;
2331 u16 sub_handle;
2332
2333 ir_index = map_irq_to_irte_handle(irq, &sub_handle);
2334 BUG_ON(ir_index == -1);
2335
2336 memset (&irte, 0, sizeof(irte));
2337
2338 irte.present = 1;
2339 irte.dst_mode = INT_DEST_MODE;
2340 irte.trigger_mode = 0; /* edge */
2341 irte.dlvry_mode = INT_DELIVERY_MODE;
2342 irte.vector = cfg->vector;
2343 irte.dest_id = IRTE_DEST(dest);
2344
2345 modify_irte(irq, &irte);
2321 2346
2322 msg->address_hi = MSI_ADDR_BASE_HI; 2347 msg->address_hi = MSI_ADDR_BASE_HI;
2348 msg->data = sub_handle;
2349 msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
2350 MSI_ADDR_IR_SHV |
2351 MSI_ADDR_IR_INDEX1(ir_index) |
2352 MSI_ADDR_IR_INDEX2(ir_index);
2353 } else
2354#endif
2355 {
2356 msg->address_hi = MSI_ADDR_BASE_HI;
2323 msg->address_lo = 2357 msg->address_lo =
2324 MSI_ADDR_BASE_LO | 2358 MSI_ADDR_BASE_LO |
2325 ((INT_DEST_MODE == 0) ? 2359 ((INT_DEST_MODE == 0) ?
@@ -2369,6 +2403,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2369 write_msi_msg(irq, &msg); 2403 write_msi_msg(irq, &msg);
2370 irq_desc[irq].affinity = mask; 2404 irq_desc[irq].affinity = mask;
2371} 2405}
2406
2407#ifdef CONFIG_INTR_REMAP
2408/*
2409 * Migrate the MSI irq to another cpumask. This migration is
2410 * done in the process context using interrupt-remapping hardware.
2411 */
2412static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2413{
2414 struct irq_cfg *cfg = irq_cfg + irq;
2415 unsigned int dest;
2416 cpumask_t tmp, cleanup_mask;
2417 struct irte irte;
2418
2419 cpus_and(tmp, mask, cpu_online_map);
2420 if (cpus_empty(tmp))
2421 return;
2422
2423 if (get_irte(irq, &irte))
2424 return;
2425
2426 if (assign_irq_vector(irq, mask))
2427 return;
2428
2429 cpus_and(tmp, cfg->domain, mask);
2430 dest = cpu_mask_to_apicid(tmp);
2431
2432 irte.vector = cfg->vector;
2433 irte.dest_id = IRTE_DEST(dest);
2434
2435 /*
2436 * atomically update the IRTE with the new destination and vector.
2437 */
2438 modify_irte(irq, &irte);
2439
2440 /*
2441 * After this point, all the interrupts will start arriving
2442 * at the new destination. So, time to cleanup the previous
2443 * vector allocation.
2444 */
2445 if (cfg->move_in_progress) {
2446 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
2447 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2448 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2449 cfg->move_in_progress = 0;
2450 }
2451
2452 irq_desc[irq].affinity = mask;
2453}
2454#endif
2372#endif /* CONFIG_SMP */ 2455#endif /* CONFIG_SMP */
2373 2456
2374/* 2457/*
@@ -2386,26 +2469,157 @@ static struct irq_chip msi_chip = {
2386 .retrigger = ioapic_retrigger_irq, 2469 .retrigger = ioapic_retrigger_irq,
2387}; 2470};
2388 2471
2389int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) 2472#ifdef CONFIG_INTR_REMAP
2473static struct irq_chip msi_ir_chip = {
2474 .name = "IR-PCI-MSI",
2475 .unmask = unmask_msi_irq,
2476 .mask = mask_msi_irq,
2477 .ack = ack_x2apic_edge,
2478#ifdef CONFIG_SMP
2479 .set_affinity = ir_set_msi_irq_affinity,
2480#endif
2481 .retrigger = ioapic_retrigger_irq,
2482};
2483
2484/*
2485 * Map the PCI dev to the corresponding remapping hardware unit
2486 * and allocate 'nvec' consecutive interrupt-remapping table entries
2487 * in it.
2488 */
2489static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
2490{
2491 struct intel_iommu *iommu;
2492 int index;
2493
2494 iommu = map_dev_to_ir(dev);
2495 if (!iommu) {
2496 printk(KERN_ERR
2497 "Unable to map PCI %s to iommu\n", pci_name(dev));
2498 return -ENOENT;
2499 }
2500
2501 index = alloc_irte(iommu, irq, nvec);
2502 if (index < 0) {
2503 printk(KERN_ERR
2504 "Unable to allocate %d IRTE for PCI %s\n", nvec,
2505 pci_name(dev));
2506 return -ENOSPC;
2507 }
2508 return index;
2509}
2510#endif
2511
2512static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
2390{ 2513{
2514 int ret;
2391 struct msi_msg msg; 2515 struct msi_msg msg;
2516
2517 ret = msi_compose_msg(dev, irq, &msg);
2518 if (ret < 0)
2519 return ret;
2520
2521 set_irq_msi(irq, desc);
2522 write_msi_msg(irq, &msg);
2523
2524#ifdef CONFIG_INTR_REMAP
2525 if (irq_remapped(irq)) {
2526 struct irq_desc *desc = irq_desc + irq;
2527 /*
2528 * irq migration in process context
2529 */
2530 desc->status |= IRQ_MOVE_PCNTXT;
2531 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
2532 } else
2533#endif
2534 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
2535
2536 return 0;
2537}
2538
2539int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
2540{
2392 int irq, ret; 2541 int irq, ret;
2542
2393 irq = create_irq(); 2543 irq = create_irq();
2394 if (irq < 0) 2544 if (irq < 0)
2395 return irq; 2545 return irq;
2396 2546
2397 ret = msi_compose_msg(dev, irq, &msg); 2547#ifdef CONFIG_INTR_REMAP
2548 if (!intr_remapping_enabled)
2549 goto no_ir;
2550
2551 ret = msi_alloc_irte(dev, irq, 1);
2552 if (ret < 0)
2553 goto error;
2554no_ir:
2555#endif
2556 ret = setup_msi_irq(dev, desc, irq);
2398 if (ret < 0) { 2557 if (ret < 0) {
2399 destroy_irq(irq); 2558 destroy_irq(irq);
2400 return ret; 2559 return ret;
2401 } 2560 }
2561 return 0;
2402 2562
2403 set_irq_msi(irq, desc); 2563#ifdef CONFIG_INTR_REMAP
2404 write_msi_msg(irq, &msg); 2564error:
2565 destroy_irq(irq);
2566 return ret;
2567#endif
2568}
2405 2569
2406 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 2570int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
2571{
2572 int irq, ret, sub_handle;
2573 struct msi_desc *desc;
2574#ifdef CONFIG_INTR_REMAP
2575 struct intel_iommu *iommu = 0;
2576 int index = 0;
2577#endif
2407 2578
2579 sub_handle = 0;
2580 list_for_each_entry(desc, &dev->msi_list, list) {
2581 irq = create_irq();
2582 if (irq < 0)
2583 return irq;
2584#ifdef CONFIG_INTR_REMAP
2585 if (!intr_remapping_enabled)
2586 goto no_ir;
2587
2588 if (!sub_handle) {
2589 /*
2590 * allocate the consecutive block of IRTE's
2591 * for 'nvec'
2592 */
2593 index = msi_alloc_irte(dev, irq, nvec);
2594 if (index < 0) {
2595 ret = index;
2596 goto error;
2597 }
2598 } else {
2599 iommu = map_dev_to_ir(dev);
2600 if (!iommu) {
2601 ret = -ENOENT;
2602 goto error;
2603 }
2604 /*
2605 * setup the mapping between the irq and the IRTE
2606 * base index, the sub_handle pointing to the
2607 * appropriate interrupt remap table entry.
2608 */
2609 set_irte_irq(irq, iommu, index, sub_handle);
2610 }
2611no_ir:
2612#endif
2613 ret = setup_msi_irq(dev, desc, irq);
2614 if (ret < 0)
2615 goto error;
2616 sub_handle++;
2617 }
2408 return 0; 2618 return 0;
2619
2620error:
2621 destroy_irq(irq);
2622 return ret;
2409} 2623}
2410 2624
2411void arch_teardown_msi_irq(unsigned int irq) 2625void arch_teardown_msi_irq(unsigned int irq)