aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarc Zyngier <marc.zyngier@arm.com>2018-05-27 11:14:15 -0400
committerMarc Zyngier <marc.zyngier@arm.com>2018-07-16 09:22:19 -0400
commit880cb3cddd164128522146d1c087fbd1811ed0cc (patch)
treec9c7e86419af439fe180bab9da21d917ab4d2e15
parent9d3cce1e8b8561fed5f383d22a4d6949db4eadbe (diff)
irqchip/gic-v3-its: Refactor LPI allocator
Our current LPI allocator relies on a bitmap, each bit representing a chunk of 32 LPIs, meaning that each device gets allocated LPIs in multiple of 32. It served us well so far, but new use cases now require much more finer grain allocations, down the the individual LPI. Given the size of the IntID space (up to 32bit), it isn't practical to continue using a bitmap, so let's use a different data structure altogether. We switch to a list, where each element represent a contiguous range of LPIs. On allocation, we simply grab the first group big enough to satisfy the allocation, and substract what we need from it. If the group becomes empty, we just remove it. On freeing interrupts, we insert a new group of interrupt in the list, sort it and fuse the adjacent groups. This makes freeing interrupt much more expensive than allocating them (an unusual behaviour), but that's fine as long as we consider that freeing interrupts is an extremely rare event. We still allocate interrupts in blocks of 32 for the time being, but subsequent patches will relax this. Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c191
1 files changed, 129 insertions, 62 deletions
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index d7842d312d3e..9084a7e5a4b2 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -23,6 +23,8 @@
23#include <linux/dma-iommu.h> 23#include <linux/dma-iommu.h>
24#include <linux/interrupt.h> 24#include <linux/interrupt.h>
25#include <linux/irqdomain.h> 25#include <linux/irqdomain.h>
26#include <linux/list.h>
27#include <linux/list_sort.h>
26#include <linux/log2.h> 28#include <linux/log2.h>
27#include <linux/mm.h> 29#include <linux/mm.h>
28#include <linux/msi.h> 30#include <linux/msi.h>
@@ -1421,112 +1423,177 @@ static struct irq_chip its_irq_chip = {
1421 .irq_set_vcpu_affinity = its_irq_set_vcpu_affinity, 1423 .irq_set_vcpu_affinity = its_irq_set_vcpu_affinity,
1422}; 1424};
1423 1425
1426
1424/* 1427/*
1425 * How we allocate LPIs: 1428 * How we allocate LPIs:
1426 * 1429 *
1427 * The GIC has id_bits bits for interrupt identifiers. From there, we 1430 * lpi_range_list contains ranges of LPIs that are to available to
1428 * must subtract 8192 which are reserved for SGIs/PPIs/SPIs. Then, as 1431 * allocate from. To allocate LPIs, just pick the first range that
1429 * we allocate LPIs by chunks of 32, we can shift the whole thing by 5 1432 * fits the required allocation, and reduce it by the required
1430 * bits to the right. 1433 * amount. Once empty, remove the range from the list.
1434 *
1435 * To free a range of LPIs, add a free range to the list, sort it and
1436 * merge the result if the new range happens to be adjacent to an
1437 * already free block.
1431 * 1438 *
1432 * This gives us (((1UL << id_bits) - 8192) >> 5) possible allocations. 1439 * The consequence of the above is that allocation is cost is low, but
1440 * freeing is expensive. We assumes that freeing rarely occurs.
1441 */
1442
1443/*
1444 * Compatibility defines until we fully refactor the allocator
1433 */ 1445 */
1434#define IRQS_PER_CHUNK_SHIFT 5 1446#define IRQS_PER_CHUNK_SHIFT 5
1435#define IRQS_PER_CHUNK (1UL << IRQS_PER_CHUNK_SHIFT) 1447#define IRQS_PER_CHUNK (1UL << IRQS_PER_CHUNK_SHIFT)
1436#define ITS_MAX_LPI_NRBITS 16 /* 64K LPIs */ 1448#define ITS_MAX_LPI_NRBITS 16 /* 64K LPIs */
1437 1449
1438static unsigned long *lpi_bitmap; 1450static DEFINE_MUTEX(lpi_range_lock);
1439static u32 lpi_chunks; 1451static LIST_HEAD(lpi_range_list);
1440static DEFINE_SPINLOCK(lpi_lock);
1441 1452
1442static int its_lpi_to_chunk(int lpi) 1453struct lpi_range {
1454 struct list_head entry;
1455 u32 base_id;
1456 u32 span;
1457};
1458
1459static struct lpi_range *mk_lpi_range(u32 base, u32 span)
1443{ 1460{
1444 return (lpi - 8192) >> IRQS_PER_CHUNK_SHIFT; 1461 struct lpi_range *range;
1462
1463 range = kzalloc(sizeof(*range), GFP_KERNEL);
1464 if (range) {
1465 INIT_LIST_HEAD(&range->entry);
1466 range->base_id = base;
1467 range->span = span;
1468 }
1469
1470 return range;
1445} 1471}
1446 1472
1447static int its_chunk_to_lpi(int chunk) 1473static int lpi_range_cmp(void *priv, struct list_head *a, struct list_head *b)
1448{ 1474{
1449 return (chunk << IRQS_PER_CHUNK_SHIFT) + 8192; 1475 struct lpi_range *ra, *rb;
1476
1477 ra = container_of(a, struct lpi_range, entry);
1478 rb = container_of(b, struct lpi_range, entry);
1479
1480 return rb->base_id - ra->base_id;
1450} 1481}
1451 1482
1452static int __init its_lpi_init(u32 id_bits) 1483static void merge_lpi_ranges(void)
1453{ 1484{
1454 lpi_chunks = its_lpi_to_chunk(1UL << id_bits); 1485 struct lpi_range *range, *tmp;
1455 1486
1456 lpi_bitmap = kcalloc(BITS_TO_LONGS(lpi_chunks), sizeof(long), 1487 list_for_each_entry_safe(range, tmp, &lpi_range_list, entry) {
1457 GFP_KERNEL); 1488 if (!list_is_last(&range->entry, &lpi_range_list) &&
1458 if (!lpi_bitmap) { 1489 (tmp->base_id == (range->base_id + range->span))) {
1459 lpi_chunks = 0; 1490 tmp->base_id = range->base_id;
1460 return -ENOMEM; 1491 tmp->span += range->span;
1492 list_del(&range->entry);
1493 kfree(range);
1494 }
1461 } 1495 }
1496}
1462 1497
1463 pr_info("ITS: Allocated %d chunks for LPIs\n", (int)lpi_chunks); 1498static int alloc_lpi_range(u32 nr_lpis, u32 *base)
1464 return 0; 1499{
1500 struct lpi_range *range, *tmp;
1501 int err = -ENOSPC;
1502
1503 mutex_lock(&lpi_range_lock);
1504
1505 list_for_each_entry_safe(range, tmp, &lpi_range_list, entry) {
1506 if (range->span >= nr_lpis) {
1507 *base = range->base_id;
1508 range->base_id += nr_lpis;
1509 range->span -= nr_lpis;
1510
1511 if (range->span == 0) {
1512 list_del(&range->entry);
1513 kfree(range);
1514 }
1515
1516 err = 0;
1517 break;
1518 }
1519 }
1520
1521 mutex_unlock(&lpi_range_lock);
1522
1523 pr_debug("ITS: alloc %u:%u\n", *base, nr_lpis);
1524 return err;
1465} 1525}
1466 1526
1467static unsigned long *its_lpi_alloc_chunks(int nr_irqs, int *base, int *nr_ids) 1527static int free_lpi_range(u32 base, u32 nr_lpis)
1468{ 1528{
1469 unsigned long *bitmap = NULL; 1529 struct lpi_range *new;
1470 int chunk_id; 1530 int err = 0;
1471 int nr_chunks;
1472 int i;
1473 1531
1474 nr_chunks = DIV_ROUND_UP(nr_irqs, IRQS_PER_CHUNK); 1532 mutex_lock(&lpi_range_lock);
1475 1533
1476 spin_lock(&lpi_lock); 1534 new = mk_lpi_range(base, nr_lpis);
1535 if (!new) {
1536 err = -ENOMEM;
1537 goto out;
1538 }
1539
1540 list_add(&new->entry, &lpi_range_list);
1541 list_sort(NULL, &lpi_range_list, lpi_range_cmp);
1542 merge_lpi_ranges();
1543out:
1544 mutex_unlock(&lpi_range_lock);
1545 return err;
1546}
1547
1548static int __init its_lpi_init(u32 id_bits)
1549{
1550 u32 lpis = (1UL << id_bits) - 8192;
1551 int err;
1552
1553 /*
1554 * Initializing the allocator is just the same as freeing the
1555 * full range of LPIs.
1556 */
1557 err = free_lpi_range(8192, lpis);
1558 pr_debug("ITS: Allocator initialized for %u LPIs\n", lpis);
1559 return err;
1560}
1561
1562static unsigned long *its_lpi_alloc_chunks(int nr_irqs, u32 *base, int *nr_ids)
1563{
1564 unsigned long *bitmap = NULL;
1565 int err = 0;
1566 int nr_lpis;
1567
1568 nr_lpis = round_up(nr_irqs, IRQS_PER_CHUNK);
1477 1569
1478 do { 1570 do {
1479 chunk_id = bitmap_find_next_zero_area(lpi_bitmap, lpi_chunks, 1571 err = alloc_lpi_range(nr_lpis, base);
1480 0, nr_chunks, 0); 1572 if (!err)
1481 if (chunk_id < lpi_chunks)
1482 break; 1573 break;
1483 1574
1484 nr_chunks--; 1575 nr_lpis -= IRQS_PER_CHUNK;
1485 } while (nr_chunks > 0); 1576 } while (nr_lpis > 0);
1486 1577
1487 if (!nr_chunks) 1578 if (err)
1488 goto out; 1579 goto out;
1489 1580
1490 bitmap = kcalloc(BITS_TO_LONGS(nr_chunks * IRQS_PER_CHUNK), 1581 bitmap = kcalloc(BITS_TO_LONGS(nr_lpis), sizeof (long), GFP_ATOMIC);
1491 sizeof(long),
1492 GFP_ATOMIC);
1493 if (!bitmap) 1582 if (!bitmap)
1494 goto out; 1583 goto out;
1495 1584
1496 for (i = 0; i < nr_chunks; i++) 1585 *nr_ids = nr_lpis;
1497 set_bit(chunk_id + i, lpi_bitmap);
1498
1499 *base = its_chunk_to_lpi(chunk_id);
1500 *nr_ids = nr_chunks * IRQS_PER_CHUNK;
1501 1586
1502out: 1587out:
1503 spin_unlock(&lpi_lock);
1504
1505 if (!bitmap) 1588 if (!bitmap)
1506 *base = *nr_ids = 0; 1589 *base = *nr_ids = 0;
1507 1590
1508 return bitmap; 1591 return bitmap;
1509} 1592}
1510 1593
1511static void its_lpi_free_chunks(unsigned long *bitmap, int base, int nr_ids) 1594static void its_lpi_free_chunks(unsigned long *bitmap, u32 base, u32 nr_ids)
1512{ 1595{
1513 int lpi; 1596 WARN_ON(free_lpi_range(base, nr_ids));
1514
1515 spin_lock(&lpi_lock);
1516
1517 for (lpi = base; lpi < (base + nr_ids); lpi += IRQS_PER_CHUNK) {
1518 int chunk = its_lpi_to_chunk(lpi);
1519
1520 BUG_ON(chunk > lpi_chunks);
1521 if (test_bit(chunk, lpi_bitmap)) {
1522 clear_bit(chunk, lpi_bitmap);
1523 } else {
1524 pr_err("Bad LPI chunk %d\n", chunk);
1525 }
1526 }
1527
1528 spin_unlock(&lpi_lock);
1529
1530 kfree(bitmap); 1597 kfree(bitmap);
1531} 1598}
1532 1599