irqchip/gic-v3-its: Refactor LPI allocator

Our current LPI allocator relies on a bitmap, each bit representing a chunk of 32 LPIs, meaning that each device gets allocated LPIs in multiple of 32. It served us well so far, but new use cases now require much more finer grain allocations, down the the individual LPI. Given the size of the IntID space (up to 32bit), it isn't practical to continue using a bitmap, so let's use a different data structure altogether. We switch to a list, where each element represent a contiguous range of LPIs. On allocation, we simply grab the first group big enough to satisfy the allocation, and substract what we need from it. If the group becomes empty, we just remove it. On freeing interrupts, we insert a new group of interrupt in the list, sort it and fuse the adjacent groups. This makes freeing interrupt much more expensive than allocating them (an unusual behaviour), but that's fine as long as we consider that freeing interrupts is an extremely rare event. We still allocate interrupts in blocks of 32 for the time being, but subsequent patches will relax this. Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
author: Marc Zyngier <marc.zyngier@arm.com> 2018-05-27 11:14:15 -0400
committer: Marc Zyngier <marc.zyngier@arm.com> 2018-07-16 09:22:19 -0400
commit: 880cb3cddd164128522146d1c087fbd1811ed0cc (patch)
tree: c9c7e86419af439fe180bab9da21d917ab4d2e15
parent: 9d3cce1e8b8561fed5f383d22a4d6949db4eadbe (diff)
1 files changed, 129 insertions, 62 deletions
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index d7842d312d3e..9084a7e5a4b2 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -23,6 +23,8 @@
 #include <linux/dma-iommu.h>
 #include <linux/interrupt.h>
 #include <linux/irqdomain.h>
+#include <linux/list.h>
+#include <linux/list_sort.h>
 #include <linux/log2.h>
 #include <linux/mm.h>
 #include <linux/msi.h>
@@ -1421,112 +1423,177 @@ static struct irq_chip its_irq_chip = {
        .irq_set_vcpu_affinity  = its_irq_set_vcpu_affinity,
 };
 /*
 * How we allocate LPIs:
 *
- * The GIC has id_bits bits for interrupt identifiers. From there, we
+ * lpi_range_list contains ranges of LPIs that are to available to
- * must subtract 8192 which are reserved for SGIs/PPIs/SPIs. Then, as
+ * allocate from. To allocate LPIs, just pick the first range that
- * we allocate LPIs by chunks of 32, we can shift the whole thing by 5
+ * fits the required allocation, and reduce it by the required
- * bits to the right.
+ * amount. Once empty, remove the range from the list.
+ *
+ * To free a range of LPIs, add a free range to the list, sort it and
+ * merge the result if the new range happens to be adjacent to an
+ * already free block.
 *
- * This gives us (((1UL << id_bits) - 8192) >> 5) possible allocations.
+ * The consequence of the above is that allocation is cost is low, but
+ * freeing is expensive. We assumes that freeing rarely occurs.
+ */
+/*
+ * Compatibility defines until we fully refactor the allocator
 */
 #define IRQS_PER_CHUNK_SHIFT    5
 #define IRQS_PER_CHUNK          (1UL << IRQS_PER_CHUNK_SHIFT)
 #define ITS_MAX_LPI_NRBITS      16 /* 64K LPIs */
-static unsigned long *lpi_bitmap;
+static DEFINE_MUTEX(lpi_range_lock);
-static u32 lpi_chunks;
+static LIST_HEAD(lpi_range_list);
-static DEFINE_SPINLOCK(lpi_lock);
-static int its_lpi_to_chunk(int lpi)
+struct lpi_range {
+        struct list_head        entry;
+        u32                     base_id;
+        u32                     span;
+};
+static struct lpi_range *mk_lpi_range(u32 base, u32 span)
 {
-        return (lpi - 8192) >> IRQS_PER_CHUNK_SHIFT;
+        struct lpi_range *range;
+        range = kzalloc(sizeof(*range), GFP_KERNEL);
+        if (range) {
+                INIT_LIST_HEAD(&range->entry);
+                range->base_id = base;
+                range->span = span;
+        }
+        return range;
 }
-static int its_chunk_to_lpi(int chunk)
+static int lpi_range_cmp(void *priv, struct list_head *a, struct list_head *b)
 {
-        return (chunk << IRQS_PER_CHUNK_SHIFT) + 8192;
+        struct lpi_range *ra, *rb;
+        ra = container_of(a, struct lpi_range, entry);
+        rb = container_of(b, struct lpi_range, entry);
+        return rb->base_id - ra->base_id;
 }
-static int __init its_lpi_init(u32 id_bits)
+static void merge_lpi_ranges(void)
 {
-        lpi_chunks = its_lpi_to_chunk(1UL << id_bits);
+        struct lpi_range *range, *tmp;
-        lpi_bitmap = kcalloc(BITS_TO_LONGS(lpi_chunks), sizeof(long),
+        list_for_each_entry_safe(range, tmp, &lpi_range_list, entry) {
-                             GFP_KERNEL);
+                if (!list_is_last(&range->entry, &lpi_range_list) &&
-        if (!lpi_bitmap) {
+                    (tmp->base_id == (range->base_id + range->span))) {
-                lpi_chunks = 0;
+                        tmp->base_id = range->base_id;
-                return -ENOMEM;
+                        tmp->span += range->span;
+                        list_del(&range->entry);
+                        kfree(range);
+                }
        }
+}
-        pr_info("ITS: Allocated %d chunks for LPIs\n", (int)lpi_chunks);
+static int alloc_lpi_range(u32 nr_lpis, u32 *base)
-        return 0;
+{
+        struct lpi_range *range, *tmp;
+        int err = -ENOSPC;
+        mutex_lock(&lpi_range_lock);
+        list_for_each_entry_safe(range, tmp, &lpi_range_list, entry) {
+                if (range->span >= nr_lpis) {
+                        *base = range->base_id;
+                        range->base_id += nr_lpis;
+                        range->span -= nr_lpis;
+                        if (range->span == 0) {
+                                list_del(&range->entry);
+                                kfree(range);
+                        }
+                        err = 0;
+                        break;
+                }
+        }
+        mutex_unlock(&lpi_range_lock);
+        pr_debug("ITS: alloc %u:%u\n", *base, nr_lpis);
+        return err;
 }
-static unsigned long *its_lpi_alloc_chunks(int nr_irqs, int *base, int *nr_ids)
+static int free_lpi_range(u32 base, u32 nr_lpis)
 {
-        unsigned long *bitmap = NULL;
+        struct lpi_range *new;
-        int chunk_id;
+        int err = 0;
-        int nr_chunks;
-        int i;
-        nr_chunks = DIV_ROUND_UP(nr_irqs, IRQS_PER_CHUNK);
+        mutex_lock(&lpi_range_lock);
-        spin_lock(&lpi_lock);
+        new = mk_lpi_range(base, nr_lpis);
+        if (!new) {
+                err = -ENOMEM;
+                goto out;
+        }
+        list_add(&new->entry, &lpi_range_list);
+        list_sort(NULL, &lpi_range_list, lpi_range_cmp);
+        merge_lpi_ranges();
+out:
+        mutex_unlock(&lpi_range_lock);
+        return err;
+}
+static int __init its_lpi_init(u32 id_bits)
+{
+        u32 lpis = (1UL << id_bits) - 8192;
+        int err;
+        /*
+         * Initializing the allocator is just the same as freeing the
+         * full range of LPIs.
+         */
+        err = free_lpi_range(8192, lpis);
+        pr_debug("ITS: Allocator initialized for %u LPIs\n", lpis);
+        return err;
+}
+static unsigned long *its_lpi_alloc_chunks(int nr_irqs, u32 *base, int *nr_ids)
+{
+        unsigned long *bitmap = NULL;
+        int err = 0;
+        int nr_lpis;
+        nr_lpis = round_up(nr_irqs, IRQS_PER_CHUNK);
        do {
-                chunk_id = bitmap_find_next_zero_area(lpi_bitmap, lpi_chunks,
+                err = alloc_lpi_range(nr_lpis, base);
-                                                      0, nr_chunks, 0);
+                if (!err)
-                if (chunk_id < lpi_chunks)
                        break;
-                nr_chunks--;
+                nr_lpis -= IRQS_PER_CHUNK;
-        } while (nr_chunks > 0);
+        } while (nr_lpis > 0);
-        if (!nr_chunks)
+        if (err)
                goto out;
-        bitmap = kcalloc(BITS_TO_LONGS(nr_chunks * IRQS_PER_CHUNK),
+        bitmap = kcalloc(BITS_TO_LONGS(nr_lpis), sizeof (long), GFP_ATOMIC);
-                         sizeof(long),
-                         GFP_ATOMIC);
        if (!bitmap)
                goto out;
-        for (i = 0; i < nr_chunks; i++)
+        *nr_ids = nr_lpis;
-                set_bit(chunk_id + i, lpi_bitmap);
-        *base = its_chunk_to_lpi(chunk_id);
-        *nr_ids = nr_chunks * IRQS_PER_CHUNK;
 out:
-        spin_unlock(&lpi_lock);
        if (!bitmap)
                *base = *nr_ids = 0;
        return bitmap;
 }
-static void its_lpi_free_chunks(unsigned long *bitmap, int base, int nr_ids)
+static void its_lpi_free_chunks(unsigned long *bitmap, u32 base, u32 nr_ids)
 {
-        int lpi;
+        WARN_ON(free_lpi_range(base, nr_ids));
-        spin_lock(&lpi_lock);
-        for (lpi = base; lpi < (base + nr_ids); lpi += IRQS_PER_CHUNK) {
-                int chunk = its_lpi_to_chunk(lpi);
-                BUG_ON(chunk > lpi_chunks);
-                if (test_bit(chunk, lpi_bitmap)) {
-                        clear_bit(chunk, lpi_bitmap);
-                } else {
-                        pr_err("Bad LPI chunk %d\n", chunk);
-                }
-        }
-        spin_unlock(&lpi_lock);
        kfree(bitmap);
 }
author	Marc Zyngier <marc.zyngier@arm.com>	2018-05-27 11:14:15 -0400
committer	Marc Zyngier <marc.zyngier@arm.com>	2018-07-16 09:22:19 -0400
commit	880cb3cddd164128522146d1c087fbd1811ed0cc (patch)
tree	c9c7e86419af439fe180bab9da21d917ab4d2e15
parent	9d3cce1e8b8561fed5f383d22a4d6949db4eadbe (diff)