TILER: Make tiler nv12 support a configuration option

The tiler driver now allows for configuring the nv12 support as a kernel configuration option. If enabled, nv12 support will be compiled into the driver. Signed-off-by: Andy Gross <andy.gross@ti.com>
author: Andy Gross <andy.gross@ti.com> 2011-06-07 23:15:55 -0400
committer: Paolo Pisati <paolo.pisati@canonical.com> 2012-08-17 04:19:05 -0400
commit: 2f33160580154c63f94cb96d1891391bc0fdeb63 (patch)
tree: 7693a7d0cbf9464b8ed993dd8c387ad799696b74
parent: 5b461ddccf87ad46a710885a92ee85b79a3d45b7 (diff)
7 files changed, 460 insertions, 399 deletions
diff --git a/drivers/media/video/tiler/Kconfig b/drivers/media/video/tiler/Kconfig
index 8ff8ede9164..a22746ed152 100644
--- a/drivers/media/video/tiler/Kconfig
+++ b/drivers/media/video/tiler/Kconfig
@@ -124,3 +124,13 @@ config TILER_EXPOSE_SSPTR
           You can use this flag to see if the userspace is relying on
           having access to the SSPtr.
+config TILER_ENABLE_NV12
+        bool "Enable NV12 support"
+        default y
+        depends on TI_TILER
+        help
+            This option enables NV12 functionality in the TILER driver.
+            If set, nv12 support will be compiled into the driver and APIs
+            will be enabled.
diff --git a/drivers/media/video/tiler/Makefile b/drivers/media/video/tiler/Makefile
index b3276440304..ad2dfa22ae7 100644
--- a/drivers/media/video/tiler/Makefile
+++ b/drivers/media/video/tiler/Makefile
@@ -3,6 +3,9 @@ obj-$(CONFIG_TI_TILER) += tcm/
 obj-$(CONFIG_TI_TILER) += tiler.o
 tiler-objs = tiler-geom.o tiler-main.o tiler-iface.o tiler-reserve.o tmm-pat.o
+ifdef CONFIG_TILER_ENABLE_NV12
+tiler-objs += tiler-nv12.o
+endif
 obj-$(CONFIG_TI_TILER) += tiler_dmm.o
 tiler_dmm-objs = dmm.o
diff --git a/drivers/media/video/tiler/_tiler.h b/drivers/media/video/tiler/_tiler.h
index 41740b4cce7..375cdbae6fa 100644
--- a/drivers/media/video/tiler/_tiler.h
+++ b/drivers/media/video/tiler/_tiler.h
@@ -105,8 +105,10 @@ struct tiler_ops {
        s32 (*lay_2d) (enum tiler_fmt fmt, u16 n, u16 w, u16 h, u16 band,
                        u16 align, u16 offs, struct gid_info *gi,
                        struct list_head *pos);
+#ifdef CONFIG_TILER_ENABLE_NV12
        s32 (*lay_nv12) (int n, u16 w, u16 w1, u16 h, struct gid_info *gi,
-                                                                        u8 *p);
+                         u8 *p);
+#endif
        /* group operations */
        struct gid_info * (*get_gi) (struct process_info *pi, u32 gid);
        void (*release_gi) (struct gid_info *gi);
@@ -131,8 +133,9 @@ struct tiler_ops {
        /* additional info */
        const struct file_operations *fops;
+#ifdef CONFIG_TILER_ENABLE_NV12
        bool nv12_packed;       /* whether NV12 is packed into same container */
+#endif
        u32 page;               /* page size */
        u32 width;              /* container width */
        u32 height;             /* container height */
@@ -141,6 +144,8 @@ struct tiler_ops {
 void tiler_iface_init(struct tiler_ops *tiler);
 void tiler_geom_init(struct tiler_ops *tiler);
 void tiler_reserve_init(struct tiler_ops *tiler);
+void tiler_nv12_init(struct tiler_ops *tiler);
+u32 tiler_best2pack(u16 o, u16 a, u16 b, u16 w, u16 *n, u16 *_area);
 struct process_info *__get_pi(pid_t pid, bool kernel);
diff --git a/drivers/media/video/tiler/tiler-iface.c b/drivers/media/video/tiler/tiler-iface.c
index 3e20599a9e9..534fb49c536 100644
--- a/drivers/media/video/tiler/tiler-iface.c
+++ b/drivers/media/video/tiler/tiler-iface.c
@@ -505,12 +505,16 @@ static long tiler_ioctl(struct file *filp, u32 cmd, unsigned long arg)
                        return -EFAULT;
                if (block_info.fmt == TILFMT_8AND16)
+#ifdef CONFIG_TILER_ENABLE_NV12
                        ops->reserve_nv12(block_info.key,
                                          block_info.dim.area.width,
                                          block_info.dim.area.height,
                                          block_info.align,
                                          block_info.offs,
                                          block_info.group_id, pi);
+#else
+                        return -EINVAL;
+#endif
                else
                        ops->reserve(block_info.key,
                                     block_info.fmt,
@@ -672,6 +676,7 @@ void tiler_reserve(u32 n, enum tiler_fmt fmt, u32 width, u32 height,
 }
 EXPORT_SYMBOL(tiler_reserve);
+#ifdef CONFIG_TILER_ENABLE_NV12
 void tiler_reservex_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs,
                        u32 gid, pid_t pid)
 {
@@ -687,6 +692,7 @@ void tiler_reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs)
        tiler_reservex_nv12(n, width, height, align, offs, 0, current->tgid);
 }
 EXPORT_SYMBOL(tiler_reserve_nv12);
+#endif
 s32 tiler_allocx(struct tiler_block_t *blk, enum tiler_fmt fmt,
                                u32 align, u32 offs, u32 gid, pid_t pid)
diff --git a/drivers/media/video/tiler/tiler-main.c b/drivers/media/video/tiler/tiler-main.c
index bffd8cc82c2..23d130f897f 100644
--- a/drivers/media/video/tiler/tiler-main.c
+++ b/drivers/media/video/tiler/tiler-main.c
@@ -513,6 +513,7 @@ static s32 lay_2d(enum tiler_fmt fmt, u16 n, u16 w, u16 h, u16 band,
        return n;
 }
+#ifdef CONFIG_TILER_ENABLE_NV12
 /* layout reserved nv12 blocks in a larger area */
 /* NOTE: area w(idth), w1 (8-bit block width), h(eight) are in slots */
 /* p is a pointer to a packing description, which is a list of offsets in
@@ -558,6 +559,7 @@ static s32 lay_nv12(int n, u16 w, u16 w1, u16 h, struct gid_info *gi, u8 *p)
        mutex_unlock(&mtx);
        return n;
 }
+#endif
 static void _m_unpin(struct mem_info *mi)
 {
@@ -1221,7 +1223,9 @@ static s32 __init tiler_init(void)
        tiler.lock = find_n_lock;
        tiler.unlock_free = unlock_n_free;
        tiler.lay_2d = lay_2d;
+#ifdef CONFIG_TILER_ENABLE_NV12
        tiler.lay_nv12 = lay_nv12;
+#endif
        tiler.destroy_group = destroy_group;
        tiler.lock_by_ssptr = find_block_by_ssptr;
        tiler.describe = fill_block_info;
@@ -1233,6 +1237,9 @@ static s32 __init tiler_init(void)
        tiler_geom_init(&tiler);
        tiler_reserve_init(&tiler);
        tiler_iface_init(&tiler);
+#ifdef CONFIG_TILER_ENABLE_NV12
+        tiler_nv12_init(&tiler);
+#endif
        /* check module parameters for correctness */
        if (default_align > PAGE_SIZE ||
@@ -1272,7 +1279,9 @@ static s32 __init tiler_init(void)
        area.y1 = tiler.height - 1;
        tmm_unpin(tmm_pat, area);
+#ifdef CONFIG_TILER_ENABLE_NV12
        tiler.nv12_packed = tcm[TILFMT_8BIT] == tcm[TILFMT_16BIT];
+#endif
        tiler_device = kmalloc(sizeof(*tiler_device), GFP_KERNEL);
        if (!tiler_device || !sita || !tmm_pat) {
diff --git a/drivers/media/video/tiler/tiler-nv12.c b/drivers/media/video/tiler/tiler-nv12.c
new file mode 100644
index 00000000000..c16a14015ae
--- /dev/null
+++ b/drivers/media/video/tiler/tiler-nv12.c
@@ -0,0 +1,423 @@
+/*
+ * tiler-nv12.c
+ *
+ * TILER driver NV12 area reservation functions for TI TILER hardware block.
+ *
+ * Author: Lajos Molnar <molnar@ti.com>
+ *
+ * Copyright (C) 2009-2010 Texas Instruments, Inc.
+ *
+ * This package is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "_tiler.h"
+static struct tiler_ops *ops;   /* shared methods and variables */
+static int band_8;
+static int band_16;
+/*
+ * NV12 Reservation Functions
+ *
+ * TILER is designed so that a (w * h) * 8bit area is twice as wide as a
+ * (w/2 * h/2) * 16bit area.  Since having pairs of such 8-bit and 16-bit
+ * blocks is a common usecase for TILER, we optimize packing these into a
+ * TILER area.
+ *
+ * During reservation we want to find the most effective packing (most used area
+ * in the smallest overall area)
+ *
+ * We have two algorithms for packing nv12 blocks: either pack 8- and 16-bit
+ * blocks into separate container areas, or pack them together into same area.
+ */
+/**
+ * Calculate effectiveness of packing. We weight total area much higher than
+ * packing efficiency to get the smallest overall container use.
+ *
+ * @param w             width of one (8-bit) block
+ * @param n             buffers in a packing
+ * @param area          width of packing area
+ * @param n_total       total number of buffers to be packed
+ * @return effectiveness, the higher the better
+ */
+static inline u32 nv12_eff(u16 w, u16 n, u16 area, u16 n_total)
+{
+        return 0x10000000 -
+                /* weigh against total area needed (for all buffers) */
+                /* 64-slots = -2048 */
+                DIV_ROUND_UP(n_total, n) * area * 32 +
+                /* packing efficiency (0 - 1024) */
+                1024 * n * ((w * 3 + 1) >> 1) / area;
+}
+/**
+ * Fallback nv12 packing algorithm: pack 8 and 16 bit block into separate
+ * areas.
+ *
+ * @author a0194118 (7/16/2010)
+ *
+ * @param o     desired offset (<a)
+ * @param a     desired alignment (>=2)
+ * @param w     block width (>0)
+ * @param n     number of blocks desired
+ * @param area  pointer to store total area needed
+ *
+ * @return number of blocks that can be allocated
+ */
+static u16 nv12_separate(u16 o, u16 a, u16 w, u16 n, u16 *area)
+{
+        tiler_best2pack(o, a, band_8, w, &n, area);
+        tiler_best2pack(o >> 1, a >> 1, band_16, (w + 1) >> 1, &n, area);
+        *area *= 3;
+        return n;
+}
+/*
+ * Specialized NV12 Reservation Algorithms
+ *
+ * We use 4 packing methods that pack nv12 blocks into the same area.  Together
+ * these 4 methods give the optimal result for most possible input parameters.
+ *
+ * For now we pack into a 64-slot area, so that we don't have to worry about
+ * stride issues (all blocks get 4K stride). For some of the algorithms this
+ * could be true even if the area was 128.
+ */
+/**
+ * Packing types are marked using a letter sequence, capital letters denoting
+ * 8-bit blocks, lower case letters denoting corresponding 16-bit blocks.
+ *
+ * All methods have the following parameters. They also define the maximum
+ * number of coordinates that could potentially be packed.
+ *
+ * @param o, a, w, n offset, alignment, width, # of blocks as usual
+ * @param area          pointer to store area needed for packing
+ * @param p             pointer to store packing coordinates
+ * @return              number of blocks that can be packed
+ */
+/* Method A: progressive packing: AAAAaaaaBBbbCc into 64-slot area */
+#define MAX_A 21
+static int nv12_A(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
+{
+        u16 x = o, u, l, m = 0;
+        *area = band_8;
+        while (x + w < *area && m < n) {
+                /* current 8bit upper bound (a) is next 8bit lower bound (B) */
+                l = u = (*area + x) >> 1;
+                /* pack until upper bound */
+                while (x + w <= u && m < n) {
+                        /* save packing */
+                        BUG_ON(m + 1 >= MAX_A);
+                        *p++ = x;
+                        *p++ = l;
+                        l = (*area + x + w + 1) >> 1;
+                        x = ALIGN(x + w - o, a) + o;
+                        m++;
+                }
+                x = ALIGN(l - o, a) + o;        /* set new lower bound */
+        }
+        return m;
+}
+/* Method -A: regressive packing: cCbbBBaaaaAAAA into 64-slot area */
+static int nv12_revA(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
+{
+        u16 m;
+        /* this is a mirrored packing of method A */
+        n = nv12_A((a - (o + w) % a) % a, a, w, n, area, p);
+        /* reverse packing */
+        for (m = 0; m < n; m++) {
+                *p = *area - *p - w;
+                p++;
+                *p = *area - *p - ((w + 1) >> 1);
+                p++;
+        }
+        return n;
+}
+/* Method B: simple layout: aAbcBdeCfgDhEFGH */
+#define MAX_B 8
+static int nv12_B(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
+{
+        u16 e  = (o + w) % a;   /* end offset */
+        u16 o1 = (o >> 1) % a;                  /* half offset */
+        u16 e1 = ((o + w + 1) >> 1) % a;        /* half end offset */
+        u16 o2 = o1 + (a >> 2);                 /* 2nd half offset */
+        u16 e2 = e1 + (a >> 2);                 /* 2nd half end offset */
+        u16 m = 0;
+        *area = band_8;
+        /* ensure 16-bit blocks don't overlap 8-bit blocks */
+        /* width cannot wrap around alignment, half block must be before block,
+           2nd half can be before or after */
+        if (w < a && o < e && e1 <= o && (e2 <= o || o2 >= e))
+                while (o + w <= *area && m < n) {
+                        BUG_ON(m + 1 >= MAX_B);
+                        *p++ = o;
+                        *p++ = o >> 1;
+                        m++;
+                        o += a;
+                }
+        return m;
+}
+/* Method C: butterfly layout: AAbbaaBB */
+#define MAX_C 20
+static int nv12_C(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
+{
+        int m = 0;
+        u16 o2, e = ALIGN(w, a), i = 0, j = 0;
+        *area = band_8;
+        o2 = *area - (a - (o + w) % a) % a;     /* end of last possible block */
+        m = (min(o2 - 2 * o, 2 * o2 - o - *area) / 3 - w) / e + 1;
+        for (i = j = 0; i < m && j < n; i++, j++) {
+                BUG_ON(j + 1 >= MAX_C);
+                *p++ = o + i * e;
+                *p++ = (o + i * e + *area) >> 1;
+                if (++j < n) {
+                        *p++ = o2 - i * e - w;
+                        *p++ = (o2 - i * e - w) >> 1;
+                }
+        }
+        return j;
+}
+/* Method D: for large allocation: aA or Aa */
+#define MAX_D 1
+static int nv12_D(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
+{
+        u16 o1, w1 = (w + 1) >> 1, d;
+        *area = ALIGN(o + w, band_8);
+        for (d = 0; n > 0 && d + o + w <= *area; d += a) {
+                /* try to fit 16-bit before 8-bit */
+                o1 = ((o + d) % band_8) >> 1;
+                if (o1 + w1 <= o + d) {
+                        *p++ = o + d;
+                        *p++ = o1;
+                        return 1;
+                }
+                /* try to fit 16-bit after 8-bit */
+                o1 += ALIGN(d + o + w - o1, band_16);
+                if (o1 + w1 <= *area) {
+                        *p++ = o;
+                        *p++ = o1;
+                        return 1;
+                }
+        }
+        return 0;
+}
+/**
+ * Umbrella nv12 packing method. This selects the best packings from the above
+ * methods.  It also contains hardcoded packings for parameter combinations
+ * that have more efficient packings. This method provides is guaranteed to
+ * provide the optimal packing if 2 <= a <= 64 and w <= 64 and n is large.
+ */
+#define MAX_ANY 21      /* must be MAX(method-MAX-s, hardcoded n-s) */
+static u16 nv12_together(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *packing)
+{
+        u16 n_best, a_best, n2, a_, o_, w_;
+        /* algo results (packings) */
+        u8 pack_A[MAX_A * 2], pack_rA[MAX_A * 2];
+        u8 pack_B[MAX_B * 2], pack_C[MAX_C * 2];
+        u8 pack_D[MAX_D * 2];
+        /*
+         * Hardcoded packings.  They are sorted by increasing area, and then by
+         * decreasing n.  We may not get the best efficiency if less than n
+         * blocks are needed as packings are not necessarily sorted in
+         * increasing order.  However, for those n-s one of the other 4 methods
+         * may return the optimal packing.
+         */
+        u8 packings[] = {
+                /* n=9, o=2, w=4, a=4, area=64 */
+                9, 2, 4, 4, 64,
+                        /* 8-bit, 16-bit block coordinate pairs */
+                        2, 33,  6, 35,  10, 37, 14, 39, 18, 41,
+                        46, 23, 50, 25, 54, 27, 58, 29,
+                /* o=0, w=12, a=4, n=3 */
+                3, 0, 12, 4, 64,
+                        0, 32,  12, 38, 48, 24,
+                /* end */
+                0
+        }, *p = packings, *p_best = NULL, *p_end;
+        p_end = packings + sizeof(packings) - 1;
+        /* see which method gives the best packing */
+        /* start with smallest area algorithms A, B & C, stop if we can
+           pack all buffers */
+        n_best = nv12_A(o, a, w, n, area, pack_A);
+        p_best = pack_A;
+        if (n_best < n) {
+                n2 = nv12_revA(o, a, w, n, &a_best, pack_rA);
+                if (n2 > n_best) {
+                        n_best = n2;
+                        p_best = pack_rA;
+                        *area = a_best;
+                }
+        }
+        if (n_best < n) {
+                n2 = nv12_B(o, a, w, n, &a_best, pack_B);
+                if (n2 > n_best) {
+                        n_best = n2;
+                        p_best = pack_B;
+                        *area = a_best;
+                }
+        }
+        if (n_best < n) {
+                n2 = nv12_C(o, a, w, n, &a_best, pack_C);
+                if (n2 > n_best) {
+                        n_best = n2;
+                        p_best = pack_C;
+                        *area = a_best;
+                }
+        }
+        /* traverse any special packings */
+        while (*p) {
+                n2 = *p++;
+                o_ = *p++;
+                w_ = *p++;
+                a_ = *p++;
+                /* stop if we already have a better packing */
+                if (n2 < n_best)
+                        break;
+                /* check if this packing is satisfactory */
+                if (a_ >= a && o + w + ALIGN(o_ - o, a) <= o_ + w_) {
+                        *area = *p++;
+                        n_best = min(n2, n);
+                        p_best = p;
+                        break;
+                }
+                /* skip to next packing */
+                p += 1 + n2 * 2;
+        }
+        /*
+         * If so far unsuccessful, check whether 8 and 16 bit blocks can be
+         * co-packed.  This will actually be done in the end by the normal
+         * allocation, but we need to reserve a big-enough area.
+         */
+        if (!n_best) {
+                n_best = nv12_D(o, a, w, n, area, pack_D);
+                p_best = NULL;
+        }
+        /* store best packing */
+        if (p_best && n_best) {
+                BUG_ON(n_best > MAX_ANY);
+                memcpy(packing, p_best, n_best * 2 * sizeof(*pack_A));
+        }
+        return n_best;
+}
+/* reserve nv12 blocks */
+static void reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs,
+                                        u32 gid, struct process_info *pi)
+{
+        u16 w, h, band, a = align, o = offs;
+        struct gid_info *gi;
+        int res = 0, res2, i;
+        u16 n_t, n_s, area_t, area_s;
+        u8 packing[2 * MAX_ANY];
+        struct list_head reserved = LIST_HEAD_INIT(reserved);
+        /* adjust alignment to the largest slot width (128 bytes) */
+        a = max_t(u16, PAGE_SIZE / min(band_8, band_16), a);
+        /* Check input parameters for correctness, and support */
+        if (!width || !height || !n ||
+            offs >= align || offs & 1 ||
+            align >= PAGE_SIZE ||
+            n > ops->width * ops->height / 2)
+                return;
+        /* calculate dimensions, band, offs and alignment in slots */
+        if (ops->analize(TILFMT_8BIT, width, height, &w, &h, &band, &a, &o,
+                                                                        NULL))
+                return;
+        /* get group context */
+        gi = ops->get_gi(pi, gid);
+        if (!gi)
+                return;
+        /* reserve in groups until failed or all is reserved */
+        for (i = 0; i < n && res >= 0; i += res) {
+                /* check packing separately vs together */
+                n_s = nv12_separate(o, a, w, n - i, &area_s);
+                if (ops->nv12_packed)
+                        n_t = nv12_together(o, a, w, n - i, &area_t, packing);
+                else
+                        n_t = 0;
+                /* pack based on better efficiency */
+                res = -1;
+                if (!ops->nv12_packed ||
+                        nv12_eff(w, n_s, area_s, n - i) >
+                        nv12_eff(w, n_t, area_t, n - i)) {
+                        /*
+                         * Reserve blocks separately into a temporary list, so
+                         * that we can free them if unsuccessful. We need to be
+                         * able to reserve both 8- and 16-bit blocks as the
+                         * offsets of them must match.
+                         */
+                        res = ops->lay_2d(TILFMT_8BIT, n_s, w, h, band_8, a, o,
+                                                gi, &reserved);
+                        res2 = ops->lay_2d(TILFMT_16BIT, n_s, (w + 1) >> 1, h,
+                                band_16, a >> 1, o >> 1, gi, &reserved);
+                        if (res2 < 0 || res < 0 || res != res2) {
+                                /* clean up */
+                                ops->release(&reserved);
+                                res = -1;
+                        } else {
+                                /* add list to reserved */
+                                ops->add_reserved(&reserved, gi);
+                        }
+                }
+                /* if separate packing failed, still try to pack together */
+                if (res < 0 && ops->nv12_packed && n_t) {
+                        /* pack together */
+                        res = ops->lay_nv12(n_t, area_t, w, h, gi, packing);
+                }
+        }
+        ops->release_gi(gi);
+}
+/* initialize shared method pointers and global static variables */
+void tiler_nv12_init(struct tiler_ops *tiler)
+{
+        ops = tiler;
+        ops->reserve_nv12 = reserve_nv12;
+        band_8 = PAGE_SIZE / ops->geom(TILFMT_8BIT)->slot_w
+                / ops->geom(TILFMT_8BIT)->bpp;
+        band_16 = PAGE_SIZE / ops->geom(TILFMT_16BIT)->slot_w
+                / ops->geom(TILFMT_16BIT)->bpp;
+}
diff --git a/drivers/media/video/tiler/tiler-reserve.c b/drivers/media/video/tiler/tiler-reserve.c
index 6715d3ddd6a..770fb07c5bb 100644
--- a/drivers/media/video/tiler/tiler-reserve.c
+++ b/drivers/media/video/tiler/tiler-reserve.c
@@ -19,8 +19,6 @@
 #include "_tiler.h"
 static struct tiler_ops *ops;   /* shared methods and variables */
-static int band_8;              /* size of 8-bit band in slots */
-static int band_16;             /* size of 16-bit band in slots */
 /**
 * Calculate the maximum number buffers that can be packed next to each other,
@@ -38,7 +36,7 @@ static int band_16;		/* size of 16-bit band in slots */
 *
 * @return packing efficiency (0-1024)
 */
-static u32 tiler_best2pack(u16 o, u16 a, u16 b, u16 w, u16 *n, u16 *_area)
+u32 tiler_best2pack(u16 o, u16 a, u16 b, u16 w, u16 *n, u16 *_area)
 {
        u16 m = 0, max_n = *n;          /* m is mostly n - 1 */
        u16 e = ALIGN(w, a);            /* effective width of one block */
@@ -71,393 +69,6 @@ static u32 tiler_best2pack(u16 o, u16 a, u16 b, u16 w, u16 *n, u16 *_area)
        return best_eff;
 }
-/*
- * NV12 Reservation Functions
- *
- * TILER is designed so that a (w * h) * 8bit area is twice as wide as a
- * (w/2 * h/2) * 16bit area.  Since having pairs of such 8-bit and 16-bit
- * blocks is a common usecase for TILER, we optimize packing these into a
- * TILER area.
- *
- * During reservation we want to find the most effective packing (most used area
- * in the smallest overall area)
- *
- * We have two algorithms for packing nv12 blocks: either pack 8- and 16-bit
- * blocks into separate container areas, or pack them together into same area.
- */
-/**
- * Calculate effectiveness of packing. We weight total area much higher than
- * packing efficiency to get the smallest overall container use.
- *
- * @param w             width of one (8-bit) block
- * @param n             buffers in a packing
- * @param area          width of packing area
- * @param n_total       total number of buffers to be packed
- * @return effectiveness, the higher the better
- */
-static inline u32 nv12_eff(u16 w, u16 n, u16 area, u16 n_total)
-{
-        return 0x10000000 -
-                /* weigh against total area needed (for all buffers) */
-                /* 64-slots = -2048 */
-                DIV_ROUND_UP(n_total, n) * area * 32 +
-                /* packing efficiency (0 - 1024) */
-                1024 * n * ((w * 3 + 1) >> 1) / area;
-}
-/**
- * Fallback nv12 packing algorithm: pack 8 and 16 bit block into separate
- * areas.
- *
- * @author a0194118 (7/16/2010)
- *
- * @param o     desired offset (<a)
- * @param a     desired alignment (>=2)
- * @param w     block width (>0)
- * @param n     number of blocks desired
- * @param area  pointer to store total area needed
- *
- * @return number of blocks that can be allocated
- */
-static u16 nv12_separate(u16 o, u16 a, u16 w, u16 n, u16 *area)
-{
-        tiler_best2pack(o, a, band_8, w, &n, area);
-        tiler_best2pack(o >> 1, a >> 1, band_16, (w + 1) >> 1, &n, area);
-        *area *= 3;
-        return n;
-}
-/*
- * Specialized NV12 Reservation Algorithms
- *
- * We use 4 packing methods that pack nv12 blocks into the same area.  Together
- * these 4 methods give the optimal result for most possible input parameters.
- *
- * For now we pack into a 64-slot area, so that we don't have to worry about
- * stride issues (all blocks get 4K stride). For some of the algorithms this
- * could be true even if the area was 128.
- */
-/**
- * Packing types are marked using a letter sequence, capital letters denoting
- * 8-bit blocks, lower case letters denoting corresponding 16-bit blocks.
- *
- * All methods have the following parameters. They also define the maximum
- * number of coordinates that could potentially be packed.
- *
- * @param o, a, w, n offset, alignment, width, # of blocks as usual
- * @param area          pointer to store area needed for packing
- * @param p             pointer to store packing coordinates
- * @return              number of blocks that can be packed
- */
-/* Method A: progressive packing: AAAAaaaaBBbbCc into 64-slot area */
-#define MAX_A 21
-static int nv12_A(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
-{
-        u16 x = o, u, l, m = 0;
-        *area = band_8;
-        while (x + w < *area && m < n) {
-                /* current 8bit upper bound (a) is next 8bit lower bound (B) */
-                l = u = (*area + x) >> 1;
-                /* pack until upper bound */
-                while (x + w <= u && m < n) {
-                        /* save packing */
-                        BUG_ON(m + 1 >= MAX_A);
-                        *p++ = x;
-                        *p++ = l;
-                        l = (*area + x + w + 1) >> 1;
-                        x = ALIGN(x + w - o, a) + o;
-                        m++;
-                }
-                x = ALIGN(l - o, a) + o;        /* set new lower bound */
-        }
-        return m;
-}
-/* Method -A: regressive packing: cCbbBBaaaaAAAA into 64-slot area */
-static int nv12_revA(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
-{
-        u16 m;
-        /* this is a mirrored packing of method A */
-        n = nv12_A((a - (o + w) % a) % a, a, w, n, area, p);
-        /* reverse packing */
-        for (m = 0; m < n; m++) {
-                *p = *area - *p - w;
-                p++;
-                *p = *area - *p - ((w + 1) >> 1);
-                p++;
-        }
-        return n;
-}
-/* Method B: simple layout: aAbcBdeCfgDhEFGH */
-#define MAX_B 8
-static int nv12_B(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
-{
-        u16 e  = (o + w) % a;   /* end offset */
-        u16 o1 = (o >> 1) % a;                  /* half offset */
-        u16 e1 = ((o + w + 1) >> 1) % a;        /* half end offset */
-        u16 o2 = o1 + (a >> 2);                 /* 2nd half offset */
-        u16 e2 = e1 + (a >> 2);                 /* 2nd half end offset */
-        u16 m = 0;
-        *area = band_8;
-        /* ensure 16-bit blocks don't overlap 8-bit blocks */
-        /* width cannot wrap around alignment, half block must be before block,
-           2nd half can be before or after */
-        if (w < a && o < e && e1 <= o && (e2 <= o || o2 >= e))
-                while (o + w <= *area && m < n) {
-                        BUG_ON(m + 1 >= MAX_B);
-                        *p++ = o;
-                        *p++ = o >> 1;
-                        m++;
-                        o += a;
-                }
-        return m;
-}
-/* Method C: butterfly layout: AAbbaaBB */
-#define MAX_C 20
-static int nv12_C(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
-{
-        int m = 0;
-        u16 o2, e = ALIGN(w, a), i = 0, j = 0;
-        *area = band_8;
-        o2 = *area - (a - (o + w) % a) % a;     /* end of last possible block */
-        m = (min(o2 - 2 * o, 2 * o2 - o - *area) / 3 - w) / e + 1;
-        for (i = j = 0; i < m && j < n; i++, j++) {
-                BUG_ON(j + 1 >= MAX_C);
-                *p++ = o + i * e;
-                *p++ = (o + i * e + *area) >> 1;
-                if (++j < n) {
-                        *p++ = o2 - i * e - w;
-                        *p++ = (o2 - i * e - w) >> 1;
-                }
-        }
-        return j;
-}
-/* Method D: for large allocation: aA or Aa */
-#define MAX_D 1
-static int nv12_D(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
-{
-        u16 o1, w1 = (w + 1) >> 1, d;
-        *area = ALIGN(o + w, band_8);
-        for (d = 0; n > 0 && d + o + w <= *area; d += a) {
-                /* try to fit 16-bit before 8-bit */
-                o1 = ((o + d) % band_8) >> 1;
-                if (o1 + w1 <= o + d) {
-                        *p++ = o + d;
-                        *p++ = o1;
-                        return 1;
-                }
-                /* try to fit 16-bit after 8-bit */
-                o1 += ALIGN(d + o + w - o1, band_16);
-                if (o1 + w1 <= *area) {
-                        *p++ = o;
-                        *p++ = o1;
-                        return 1;
-                }
-        }
-        return 0;
-}
-/**
- * Umbrella nv12 packing method. This selects the best packings from the above
- * methods.  It also contains hardcoded packings for parameter combinations
- * that have more efficient packings. This method provides is guaranteed to
- * provide the optimal packing if 2 <= a <= 64 and w <= 64 and n is large.
- */
-#define MAX_ANY 21      /* must be MAX(method-MAX-s, hardcoded n-s) */
-static u16 nv12_together(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *packing)
-{
-        u16 n_best, a_best, n2, a_, o_, w_;
-        /* algo results (packings) */
-        u8 pack_A[MAX_A * 2], pack_rA[MAX_A * 2];
-        u8 pack_B[MAX_B * 2], pack_C[MAX_C * 2];
-        u8 pack_D[MAX_D * 2];
-        /*
-         * Hardcoded packings.  They are sorted by increasing area, and then by
-         * decreasing n.  We may not get the best efficiency if less than n
-         * blocks are needed as packings are not necessarily sorted in
-         * increasing order.  However, for those n-s one of the other 4 methods
-         * may return the optimal packing.
-         */
-        u8 packings[] = {
-                /* n=9, o=2, w=4, a=4, area=64 */
-                9, 2, 4, 4, 64,
-                        /* 8-bit, 16-bit block coordinate pairs */
-                        2, 33,  6, 35,  10, 37, 14, 39, 18, 41,
-                        46, 23, 50, 25, 54, 27, 58, 29,
-                /* o=0, w=12, a=4, n=3 */
-                3, 0, 12, 4, 64,
-                        0, 32,  12, 38, 48, 24,
-                /* end */
-                0
-        }, *p = packings, *p_best = NULL, *p_end;
-        p_end = packings + sizeof(packings) - 1;
-        /* see which method gives the best packing */
-        /* start with smallest area algorithms A, B & C, stop if we can
-           pack all buffers */
-        n_best = nv12_A(o, a, w, n, area, pack_A);
-        p_best = pack_A;
-        if (n_best < n) {
-                n2 = nv12_revA(o, a, w, n, &a_best, pack_rA);
-                if (n2 > n_best) {
-                        n_best = n2;
-                        p_best = pack_rA;
-                        *area = a_best;
-                }
-        }
-        if (n_best < n) {
-                n2 = nv12_B(o, a, w, n, &a_best, pack_B);
-                if (n2 > n_best) {
-                        n_best = n2;
-                        p_best = pack_B;
-                        *area = a_best;
-                }
-        }
-        if (n_best < n) {
-                n2 = nv12_C(o, a, w, n, &a_best, pack_C);
-                if (n2 > n_best) {
-                        n_best = n2;
-                        p_best = pack_C;
-                        *area = a_best;
-                }
-        }
-        /* traverse any special packings */
-        while (*p) {
-                n2 = *p++;
-                o_ = *p++;
-                w_ = *p++;
-                a_ = *p++;
-                /* stop if we already have a better packing */
-                if (n2 < n_best)
-                        break;
-                /* check if this packing is satisfactory */
-                if (a_ >= a && o + w + ALIGN(o_ - o, a) <= o_ + w_) {
-                        *area = *p++;
-                        n_best = min(n2, n);
-                        p_best = p;
-                        break;
-                }
-                /* skip to next packing */
-                p += 1 + n2 * 2;
-        }
-        /*
-         * If so far unsuccessful, check whether 8 and 16 bit blocks can be
-         * co-packed.  This will actually be done in the end by the normal
-         * allocation, but we need to reserve a big-enough area.
-         */
-        if (!n_best) {
-                n_best = nv12_D(o, a, w, n, area, pack_D);
-                p_best = NULL;
-        }
-        /* store best packing */
-        if (p_best && n_best) {
-                BUG_ON(n_best > MAX_ANY);
-                memcpy(packing, p_best, n_best * 2 * sizeof(*pack_A));
-        }
-        return n_best;
-}
-/* reserve nv12 blocks */
-static void reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs,
-                                        u32 gid, struct process_info *pi)
-{
-        u16 w, h, band, a = align, o = offs;
-        struct gid_info *gi;
-        int res = 0, res2, i;
-        u16 n_t, n_s, area_t, area_s;
-        u8 packing[2 * MAX_ANY];
-        struct list_head reserved = LIST_HEAD_INIT(reserved);
-        /* adjust alignment to the largest slot width (128 bytes) */
-        a = max_t(u16, PAGE_SIZE / min(band_8, band_16), a);
-        /* Check input parameters for correctness, and support */
-        if (!width || !height || !n ||
-            offs >= align || offs & 1 ||
-            align >= PAGE_SIZE ||
-            n > ops->width * ops->height / 2)
-                return;
-        /* calculate dimensions, band, offs and alignment in slots */
-        if (ops->analize(TILFMT_8BIT, width, height, &w, &h, &band, &a, &o,
-                                                                        NULL))
-                return;
-        /* get group context */
-        gi = ops->get_gi(pi, gid);
-        if (!gi)
-                return;
-        /* reserve in groups until failed or all is reserved */
-        for (i = 0; i < n && res >= 0; i += res) {
-                /* check packing separately vs together */
-                n_s = nv12_separate(o, a, w, n - i, &area_s);
-                if (ops->nv12_packed)
-                        n_t = nv12_together(o, a, w, n - i, &area_t, packing);
-                else
-                        n_t = 0;
-                /* pack based on better efficiency */
-                res = -1;
-                if (!ops->nv12_packed ||
-                        nv12_eff(w, n_s, area_s, n - i) >
-                        nv12_eff(w, n_t, area_t, n - i)) {
-                        /*
-                         * Reserve blocks separately into a temporary list, so
-                         * that we can free them if unsuccessful. We need to be
-                         * able to reserve both 8- and 16-bit blocks as the
-                         * offsets of them must match.
-                         */
-                        res = ops->lay_2d(TILFMT_8BIT, n_s, w, h, band_8, a, o,
-                                                gi, &reserved);
-                        res2 = ops->lay_2d(TILFMT_16BIT, n_s, (w + 1) >> 1, h,
-                                band_16, a >> 1, o >> 1, gi, &reserved);
-                        if (res2 < 0 || res < 0 || res != res2) {
-                                /* clean up */
-                                ops->release(&reserved);
-                                res = -1;
-                        } else {
-                                /* add list to reserved */
-                                ops->add_reserved(&reserved, gi);
-                        }
-                }
-                /* if separate packing failed, still try to pack together */
-                if (res < 0 && ops->nv12_packed && n_t) {
-                        /* pack together */
-                        res = ops->lay_nv12(n_t, area_t, w, h, gi, packing);
-                }
-        }
-        ops->release_gi(gi);
-}
 /**
 * We also optimize packing regular 2D areas as the auto-packing may result in
 * sub-optimal efficiency. This is most pronounced if the area is wider than
@@ -539,12 +150,6 @@ void tiler_reserve_init(struct tiler_ops *tiler)
 {
        ops = tiler;
-        ops->reserve_nv12 = reserve_nv12;
        ops->reserve = reserve_blocks;
        ops->unreserve = unreserve_blocks;
-        band_8 = PAGE_SIZE / ops->geom(TILFMT_8BIT)->slot_w
-                / ops->geom(TILFMT_8BIT)->bpp;
-        band_16 = PAGE_SIZE / ops->geom(TILFMT_16BIT)->slot_w
-                / ops->geom(TILFMT_16BIT)->bpp;
 }
author	Andy Gross <andy.gross@ti.com>	2011-06-07 23:15:55 -0400
committer	Paolo Pisati <paolo.pisati@canonical.com>	2012-08-17 04:19:05 -0400
commit	2f33160580154c63f94cb96d1891391bc0fdeb63 (patch)
tree	7693a7d0cbf9464b8ed993dd8c387ad799696b74
parent	5b461ddccf87ad46a710885a92ee85b79a3d45b7 (diff)