aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Gross <andy.gross@ti.com>2011-06-07 23:15:55 -0400
committerPaolo Pisati <paolo.pisati@canonical.com>2012-08-17 04:19:05 -0400
commit2f33160580154c63f94cb96d1891391bc0fdeb63 (patch)
tree7693a7d0cbf9464b8ed993dd8c387ad799696b74
parent5b461ddccf87ad46a710885a92ee85b79a3d45b7 (diff)
TILER: Make tiler nv12 support a configuration option
The tiler driver now allows for configuring the nv12 support as a kernel configuration option. If enabled, nv12 support will be compiled into the driver. Signed-off-by: Andy Gross <andy.gross@ti.com>
-rw-r--r--drivers/media/video/tiler/Kconfig10
-rw-r--r--drivers/media/video/tiler/Makefile5
-rw-r--r--drivers/media/video/tiler/_tiler.h9
-rw-r--r--drivers/media/video/tiler/tiler-iface.c6
-rw-r--r--drivers/media/video/tiler/tiler-main.c9
-rw-r--r--drivers/media/video/tiler/tiler-nv12.c423
-rw-r--r--drivers/media/video/tiler/tiler-reserve.c397
7 files changed, 460 insertions, 399 deletions
diff --git a/drivers/media/video/tiler/Kconfig b/drivers/media/video/tiler/Kconfig
index 8ff8ede9164..a22746ed152 100644
--- a/drivers/media/video/tiler/Kconfig
+++ b/drivers/media/video/tiler/Kconfig
@@ -124,3 +124,13 @@ config TILER_EXPOSE_SSPTR
124 124
125 You can use this flag to see if the userspace is relying on 125 You can use this flag to see if the userspace is relying on
126 having access to the SSPtr. 126 having access to the SSPtr.
127
128config TILER_ENABLE_NV12
129 bool "Enable NV12 support"
130 default y
131 depends on TI_TILER
132 help
133 This option enables NV12 functionality in the TILER driver.
134
135 If set, nv12 support will be compiled into the driver and APIs
136 will be enabled.
diff --git a/drivers/media/video/tiler/Makefile b/drivers/media/video/tiler/Makefile
index b3276440304..ad2dfa22ae7 100644
--- a/drivers/media/video/tiler/Makefile
+++ b/drivers/media/video/tiler/Makefile
@@ -3,6 +3,9 @@ obj-$(CONFIG_TI_TILER) += tcm/
3obj-$(CONFIG_TI_TILER) += tiler.o 3obj-$(CONFIG_TI_TILER) += tiler.o
4tiler-objs = tiler-geom.o tiler-main.o tiler-iface.o tiler-reserve.o tmm-pat.o 4tiler-objs = tiler-geom.o tiler-main.o tiler-iface.o tiler-reserve.o tmm-pat.o
5 5
6ifdef CONFIG_TILER_ENABLE_NV12
7tiler-objs += tiler-nv12.o
8endif
9
6obj-$(CONFIG_TI_TILER) += tiler_dmm.o 10obj-$(CONFIG_TI_TILER) += tiler_dmm.o
7tiler_dmm-objs = dmm.o 11tiler_dmm-objs = dmm.o
8
diff --git a/drivers/media/video/tiler/_tiler.h b/drivers/media/video/tiler/_tiler.h
index 41740b4cce7..375cdbae6fa 100644
--- a/drivers/media/video/tiler/_tiler.h
+++ b/drivers/media/video/tiler/_tiler.h
@@ -105,8 +105,10 @@ struct tiler_ops {
105 s32 (*lay_2d) (enum tiler_fmt fmt, u16 n, u16 w, u16 h, u16 band, 105 s32 (*lay_2d) (enum tiler_fmt fmt, u16 n, u16 w, u16 h, u16 band,
106 u16 align, u16 offs, struct gid_info *gi, 106 u16 align, u16 offs, struct gid_info *gi,
107 struct list_head *pos); 107 struct list_head *pos);
108#ifdef CONFIG_TILER_ENABLE_NV12
108 s32 (*lay_nv12) (int n, u16 w, u16 w1, u16 h, struct gid_info *gi, 109 s32 (*lay_nv12) (int n, u16 w, u16 w1, u16 h, struct gid_info *gi,
109 u8 *p); 110 u8 *p);
111#endif
110 /* group operations */ 112 /* group operations */
111 struct gid_info * (*get_gi) (struct process_info *pi, u32 gid); 113 struct gid_info * (*get_gi) (struct process_info *pi, u32 gid);
112 void (*release_gi) (struct gid_info *gi); 114 void (*release_gi) (struct gid_info *gi);
@@ -131,8 +133,9 @@ struct tiler_ops {
131 133
132 /* additional info */ 134 /* additional info */
133 const struct file_operations *fops; 135 const struct file_operations *fops;
134 136#ifdef CONFIG_TILER_ENABLE_NV12
135 bool nv12_packed; /* whether NV12 is packed into same container */ 137 bool nv12_packed; /* whether NV12 is packed into same container */
138#endif
136 u32 page; /* page size */ 139 u32 page; /* page size */
137 u32 width; /* container width */ 140 u32 width; /* container width */
138 u32 height; /* container height */ 141 u32 height; /* container height */
@@ -141,6 +144,8 @@ struct tiler_ops {
141void tiler_iface_init(struct tiler_ops *tiler); 144void tiler_iface_init(struct tiler_ops *tiler);
142void tiler_geom_init(struct tiler_ops *tiler); 145void tiler_geom_init(struct tiler_ops *tiler);
143void tiler_reserve_init(struct tiler_ops *tiler); 146void tiler_reserve_init(struct tiler_ops *tiler);
147void tiler_nv12_init(struct tiler_ops *tiler);
148u32 tiler_best2pack(u16 o, u16 a, u16 b, u16 w, u16 *n, u16 *_area);
144 149
145struct process_info *__get_pi(pid_t pid, bool kernel); 150struct process_info *__get_pi(pid_t pid, bool kernel);
146 151
diff --git a/drivers/media/video/tiler/tiler-iface.c b/drivers/media/video/tiler/tiler-iface.c
index 3e20599a9e9..534fb49c536 100644
--- a/drivers/media/video/tiler/tiler-iface.c
+++ b/drivers/media/video/tiler/tiler-iface.c
@@ -505,12 +505,16 @@ static long tiler_ioctl(struct file *filp, u32 cmd, unsigned long arg)
505 return -EFAULT; 505 return -EFAULT;
506 506
507 if (block_info.fmt == TILFMT_8AND16) 507 if (block_info.fmt == TILFMT_8AND16)
508#ifdef CONFIG_TILER_ENABLE_NV12
508 ops->reserve_nv12(block_info.key, 509 ops->reserve_nv12(block_info.key,
509 block_info.dim.area.width, 510 block_info.dim.area.width,
510 block_info.dim.area.height, 511 block_info.dim.area.height,
511 block_info.align, 512 block_info.align,
512 block_info.offs, 513 block_info.offs,
513 block_info.group_id, pi); 514 block_info.group_id, pi);
515#else
516 return -EINVAL;
517#endif
514 else 518 else
515 ops->reserve(block_info.key, 519 ops->reserve(block_info.key,
516 block_info.fmt, 520 block_info.fmt,
@@ -672,6 +676,7 @@ void tiler_reserve(u32 n, enum tiler_fmt fmt, u32 width, u32 height,
672} 676}
673EXPORT_SYMBOL(tiler_reserve); 677EXPORT_SYMBOL(tiler_reserve);
674 678
679#ifdef CONFIG_TILER_ENABLE_NV12
675void tiler_reservex_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs, 680void tiler_reservex_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs,
676 u32 gid, pid_t pid) 681 u32 gid, pid_t pid)
677{ 682{
@@ -687,6 +692,7 @@ void tiler_reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs)
687 tiler_reservex_nv12(n, width, height, align, offs, 0, current->tgid); 692 tiler_reservex_nv12(n, width, height, align, offs, 0, current->tgid);
688} 693}
689EXPORT_SYMBOL(tiler_reserve_nv12); 694EXPORT_SYMBOL(tiler_reserve_nv12);
695#endif
690 696
691s32 tiler_allocx(struct tiler_block_t *blk, enum tiler_fmt fmt, 697s32 tiler_allocx(struct tiler_block_t *blk, enum tiler_fmt fmt,
692 u32 align, u32 offs, u32 gid, pid_t pid) 698 u32 align, u32 offs, u32 gid, pid_t pid)
diff --git a/drivers/media/video/tiler/tiler-main.c b/drivers/media/video/tiler/tiler-main.c
index bffd8cc82c2..23d130f897f 100644
--- a/drivers/media/video/tiler/tiler-main.c
+++ b/drivers/media/video/tiler/tiler-main.c
@@ -513,6 +513,7 @@ static s32 lay_2d(enum tiler_fmt fmt, u16 n, u16 w, u16 h, u16 band,
513 return n; 513 return n;
514} 514}
515 515
516#ifdef CONFIG_TILER_ENABLE_NV12
516/* layout reserved nv12 blocks in a larger area */ 517/* layout reserved nv12 blocks in a larger area */
517/* NOTE: area w(idth), w1 (8-bit block width), h(eight) are in slots */ 518/* NOTE: area w(idth), w1 (8-bit block width), h(eight) are in slots */
518/* p is a pointer to a packing description, which is a list of offsets in 519/* p is a pointer to a packing description, which is a list of offsets in
@@ -558,6 +559,7 @@ static s32 lay_nv12(int n, u16 w, u16 w1, u16 h, struct gid_info *gi, u8 *p)
558 mutex_unlock(&mtx); 559 mutex_unlock(&mtx);
559 return n; 560 return n;
560} 561}
562#endif
561 563
562static void _m_unpin(struct mem_info *mi) 564static void _m_unpin(struct mem_info *mi)
563{ 565{
@@ -1221,7 +1223,9 @@ static s32 __init tiler_init(void)
1221 tiler.lock = find_n_lock; 1223 tiler.lock = find_n_lock;
1222 tiler.unlock_free = unlock_n_free; 1224 tiler.unlock_free = unlock_n_free;
1223 tiler.lay_2d = lay_2d; 1225 tiler.lay_2d = lay_2d;
1226#ifdef CONFIG_TILER_ENABLE_NV12
1224 tiler.lay_nv12 = lay_nv12; 1227 tiler.lay_nv12 = lay_nv12;
1228#endif
1225 tiler.destroy_group = destroy_group; 1229 tiler.destroy_group = destroy_group;
1226 tiler.lock_by_ssptr = find_block_by_ssptr; 1230 tiler.lock_by_ssptr = find_block_by_ssptr;
1227 tiler.describe = fill_block_info; 1231 tiler.describe = fill_block_info;
@@ -1233,6 +1237,9 @@ static s32 __init tiler_init(void)
1233 tiler_geom_init(&tiler); 1237 tiler_geom_init(&tiler);
1234 tiler_reserve_init(&tiler); 1238 tiler_reserve_init(&tiler);
1235 tiler_iface_init(&tiler); 1239 tiler_iface_init(&tiler);
1240#ifdef CONFIG_TILER_ENABLE_NV12
1241 tiler_nv12_init(&tiler);
1242#endif
1236 1243
1237 /* check module parameters for correctness */ 1244 /* check module parameters for correctness */
1238 if (default_align > PAGE_SIZE || 1245 if (default_align > PAGE_SIZE ||
@@ -1272,7 +1279,9 @@ static s32 __init tiler_init(void)
1272 area.y1 = tiler.height - 1; 1279 area.y1 = tiler.height - 1;
1273 tmm_unpin(tmm_pat, area); 1280 tmm_unpin(tmm_pat, area);
1274 1281
1282#ifdef CONFIG_TILER_ENABLE_NV12
1275 tiler.nv12_packed = tcm[TILFMT_8BIT] == tcm[TILFMT_16BIT]; 1283 tiler.nv12_packed = tcm[TILFMT_8BIT] == tcm[TILFMT_16BIT];
1284#endif
1276 1285
1277 tiler_device = kmalloc(sizeof(*tiler_device), GFP_KERNEL); 1286 tiler_device = kmalloc(sizeof(*tiler_device), GFP_KERNEL);
1278 if (!tiler_device || !sita || !tmm_pat) { 1287 if (!tiler_device || !sita || !tmm_pat) {
diff --git a/drivers/media/video/tiler/tiler-nv12.c b/drivers/media/video/tiler/tiler-nv12.c
new file mode 100644
index 00000000000..c16a14015ae
--- /dev/null
+++ b/drivers/media/video/tiler/tiler-nv12.c
@@ -0,0 +1,423 @@
1/*
2 * tiler-nv12.c
3 *
4 * TILER driver NV12 area reservation functions for TI TILER hardware block.
5 *
6 * Author: Lajos Molnar <molnar@ti.com>
7 *
8 * Copyright (C) 2009-2010 Texas Instruments, Inc.
9 *
10 * This package is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 *
14 * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
16 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
17 */
18
19#include "_tiler.h"
20
21static struct tiler_ops *ops; /* shared methods and variables */
22static int band_8;
23static int band_16;
24
25/*
26 * NV12 Reservation Functions
27 *
28 * TILER is designed so that a (w * h) * 8bit area is twice as wide as a
29 * (w/2 * h/2) * 16bit area. Since having pairs of such 8-bit and 16-bit
30 * blocks is a common usecase for TILER, we optimize packing these into a
31 * TILER area.
32 *
33 * During reservation we want to find the most effective packing (most used area
34 * in the smallest overall area)
35 *
36 * We have two algorithms for packing nv12 blocks: either pack 8- and 16-bit
37 * blocks into separate container areas, or pack them together into same area.
38 */
39
40/**
41 * Calculate effectiveness of packing. We weight total area much higher than
42 * packing efficiency to get the smallest overall container use.
43 *
44 * @param w width of one (8-bit) block
45 * @param n buffers in a packing
46 * @param area width of packing area
47 * @param n_total total number of buffers to be packed
48 * @return effectiveness, the higher the better
49 */
50static inline u32 nv12_eff(u16 w, u16 n, u16 area, u16 n_total)
51{
52 return 0x10000000 -
53 /* weigh against total area needed (for all buffers) */
54 /* 64-slots = -2048 */
55 DIV_ROUND_UP(n_total, n) * area * 32 +
56 /* packing efficiency (0 - 1024) */
57 1024 * n * ((w * 3 + 1) >> 1) / area;
58}
59
60/**
61 * Fallback nv12 packing algorithm: pack 8 and 16 bit block into separate
62 * areas.
63 *
64 * @author a0194118 (7/16/2010)
65 *
66 * @param o desired offset (<a)
67 * @param a desired alignment (>=2)
68 * @param w block width (>0)
69 * @param n number of blocks desired
70 * @param area pointer to store total area needed
71 *
72 * @return number of blocks that can be allocated
73 */
74static u16 nv12_separate(u16 o, u16 a, u16 w, u16 n, u16 *area)
75{
76 tiler_best2pack(o, a, band_8, w, &n, area);
77 tiler_best2pack(o >> 1, a >> 1, band_16, (w + 1) >> 1, &n, area);
78 *area *= 3;
79 return n;
80}
81
82/*
83 * Specialized NV12 Reservation Algorithms
84 *
85 * We use 4 packing methods that pack nv12 blocks into the same area. Together
86 * these 4 methods give the optimal result for most possible input parameters.
87 *
88 * For now we pack into a 64-slot area, so that we don't have to worry about
89 * stride issues (all blocks get 4K stride). For some of the algorithms this
90 * could be true even if the area was 128.
91 */
92
93/**
94 * Packing types are marked using a letter sequence, capital letters denoting
95 * 8-bit blocks, lower case letters denoting corresponding 16-bit blocks.
96 *
97 * All methods have the following parameters. They also define the maximum
98 * number of coordinates that could potentially be packed.
99 *
100 * @param o, a, w, n offset, alignment, width, # of blocks as usual
101 * @param area pointer to store area needed for packing
102 * @param p pointer to store packing coordinates
103 * @return number of blocks that can be packed
104 */
105
106/* Method A: progressive packing: AAAAaaaaBBbbCc into 64-slot area */
107#define MAX_A 21
108static int nv12_A(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
109{
110 u16 x = o, u, l, m = 0;
111 *area = band_8;
112
113 while (x + w < *area && m < n) {
114 /* current 8bit upper bound (a) is next 8bit lower bound (B) */
115 l = u = (*area + x) >> 1;
116
117 /* pack until upper bound */
118 while (x + w <= u && m < n) {
119 /* save packing */
120 BUG_ON(m + 1 >= MAX_A);
121 *p++ = x;
122 *p++ = l;
123 l = (*area + x + w + 1) >> 1;
124 x = ALIGN(x + w - o, a) + o;
125 m++;
126 }
127 x = ALIGN(l - o, a) + o; /* set new lower bound */
128 }
129 return m;
130}
131
132/* Method -A: regressive packing: cCbbBBaaaaAAAA into 64-slot area */
133static int nv12_revA(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
134{
135 u16 m;
136
137 /* this is a mirrored packing of method A */
138 n = nv12_A((a - (o + w) % a) % a, a, w, n, area, p);
139
140 /* reverse packing */
141 for (m = 0; m < n; m++) {
142 *p = *area - *p - w;
143 p++;
144 *p = *area - *p - ((w + 1) >> 1);
145 p++;
146 }
147 return n;
148}
149
150/* Method B: simple layout: aAbcBdeCfgDhEFGH */
151#define MAX_B 8
152static int nv12_B(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
153{
154 u16 e = (o + w) % a; /* end offset */
155 u16 o1 = (o >> 1) % a; /* half offset */
156 u16 e1 = ((o + w + 1) >> 1) % a; /* half end offset */
157 u16 o2 = o1 + (a >> 2); /* 2nd half offset */
158 u16 e2 = e1 + (a >> 2); /* 2nd half end offset */
159 u16 m = 0;
160 *area = band_8;
161
162 /* ensure 16-bit blocks don't overlap 8-bit blocks */
163
164 /* width cannot wrap around alignment, half block must be before block,
165 2nd half can be before or after */
166 if (w < a && o < e && e1 <= o && (e2 <= o || o2 >= e))
167 while (o + w <= *area && m < n) {
168 BUG_ON(m + 1 >= MAX_B);
169 *p++ = o;
170 *p++ = o >> 1;
171 m++;
172 o += a;
173 }
174 return m;
175}
176
177/* Method C: butterfly layout: AAbbaaBB */
178#define MAX_C 20
179static int nv12_C(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
180{
181 int m = 0;
182 u16 o2, e = ALIGN(w, a), i = 0, j = 0;
183 *area = band_8;
184 o2 = *area - (a - (o + w) % a) % a; /* end of last possible block */
185
186 m = (min(o2 - 2 * o, 2 * o2 - o - *area) / 3 - w) / e + 1;
187 for (i = j = 0; i < m && j < n; i++, j++) {
188 BUG_ON(j + 1 >= MAX_C);
189 *p++ = o + i * e;
190 *p++ = (o + i * e + *area) >> 1;
191 if (++j < n) {
192 *p++ = o2 - i * e - w;
193 *p++ = (o2 - i * e - w) >> 1;
194 }
195 }
196 return j;
197}
198
199/* Method D: for large allocation: aA or Aa */
200#define MAX_D 1
201static int nv12_D(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
202{
203 u16 o1, w1 = (w + 1) >> 1, d;
204 *area = ALIGN(o + w, band_8);
205
206 for (d = 0; n > 0 && d + o + w <= *area; d += a) {
207 /* try to fit 16-bit before 8-bit */
208 o1 = ((o + d) % band_8) >> 1;
209 if (o1 + w1 <= o + d) {
210 *p++ = o + d;
211 *p++ = o1;
212 return 1;
213 }
214
215 /* try to fit 16-bit after 8-bit */
216 o1 += ALIGN(d + o + w - o1, band_16);
217 if (o1 + w1 <= *area) {
218 *p++ = o;
219 *p++ = o1;
220 return 1;
221 }
222 }
223 return 0;
224}
225
226/**
227 * Umbrella nv12 packing method. This selects the best packings from the above
228 * methods. It also contains hardcoded packings for parameter combinations
229 * that have more efficient packings. This method provides is guaranteed to
230 * provide the optimal packing if 2 <= a <= 64 and w <= 64 and n is large.
231 */
232#define MAX_ANY 21 /* must be MAX(method-MAX-s, hardcoded n-s) */
233static u16 nv12_together(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *packing)
234{
235 u16 n_best, a_best, n2, a_, o_, w_;
236
237 /* algo results (packings) */
238 u8 pack_A[MAX_A * 2], pack_rA[MAX_A * 2];
239 u8 pack_B[MAX_B * 2], pack_C[MAX_C * 2];
240 u8 pack_D[MAX_D * 2];
241
242 /*
243 * Hardcoded packings. They are sorted by increasing area, and then by
244 * decreasing n. We may not get the best efficiency if less than n
245 * blocks are needed as packings are not necessarily sorted in
246 * increasing order. However, for those n-s one of the other 4 methods
247 * may return the optimal packing.
248 */
249 u8 packings[] = {
250 /* n=9, o=2, w=4, a=4, area=64 */
251 9, 2, 4, 4, 64,
252 /* 8-bit, 16-bit block coordinate pairs */
253 2, 33, 6, 35, 10, 37, 14, 39, 18, 41,
254 46, 23, 50, 25, 54, 27, 58, 29,
255 /* o=0, w=12, a=4, n=3 */
256 3, 0, 12, 4, 64,
257 0, 32, 12, 38, 48, 24,
258 /* end */
259 0
260 }, *p = packings, *p_best = NULL, *p_end;
261 p_end = packings + sizeof(packings) - 1;
262
263 /* see which method gives the best packing */
264
265 /* start with smallest area algorithms A, B & C, stop if we can
266 pack all buffers */
267 n_best = nv12_A(o, a, w, n, area, pack_A);
268 p_best = pack_A;
269 if (n_best < n) {
270 n2 = nv12_revA(o, a, w, n, &a_best, pack_rA);
271 if (n2 > n_best) {
272 n_best = n2;
273 p_best = pack_rA;
274 *area = a_best;
275 }
276 }
277 if (n_best < n) {
278 n2 = nv12_B(o, a, w, n, &a_best, pack_B);
279 if (n2 > n_best) {
280 n_best = n2;
281 p_best = pack_B;
282 *area = a_best;
283 }
284 }
285 if (n_best < n) {
286 n2 = nv12_C(o, a, w, n, &a_best, pack_C);
287 if (n2 > n_best) {
288 n_best = n2;
289 p_best = pack_C;
290 *area = a_best;
291 }
292 }
293
294 /* traverse any special packings */
295 while (*p) {
296 n2 = *p++;
297 o_ = *p++;
298 w_ = *p++;
299 a_ = *p++;
300 /* stop if we already have a better packing */
301 if (n2 < n_best)
302 break;
303
304 /* check if this packing is satisfactory */
305 if (a_ >= a && o + w + ALIGN(o_ - o, a) <= o_ + w_) {
306 *area = *p++;
307 n_best = min(n2, n);
308 p_best = p;
309 break;
310 }
311
312 /* skip to next packing */
313 p += 1 + n2 * 2;
314 }
315
316 /*
317 * If so far unsuccessful, check whether 8 and 16 bit blocks can be
318 * co-packed. This will actually be done in the end by the normal
319 * allocation, but we need to reserve a big-enough area.
320 */
321 if (!n_best) {
322 n_best = nv12_D(o, a, w, n, area, pack_D);
323 p_best = NULL;
324 }
325
326 /* store best packing */
327 if (p_best && n_best) {
328 BUG_ON(n_best > MAX_ANY);
329 memcpy(packing, p_best, n_best * 2 * sizeof(*pack_A));
330 }
331
332 return n_best;
333}
334
335/* reserve nv12 blocks */
336static void reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs,
337 u32 gid, struct process_info *pi)
338{
339 u16 w, h, band, a = align, o = offs;
340 struct gid_info *gi;
341 int res = 0, res2, i;
342 u16 n_t, n_s, area_t, area_s;
343 u8 packing[2 * MAX_ANY];
344 struct list_head reserved = LIST_HEAD_INIT(reserved);
345
346 /* adjust alignment to the largest slot width (128 bytes) */
347 a = max_t(u16, PAGE_SIZE / min(band_8, band_16), a);
348
349 /* Check input parameters for correctness, and support */
350 if (!width || !height || !n ||
351 offs >= align || offs & 1 ||
352 align >= PAGE_SIZE ||
353 n > ops->width * ops->height / 2)
354 return;
355
356 /* calculate dimensions, band, offs and alignment in slots */
357 if (ops->analize(TILFMT_8BIT, width, height, &w, &h, &band, &a, &o,
358 NULL))
359 return;
360
361 /* get group context */
362 gi = ops->get_gi(pi, gid);
363 if (!gi)
364 return;
365
366 /* reserve in groups until failed or all is reserved */
367 for (i = 0; i < n && res >= 0; i += res) {
368 /* check packing separately vs together */
369 n_s = nv12_separate(o, a, w, n - i, &area_s);
370 if (ops->nv12_packed)
371 n_t = nv12_together(o, a, w, n - i, &area_t, packing);
372 else
373 n_t = 0;
374
375 /* pack based on better efficiency */
376 res = -1;
377 if (!ops->nv12_packed ||
378 nv12_eff(w, n_s, area_s, n - i) >
379 nv12_eff(w, n_t, area_t, n - i)) {
380
381 /*
382 * Reserve blocks separately into a temporary list, so
383 * that we can free them if unsuccessful. We need to be
384 * able to reserve both 8- and 16-bit blocks as the
385 * offsets of them must match.
386 */
387 res = ops->lay_2d(TILFMT_8BIT, n_s, w, h, band_8, a, o,
388 gi, &reserved);
389 res2 = ops->lay_2d(TILFMT_16BIT, n_s, (w + 1) >> 1, h,
390 band_16, a >> 1, o >> 1, gi, &reserved);
391
392 if (res2 < 0 || res < 0 || res != res2) {
393 /* clean up */
394 ops->release(&reserved);
395 res = -1;
396 } else {
397 /* add list to reserved */
398 ops->add_reserved(&reserved, gi);
399 }
400 }
401
402 /* if separate packing failed, still try to pack together */
403 if (res < 0 && ops->nv12_packed && n_t) {
404 /* pack together */
405 res = ops->lay_nv12(n_t, area_t, w, h, gi, packing);
406 }
407 }
408
409 ops->release_gi(gi);
410}
411
412/* initialize shared method pointers and global static variables */
413void tiler_nv12_init(struct tiler_ops *tiler)
414{
415 ops = tiler;
416
417 ops->reserve_nv12 = reserve_nv12;
418
419 band_8 = PAGE_SIZE / ops->geom(TILFMT_8BIT)->slot_w
420 / ops->geom(TILFMT_8BIT)->bpp;
421 band_16 = PAGE_SIZE / ops->geom(TILFMT_16BIT)->slot_w
422 / ops->geom(TILFMT_16BIT)->bpp;
423}
diff --git a/drivers/media/video/tiler/tiler-reserve.c b/drivers/media/video/tiler/tiler-reserve.c
index 6715d3ddd6a..770fb07c5bb 100644
--- a/drivers/media/video/tiler/tiler-reserve.c
+++ b/drivers/media/video/tiler/tiler-reserve.c
@@ -19,8 +19,6 @@
19#include "_tiler.h" 19#include "_tiler.h"
20 20
21static struct tiler_ops *ops; /* shared methods and variables */ 21static struct tiler_ops *ops; /* shared methods and variables */
22static int band_8; /* size of 8-bit band in slots */
23static int band_16; /* size of 16-bit band in slots */
24 22
25/** 23/**
26 * Calculate the maximum number buffers that can be packed next to each other, 24 * Calculate the maximum number buffers that can be packed next to each other,
@@ -38,7 +36,7 @@ static int band_16; /* size of 16-bit band in slots */
38 * 36 *
39 * @return packing efficiency (0-1024) 37 * @return packing efficiency (0-1024)
40 */ 38 */
41static u32 tiler_best2pack(u16 o, u16 a, u16 b, u16 w, u16 *n, u16 *_area) 39u32 tiler_best2pack(u16 o, u16 a, u16 b, u16 w, u16 *n, u16 *_area)
42{ 40{
43 u16 m = 0, max_n = *n; /* m is mostly n - 1 */ 41 u16 m = 0, max_n = *n; /* m is mostly n - 1 */
44 u16 e = ALIGN(w, a); /* effective width of one block */ 42 u16 e = ALIGN(w, a); /* effective width of one block */
@@ -71,393 +69,6 @@ static u32 tiler_best2pack(u16 o, u16 a, u16 b, u16 w, u16 *n, u16 *_area)
71 return best_eff; 69 return best_eff;
72} 70}
73 71
74/*
75 * NV12 Reservation Functions
76 *
77 * TILER is designed so that a (w * h) * 8bit area is twice as wide as a
78 * (w/2 * h/2) * 16bit area. Since having pairs of such 8-bit and 16-bit
79 * blocks is a common usecase for TILER, we optimize packing these into a
80 * TILER area.
81 *
82 * During reservation we want to find the most effective packing (most used area
83 * in the smallest overall area)
84 *
85 * We have two algorithms for packing nv12 blocks: either pack 8- and 16-bit
86 * blocks into separate container areas, or pack them together into same area.
87 */
88
89/**
90 * Calculate effectiveness of packing. We weight total area much higher than
91 * packing efficiency to get the smallest overall container use.
92 *
93 * @param w width of one (8-bit) block
94 * @param n buffers in a packing
95 * @param area width of packing area
96 * @param n_total total number of buffers to be packed
97 * @return effectiveness, the higher the better
98 */
99static inline u32 nv12_eff(u16 w, u16 n, u16 area, u16 n_total)
100{
101 return 0x10000000 -
102 /* weigh against total area needed (for all buffers) */
103 /* 64-slots = -2048 */
104 DIV_ROUND_UP(n_total, n) * area * 32 +
105 /* packing efficiency (0 - 1024) */
106 1024 * n * ((w * 3 + 1) >> 1) / area;
107}
108
109/**
110 * Fallback nv12 packing algorithm: pack 8 and 16 bit block into separate
111 * areas.
112 *
113 * @author a0194118 (7/16/2010)
114 *
115 * @param o desired offset (<a)
116 * @param a desired alignment (>=2)
117 * @param w block width (>0)
118 * @param n number of blocks desired
119 * @param area pointer to store total area needed
120 *
121 * @return number of blocks that can be allocated
122 */
123static u16 nv12_separate(u16 o, u16 a, u16 w, u16 n, u16 *area)
124{
125 tiler_best2pack(o, a, band_8, w, &n, area);
126 tiler_best2pack(o >> 1, a >> 1, band_16, (w + 1) >> 1, &n, area);
127 *area *= 3;
128 return n;
129}
130
131/*
132 * Specialized NV12 Reservation Algorithms
133 *
134 * We use 4 packing methods that pack nv12 blocks into the same area. Together
135 * these 4 methods give the optimal result for most possible input parameters.
136 *
137 * For now we pack into a 64-slot area, so that we don't have to worry about
138 * stride issues (all blocks get 4K stride). For some of the algorithms this
139 * could be true even if the area was 128.
140 */
141
142/**
143 * Packing types are marked using a letter sequence, capital letters denoting
144 * 8-bit blocks, lower case letters denoting corresponding 16-bit blocks.
145 *
146 * All methods have the following parameters. They also define the maximum
147 * number of coordinates that could potentially be packed.
148 *
149 * @param o, a, w, n offset, alignment, width, # of blocks as usual
150 * @param area pointer to store area needed for packing
151 * @param p pointer to store packing coordinates
152 * @return number of blocks that can be packed
153 */
154
155/* Method A: progressive packing: AAAAaaaaBBbbCc into 64-slot area */
156#define MAX_A 21
157static int nv12_A(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
158{
159 u16 x = o, u, l, m = 0;
160 *area = band_8;
161
162 while (x + w < *area && m < n) {
163 /* current 8bit upper bound (a) is next 8bit lower bound (B) */
164 l = u = (*area + x) >> 1;
165
166 /* pack until upper bound */
167 while (x + w <= u && m < n) {
168 /* save packing */
169 BUG_ON(m + 1 >= MAX_A);
170 *p++ = x;
171 *p++ = l;
172 l = (*area + x + w + 1) >> 1;
173 x = ALIGN(x + w - o, a) + o;
174 m++;
175 }
176 x = ALIGN(l - o, a) + o; /* set new lower bound */
177 }
178 return m;
179}
180
181/* Method -A: regressive packing: cCbbBBaaaaAAAA into 64-slot area */
182static int nv12_revA(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
183{
184 u16 m;
185
186 /* this is a mirrored packing of method A */
187 n = nv12_A((a - (o + w) % a) % a, a, w, n, area, p);
188
189 /* reverse packing */
190 for (m = 0; m < n; m++) {
191 *p = *area - *p - w;
192 p++;
193 *p = *area - *p - ((w + 1) >> 1);
194 p++;
195 }
196 return n;
197}
198
199/* Method B: simple layout: aAbcBdeCfgDhEFGH */
200#define MAX_B 8
201static int nv12_B(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
202{
203 u16 e = (o + w) % a; /* end offset */
204 u16 o1 = (o >> 1) % a; /* half offset */
205 u16 e1 = ((o + w + 1) >> 1) % a; /* half end offset */
206 u16 o2 = o1 + (a >> 2); /* 2nd half offset */
207 u16 e2 = e1 + (a >> 2); /* 2nd half end offset */
208 u16 m = 0;
209 *area = band_8;
210
211 /* ensure 16-bit blocks don't overlap 8-bit blocks */
212
213 /* width cannot wrap around alignment, half block must be before block,
214 2nd half can be before or after */
215 if (w < a && o < e && e1 <= o && (e2 <= o || o2 >= e))
216 while (o + w <= *area && m < n) {
217 BUG_ON(m + 1 >= MAX_B);
218 *p++ = o;
219 *p++ = o >> 1;
220 m++;
221 o += a;
222 }
223 return m;
224}
225
226/* Method C: butterfly layout: AAbbaaBB */
227#define MAX_C 20
228static int nv12_C(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
229{
230 int m = 0;
231 u16 o2, e = ALIGN(w, a), i = 0, j = 0;
232 *area = band_8;
233 o2 = *area - (a - (o + w) % a) % a; /* end of last possible block */
234
235 m = (min(o2 - 2 * o, 2 * o2 - o - *area) / 3 - w) / e + 1;
236 for (i = j = 0; i < m && j < n; i++, j++) {
237 BUG_ON(j + 1 >= MAX_C);
238 *p++ = o + i * e;
239 *p++ = (o + i * e + *area) >> 1;
240 if (++j < n) {
241 *p++ = o2 - i * e - w;
242 *p++ = (o2 - i * e - w) >> 1;
243 }
244 }
245 return j;
246}
247
248/* Method D: for large allocation: aA or Aa */
249#define MAX_D 1
250static int nv12_D(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
251{
252 u16 o1, w1 = (w + 1) >> 1, d;
253 *area = ALIGN(o + w, band_8);
254
255 for (d = 0; n > 0 && d + o + w <= *area; d += a) {
256 /* try to fit 16-bit before 8-bit */
257 o1 = ((o + d) % band_8) >> 1;
258 if (o1 + w1 <= o + d) {
259 *p++ = o + d;
260 *p++ = o1;
261 return 1;
262 }
263
264 /* try to fit 16-bit after 8-bit */
265 o1 += ALIGN(d + o + w - o1, band_16);
266 if (o1 + w1 <= *area) {
267 *p++ = o;
268 *p++ = o1;
269 return 1;
270 }
271 }
272 return 0;
273}
274
275/**
276 * Umbrella nv12 packing method. This selects the best packings from the above
277 * methods. It also contains hardcoded packings for parameter combinations
278 * that have more efficient packings. This method provides is guaranteed to
279 * provide the optimal packing if 2 <= a <= 64 and w <= 64 and n is large.
280 */
281#define MAX_ANY 21 /* must be MAX(method-MAX-s, hardcoded n-s) */
282static u16 nv12_together(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *packing)
283{
284 u16 n_best, a_best, n2, a_, o_, w_;
285
286 /* algo results (packings) */
287 u8 pack_A[MAX_A * 2], pack_rA[MAX_A * 2];
288 u8 pack_B[MAX_B * 2], pack_C[MAX_C * 2];
289 u8 pack_D[MAX_D * 2];
290
291 /*
292 * Hardcoded packings. They are sorted by increasing area, and then by
293 * decreasing n. We may not get the best efficiency if less than n
294 * blocks are needed as packings are not necessarily sorted in
295 * increasing order. However, for those n-s one of the other 4 methods
296 * may return the optimal packing.
297 */
298 u8 packings[] = {
299 /* n=9, o=2, w=4, a=4, area=64 */
300 9, 2, 4, 4, 64,
301 /* 8-bit, 16-bit block coordinate pairs */
302 2, 33, 6, 35, 10, 37, 14, 39, 18, 41,
303 46, 23, 50, 25, 54, 27, 58, 29,
304 /* o=0, w=12, a=4, n=3 */
305 3, 0, 12, 4, 64,
306 0, 32, 12, 38, 48, 24,
307 /* end */
308 0
309 }, *p = packings, *p_best = NULL, *p_end;
310 p_end = packings + sizeof(packings) - 1;
311
312 /* see which method gives the best packing */
313
314 /* start with smallest area algorithms A, B & C, stop if we can
315 pack all buffers */
316 n_best = nv12_A(o, a, w, n, area, pack_A);
317 p_best = pack_A;
318 if (n_best < n) {
319 n2 = nv12_revA(o, a, w, n, &a_best, pack_rA);
320 if (n2 > n_best) {
321 n_best = n2;
322 p_best = pack_rA;
323 *area = a_best;
324 }
325 }
326 if (n_best < n) {
327 n2 = nv12_B(o, a, w, n, &a_best, pack_B);
328 if (n2 > n_best) {
329 n_best = n2;
330 p_best = pack_B;
331 *area = a_best;
332 }
333 }
334 if (n_best < n) {
335 n2 = nv12_C(o, a, w, n, &a_best, pack_C);
336 if (n2 > n_best) {
337 n_best = n2;
338 p_best = pack_C;
339 *area = a_best;
340 }
341 }
342
343 /* traverse any special packings */
344 while (*p) {
345 n2 = *p++;
346 o_ = *p++;
347 w_ = *p++;
348 a_ = *p++;
349 /* stop if we already have a better packing */
350 if (n2 < n_best)
351 break;
352
353 /* check if this packing is satisfactory */
354 if (a_ >= a && o + w + ALIGN(o_ - o, a) <= o_ + w_) {
355 *area = *p++;
356 n_best = min(n2, n);
357 p_best = p;
358 break;
359 }
360
361 /* skip to next packing */
362 p += 1 + n2 * 2;
363 }
364
365 /*
366 * If so far unsuccessful, check whether 8 and 16 bit blocks can be
367 * co-packed. This will actually be done in the end by the normal
368 * allocation, but we need to reserve a big-enough area.
369 */
370 if (!n_best) {
371 n_best = nv12_D(o, a, w, n, area, pack_D);
372 p_best = NULL;
373 }
374
375 /* store best packing */
376 if (p_best && n_best) {
377 BUG_ON(n_best > MAX_ANY);
378 memcpy(packing, p_best, n_best * 2 * sizeof(*pack_A));
379 }
380
381 return n_best;
382}
383
384/* reserve nv12 blocks */
385static void reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs,
386 u32 gid, struct process_info *pi)
387{
388 u16 w, h, band, a = align, o = offs;
389 struct gid_info *gi;
390 int res = 0, res2, i;
391 u16 n_t, n_s, area_t, area_s;
392 u8 packing[2 * MAX_ANY];
393 struct list_head reserved = LIST_HEAD_INIT(reserved);
394
395 /* adjust alignment to the largest slot width (128 bytes) */
396 a = max_t(u16, PAGE_SIZE / min(band_8, band_16), a);
397
398 /* Check input parameters for correctness, and support */
399 if (!width || !height || !n ||
400 offs >= align || offs & 1 ||
401 align >= PAGE_SIZE ||
402 n > ops->width * ops->height / 2)
403 return;
404
405 /* calculate dimensions, band, offs and alignment in slots */
406 if (ops->analize(TILFMT_8BIT, width, height, &w, &h, &band, &a, &o,
407 NULL))
408 return;
409
410 /* get group context */
411 gi = ops->get_gi(pi, gid);
412 if (!gi)
413 return;
414
415 /* reserve in groups until failed or all is reserved */
416 for (i = 0; i < n && res >= 0; i += res) {
417 /* check packing separately vs together */
418 n_s = nv12_separate(o, a, w, n - i, &area_s);
419 if (ops->nv12_packed)
420 n_t = nv12_together(o, a, w, n - i, &area_t, packing);
421 else
422 n_t = 0;
423
424 /* pack based on better efficiency */
425 res = -1;
426 if (!ops->nv12_packed ||
427 nv12_eff(w, n_s, area_s, n - i) >
428 nv12_eff(w, n_t, area_t, n - i)) {
429
430 /*
431 * Reserve blocks separately into a temporary list, so
432 * that we can free them if unsuccessful. We need to be
433 * able to reserve both 8- and 16-bit blocks as the
434 * offsets of them must match.
435 */
436 res = ops->lay_2d(TILFMT_8BIT, n_s, w, h, band_8, a, o,
437 gi, &reserved);
438 res2 = ops->lay_2d(TILFMT_16BIT, n_s, (w + 1) >> 1, h,
439 band_16, a >> 1, o >> 1, gi, &reserved);
440
441 if (res2 < 0 || res < 0 || res != res2) {
442 /* clean up */
443 ops->release(&reserved);
444 res = -1;
445 } else {
446 /* add list to reserved */
447 ops->add_reserved(&reserved, gi);
448 }
449 }
450
451 /* if separate packing failed, still try to pack together */
452 if (res < 0 && ops->nv12_packed && n_t) {
453 /* pack together */
454 res = ops->lay_nv12(n_t, area_t, w, h, gi, packing);
455 }
456 }
457
458 ops->release_gi(gi);
459}
460
461/** 72/**
462 * We also optimize packing regular 2D areas as the auto-packing may result in 73 * We also optimize packing regular 2D areas as the auto-packing may result in
463 * sub-optimal efficiency. This is most pronounced if the area is wider than 74 * sub-optimal efficiency. This is most pronounced if the area is wider than
@@ -539,12 +150,6 @@ void tiler_reserve_init(struct tiler_ops *tiler)
539{ 150{
540 ops = tiler; 151 ops = tiler;
541 152
542 ops->reserve_nv12 = reserve_nv12;
543 ops->reserve = reserve_blocks; 153 ops->reserve = reserve_blocks;
544 ops->unreserve = unreserve_blocks; 154 ops->unreserve = unreserve_blocks;
545
546 band_8 = PAGE_SIZE / ops->geom(TILFMT_8BIT)->slot_w
547 / ops->geom(TILFMT_8BIT)->bpp;
548 band_16 = PAGE_SIZE / ops->geom(TILFMT_16BIT)->slot_w
549 / ops->geom(TILFMT_16BIT)->bpp;
550} 155}