diff options
author | Andy Gross <andy.gross@ti.com> | 2011-06-07 23:15:55 -0400 |
---|---|---|
committer | Paolo Pisati <paolo.pisati@canonical.com> | 2012-08-17 04:19:05 -0400 |
commit | 2f33160580154c63f94cb96d1891391bc0fdeb63 (patch) | |
tree | 7693a7d0cbf9464b8ed993dd8c387ad799696b74 | |
parent | 5b461ddccf87ad46a710885a92ee85b79a3d45b7 (diff) |
TILER: Make tiler nv12 support a configuration option
The tiler driver now allows for configuring the nv12
support as a kernel configuration option. If enabled,
nv12 support will be compiled into the driver.
Signed-off-by: Andy Gross <andy.gross@ti.com>
-rw-r--r-- | drivers/media/video/tiler/Kconfig | 10 | ||||
-rw-r--r-- | drivers/media/video/tiler/Makefile | 5 | ||||
-rw-r--r-- | drivers/media/video/tiler/_tiler.h | 9 | ||||
-rw-r--r-- | drivers/media/video/tiler/tiler-iface.c | 6 | ||||
-rw-r--r-- | drivers/media/video/tiler/tiler-main.c | 9 | ||||
-rw-r--r-- | drivers/media/video/tiler/tiler-nv12.c | 423 | ||||
-rw-r--r-- | drivers/media/video/tiler/tiler-reserve.c | 397 |
7 files changed, 460 insertions, 399 deletions
diff --git a/drivers/media/video/tiler/Kconfig b/drivers/media/video/tiler/Kconfig index 8ff8ede9164..a22746ed152 100644 --- a/drivers/media/video/tiler/Kconfig +++ b/drivers/media/video/tiler/Kconfig | |||
@@ -124,3 +124,13 @@ config TILER_EXPOSE_SSPTR | |||
124 | 124 | ||
125 | You can use this flag to see if the userspace is relying on | 125 | You can use this flag to see if the userspace is relying on |
126 | having access to the SSPtr. | 126 | having access to the SSPtr. |
127 | |||
128 | config TILER_ENABLE_NV12 | ||
129 | bool "Enable NV12 support" | ||
130 | default y | ||
131 | depends on TI_TILER | ||
132 | help | ||
133 | This option enables NV12 functionality in the TILER driver. | ||
134 | |||
135 | If set, nv12 support will be compiled into the driver and APIs | ||
136 | will be enabled. | ||
diff --git a/drivers/media/video/tiler/Makefile b/drivers/media/video/tiler/Makefile index b3276440304..ad2dfa22ae7 100644 --- a/drivers/media/video/tiler/Makefile +++ b/drivers/media/video/tiler/Makefile | |||
@@ -3,6 +3,9 @@ obj-$(CONFIG_TI_TILER) += tcm/ | |||
3 | obj-$(CONFIG_TI_TILER) += tiler.o | 3 | obj-$(CONFIG_TI_TILER) += tiler.o |
4 | tiler-objs = tiler-geom.o tiler-main.o tiler-iface.o tiler-reserve.o tmm-pat.o | 4 | tiler-objs = tiler-geom.o tiler-main.o tiler-iface.o tiler-reserve.o tmm-pat.o |
5 | 5 | ||
6 | ifdef CONFIG_TILER_ENABLE_NV12 | ||
7 | tiler-objs += tiler-nv12.o | ||
8 | endif | ||
9 | |||
6 | obj-$(CONFIG_TI_TILER) += tiler_dmm.o | 10 | obj-$(CONFIG_TI_TILER) += tiler_dmm.o |
7 | tiler_dmm-objs = dmm.o | 11 | tiler_dmm-objs = dmm.o |
8 | |||
diff --git a/drivers/media/video/tiler/_tiler.h b/drivers/media/video/tiler/_tiler.h index 41740b4cce7..375cdbae6fa 100644 --- a/drivers/media/video/tiler/_tiler.h +++ b/drivers/media/video/tiler/_tiler.h | |||
@@ -105,8 +105,10 @@ struct tiler_ops { | |||
105 | s32 (*lay_2d) (enum tiler_fmt fmt, u16 n, u16 w, u16 h, u16 band, | 105 | s32 (*lay_2d) (enum tiler_fmt fmt, u16 n, u16 w, u16 h, u16 band, |
106 | u16 align, u16 offs, struct gid_info *gi, | 106 | u16 align, u16 offs, struct gid_info *gi, |
107 | struct list_head *pos); | 107 | struct list_head *pos); |
108 | #ifdef CONFIG_TILER_ENABLE_NV12 | ||
108 | s32 (*lay_nv12) (int n, u16 w, u16 w1, u16 h, struct gid_info *gi, | 109 | s32 (*lay_nv12) (int n, u16 w, u16 w1, u16 h, struct gid_info *gi, |
109 | u8 *p); | 110 | u8 *p); |
111 | #endif | ||
110 | /* group operations */ | 112 | /* group operations */ |
111 | struct gid_info * (*get_gi) (struct process_info *pi, u32 gid); | 113 | struct gid_info * (*get_gi) (struct process_info *pi, u32 gid); |
112 | void (*release_gi) (struct gid_info *gi); | 114 | void (*release_gi) (struct gid_info *gi); |
@@ -131,8 +133,9 @@ struct tiler_ops { | |||
131 | 133 | ||
132 | /* additional info */ | 134 | /* additional info */ |
133 | const struct file_operations *fops; | 135 | const struct file_operations *fops; |
134 | 136 | #ifdef CONFIG_TILER_ENABLE_NV12 | |
135 | bool nv12_packed; /* whether NV12 is packed into same container */ | 137 | bool nv12_packed; /* whether NV12 is packed into same container */ |
138 | #endif | ||
136 | u32 page; /* page size */ | 139 | u32 page; /* page size */ |
137 | u32 width; /* container width */ | 140 | u32 width; /* container width */ |
138 | u32 height; /* container height */ | 141 | u32 height; /* container height */ |
@@ -141,6 +144,8 @@ struct tiler_ops { | |||
141 | void tiler_iface_init(struct tiler_ops *tiler); | 144 | void tiler_iface_init(struct tiler_ops *tiler); |
142 | void tiler_geom_init(struct tiler_ops *tiler); | 145 | void tiler_geom_init(struct tiler_ops *tiler); |
143 | void tiler_reserve_init(struct tiler_ops *tiler); | 146 | void tiler_reserve_init(struct tiler_ops *tiler); |
147 | void tiler_nv12_init(struct tiler_ops *tiler); | ||
148 | u32 tiler_best2pack(u16 o, u16 a, u16 b, u16 w, u16 *n, u16 *_area); | ||
144 | 149 | ||
145 | struct process_info *__get_pi(pid_t pid, bool kernel); | 150 | struct process_info *__get_pi(pid_t pid, bool kernel); |
146 | 151 | ||
diff --git a/drivers/media/video/tiler/tiler-iface.c b/drivers/media/video/tiler/tiler-iface.c index 3e20599a9e9..534fb49c536 100644 --- a/drivers/media/video/tiler/tiler-iface.c +++ b/drivers/media/video/tiler/tiler-iface.c | |||
@@ -505,12 +505,16 @@ static long tiler_ioctl(struct file *filp, u32 cmd, unsigned long arg) | |||
505 | return -EFAULT; | 505 | return -EFAULT; |
506 | 506 | ||
507 | if (block_info.fmt == TILFMT_8AND16) | 507 | if (block_info.fmt == TILFMT_8AND16) |
508 | #ifdef CONFIG_TILER_ENABLE_NV12 | ||
508 | ops->reserve_nv12(block_info.key, | 509 | ops->reserve_nv12(block_info.key, |
509 | block_info.dim.area.width, | 510 | block_info.dim.area.width, |
510 | block_info.dim.area.height, | 511 | block_info.dim.area.height, |
511 | block_info.align, | 512 | block_info.align, |
512 | block_info.offs, | 513 | block_info.offs, |
513 | block_info.group_id, pi); | 514 | block_info.group_id, pi); |
515 | #else | ||
516 | return -EINVAL; | ||
517 | #endif | ||
514 | else | 518 | else |
515 | ops->reserve(block_info.key, | 519 | ops->reserve(block_info.key, |
516 | block_info.fmt, | 520 | block_info.fmt, |
@@ -672,6 +676,7 @@ void tiler_reserve(u32 n, enum tiler_fmt fmt, u32 width, u32 height, | |||
672 | } | 676 | } |
673 | EXPORT_SYMBOL(tiler_reserve); | 677 | EXPORT_SYMBOL(tiler_reserve); |
674 | 678 | ||
679 | #ifdef CONFIG_TILER_ENABLE_NV12 | ||
675 | void tiler_reservex_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs, | 680 | void tiler_reservex_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs, |
676 | u32 gid, pid_t pid) | 681 | u32 gid, pid_t pid) |
677 | { | 682 | { |
@@ -687,6 +692,7 @@ void tiler_reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs) | |||
687 | tiler_reservex_nv12(n, width, height, align, offs, 0, current->tgid); | 692 | tiler_reservex_nv12(n, width, height, align, offs, 0, current->tgid); |
688 | } | 693 | } |
689 | EXPORT_SYMBOL(tiler_reserve_nv12); | 694 | EXPORT_SYMBOL(tiler_reserve_nv12); |
695 | #endif | ||
690 | 696 | ||
691 | s32 tiler_allocx(struct tiler_block_t *blk, enum tiler_fmt fmt, | 697 | s32 tiler_allocx(struct tiler_block_t *blk, enum tiler_fmt fmt, |
692 | u32 align, u32 offs, u32 gid, pid_t pid) | 698 | u32 align, u32 offs, u32 gid, pid_t pid) |
diff --git a/drivers/media/video/tiler/tiler-main.c b/drivers/media/video/tiler/tiler-main.c index bffd8cc82c2..23d130f897f 100644 --- a/drivers/media/video/tiler/tiler-main.c +++ b/drivers/media/video/tiler/tiler-main.c | |||
@@ -513,6 +513,7 @@ static s32 lay_2d(enum tiler_fmt fmt, u16 n, u16 w, u16 h, u16 band, | |||
513 | return n; | 513 | return n; |
514 | } | 514 | } |
515 | 515 | ||
516 | #ifdef CONFIG_TILER_ENABLE_NV12 | ||
516 | /* layout reserved nv12 blocks in a larger area */ | 517 | /* layout reserved nv12 blocks in a larger area */ |
517 | /* NOTE: area w(idth), w1 (8-bit block width), h(eight) are in slots */ | 518 | /* NOTE: area w(idth), w1 (8-bit block width), h(eight) are in slots */ |
518 | /* p is a pointer to a packing description, which is a list of offsets in | 519 | /* p is a pointer to a packing description, which is a list of offsets in |
@@ -558,6 +559,7 @@ static s32 lay_nv12(int n, u16 w, u16 w1, u16 h, struct gid_info *gi, u8 *p) | |||
558 | mutex_unlock(&mtx); | 559 | mutex_unlock(&mtx); |
559 | return n; | 560 | return n; |
560 | } | 561 | } |
562 | #endif | ||
561 | 563 | ||
562 | static void _m_unpin(struct mem_info *mi) | 564 | static void _m_unpin(struct mem_info *mi) |
563 | { | 565 | { |
@@ -1221,7 +1223,9 @@ static s32 __init tiler_init(void) | |||
1221 | tiler.lock = find_n_lock; | 1223 | tiler.lock = find_n_lock; |
1222 | tiler.unlock_free = unlock_n_free; | 1224 | tiler.unlock_free = unlock_n_free; |
1223 | tiler.lay_2d = lay_2d; | 1225 | tiler.lay_2d = lay_2d; |
1226 | #ifdef CONFIG_TILER_ENABLE_NV12 | ||
1224 | tiler.lay_nv12 = lay_nv12; | 1227 | tiler.lay_nv12 = lay_nv12; |
1228 | #endif | ||
1225 | tiler.destroy_group = destroy_group; | 1229 | tiler.destroy_group = destroy_group; |
1226 | tiler.lock_by_ssptr = find_block_by_ssptr; | 1230 | tiler.lock_by_ssptr = find_block_by_ssptr; |
1227 | tiler.describe = fill_block_info; | 1231 | tiler.describe = fill_block_info; |
@@ -1233,6 +1237,9 @@ static s32 __init tiler_init(void) | |||
1233 | tiler_geom_init(&tiler); | 1237 | tiler_geom_init(&tiler); |
1234 | tiler_reserve_init(&tiler); | 1238 | tiler_reserve_init(&tiler); |
1235 | tiler_iface_init(&tiler); | 1239 | tiler_iface_init(&tiler); |
1240 | #ifdef CONFIG_TILER_ENABLE_NV12 | ||
1241 | tiler_nv12_init(&tiler); | ||
1242 | #endif | ||
1236 | 1243 | ||
1237 | /* check module parameters for correctness */ | 1244 | /* check module parameters for correctness */ |
1238 | if (default_align > PAGE_SIZE || | 1245 | if (default_align > PAGE_SIZE || |
@@ -1272,7 +1279,9 @@ static s32 __init tiler_init(void) | |||
1272 | area.y1 = tiler.height - 1; | 1279 | area.y1 = tiler.height - 1; |
1273 | tmm_unpin(tmm_pat, area); | 1280 | tmm_unpin(tmm_pat, area); |
1274 | 1281 | ||
1282 | #ifdef CONFIG_TILER_ENABLE_NV12 | ||
1275 | tiler.nv12_packed = tcm[TILFMT_8BIT] == tcm[TILFMT_16BIT]; | 1283 | tiler.nv12_packed = tcm[TILFMT_8BIT] == tcm[TILFMT_16BIT]; |
1284 | #endif | ||
1276 | 1285 | ||
1277 | tiler_device = kmalloc(sizeof(*tiler_device), GFP_KERNEL); | 1286 | tiler_device = kmalloc(sizeof(*tiler_device), GFP_KERNEL); |
1278 | if (!tiler_device || !sita || !tmm_pat) { | 1287 | if (!tiler_device || !sita || !tmm_pat) { |
diff --git a/drivers/media/video/tiler/tiler-nv12.c b/drivers/media/video/tiler/tiler-nv12.c new file mode 100644 index 00000000000..c16a14015ae --- /dev/null +++ b/drivers/media/video/tiler/tiler-nv12.c | |||
@@ -0,0 +1,423 @@ | |||
1 | /* | ||
2 | * tiler-nv12.c | ||
3 | * | ||
4 | * TILER driver NV12 area reservation functions for TI TILER hardware block. | ||
5 | * | ||
6 | * Author: Lajos Molnar <molnar@ti.com> | ||
7 | * | ||
8 | * Copyright (C) 2009-2010 Texas Instruments, Inc. | ||
9 | * | ||
10 | * This package is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License version 2 as | ||
12 | * published by the Free Software Foundation. | ||
13 | * | ||
14 | * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR | ||
15 | * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED | ||
16 | * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. | ||
17 | */ | ||
18 | |||
19 | #include "_tiler.h" | ||
20 | |||
21 | static struct tiler_ops *ops; /* shared methods and variables */ | ||
22 | static int band_8; | ||
23 | static int band_16; | ||
24 | |||
25 | /* | ||
26 | * NV12 Reservation Functions | ||
27 | * | ||
28 | * TILER is designed so that a (w * h) * 8bit area is twice as wide as a | ||
29 | * (w/2 * h/2) * 16bit area. Since having pairs of such 8-bit and 16-bit | ||
30 | * blocks is a common usecase for TILER, we optimize packing these into a | ||
31 | * TILER area. | ||
32 | * | ||
33 | * During reservation we want to find the most effective packing (most used area | ||
34 | * in the smallest overall area) | ||
35 | * | ||
36 | * We have two algorithms for packing nv12 blocks: either pack 8- and 16-bit | ||
37 | * blocks into separate container areas, or pack them together into same area. | ||
38 | */ | ||
39 | |||
40 | /** | ||
41 | * Calculate effectiveness of packing. We weight total area much higher than | ||
42 | * packing efficiency to get the smallest overall container use. | ||
43 | * | ||
44 | * @param w width of one (8-bit) block | ||
45 | * @param n buffers in a packing | ||
46 | * @param area width of packing area | ||
47 | * @param n_total total number of buffers to be packed | ||
48 | * @return effectiveness, the higher the better | ||
49 | */ | ||
50 | static inline u32 nv12_eff(u16 w, u16 n, u16 area, u16 n_total) | ||
51 | { | ||
52 | return 0x10000000 - | ||
53 | /* weigh against total area needed (for all buffers) */ | ||
54 | /* 64-slots = -2048 */ | ||
55 | DIV_ROUND_UP(n_total, n) * area * 32 + | ||
56 | /* packing efficiency (0 - 1024) */ | ||
57 | 1024 * n * ((w * 3 + 1) >> 1) / area; | ||
58 | } | ||
59 | |||
60 | /** | ||
61 | * Fallback nv12 packing algorithm: pack 8 and 16 bit block into separate | ||
62 | * areas. | ||
63 | * | ||
64 | * @author a0194118 (7/16/2010) | ||
65 | * | ||
66 | * @param o desired offset (<a) | ||
67 | * @param a desired alignment (>=2) | ||
68 | * @param w block width (>0) | ||
69 | * @param n number of blocks desired | ||
70 | * @param area pointer to store total area needed | ||
71 | * | ||
72 | * @return number of blocks that can be allocated | ||
73 | */ | ||
74 | static u16 nv12_separate(u16 o, u16 a, u16 w, u16 n, u16 *area) | ||
75 | { | ||
76 | tiler_best2pack(o, a, band_8, w, &n, area); | ||
77 | tiler_best2pack(o >> 1, a >> 1, band_16, (w + 1) >> 1, &n, area); | ||
78 | *area *= 3; | ||
79 | return n; | ||
80 | } | ||
81 | |||
82 | /* | ||
83 | * Specialized NV12 Reservation Algorithms | ||
84 | * | ||
85 | * We use 4 packing methods that pack nv12 blocks into the same area. Together | ||
86 | * these 4 methods give the optimal result for most possible input parameters. | ||
87 | * | ||
88 | * For now we pack into a 64-slot area, so that we don't have to worry about | ||
89 | * stride issues (all blocks get 4K stride). For some of the algorithms this | ||
90 | * could be true even if the area was 128. | ||
91 | */ | ||
92 | |||
93 | /** | ||
94 | * Packing types are marked using a letter sequence, capital letters denoting | ||
95 | * 8-bit blocks, lower case letters denoting corresponding 16-bit blocks. | ||
96 | * | ||
97 | * All methods have the following parameters. They also define the maximum | ||
98 | * number of coordinates that could potentially be packed. | ||
99 | * | ||
100 | * @param o, a, w, n offset, alignment, width, # of blocks as usual | ||
101 | * @param area pointer to store area needed for packing | ||
102 | * @param p pointer to store packing coordinates | ||
103 | * @return number of blocks that can be packed | ||
104 | */ | ||
105 | |||
106 | /* Method A: progressive packing: AAAAaaaaBBbbCc into 64-slot area */ | ||
107 | #define MAX_A 21 | ||
108 | static int nv12_A(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) | ||
109 | { | ||
110 | u16 x = o, u, l, m = 0; | ||
111 | *area = band_8; | ||
112 | |||
113 | while (x + w < *area && m < n) { | ||
114 | /* current 8bit upper bound (a) is next 8bit lower bound (B) */ | ||
115 | l = u = (*area + x) >> 1; | ||
116 | |||
117 | /* pack until upper bound */ | ||
118 | while (x + w <= u && m < n) { | ||
119 | /* save packing */ | ||
120 | BUG_ON(m + 1 >= MAX_A); | ||
121 | *p++ = x; | ||
122 | *p++ = l; | ||
123 | l = (*area + x + w + 1) >> 1; | ||
124 | x = ALIGN(x + w - o, a) + o; | ||
125 | m++; | ||
126 | } | ||
127 | x = ALIGN(l - o, a) + o; /* set new lower bound */ | ||
128 | } | ||
129 | return m; | ||
130 | } | ||
131 | |||
132 | /* Method -A: regressive packing: cCbbBBaaaaAAAA into 64-slot area */ | ||
133 | static int nv12_revA(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) | ||
134 | { | ||
135 | u16 m; | ||
136 | |||
137 | /* this is a mirrored packing of method A */ | ||
138 | n = nv12_A((a - (o + w) % a) % a, a, w, n, area, p); | ||
139 | |||
140 | /* reverse packing */ | ||
141 | for (m = 0; m < n; m++) { | ||
142 | *p = *area - *p - w; | ||
143 | p++; | ||
144 | *p = *area - *p - ((w + 1) >> 1); | ||
145 | p++; | ||
146 | } | ||
147 | return n; | ||
148 | } | ||
149 | |||
150 | /* Method B: simple layout: aAbcBdeCfgDhEFGH */ | ||
151 | #define MAX_B 8 | ||
152 | static int nv12_B(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) | ||
153 | { | ||
154 | u16 e = (o + w) % a; /* end offset */ | ||
155 | u16 o1 = (o >> 1) % a; /* half offset */ | ||
156 | u16 e1 = ((o + w + 1) >> 1) % a; /* half end offset */ | ||
157 | u16 o2 = o1 + (a >> 2); /* 2nd half offset */ | ||
158 | u16 e2 = e1 + (a >> 2); /* 2nd half end offset */ | ||
159 | u16 m = 0; | ||
160 | *area = band_8; | ||
161 | |||
162 | /* ensure 16-bit blocks don't overlap 8-bit blocks */ | ||
163 | |||
164 | /* width cannot wrap around alignment, half block must be before block, | ||
165 | 2nd half can be before or after */ | ||
166 | if (w < a && o < e && e1 <= o && (e2 <= o || o2 >= e)) | ||
167 | while (o + w <= *area && m < n) { | ||
168 | BUG_ON(m + 1 >= MAX_B); | ||
169 | *p++ = o; | ||
170 | *p++ = o >> 1; | ||
171 | m++; | ||
172 | o += a; | ||
173 | } | ||
174 | return m; | ||
175 | } | ||
176 | |||
177 | /* Method C: butterfly layout: AAbbaaBB */ | ||
178 | #define MAX_C 20 | ||
179 | static int nv12_C(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) | ||
180 | { | ||
181 | int m = 0; | ||
182 | u16 o2, e = ALIGN(w, a), i = 0, j = 0; | ||
183 | *area = band_8; | ||
184 | o2 = *area - (a - (o + w) % a) % a; /* end of last possible block */ | ||
185 | |||
186 | m = (min(o2 - 2 * o, 2 * o2 - o - *area) / 3 - w) / e + 1; | ||
187 | for (i = j = 0; i < m && j < n; i++, j++) { | ||
188 | BUG_ON(j + 1 >= MAX_C); | ||
189 | *p++ = o + i * e; | ||
190 | *p++ = (o + i * e + *area) >> 1; | ||
191 | if (++j < n) { | ||
192 | *p++ = o2 - i * e - w; | ||
193 | *p++ = (o2 - i * e - w) >> 1; | ||
194 | } | ||
195 | } | ||
196 | return j; | ||
197 | } | ||
198 | |||
199 | /* Method D: for large allocation: aA or Aa */ | ||
200 | #define MAX_D 1 | ||
201 | static int nv12_D(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) | ||
202 | { | ||
203 | u16 o1, w1 = (w + 1) >> 1, d; | ||
204 | *area = ALIGN(o + w, band_8); | ||
205 | |||
206 | for (d = 0; n > 0 && d + o + w <= *area; d += a) { | ||
207 | /* try to fit 16-bit before 8-bit */ | ||
208 | o1 = ((o + d) % band_8) >> 1; | ||
209 | if (o1 + w1 <= o + d) { | ||
210 | *p++ = o + d; | ||
211 | *p++ = o1; | ||
212 | return 1; | ||
213 | } | ||
214 | |||
215 | /* try to fit 16-bit after 8-bit */ | ||
216 | o1 += ALIGN(d + o + w - o1, band_16); | ||
217 | if (o1 + w1 <= *area) { | ||
218 | *p++ = o; | ||
219 | *p++ = o1; | ||
220 | return 1; | ||
221 | } | ||
222 | } | ||
223 | return 0; | ||
224 | } | ||
225 | |||
226 | /** | ||
227 | * Umbrella nv12 packing method. This selects the best packings from the above | ||
228 | * methods. It also contains hardcoded packings for parameter combinations | ||
229 | * that have more efficient packings. This method provides is guaranteed to | ||
230 | * provide the optimal packing if 2 <= a <= 64 and w <= 64 and n is large. | ||
231 | */ | ||
232 | #define MAX_ANY 21 /* must be MAX(method-MAX-s, hardcoded n-s) */ | ||
233 | static u16 nv12_together(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *packing) | ||
234 | { | ||
235 | u16 n_best, a_best, n2, a_, o_, w_; | ||
236 | |||
237 | /* algo results (packings) */ | ||
238 | u8 pack_A[MAX_A * 2], pack_rA[MAX_A * 2]; | ||
239 | u8 pack_B[MAX_B * 2], pack_C[MAX_C * 2]; | ||
240 | u8 pack_D[MAX_D * 2]; | ||
241 | |||
242 | /* | ||
243 | * Hardcoded packings. They are sorted by increasing area, and then by | ||
244 | * decreasing n. We may not get the best efficiency if less than n | ||
245 | * blocks are needed as packings are not necessarily sorted in | ||
246 | * increasing order. However, for those n-s one of the other 4 methods | ||
247 | * may return the optimal packing. | ||
248 | */ | ||
249 | u8 packings[] = { | ||
250 | /* n=9, o=2, w=4, a=4, area=64 */ | ||
251 | 9, 2, 4, 4, 64, | ||
252 | /* 8-bit, 16-bit block coordinate pairs */ | ||
253 | 2, 33, 6, 35, 10, 37, 14, 39, 18, 41, | ||
254 | 46, 23, 50, 25, 54, 27, 58, 29, | ||
255 | /* o=0, w=12, a=4, n=3 */ | ||
256 | 3, 0, 12, 4, 64, | ||
257 | 0, 32, 12, 38, 48, 24, | ||
258 | /* end */ | ||
259 | 0 | ||
260 | }, *p = packings, *p_best = NULL, *p_end; | ||
261 | p_end = packings + sizeof(packings) - 1; | ||
262 | |||
263 | /* see which method gives the best packing */ | ||
264 | |||
265 | /* start with smallest area algorithms A, B & C, stop if we can | ||
266 | pack all buffers */ | ||
267 | n_best = nv12_A(o, a, w, n, area, pack_A); | ||
268 | p_best = pack_A; | ||
269 | if (n_best < n) { | ||
270 | n2 = nv12_revA(o, a, w, n, &a_best, pack_rA); | ||
271 | if (n2 > n_best) { | ||
272 | n_best = n2; | ||
273 | p_best = pack_rA; | ||
274 | *area = a_best; | ||
275 | } | ||
276 | } | ||
277 | if (n_best < n) { | ||
278 | n2 = nv12_B(o, a, w, n, &a_best, pack_B); | ||
279 | if (n2 > n_best) { | ||
280 | n_best = n2; | ||
281 | p_best = pack_B; | ||
282 | *area = a_best; | ||
283 | } | ||
284 | } | ||
285 | if (n_best < n) { | ||
286 | n2 = nv12_C(o, a, w, n, &a_best, pack_C); | ||
287 | if (n2 > n_best) { | ||
288 | n_best = n2; | ||
289 | p_best = pack_C; | ||
290 | *area = a_best; | ||
291 | } | ||
292 | } | ||
293 | |||
294 | /* traverse any special packings */ | ||
295 | while (*p) { | ||
296 | n2 = *p++; | ||
297 | o_ = *p++; | ||
298 | w_ = *p++; | ||
299 | a_ = *p++; | ||
300 | /* stop if we already have a better packing */ | ||
301 | if (n2 < n_best) | ||
302 | break; | ||
303 | |||
304 | /* check if this packing is satisfactory */ | ||
305 | if (a_ >= a && o + w + ALIGN(o_ - o, a) <= o_ + w_) { | ||
306 | *area = *p++; | ||
307 | n_best = min(n2, n); | ||
308 | p_best = p; | ||
309 | break; | ||
310 | } | ||
311 | |||
312 | /* skip to next packing */ | ||
313 | p += 1 + n2 * 2; | ||
314 | } | ||
315 | |||
316 | /* | ||
317 | * If so far unsuccessful, check whether 8 and 16 bit blocks can be | ||
318 | * co-packed. This will actually be done in the end by the normal | ||
319 | * allocation, but we need to reserve a big-enough area. | ||
320 | */ | ||
321 | if (!n_best) { | ||
322 | n_best = nv12_D(o, a, w, n, area, pack_D); | ||
323 | p_best = NULL; | ||
324 | } | ||
325 | |||
326 | /* store best packing */ | ||
327 | if (p_best && n_best) { | ||
328 | BUG_ON(n_best > MAX_ANY); | ||
329 | memcpy(packing, p_best, n_best * 2 * sizeof(*pack_A)); | ||
330 | } | ||
331 | |||
332 | return n_best; | ||
333 | } | ||
334 | |||
335 | /* reserve nv12 blocks */ | ||
336 | static void reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs, | ||
337 | u32 gid, struct process_info *pi) | ||
338 | { | ||
339 | u16 w, h, band, a = align, o = offs; | ||
340 | struct gid_info *gi; | ||
341 | int res = 0, res2, i; | ||
342 | u16 n_t, n_s, area_t, area_s; | ||
343 | u8 packing[2 * MAX_ANY]; | ||
344 | struct list_head reserved = LIST_HEAD_INIT(reserved); | ||
345 | |||
346 | /* adjust alignment to the largest slot width (128 bytes) */ | ||
347 | a = max_t(u16, PAGE_SIZE / min(band_8, band_16), a); | ||
348 | |||
349 | /* Check input parameters for correctness, and support */ | ||
350 | if (!width || !height || !n || | ||
351 | offs >= align || offs & 1 || | ||
352 | align >= PAGE_SIZE || | ||
353 | n > ops->width * ops->height / 2) | ||
354 | return; | ||
355 | |||
356 | /* calculate dimensions, band, offs and alignment in slots */ | ||
357 | if (ops->analize(TILFMT_8BIT, width, height, &w, &h, &band, &a, &o, | ||
358 | NULL)) | ||
359 | return; | ||
360 | |||
361 | /* get group context */ | ||
362 | gi = ops->get_gi(pi, gid); | ||
363 | if (!gi) | ||
364 | return; | ||
365 | |||
366 | /* reserve in groups until failed or all is reserved */ | ||
367 | for (i = 0; i < n && res >= 0; i += res) { | ||
368 | /* check packing separately vs together */ | ||
369 | n_s = nv12_separate(o, a, w, n - i, &area_s); | ||
370 | if (ops->nv12_packed) | ||
371 | n_t = nv12_together(o, a, w, n - i, &area_t, packing); | ||
372 | else | ||
373 | n_t = 0; | ||
374 | |||
375 | /* pack based on better efficiency */ | ||
376 | res = -1; | ||
377 | if (!ops->nv12_packed || | ||
378 | nv12_eff(w, n_s, area_s, n - i) > | ||
379 | nv12_eff(w, n_t, area_t, n - i)) { | ||
380 | |||
381 | /* | ||
382 | * Reserve blocks separately into a temporary list, so | ||
383 | * that we can free them if unsuccessful. We need to be | ||
384 | * able to reserve both 8- and 16-bit blocks as the | ||
385 | * offsets of them must match. | ||
386 | */ | ||
387 | res = ops->lay_2d(TILFMT_8BIT, n_s, w, h, band_8, a, o, | ||
388 | gi, &reserved); | ||
389 | res2 = ops->lay_2d(TILFMT_16BIT, n_s, (w + 1) >> 1, h, | ||
390 | band_16, a >> 1, o >> 1, gi, &reserved); | ||
391 | |||
392 | if (res2 < 0 || res < 0 || res != res2) { | ||
393 | /* clean up */ | ||
394 | ops->release(&reserved); | ||
395 | res = -1; | ||
396 | } else { | ||
397 | /* add list to reserved */ | ||
398 | ops->add_reserved(&reserved, gi); | ||
399 | } | ||
400 | } | ||
401 | |||
402 | /* if separate packing failed, still try to pack together */ | ||
403 | if (res < 0 && ops->nv12_packed && n_t) { | ||
404 | /* pack together */ | ||
405 | res = ops->lay_nv12(n_t, area_t, w, h, gi, packing); | ||
406 | } | ||
407 | } | ||
408 | |||
409 | ops->release_gi(gi); | ||
410 | } | ||
411 | |||
412 | /* initialize shared method pointers and global static variables */ | ||
413 | void tiler_nv12_init(struct tiler_ops *tiler) | ||
414 | { | ||
415 | ops = tiler; | ||
416 | |||
417 | ops->reserve_nv12 = reserve_nv12; | ||
418 | |||
419 | band_8 = PAGE_SIZE / ops->geom(TILFMT_8BIT)->slot_w | ||
420 | / ops->geom(TILFMT_8BIT)->bpp; | ||
421 | band_16 = PAGE_SIZE / ops->geom(TILFMT_16BIT)->slot_w | ||
422 | / ops->geom(TILFMT_16BIT)->bpp; | ||
423 | } | ||
diff --git a/drivers/media/video/tiler/tiler-reserve.c b/drivers/media/video/tiler/tiler-reserve.c index 6715d3ddd6a..770fb07c5bb 100644 --- a/drivers/media/video/tiler/tiler-reserve.c +++ b/drivers/media/video/tiler/tiler-reserve.c | |||
@@ -19,8 +19,6 @@ | |||
19 | #include "_tiler.h" | 19 | #include "_tiler.h" |
20 | 20 | ||
21 | static struct tiler_ops *ops; /* shared methods and variables */ | 21 | static struct tiler_ops *ops; /* shared methods and variables */ |
22 | static int band_8; /* size of 8-bit band in slots */ | ||
23 | static int band_16; /* size of 16-bit band in slots */ | ||
24 | 22 | ||
25 | /** | 23 | /** |
26 | * Calculate the maximum number buffers that can be packed next to each other, | 24 | * Calculate the maximum number buffers that can be packed next to each other, |
@@ -38,7 +36,7 @@ static int band_16; /* size of 16-bit band in slots */ | |||
38 | * | 36 | * |
39 | * @return packing efficiency (0-1024) | 37 | * @return packing efficiency (0-1024) |
40 | */ | 38 | */ |
41 | static u32 tiler_best2pack(u16 o, u16 a, u16 b, u16 w, u16 *n, u16 *_area) | 39 | u32 tiler_best2pack(u16 o, u16 a, u16 b, u16 w, u16 *n, u16 *_area) |
42 | { | 40 | { |
43 | u16 m = 0, max_n = *n; /* m is mostly n - 1 */ | 41 | u16 m = 0, max_n = *n; /* m is mostly n - 1 */ |
44 | u16 e = ALIGN(w, a); /* effective width of one block */ | 42 | u16 e = ALIGN(w, a); /* effective width of one block */ |
@@ -71,393 +69,6 @@ static u32 tiler_best2pack(u16 o, u16 a, u16 b, u16 w, u16 *n, u16 *_area) | |||
71 | return best_eff; | 69 | return best_eff; |
72 | } | 70 | } |
73 | 71 | ||
74 | /* | ||
75 | * NV12 Reservation Functions | ||
76 | * | ||
77 | * TILER is designed so that a (w * h) * 8bit area is twice as wide as a | ||
78 | * (w/2 * h/2) * 16bit area. Since having pairs of such 8-bit and 16-bit | ||
79 | * blocks is a common usecase for TILER, we optimize packing these into a | ||
80 | * TILER area. | ||
81 | * | ||
82 | * During reservation we want to find the most effective packing (most used area | ||
83 | * in the smallest overall area) | ||
84 | * | ||
85 | * We have two algorithms for packing nv12 blocks: either pack 8- and 16-bit | ||
86 | * blocks into separate container areas, or pack them together into same area. | ||
87 | */ | ||
88 | |||
89 | /** | ||
90 | * Calculate effectiveness of packing. We weight total area much higher than | ||
91 | * packing efficiency to get the smallest overall container use. | ||
92 | * | ||
93 | * @param w width of one (8-bit) block | ||
94 | * @param n buffers in a packing | ||
95 | * @param area width of packing area | ||
96 | * @param n_total total number of buffers to be packed | ||
97 | * @return effectiveness, the higher the better | ||
98 | */ | ||
99 | static inline u32 nv12_eff(u16 w, u16 n, u16 area, u16 n_total) | ||
100 | { | ||
101 | return 0x10000000 - | ||
102 | /* weigh against total area needed (for all buffers) */ | ||
103 | /* 64-slots = -2048 */ | ||
104 | DIV_ROUND_UP(n_total, n) * area * 32 + | ||
105 | /* packing efficiency (0 - 1024) */ | ||
106 | 1024 * n * ((w * 3 + 1) >> 1) / area; | ||
107 | } | ||
108 | |||
109 | /** | ||
110 | * Fallback nv12 packing algorithm: pack 8 and 16 bit block into separate | ||
111 | * areas. | ||
112 | * | ||
113 | * @author a0194118 (7/16/2010) | ||
114 | * | ||
115 | * @param o desired offset (<a) | ||
116 | * @param a desired alignment (>=2) | ||
117 | * @param w block width (>0) | ||
118 | * @param n number of blocks desired | ||
119 | * @param area pointer to store total area needed | ||
120 | * | ||
121 | * @return number of blocks that can be allocated | ||
122 | */ | ||
123 | static u16 nv12_separate(u16 o, u16 a, u16 w, u16 n, u16 *area) | ||
124 | { | ||
125 | tiler_best2pack(o, a, band_8, w, &n, area); | ||
126 | tiler_best2pack(o >> 1, a >> 1, band_16, (w + 1) >> 1, &n, area); | ||
127 | *area *= 3; | ||
128 | return n; | ||
129 | } | ||
130 | |||
131 | /* | ||
132 | * Specialized NV12 Reservation Algorithms | ||
133 | * | ||
134 | * We use 4 packing methods that pack nv12 blocks into the same area. Together | ||
135 | * these 4 methods give the optimal result for most possible input parameters. | ||
136 | * | ||
137 | * For now we pack into a 64-slot area, so that we don't have to worry about | ||
138 | * stride issues (all blocks get 4K stride). For some of the algorithms this | ||
139 | * could be true even if the area was 128. | ||
140 | */ | ||
141 | |||
142 | /** | ||
143 | * Packing types are marked using a letter sequence, capital letters denoting | ||
144 | * 8-bit blocks, lower case letters denoting corresponding 16-bit blocks. | ||
145 | * | ||
146 | * All methods have the following parameters. They also define the maximum | ||
147 | * number of coordinates that could potentially be packed. | ||
148 | * | ||
149 | * @param o, a, w, n offset, alignment, width, # of blocks as usual | ||
150 | * @param area pointer to store area needed for packing | ||
151 | * @param p pointer to store packing coordinates | ||
152 | * @return number of blocks that can be packed | ||
153 | */ | ||
154 | |||
155 | /* Method A: progressive packing: AAAAaaaaBBbbCc into 64-slot area */ | ||
156 | #define MAX_A 21 | ||
157 | static int nv12_A(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) | ||
158 | { | ||
159 | u16 x = o, u, l, m = 0; | ||
160 | *area = band_8; | ||
161 | |||
162 | while (x + w < *area && m < n) { | ||
163 | /* current 8bit upper bound (a) is next 8bit lower bound (B) */ | ||
164 | l = u = (*area + x) >> 1; | ||
165 | |||
166 | /* pack until upper bound */ | ||
167 | while (x + w <= u && m < n) { | ||
168 | /* save packing */ | ||
169 | BUG_ON(m + 1 >= MAX_A); | ||
170 | *p++ = x; | ||
171 | *p++ = l; | ||
172 | l = (*area + x + w + 1) >> 1; | ||
173 | x = ALIGN(x + w - o, a) + o; | ||
174 | m++; | ||
175 | } | ||
176 | x = ALIGN(l - o, a) + o; /* set new lower bound */ | ||
177 | } | ||
178 | return m; | ||
179 | } | ||
180 | |||
181 | /* Method -A: regressive packing: cCbbBBaaaaAAAA into 64-slot area */ | ||
182 | static int nv12_revA(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) | ||
183 | { | ||
184 | u16 m; | ||
185 | |||
186 | /* this is a mirrored packing of method A */ | ||
187 | n = nv12_A((a - (o + w) % a) % a, a, w, n, area, p); | ||
188 | |||
189 | /* reverse packing */ | ||
190 | for (m = 0; m < n; m++) { | ||
191 | *p = *area - *p - w; | ||
192 | p++; | ||
193 | *p = *area - *p - ((w + 1) >> 1); | ||
194 | p++; | ||
195 | } | ||
196 | return n; | ||
197 | } | ||
198 | |||
199 | /* Method B: simple layout: aAbcBdeCfgDhEFGH */ | ||
200 | #define MAX_B 8 | ||
201 | static int nv12_B(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) | ||
202 | { | ||
203 | u16 e = (o + w) % a; /* end offset */ | ||
204 | u16 o1 = (o >> 1) % a; /* half offset */ | ||
205 | u16 e1 = ((o + w + 1) >> 1) % a; /* half end offset */ | ||
206 | u16 o2 = o1 + (a >> 2); /* 2nd half offset */ | ||
207 | u16 e2 = e1 + (a >> 2); /* 2nd half end offset */ | ||
208 | u16 m = 0; | ||
209 | *area = band_8; | ||
210 | |||
211 | /* ensure 16-bit blocks don't overlap 8-bit blocks */ | ||
212 | |||
213 | /* width cannot wrap around alignment, half block must be before block, | ||
214 | 2nd half can be before or after */ | ||
215 | if (w < a && o < e && e1 <= o && (e2 <= o || o2 >= e)) | ||
216 | while (o + w <= *area && m < n) { | ||
217 | BUG_ON(m + 1 >= MAX_B); | ||
218 | *p++ = o; | ||
219 | *p++ = o >> 1; | ||
220 | m++; | ||
221 | o += a; | ||
222 | } | ||
223 | return m; | ||
224 | } | ||
225 | |||
226 | /* Method C: butterfly layout: AAbbaaBB */ | ||
227 | #define MAX_C 20 | ||
228 | static int nv12_C(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) | ||
229 | { | ||
230 | int m = 0; | ||
231 | u16 o2, e = ALIGN(w, a), i = 0, j = 0; | ||
232 | *area = band_8; | ||
233 | o2 = *area - (a - (o + w) % a) % a; /* end of last possible block */ | ||
234 | |||
235 | m = (min(o2 - 2 * o, 2 * o2 - o - *area) / 3 - w) / e + 1; | ||
236 | for (i = j = 0; i < m && j < n; i++, j++) { | ||
237 | BUG_ON(j + 1 >= MAX_C); | ||
238 | *p++ = o + i * e; | ||
239 | *p++ = (o + i * e + *area) >> 1; | ||
240 | if (++j < n) { | ||
241 | *p++ = o2 - i * e - w; | ||
242 | *p++ = (o2 - i * e - w) >> 1; | ||
243 | } | ||
244 | } | ||
245 | return j; | ||
246 | } | ||
247 | |||
248 | /* Method D: for large allocation: aA or Aa */ | ||
249 | #define MAX_D 1 | ||
250 | static int nv12_D(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) | ||
251 | { | ||
252 | u16 o1, w1 = (w + 1) >> 1, d; | ||
253 | *area = ALIGN(o + w, band_8); | ||
254 | |||
255 | for (d = 0; n > 0 && d + o + w <= *area; d += a) { | ||
256 | /* try to fit 16-bit before 8-bit */ | ||
257 | o1 = ((o + d) % band_8) >> 1; | ||
258 | if (o1 + w1 <= o + d) { | ||
259 | *p++ = o + d; | ||
260 | *p++ = o1; | ||
261 | return 1; | ||
262 | } | ||
263 | |||
264 | /* try to fit 16-bit after 8-bit */ | ||
265 | o1 += ALIGN(d + o + w - o1, band_16); | ||
266 | if (o1 + w1 <= *area) { | ||
267 | *p++ = o; | ||
268 | *p++ = o1; | ||
269 | return 1; | ||
270 | } | ||
271 | } | ||
272 | return 0; | ||
273 | } | ||
274 | |||
275 | /** | ||
276 | * Umbrella nv12 packing method. This selects the best packings from the above | ||
277 | * methods. It also contains hardcoded packings for parameter combinations | ||
278 | * that have more efficient packings. This method provides is guaranteed to | ||
279 | * provide the optimal packing if 2 <= a <= 64 and w <= 64 and n is large. | ||
280 | */ | ||
281 | #define MAX_ANY 21 /* must be MAX(method-MAX-s, hardcoded n-s) */ | ||
282 | static u16 nv12_together(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *packing) | ||
283 | { | ||
284 | u16 n_best, a_best, n2, a_, o_, w_; | ||
285 | |||
286 | /* algo results (packings) */ | ||
287 | u8 pack_A[MAX_A * 2], pack_rA[MAX_A * 2]; | ||
288 | u8 pack_B[MAX_B * 2], pack_C[MAX_C * 2]; | ||
289 | u8 pack_D[MAX_D * 2]; | ||
290 | |||
291 | /* | ||
292 | * Hardcoded packings. They are sorted by increasing area, and then by | ||
293 | * decreasing n. We may not get the best efficiency if less than n | ||
294 | * blocks are needed as packings are not necessarily sorted in | ||
295 | * increasing order. However, for those n-s one of the other 4 methods | ||
296 | * may return the optimal packing. | ||
297 | */ | ||
298 | u8 packings[] = { | ||
299 | /* n=9, o=2, w=4, a=4, area=64 */ | ||
300 | 9, 2, 4, 4, 64, | ||
301 | /* 8-bit, 16-bit block coordinate pairs */ | ||
302 | 2, 33, 6, 35, 10, 37, 14, 39, 18, 41, | ||
303 | 46, 23, 50, 25, 54, 27, 58, 29, | ||
304 | /* o=0, w=12, a=4, n=3 */ | ||
305 | 3, 0, 12, 4, 64, | ||
306 | 0, 32, 12, 38, 48, 24, | ||
307 | /* end */ | ||
308 | 0 | ||
309 | }, *p = packings, *p_best = NULL, *p_end; | ||
310 | p_end = packings + sizeof(packings) - 1; | ||
311 | |||
312 | /* see which method gives the best packing */ | ||
313 | |||
314 | /* start with smallest area algorithms A, B & C, stop if we can | ||
315 | pack all buffers */ | ||
316 | n_best = nv12_A(o, a, w, n, area, pack_A); | ||
317 | p_best = pack_A; | ||
318 | if (n_best < n) { | ||
319 | n2 = nv12_revA(o, a, w, n, &a_best, pack_rA); | ||
320 | if (n2 > n_best) { | ||
321 | n_best = n2; | ||
322 | p_best = pack_rA; | ||
323 | *area = a_best; | ||
324 | } | ||
325 | } | ||
326 | if (n_best < n) { | ||
327 | n2 = nv12_B(o, a, w, n, &a_best, pack_B); | ||
328 | if (n2 > n_best) { | ||
329 | n_best = n2; | ||
330 | p_best = pack_B; | ||
331 | *area = a_best; | ||
332 | } | ||
333 | } | ||
334 | if (n_best < n) { | ||
335 | n2 = nv12_C(o, a, w, n, &a_best, pack_C); | ||
336 | if (n2 > n_best) { | ||
337 | n_best = n2; | ||
338 | p_best = pack_C; | ||
339 | *area = a_best; | ||
340 | } | ||
341 | } | ||
342 | |||
343 | /* traverse any special packings */ | ||
344 | while (*p) { | ||
345 | n2 = *p++; | ||
346 | o_ = *p++; | ||
347 | w_ = *p++; | ||
348 | a_ = *p++; | ||
349 | /* stop if we already have a better packing */ | ||
350 | if (n2 < n_best) | ||
351 | break; | ||
352 | |||
353 | /* check if this packing is satisfactory */ | ||
354 | if (a_ >= a && o + w + ALIGN(o_ - o, a) <= o_ + w_) { | ||
355 | *area = *p++; | ||
356 | n_best = min(n2, n); | ||
357 | p_best = p; | ||
358 | break; | ||
359 | } | ||
360 | |||
361 | /* skip to next packing */ | ||
362 | p += 1 + n2 * 2; | ||
363 | } | ||
364 | |||
365 | /* | ||
366 | * If so far unsuccessful, check whether 8 and 16 bit blocks can be | ||
367 | * co-packed. This will actually be done in the end by the normal | ||
368 | * allocation, but we need to reserve a big-enough area. | ||
369 | */ | ||
370 | if (!n_best) { | ||
371 | n_best = nv12_D(o, a, w, n, area, pack_D); | ||
372 | p_best = NULL; | ||
373 | } | ||
374 | |||
375 | /* store best packing */ | ||
376 | if (p_best && n_best) { | ||
377 | BUG_ON(n_best > MAX_ANY); | ||
378 | memcpy(packing, p_best, n_best * 2 * sizeof(*pack_A)); | ||
379 | } | ||
380 | |||
381 | return n_best; | ||
382 | } | ||
383 | |||
384 | /* reserve nv12 blocks */ | ||
385 | static void reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs, | ||
386 | u32 gid, struct process_info *pi) | ||
387 | { | ||
388 | u16 w, h, band, a = align, o = offs; | ||
389 | struct gid_info *gi; | ||
390 | int res = 0, res2, i; | ||
391 | u16 n_t, n_s, area_t, area_s; | ||
392 | u8 packing[2 * MAX_ANY]; | ||
393 | struct list_head reserved = LIST_HEAD_INIT(reserved); | ||
394 | |||
395 | /* adjust alignment to the largest slot width (128 bytes) */ | ||
396 | a = max_t(u16, PAGE_SIZE / min(band_8, band_16), a); | ||
397 | |||
398 | /* Check input parameters for correctness, and support */ | ||
399 | if (!width || !height || !n || | ||
400 | offs >= align || offs & 1 || | ||
401 | align >= PAGE_SIZE || | ||
402 | n > ops->width * ops->height / 2) | ||
403 | return; | ||
404 | |||
405 | /* calculate dimensions, band, offs and alignment in slots */ | ||
406 | if (ops->analize(TILFMT_8BIT, width, height, &w, &h, &band, &a, &o, | ||
407 | NULL)) | ||
408 | return; | ||
409 | |||
410 | /* get group context */ | ||
411 | gi = ops->get_gi(pi, gid); | ||
412 | if (!gi) | ||
413 | return; | ||
414 | |||
415 | /* reserve in groups until failed or all is reserved */ | ||
416 | for (i = 0; i < n && res >= 0; i += res) { | ||
417 | /* check packing separately vs together */ | ||
418 | n_s = nv12_separate(o, a, w, n - i, &area_s); | ||
419 | if (ops->nv12_packed) | ||
420 | n_t = nv12_together(o, a, w, n - i, &area_t, packing); | ||
421 | else | ||
422 | n_t = 0; | ||
423 | |||
424 | /* pack based on better efficiency */ | ||
425 | res = -1; | ||
426 | if (!ops->nv12_packed || | ||
427 | nv12_eff(w, n_s, area_s, n - i) > | ||
428 | nv12_eff(w, n_t, area_t, n - i)) { | ||
429 | |||
430 | /* | ||
431 | * Reserve blocks separately into a temporary list, so | ||
432 | * that we can free them if unsuccessful. We need to be | ||
433 | * able to reserve both 8- and 16-bit blocks as the | ||
434 | * offsets of them must match. | ||
435 | */ | ||
436 | res = ops->lay_2d(TILFMT_8BIT, n_s, w, h, band_8, a, o, | ||
437 | gi, &reserved); | ||
438 | res2 = ops->lay_2d(TILFMT_16BIT, n_s, (w + 1) >> 1, h, | ||
439 | band_16, a >> 1, o >> 1, gi, &reserved); | ||
440 | |||
441 | if (res2 < 0 || res < 0 || res != res2) { | ||
442 | /* clean up */ | ||
443 | ops->release(&reserved); | ||
444 | res = -1; | ||
445 | } else { | ||
446 | /* add list to reserved */ | ||
447 | ops->add_reserved(&reserved, gi); | ||
448 | } | ||
449 | } | ||
450 | |||
451 | /* if separate packing failed, still try to pack together */ | ||
452 | if (res < 0 && ops->nv12_packed && n_t) { | ||
453 | /* pack together */ | ||
454 | res = ops->lay_nv12(n_t, area_t, w, h, gi, packing); | ||
455 | } | ||
456 | } | ||
457 | |||
458 | ops->release_gi(gi); | ||
459 | } | ||
460 | |||
461 | /** | 72 | /** |
462 | * We also optimize packing regular 2D areas as the auto-packing may result in | 73 | * We also optimize packing regular 2D areas as the auto-packing may result in |
463 | * sub-optimal efficiency. This is most pronounced if the area is wider than | 74 | * sub-optimal efficiency. This is most pronounced if the area is wider than |
@@ -539,12 +150,6 @@ void tiler_reserve_init(struct tiler_ops *tiler) | |||
539 | { | 150 | { |
540 | ops = tiler; | 151 | ops = tiler; |
541 | 152 | ||
542 | ops->reserve_nv12 = reserve_nv12; | ||
543 | ops->reserve = reserve_blocks; | 153 | ops->reserve = reserve_blocks; |
544 | ops->unreserve = unreserve_blocks; | 154 | ops->unreserve = unreserve_blocks; |
545 | |||
546 | band_8 = PAGE_SIZE / ops->geom(TILFMT_8BIT)->slot_w | ||
547 | / ops->geom(TILFMT_8BIT)->bpp; | ||
548 | band_16 = PAGE_SIZE / ops->geom(TILFMT_16BIT)->slot_w | ||
549 | / ops->geom(TILFMT_16BIT)->bpp; | ||
550 | } | 155 | } |