diff options
Diffstat (limited to 'drivers/video/c2p.c')
-rw-r--r-- | drivers/video/c2p.c | 181 |
1 files changed, 96 insertions, 85 deletions
diff --git a/drivers/video/c2p.c b/drivers/video/c2p.c index f102b578ce10..c170fff0d35e 100644 --- a/drivers/video/c2p.c +++ b/drivers/video/c2p.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Fast C2P (Chunky-to-Planar) Conversion | 2 | * Fast C2P (Chunky-to-Planar) Conversion |
3 | * | 3 | * |
4 | * Copyright (C) 2003 Geert Uytterhoeven | 4 | * Copyright (C) 2003-2008 Geert Uytterhoeven |
5 | * | 5 | * |
6 | * NOTES: | 6 | * NOTES: |
7 | * - This code was inspired by Scout's C2P tutorial | 7 | * - This code was inspired by Scout's C2P tutorial |
@@ -14,6 +14,9 @@ | |||
14 | 14 | ||
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/string.h> | 16 | #include <linux/string.h> |
17 | |||
18 | #include <asm/unaligned.h> | ||
19 | |||
17 | #include "c2p.h" | 20 | #include "c2p.h" |
18 | 21 | ||
19 | 22 | ||
@@ -21,97 +24,100 @@ | |||
21 | * Basic transpose step | 24 | * Basic transpose step |
22 | */ | 25 | */ |
23 | 26 | ||
24 | #define _transp(d, i1, i2, shift, mask) \ | 27 | static inline void _transp(u32 d[], unsigned int i1, unsigned int i2, |
25 | do { \ | 28 | unsigned int shift, u32 mask) |
26 | u32 t = (d[i1] ^ (d[i2] >> shift)) & mask; \ | 29 | { |
27 | d[i1] ^= t; \ | 30 | u32 t = (d[i1] ^ (d[i2] >> shift)) & mask; |
28 | d[i2] ^= t << shift; \ | 31 | |
29 | } while (0) | 32 | d[i1] ^= t; |
33 | d[i2] ^= t << shift; | ||
34 | } | ||
30 | 35 | ||
31 | static inline u32 get_mask(int n) | 36 | extern void c2p_unsupported(void); |
37 | |||
38 | static inline u32 get_mask(unsigned int n) | ||
32 | { | 39 | { |
33 | switch (n) { | 40 | switch (n) { |
34 | case 1: | 41 | case 1: |
35 | return 0x55555555; | 42 | return 0x55555555; |
36 | break; | ||
37 | 43 | ||
38 | case 2: | 44 | case 2: |
39 | return 0x33333333; | 45 | return 0x33333333; |
40 | break; | ||
41 | 46 | ||
42 | case 4: | 47 | case 4: |
43 | return 0x0f0f0f0f; | 48 | return 0x0f0f0f0f; |
44 | break; | ||
45 | 49 | ||
46 | case 8: | 50 | case 8: |
47 | return 0x00ff00ff; | 51 | return 0x00ff00ff; |
48 | break; | ||
49 | 52 | ||
50 | case 16: | 53 | case 16: |
51 | return 0x0000ffff; | 54 | return 0x0000ffff; |
52 | break; | ||
53 | } | 55 | } |
56 | |||
57 | c2p_unsupported(); | ||
54 | return 0; | 58 | return 0; |
55 | } | 59 | } |
56 | 60 | ||
57 | #define transp_nx1(d, n) \ | 61 | static inline void transp8(u32 d[], unsigned int n, unsigned int m) |
58 | do { \ | 62 | { |
59 | u32 mask = get_mask(n); \ | 63 | u32 mask = get_mask(n); |
60 | /* First block */ \ | 64 | |
61 | _transp(d, 0, 1, n, mask); \ | 65 | switch (m) { |
62 | /* Second block */ \ | 66 | case 1: |
63 | _transp(d, 2, 3, n, mask); \ | 67 | /* First n x 1 block */ |
64 | /* Third block */ \ | 68 | _transp(d, 0, 1, n, mask); |
65 | _transp(d, 4, 5, n, mask); \ | 69 | /* Second n x 1 block */ |
66 | /* Fourth block */ \ | 70 | _transp(d, 2, 3, n, mask); |
67 | _transp(d, 6, 7, n, mask); \ | 71 | /* Third n x 1 block */ |
68 | } while (0) | 72 | _transp(d, 4, 5, n, mask); |
69 | 73 | /* Fourth n x 1 block */ | |
70 | #define transp_nx2(d, n) \ | 74 | _transp(d, 6, 7, n, mask); |
71 | do { \ | 75 | return; |
72 | u32 mask = get_mask(n); \ | 76 | |
73 | /* First block */ \ | 77 | case 2: |
74 | _transp(d, 0, 2, n, mask); \ | 78 | /* First n x 2 block */ |
75 | _transp(d, 1, 3, n, mask); \ | 79 | _transp(d, 0, 2, n, mask); |
76 | /* Second block */ \ | 80 | _transp(d, 1, 3, n, mask); |
77 | _transp(d, 4, 6, n, mask); \ | 81 | /* Second n x 2 block */ |
78 | _transp(d, 5, 7, n, mask); \ | 82 | _transp(d, 4, 6, n, mask); |
79 | } while (0) | 83 | _transp(d, 5, 7, n, mask); |
80 | 84 | return; | |
81 | #define transp_nx4(d, n) \ | 85 | |
82 | do { \ | 86 | case 4: |
83 | u32 mask = get_mask(n); \ | 87 | /* Single n x 4 block */ |
84 | _transp(d, 0, 4, n, mask); \ | 88 | _transp(d, 0, 4, n, mask); |
85 | _transp(d, 1, 5, n, mask); \ | 89 | _transp(d, 1, 5, n, mask); |
86 | _transp(d, 2, 6, n, mask); \ | 90 | _transp(d, 2, 6, n, mask); |
87 | _transp(d, 3, 7, n, mask); \ | 91 | _transp(d, 3, 7, n, mask); |
88 | } while (0) | 92 | return; |
89 | 93 | } | |
90 | #define transp(d, n, m) transp_nx ## m(d, n) | 94 | |
95 | c2p_unsupported(); | ||
96 | } | ||
91 | 97 | ||
92 | 98 | ||
93 | /* | 99 | /* |
94 | * Perform a full C2P step on 32 8-bit pixels, stored in 8 32-bit words | 100 | * Perform a full C2P step on 32 8-bit pixels, stored in 8 32-bit words |
95 | * containing | 101 | * containing |
96 | * - 32 8-bit chunky pixels on input | 102 | * - 32 8-bit chunky pixels on input |
97 | * - permuted planar data on output | 103 | * - permutated planar data (1 plane per 32-bit word) on output |
98 | */ | 104 | */ |
99 | 105 | ||
100 | static void c2p_8bpp(u32 d[8]) | 106 | static void c2p_32x8(u32 d[8]) |
101 | { | 107 | { |
102 | transp(d, 16, 4); | 108 | transp8(d, 16, 4); |
103 | transp(d, 8, 2); | 109 | transp8(d, 8, 2); |
104 | transp(d, 4, 1); | 110 | transp8(d, 4, 1); |
105 | transp(d, 2, 4); | 111 | transp8(d, 2, 4); |
106 | transp(d, 1, 2); | 112 | transp8(d, 1, 2); |
107 | } | 113 | } |
108 | 114 | ||
109 | 115 | ||
110 | /* | 116 | /* |
111 | * Array containing the permution indices of the planar data after c2p | 117 | * Array containing the permutation indices of the planar data after c2p |
112 | */ | 118 | */ |
113 | 119 | ||
114 | static const int perm_c2p_8bpp[8] = { 7, 5, 3, 1, 6, 4, 2, 0 }; | 120 | static const int perm_c2p_32x8[8] = { 7, 5, 3, 1, 6, 4, 2, 0 }; |
115 | 121 | ||
116 | 122 | ||
117 | /* | 123 | /* |
@@ -119,8 +125,7 @@ static const int perm_c2p_8bpp[8] = { 7, 5, 3, 1, 6, 4, 2, 0 }; | |||
119 | * This is equivalent to (a & mask) | (b & ~mask) | 125 | * This is equivalent to (a & mask) | (b & ~mask) |
120 | */ | 126 | */ |
121 | 127 | ||
122 | static inline unsigned long comp(unsigned long a, unsigned long b, | 128 | static inline u32 comp(u32 a, u32 b, u32 mask) |
123 | unsigned long mask) | ||
124 | { | 129 | { |
125 | return ((a ^ b) & mask) ^ b; | 130 | return ((a ^ b) & mask) ^ b; |
126 | } | 131 | } |
@@ -130,12 +135,12 @@ static inline unsigned long comp(unsigned long a, unsigned long b, | |||
130 | * Store a full block of planar data after c2p conversion | 135 | * Store a full block of planar data after c2p conversion |
131 | */ | 136 | */ |
132 | 137 | ||
133 | static inline void store_planar(char *dst, u32 dst_inc, u32 bpp, u32 d[8]) | 138 | static inline void store_planar(void *dst, u32 dst_inc, u32 bpp, u32 d[8]) |
134 | { | 139 | { |
135 | int i; | 140 | int i; |
136 | 141 | ||
137 | for (i = 0; i < bpp; i++, dst += dst_inc) | 142 | for (i = 0; i < bpp; i++, dst += dst_inc) |
138 | *(u32 *)dst = d[perm_c2p_8bpp[i]]; | 143 | put_unaligned_be32(d[perm_c2p_32x8[i]], dst); |
139 | } | 144 | } |
140 | 145 | ||
141 | 146 | ||
@@ -143,13 +148,15 @@ static inline void store_planar(char *dst, u32 dst_inc, u32 bpp, u32 d[8]) | |||
143 | * Store a partial block of planar data after c2p conversion | 148 | * Store a partial block of planar data after c2p conversion |
144 | */ | 149 | */ |
145 | 150 | ||
146 | static inline void store_planar_masked(char *dst, u32 dst_inc, u32 bpp, | 151 | static inline void store_planar_masked(void *dst, u32 dst_inc, u32 bpp, |
147 | u32 d[8], u32 mask) | 152 | u32 d[8], u32 mask) |
148 | { | 153 | { |
149 | int i; | 154 | int i; |
150 | 155 | ||
151 | for (i = 0; i < bpp; i++, dst += dst_inc) | 156 | for (i = 0; i < bpp; i++, dst += dst_inc) |
152 | *(u32 *)dst = comp(d[perm_c2p_8bpp[i]], *(u32 *)dst, mask); | 157 | put_unaligned_be32(comp(d[perm_c2p_32x8[i]], |
158 | get_unaligned_be32(dst), mask), | ||
159 | dst); | ||
153 | } | 160 | } |
154 | 161 | ||
155 | 162 | ||
@@ -166,18 +173,21 @@ static inline void store_planar_masked(char *dst, u32 dst_inc, u32 bpp, | |||
166 | * @bpp: Bits per pixel of the planar frame buffer (1-8) | 173 | * @bpp: Bits per pixel of the planar frame buffer (1-8) |
167 | */ | 174 | */ |
168 | 175 | ||
169 | void c2p(u8 *dst, const u8 *src, u32 dx, u32 dy, u32 width, u32 height, | 176 | void c2p(void *dst, const void *src, u32 dx, u32 dy, u32 width, u32 height, |
170 | u32 dst_nextline, u32 dst_nextplane, u32 src_nextline, u32 bpp) | 177 | u32 dst_nextline, u32 dst_nextplane, u32 src_nextline, u32 bpp) |
171 | { | 178 | { |
172 | int dst_idx; | 179 | union { |
173 | u32 d[8], first, last, w; | 180 | u8 pixels[32]; |
181 | u32 words[8]; | ||
182 | } d; | ||
183 | u32 dst_idx, first, last, w; | ||
174 | const u8 *c; | 184 | const u8 *c; |
175 | u8 *p; | 185 | void *p; |
176 | 186 | ||
177 | dst += dy*dst_nextline+(dx & ~31); | 187 | dst += dy*dst_nextline+(dx & ~31); |
178 | dst_idx = dx % 32; | 188 | dst_idx = dx % 32; |
179 | first = ~0UL >> dst_idx; | 189 | first = 0xffffffffU >> dst_idx; |
180 | last = ~(~0UL >> ((dst_idx+width) % 32)); | 190 | last = ~(0xffffffffU >> ((dst_idx+width) % 32)); |
181 | while (height--) { | 191 | while (height--) { |
182 | c = src; | 192 | c = src; |
183 | p = dst; | 193 | p = dst; |
@@ -185,11 +195,12 @@ void c2p(u8 *dst, const u8 *src, u32 dx, u32 dy, u32 width, u32 height, | |||
185 | if (dst_idx+width <= 32) { | 195 | if (dst_idx+width <= 32) { |
186 | /* Single destination word */ | 196 | /* Single destination word */ |
187 | first &= last; | 197 | first &= last; |
188 | memset(d, 0, sizeof(d)); | 198 | memset(d.pixels, 0, sizeof(d)); |
189 | memcpy((u8 *)d+dst_idx, c, width); | 199 | memcpy(d.pixels+dst_idx, c, width); |
190 | c += width; | 200 | c += width; |
191 | c2p_8bpp(d); | 201 | c2p_32x8(d.words); |
192 | store_planar_masked(p, dst_nextplane, bpp, d, first); | 202 | store_planar_masked(p, dst_nextplane, bpp, d.words, |
203 | first); | ||
193 | p += 4; | 204 | p += 4; |
194 | } else { | 205 | } else { |
195 | /* Multiple destination words */ | 206 | /* Multiple destination words */ |
@@ -197,32 +208,32 @@ void c2p(u8 *dst, const u8 *src, u32 dx, u32 dy, u32 width, u32 height, | |||
197 | /* Leading bits */ | 208 | /* Leading bits */ |
198 | if (dst_idx) { | 209 | if (dst_idx) { |
199 | w = 32 - dst_idx; | 210 | w = 32 - dst_idx; |
200 | memset(d, 0, dst_idx); | 211 | memset(d.pixels, 0, dst_idx); |
201 | memcpy((u8 *)d+dst_idx, c, w); | 212 | memcpy(d.pixels+dst_idx, c, w); |
202 | c += w; | 213 | c += w; |
203 | c2p_8bpp(d); | 214 | c2p_32x8(d.words); |
204 | store_planar_masked(p, dst_nextplane, bpp, d, | 215 | store_planar_masked(p, dst_nextplane, bpp, |
205 | first); | 216 | d.words, first); |
206 | p += 4; | 217 | p += 4; |
207 | w = width-w; | 218 | w = width-w; |
208 | } | 219 | } |
209 | /* Main chunk */ | 220 | /* Main chunk */ |
210 | while (w >= 32) { | 221 | while (w >= 32) { |
211 | memcpy(d, c, 32); | 222 | memcpy(d.pixels, c, 32); |
212 | c += 32; | 223 | c += 32; |
213 | c2p_8bpp(d); | 224 | c2p_32x8(d.words); |
214 | store_planar(p, dst_nextplane, bpp, d); | 225 | store_planar(p, dst_nextplane, bpp, d.words); |
215 | p += 4; | 226 | p += 4; |
216 | w -= 32; | 227 | w -= 32; |
217 | } | 228 | } |
218 | /* Trailing bits */ | 229 | /* Trailing bits */ |
219 | w %= 32; | 230 | w %= 32; |
220 | if (w > 0) { | 231 | if (w > 0) { |
221 | memcpy(d, c, w); | 232 | memcpy(d.pixels, c, w); |
222 | memset((u8 *)d+w, 0, 32-w); | 233 | memset(d.pixels+w, 0, 32-w); |
223 | c2p_8bpp(d); | 234 | c2p_32x8(d.words); |
224 | store_planar_masked(p, dst_nextplane, bpp, d, | 235 | store_planar_masked(p, dst_nextplane, bpp, |
225 | last); | 236 | d.words, last); |
226 | } | 237 | } |
227 | } | 238 | } |
228 | src += src_nextline; | 239 | src += src_nextline; |