1 files changed, 96 insertions, 85 deletions
diff --git a/drivers/video/c2p.c b/drivers/video/c2p.c
index f102b578ce10..c170fff0d35e 100644
--- a/drivers/video/c2p.c
+++ b/drivers/video/c2p.c
@@ -1,7 +1,7 @@
 /*
 *  Fast C2P (Chunky-to-Planar) Conversion
 *
- *  Copyright (C) 2003 Geert Uytterhoeven
+ *  Copyright (C) 2003-2008 Geert Uytterhoeven
 *
 *  NOTES:
 *    - This code was inspired by Scout's C2P tutorial
@@ -14,6 +14,9 @@
 #include <linux/module.h>
 #include <linux/string.h>
+#include <asm/unaligned.h>
 #include "c2p.h"
@@ -21,97 +24,100 @@
     *  Basic transpose step
     */
-#define _transp(d, i1, i2, shift, mask)                         \
+static inline void _transp(u32 d[], unsigned int i1, unsigned int i2,
-        do {                                                    \
+                           unsigned int shift, u32 mask)
-                u32 t = (d[i1] ^ (d[i2] >> shift)) & mask;      \
+{
-                d[i1] ^= t;                                     \
+        u32 t = (d[i1] ^ (d[i2] >> shift)) & mask;
-                d[i2] ^= t << shift;                            \
-        } while (0)
+        d[i1] ^= t;
+        d[i2] ^= t << shift;
+}
-static inline u32 get_mask(int n)
+extern void c2p_unsupported(void);
+static inline u32 get_mask(unsigned int n)
 {
        switch (n) {
        case 1:
                return 0x55555555;
-                break;
        case 2:
                return 0x33333333;
-                break;
        case 4:
                return 0x0f0f0f0f;
-                break;
        case 8:
                return 0x00ff00ff;
-                break;
        case 16:
                return 0x0000ffff;
-                break;
        }
+        c2p_unsupported();
        return 0;
 }
-#define transp_nx1(d, n)                                        \
+static inline void transp8(u32 d[], unsigned int n, unsigned int m)
-        do {                                                    \
+{
-                u32 mask = get_mask(n);                         \
+        u32 mask = get_mask(n);
-                /* First block */                               \
-                _transp(d, 0, 1, n, mask);                      \
+        switch (m) {
-                /* Second block */                              \
+        case 1:
-                _transp(d, 2, 3, n, mask);                      \
+                /* First n x 1 block */
-                /* Third block */                               \
+                _transp(d, 0, 1, n, mask);
-                _transp(d, 4, 5, n, mask);                      \
+                /* Second n x 1 block */
-                /* Fourth block */                              \
+                _transp(d, 2, 3, n, mask);
-                _transp(d, 6, 7, n, mask);                      \
+                /* Third n x 1 block */
-        } while (0)
+                _transp(d, 4, 5, n, mask);
+                /* Fourth n x 1 block */
-#define transp_nx2(d, n)                                        \
+                _transp(d, 6, 7, n, mask);
-        do {                                                    \
+                return;
-                u32 mask = get_mask(n);                         \
-                /* First block */                               \
+        case 2:
-                _transp(d, 0, 2, n, mask);                      \
+                /* First n x 2 block */
-                _transp(d, 1, 3, n, mask);                      \
+                _transp(d, 0, 2, n, mask);
-                /* Second block */                              \
+                _transp(d, 1, 3, n, mask);
-                _transp(d, 4, 6, n, mask);                      \
+                /* Second n x 2 block */
-                _transp(d, 5, 7, n, mask);                      \
+                _transp(d, 4, 6, n, mask);
-        } while (0)
+                _transp(d, 5, 7, n, mask);
+                return;
-#define transp_nx4(d, n)                                        \
-        do {                                                    \
+        case 4:
-                u32 mask = get_mask(n);                         \
+                /* Single n x 4 block */
-                _transp(d, 0, 4, n, mask);                      \
+                _transp(d, 0, 4, n, mask);
-                _transp(d, 1, 5, n, mask);                      \
+                _transp(d, 1, 5, n, mask);
-                _transp(d, 2, 6, n, mask);                      \
+                _transp(d, 2, 6, n, mask);
-                _transp(d, 3, 7, n, mask);                      \
+                _transp(d, 3, 7, n, mask);
-        } while (0)
+                return;
+        }
-#define transp(d, n, m) transp_nx ## m(d, n)
+        c2p_unsupported();
+}
    /*
     *  Perform a full C2P step on 32 8-bit pixels, stored in 8 32-bit words
     *  containing
     *    - 32 8-bit chunky pixels on input
-     *    - permuted planar data on output
+     *    - permutated planar data (1 plane per 32-bit word) on output
     */
-static void c2p_8bpp(u32 d[8])
+static void c2p_32x8(u32 d[8])
 {
-        transp(d, 16, 4);
+        transp8(d, 16, 4);
-        transp(d, 8, 2);
+        transp8(d, 8, 2);
-        transp(d, 4, 1);
+        transp8(d, 4, 1);
-        transp(d, 2, 4);
+        transp8(d, 2, 4);
-        transp(d, 1, 2);
+        transp8(d, 1, 2);
 }
    /*
-     *  Array containing the permution indices of the planar data after c2p
+     *  Array containing the permutation indices of the planar data after c2p
     */
-static const int perm_c2p_8bpp[8] = { 7, 5, 3, 1, 6, 4, 2, 0 };
+static const int perm_c2p_32x8[8] = { 7, 5, 3, 1, 6, 4, 2, 0 };
    /*
@@ -119,8 +125,7 @@ static const int perm_c2p_8bpp[8] = { 7, 5, 3, 1, 6, 4, 2, 0 };
     *  This is equivalent to (a & mask) | (b & ~mask)
     */
-static inline unsigned long comp(unsigned long a, unsigned long b,
+static inline u32 comp(u32 a, u32 b, u32 mask)
-                                 unsigned long mask)
 {
        return ((a ^ b) & mask) ^ b;
 }
@@ -130,12 +135,12 @@ static inline unsigned long comp(unsigned long a, unsigned long b,
     *  Store a full block of planar data after c2p conversion
     */
-static inline void store_planar(char *dst, u32 dst_inc, u32 bpp, u32 d[8])
+static inline void store_planar(void *dst, u32 dst_inc, u32 bpp, u32 d[8])
 {
        int i;
        for (i = 0; i < bpp; i++, dst += dst_inc)
-                *(u32 *)dst = d[perm_c2p_8bpp[i]];
+                put_unaligned_be32(d[perm_c2p_32x8[i]], dst);
 }
@@ -143,13 +148,15 @@ static inline void store_planar(char *dst, u32 dst_inc, u32 bpp, u32 d[8])
     *  Store a partial block of planar data after c2p conversion
     */
-static inline void store_planar_masked(char *dst, u32 dst_inc, u32 bpp,
+static inline void store_planar_masked(void *dst, u32 dst_inc, u32 bpp,
                                       u32 d[8], u32 mask)
 {
        int i;
        for (i = 0; i < bpp; i++, dst += dst_inc)
-                *(u32 *)dst = comp(d[perm_c2p_8bpp[i]], *(u32 *)dst, mask);
+                put_unaligned_be32(comp(d[perm_c2p_32x8[i]],
+                                        get_unaligned_be32(dst), mask),
+                                   dst);
 }
@@ -166,18 +173,21 @@ static inline void store_planar_masked(char *dst, u32 dst_inc, u32 bpp,
     *  @bpp: Bits per pixel of the planar frame buffer (1-8)
     */
-void c2p(u8 *dst, const u8 *src, u32 dx, u32 dy, u32 width, u32 height,
+void c2p(void *dst, const void *src, u32 dx, u32 dy, u32 width, u32 height,
         u32 dst_nextline, u32 dst_nextplane, u32 src_nextline, u32 bpp)
 {
-        int dst_idx;
+        union {
-        u32 d[8], first, last, w;
+                u8 pixels[32];
+                u32 words[8];
+        } d;
+        u32 dst_idx, first, last, w;
        const u8 *c;
-        u8 *p;
+        void *p;
        dst += dy*dst_nextline+(dx & ~31);
        dst_idx = dx % 32;
-        first = ~0UL >> dst_idx;
+        first = 0xffffffffU >> dst_idx;
-        last = ~(~0UL >> ((dst_idx+width) % 32));
+        last = ~(0xffffffffU >> ((dst_idx+width) % 32));
        while (height--) {
                c = src;
                p = dst;
@@ -185,11 +195,12 @@ void c2p(u8 *dst, const u8 *src, u32 dx, u32 dy, u32 width, u32 height,
                if (dst_idx+width <= 32) {
                        /* Single destination word */
                        first &= last;
-                        memset(d, 0, sizeof(d));
+                        memset(d.pixels, 0, sizeof(d));
-                        memcpy((u8 *)d+dst_idx, c, width);
+                        memcpy(d.pixels+dst_idx, c, width);
                        c += width;
-                        c2p_8bpp(d);
+                        c2p_32x8(d.words);
-                        store_planar_masked(p, dst_nextplane, bpp, d, first);
+                        store_planar_masked(p, dst_nextplane, bpp, d.words,
+                                            first);
                        p += 4;
                } else {
                        /* Multiple destination words */
@@ -197,32 +208,32 @@ void c2p(u8 *dst, const u8 *src, u32 dx, u32 dy, u32 width, u32 height,
                        /* Leading bits */
                        if (dst_idx) {
                                w = 32 - dst_idx;
-                                memset(d, 0, dst_idx);
+                                memset(d.pixels, 0, dst_idx);
-                                memcpy((u8 *)d+dst_idx, c, w);
+                                memcpy(d.pixels+dst_idx, c, w);
                                c += w;
-                                c2p_8bpp(d);
+                                c2p_32x8(d.words);
-                                store_planar_masked(p, dst_nextplane, bpp, d,
+                                store_planar_masked(p, dst_nextplane, bpp,
-                                                    first);
+                                                    d.words, first);
                                p += 4;
                                w = width-w;
                        }
                        /* Main chunk */
                        while (w >= 32) {
-                                memcpy(d, c, 32);
+                                memcpy(d.pixels, c, 32);
                                c += 32;
-                                c2p_8bpp(d);
+                                c2p_32x8(d.words);
-                                store_planar(p, dst_nextplane, bpp, d);
+                                store_planar(p, dst_nextplane, bpp, d.words);
                                p += 4;
                                w -= 32;
                        }
                        /* Trailing bits */
                        w %= 32;
                        if (w > 0) {
-                                memcpy(d, c, w);
+                                memcpy(d.pixels, c, w);
-                                memset((u8 *)d+w, 0, 32-w);
+                                memset(d.pixels+w, 0, 32-w);
-                                c2p_8bpp(d);
+                                c2p_32x8(d.words);
-                                store_planar_masked(p, dst_nextplane, bpp, d,
+                                store_planar_masked(p, dst_nextplane, bpp,
-                                                    last);
+                                                    d.words, last);
                        }
                }
                src += src_nextline;

diff --git a/drivers/video/c2p.c b/drivers/video/c2p.c index f102b578ce10..c170fff0d35e 100644 --- a/drivers/video/c2p.c +++ b/drivers/video/c2p.c
@@ -1,7 +1,7 @@
1	/*	1	/*
2	* Fast C2P (Chunky-to-Planar) Conversion	2	* Fast C2P (Chunky-to-Planar) Conversion
3	*	3	*
4	* Copyright (C) 2003 Geert Uytterhoeven	4	* Copyright (C) 2003-2008 Geert Uytterhoeven
5	*	5	*
6	* NOTES:	6	* NOTES:
7	* - This code was inspired by Scout's C2P tutorial	7	* - This code was inspired by Scout's C2P tutorial
@@ -14,6 +14,9 @@
14		14
15	#include <linux/module.h>	15	#include <linux/module.h>
16	#include <linux/string.h>	16	#include <linux/string.h>
		17
		18	#include <asm/unaligned.h>
		19
17	#include "c2p.h"	20	#include "c2p.h"
18		21
19		22
@@ -21,97 +24,100 @@
21	* Basic transpose step	24	* Basic transpose step
22	*/	25	*/
23		26
24	#define _transp(d, i1, i2, shift, mask) \	27	static inline void _transp(u32 d[], unsigned int i1, unsigned int i2,
25	do { \	28	unsigned int shift, u32 mask)
26	u32 t = (d[i1] ^ (d[i2] >> shift)) & mask; \	29	{
27	d[i1] ^= t; \	30	u32 t = (d[i1] ^ (d[i2] >> shift)) & mask;
28	d[i2] ^= t << shift; \	31
29	} while (0)	32	d[i1] ^= t;
		33	d[i2] ^= t << shift;
		34	}
30		35
31	static inline u32 get_mask(int n)	36	extern void c2p_unsupported(void);
		37
		38	static inline u32 get_mask(unsigned int n)
32	{	39	{
33	switch (n) {	40	switch (n) {
34	case 1:	41	case 1:
35	return 0x55555555;	42	return 0x55555555;
36	break;
37		43
38	case 2:	44	case 2:
39	return 0x33333333;	45	return 0x33333333;
40	break;
41		46
42	case 4:	47	case 4:
43	return 0x0f0f0f0f;	48	return 0x0f0f0f0f;
44	break;
45		49
46	case 8:	50	case 8:
47	return 0x00ff00ff;	51	return 0x00ff00ff;
48	break;
49		52
50	case 16:	53	case 16:
51	return 0x0000ffff;	54	return 0x0000ffff;
52	break;
53	}	55	}
		56
		57	c2p_unsupported();
54	return 0;	58	return 0;
55	}	59	}
56		60
57	#define transp_nx1(d, n) \	61	static inline void transp8(u32 d[], unsigned int n, unsigned int m)
58	do { \	62	{
59	u32 mask = get_mask(n); \	63	u32 mask = get_mask(n);
60	/* First block */ \	64
61	_transp(d, 0, 1, n, mask); \	65	switch (m) {
62	/* Second block */ \	66	case 1:
63	_transp(d, 2, 3, n, mask); \	67	/* First n x 1 block */
64	/* Third block */ \	68	_transp(d, 0, 1, n, mask);
65	_transp(d, 4, 5, n, mask); \	69	/* Second n x 1 block */
66	/* Fourth block */ \	70	_transp(d, 2, 3, n, mask);
67	_transp(d, 6, 7, n, mask); \	71	/* Third n x 1 block */
68	} while (0)	72	_transp(d, 4, 5, n, mask);
69		73	/* Fourth n x 1 block */
70	#define transp_nx2(d, n) \	74	_transp(d, 6, 7, n, mask);
71	do { \	75	return;
72	u32 mask = get_mask(n); \	76
73	/* First block */ \	77	case 2:
74	_transp(d, 0, 2, n, mask); \	78	/* First n x 2 block */
75	_transp(d, 1, 3, n, mask); \	79	_transp(d, 0, 2, n, mask);
76	/* Second block */ \	80	_transp(d, 1, 3, n, mask);
77	_transp(d, 4, 6, n, mask); \	81	/* Second n x 2 block */
78	_transp(d, 5, 7, n, mask); \	82	_transp(d, 4, 6, n, mask);
79	} while (0)	83	_transp(d, 5, 7, n, mask);
80		84	return;
81	#define transp_nx4(d, n) \	85
82	do { \	86	case 4:
83	u32 mask = get_mask(n); \	87	/* Single n x 4 block */
84	_transp(d, 0, 4, n, mask); \	88	_transp(d, 0, 4, n, mask);
85	_transp(d, 1, 5, n, mask); \	89	_transp(d, 1, 5, n, mask);
86	_transp(d, 2, 6, n, mask); \	90	_transp(d, 2, 6, n, mask);
87	_transp(d, 3, 7, n, mask); \	91	_transp(d, 3, 7, n, mask);
88	} while (0)	92	return;
89		93	}
90	#define transp(d, n, m) transp_nx ## m(d, n)	94
		95	c2p_unsupported();
		96	}
91		97
92		98
93	/*	99	/*
94	* Perform a full C2P step on 32 8-bit pixels, stored in 8 32-bit words	100	* Perform a full C2P step on 32 8-bit pixels, stored in 8 32-bit words
95	* containing	101	* containing
96	* - 32 8-bit chunky pixels on input	102	* - 32 8-bit chunky pixels on input
97	* - permuted planar data on output	103	* - permutated planar data (1 plane per 32-bit word) on output
98	*/	104	*/
99		105
100	static void c2p_8bpp(u32 d[8])	106	static void c2p_32x8(u32 d[8])
101	{	107	{
102	transp(d, 16, 4);	108	transp8(d, 16, 4);
103	transp(d, 8, 2);	109	transp8(d, 8, 2);
104	transp(d, 4, 1);	110	transp8(d, 4, 1);
105	transp(d, 2, 4);	111	transp8(d, 2, 4);
106	transp(d, 1, 2);	112	transp8(d, 1, 2);
107	}	113	}
108		114
109		115
110	/*	116	/*
111	* Array containing the permution indices of the planar data after c2p	117	* Array containing the permutation indices of the planar data after c2p
112	*/	118	*/
113		119
114	static const int perm_c2p_8bpp[8] = { 7, 5, 3, 1, 6, 4, 2, 0 };	120	static const int perm_c2p_32x8[8] = { 7, 5, 3, 1, 6, 4, 2, 0 };
115		121
116		122
117	/*	123	/*
@@ -119,8 +125,7 @@ static const int perm_c2p_8bpp[8] = { 7, 5, 3, 1, 6, 4, 2, 0 };
119	* This is equivalent to (a & mask) \| (b & ~mask)	125	* This is equivalent to (a & mask) \| (b & ~mask)
120	*/	126	*/
121		127
122	static inline unsigned long comp(unsigned long a, unsigned long b,	128	static inline u32 comp(u32 a, u32 b, u32 mask)
123	unsigned long mask)
124	{	129	{
125	return ((a ^ b) & mask) ^ b;	130	return ((a ^ b) & mask) ^ b;
126	}	131	}
@@ -130,12 +135,12 @@ static inline unsigned long comp(unsigned long a, unsigned long b,
130	* Store a full block of planar data after c2p conversion	135	* Store a full block of planar data after c2p conversion
131	*/	136	*/
132		137
133	static inline void store_planar(char *dst, u32 dst_inc, u32 bpp, u32 d[8])	138	static inline void store_planar(void *dst, u32 dst_inc, u32 bpp, u32 d[8])
134	{	139	{
135	int i;	140	int i;
136		141
137	for (i = 0; i < bpp; i++, dst += dst_inc)	142	for (i = 0; i < bpp; i++, dst += dst_inc)
138	(u32 )dst = d[perm_c2p_8bpp[i]];	143	put_unaligned_be32(d[perm_c2p_32x8[i]], dst);
139	}	144	}
140		145
141		146
@@ -143,13 +148,15 @@ static inline void store_planar(char *dst, u32 dst_inc, u32 bpp, u32 d[8])
143	* Store a partial block of planar data after c2p conversion	148	* Store a partial block of planar data after c2p conversion
144	*/	149	*/
145		150
146	static inline void store_planar_masked(char *dst, u32 dst_inc, u32 bpp,	151	static inline void store_planar_masked(void *dst, u32 dst_inc, u32 bpp,
147	u32 d[8], u32 mask)	152	u32 d[8], u32 mask)
148	{	153	{
149	int i;	154	int i;
150		155
151	for (i = 0; i < bpp; i++, dst += dst_inc)	156	for (i = 0; i < bpp; i++, dst += dst_inc)
152	(u32 )dst = comp(d[perm_c2p_8bpp[i]], (u32 )dst, mask);	157	put_unaligned_be32(comp(d[perm_c2p_32x8[i]],
		158	get_unaligned_be32(dst), mask),
		159	dst);
153	}	160	}
154		161
155		162
@@ -166,18 +173,21 @@ static inline void store_planar_masked(char *dst, u32 dst_inc, u32 bpp,
166	* @bpp: Bits per pixel of the planar frame buffer (1-8)	173	* @bpp: Bits per pixel of the planar frame buffer (1-8)
167	*/	174	*/
168		175
169	void c2p(u8 dst, const u8 src, u32 dx, u32 dy, u32 width, u32 height,	176	void c2p(void dst, const void src, u32 dx, u32 dy, u32 width, u32 height,
170	u32 dst_nextline, u32 dst_nextplane, u32 src_nextline, u32 bpp)	177	u32 dst_nextline, u32 dst_nextplane, u32 src_nextline, u32 bpp)
171	{	178	{
172	int dst_idx;	179	union {
173	u32 d[8], first, last, w;	180	u8 pixels[32];
		181	u32 words[8];
		182	} d;
		183	u32 dst_idx, first, last, w;
174	const u8 *c;	184	const u8 *c;
175	u8 *p;	185	void *p;
176		186
177	dst += dy*dst_nextline+(dx & ~31);	187	dst += dy*dst_nextline+(dx & ~31);
178	dst_idx = dx % 32;	188	dst_idx = dx % 32;
179	first = ~0UL >> dst_idx;	189	first = 0xffffffffU >> dst_idx;
180	last = ~(~0UL >> ((dst_idx+width) % 32));	190	last = ~(0xffffffffU >> ((dst_idx+width) % 32));
181	while (height--) {	191	while (height--) {
182	c = src;	192	c = src;
183	p = dst;	193	p = dst;
@@ -185,11 +195,12 @@ void c2p(u8 dst, const u8 src, u32 dx, u32 dy, u32 width, u32 height,
185	if (dst_idx+width <= 32) {	195	if (dst_idx+width <= 32) {
186	/* Single destination word */	196	/* Single destination word */
187	first &= last;	197	first &= last;
188	memset(d, 0, sizeof(d));	198	memset(d.pixels, 0, sizeof(d));
189	memcpy((u8 *)d+dst_idx, c, width);	199	memcpy(d.pixels+dst_idx, c, width);
190	c += width;	200	c += width;
191	c2p_8bpp(d);	201	c2p_32x8(d.words);
192	store_planar_masked(p, dst_nextplane, bpp, d, first);	202	store_planar_masked(p, dst_nextplane, bpp, d.words,
		203	first);
193	p += 4;	204	p += 4;
194	} else {	205	} else {
195	/* Multiple destination words */	206	/* Multiple destination words */
@@ -197,32 +208,32 @@ void c2p(u8 dst, const u8 src, u32 dx, u32 dy, u32 width, u32 height,
197	/* Leading bits */	208	/* Leading bits */
198	if (dst_idx) {	209	if (dst_idx) {
199	w = 32 - dst_idx;	210	w = 32 - dst_idx;
200	memset(d, 0, dst_idx);	211	memset(d.pixels, 0, dst_idx);
201	memcpy((u8 *)d+dst_idx, c, w);	212	memcpy(d.pixels+dst_idx, c, w);
202	c += w;	213	c += w;
203	c2p_8bpp(d);	214	c2p_32x8(d.words);
204	store_planar_masked(p, dst_nextplane, bpp, d,	215	store_planar_masked(p, dst_nextplane, bpp,
205	first);	216	d.words, first);
206	p += 4;	217	p += 4;
207	w = width-w;	218	w = width-w;
208	}	219	}
209	/* Main chunk */	220	/* Main chunk */
210	while (w >= 32) {	221	while (w >= 32) {
211	memcpy(d, c, 32);	222	memcpy(d.pixels, c, 32);
212	c += 32;	223	c += 32;
213	c2p_8bpp(d);	224	c2p_32x8(d.words);
214	store_planar(p, dst_nextplane, bpp, d);	225	store_planar(p, dst_nextplane, bpp, d.words);
215	p += 4;	226	p += 4;
216	w -= 32;	227	w -= 32;
217	}	228	}
218	/* Trailing bits */	229	/* Trailing bits */
219	w %= 32;	230	w %= 32;
220	if (w > 0) {	231	if (w > 0) {
221	memcpy(d, c, w);	232	memcpy(d.pixels, c, w);
222	memset((u8 *)d+w, 0, 32-w);	233	memset(d.pixels+w, 0, 32-w);
223	c2p_8bpp(d);	234	c2p_32x8(d.words);
224	store_planar_masked(p, dst_nextplane, bpp, d,	235	store_planar_masked(p, dst_nextplane, bpp,
225	last);	236	d.words, last);
226	}	237	}
227	}	238	}
228	src += src_nextline;	239	src += src_nextline;