aboutsummaryrefslogblamecommitdiffstats
path: root/drivers/video/kyro/STG4000OverlayDevice.c
blob: 0aeeaa10708b2e70f9266ad45e7f969632e6eff3 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419















                                                                             
                             

















































































































































































































































































































































































































                                                                                                                                     
                                                                             




















































































































































































                                                                                                               
/*
 *  linux/drivers/video/kyro/STG4000OverlayDevice.c
 *
 *  Copyright (C) 2000 Imagination Technologies Ltd
 *  Copyright (C) 2002 STMicroelectronics
 *
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file COPYING in the main directory of this archive
 * for more details.
 */

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/types.h>

#include "STG4000Reg.h"
#include "STG4000Interface.h"

/* HW Defines */

#define STG4000_NO_SCALING    0x800
#define STG4000_NO_DECIMATION 0xFFFFFFFF

/* Primary surface */
#define STG4000_PRIM_NUM_PIX   5
#define STG4000_PRIM_ALIGN     4
#define STG4000_PRIM_ADDR_BITS 20

#define STG4000_PRIM_MIN_WIDTH  640
#define STG4000_PRIM_MAX_WIDTH  1600
#define STG4000_PRIM_MIN_HEIGHT 480
#define STG4000_PRIM_MAX_HEIGHT 1200

/* Overlay surface */
#define STG4000_OVRL_NUM_PIX   4
#define STG4000_OVRL_ALIGN     2
#define STG4000_OVRL_ADDR_BITS 20
#define STG4000_OVRL_NUM_MODES 5

#define STG4000_OVRL_MIN_WIDTH  0
#define STG4000_OVRL_MAX_WIDTH  720
#define STG4000_OVRL_MIN_HEIGHT 0
#define STG4000_OVRL_MAX_HEIGHT 576

/* Decimation and Scaling */
static u32 adwDecim8[33] = {
	    0xffffffff, 0xfffeffff, 0xffdffbff, 0xfefefeff, 0xfdf7efbf,
	    0xfbdf7bdf, 0xf7bbddef, 0xeeeeeeef, 0xeeddbb77, 0xedb76db7,
	    0xdb6db6db, 0xdb5b5b5b, 0xdab5ad6b, 0xd5ab55ab, 0xd555aaab,
	    0xaaaaaaab, 0xaaaa5555, 0xaa952a55, 0xa94a5295, 0xa5252525,
	    0xa4924925, 0x92491249, 0x91224489, 0x91111111, 0x90884211,
	    0x88410821, 0x88102041, 0x81010101, 0x80800801, 0x80010001,
	    0x80000001, 0x00000001, 0x00000000
};

typedef struct _OVRL_SRC_DEST {
	/*clipped on-screen pixel position of overlay */
	u32 ulDstX1;
	u32 ulDstY1;
	u32 ulDstX2;
	u32 ulDstY2;

	/*clipped pixel pos of source data within buffer thses need to be 128 bit word aligned */
	u32 ulSrcX1;
	u32 ulSrcY1;
	u32 ulSrcX2;
	u32 ulSrcY2;

	/* on-screen pixel position of overlay */
	s32 lDstX1;
	s32 lDstY1;
	s32 lDstX2;
	s32 lDstY2;
} OVRL_SRC_DEST;

static u32 ovlWidth, ovlHeight, ovlStride;
static int ovlLinear;

void ResetOverlayRegisters(volatile STG4000REG __iomem *pSTGReg)
{
	u32 tmp;

	/* Set Overlay address to default */
	tmp = STG_READ_REG(DACOverlayAddr);
	CLEAR_BITS_FRM_TO(0, 20);
	CLEAR_BIT(31);
	STG_WRITE_REG(DACOverlayAddr, tmp);

	/* Set Overlay U address */
	tmp = STG_READ_REG(DACOverlayUAddr);
	CLEAR_BITS_FRM_TO(0, 20);
	STG_WRITE_REG(DACOverlayUAddr, tmp);

	/* Set Overlay V address */
	tmp = STG_READ_REG(DACOverlayVAddr);
	CLEAR_BITS_FRM_TO(0, 20);
	STG_WRITE_REG(DACOverlayVAddr, tmp);

	/* Set Overlay Size */
	tmp = STG_READ_REG(DACOverlaySize);
	CLEAR_BITS_FRM_TO(0, 10);
	CLEAR_BITS_FRM_TO(12, 31);
	STG_WRITE_REG(DACOverlaySize, tmp);

	/* Set Overlay Vt Decimation */
	tmp = STG4000_NO_DECIMATION;
	STG_WRITE_REG(DACOverlayVtDec, tmp);

	/* Set Overlay format to default value */
	tmp = STG_READ_REG(DACPixelFormat);
	CLEAR_BITS_FRM_TO(4, 7);
	CLEAR_BITS_FRM_TO(16, 22);
	STG_WRITE_REG(DACPixelFormat, tmp);

	/* Set Vertical scaling to default */
	tmp = STG_READ_REG(DACVerticalScal);
	CLEAR_BITS_FRM_TO(0, 11);
	CLEAR_BITS_FRM_TO(16, 22);
	tmp |= STG4000_NO_SCALING;	/* Set to no scaling */
	STG_WRITE_REG(DACVerticalScal, tmp);

	/* Set Horizontal Scaling to default */
	tmp = STG_READ_REG(DACHorizontalScal);
	CLEAR_BITS_FRM_TO(0, 11);
	CLEAR_BITS_FRM_TO(16, 17);
	tmp |= STG4000_NO_SCALING;	/* Set to no scaling */
	STG_WRITE_REG(DACHorizontalScal, tmp);

	/* Set Blend mode to Alpha Blend */
	/* ????? SG 08/11/2001 Surely this isn't the alpha blend mode,
	   hopefully its overwrite
	 */
	tmp = STG_READ_REG(DACBlendCtrl);
	CLEAR_BITS_FRM_TO(0, 30);
	tmp = (GRAPHICS_MODE << 28);
	STG_WRITE_REG(DACBlendCtrl, tmp);

}

int CreateOverlaySurface(volatile STG4000REG __iomem *pSTGReg,
			 u32 inWidth,
			 u32 inHeight,
			 int bLinear,
			 u32 ulOverlayOffset,
			 u32 * retStride, u32 * retUVStride)
{
	u32 tmp;
	u32 ulStride;

	if (inWidth > STG4000_OVRL_MAX_WIDTH ||
	    inHeight > STG4000_OVRL_MAX_HEIGHT) {
		return -EINVAL;
	}

	/* Stride in 16 byte words - 16Bpp */
	if (bLinear) {
		/* Format is 16bits so num 16 byte words is width/8 */
		if ((inWidth & 0x7) == 0) {	/* inWidth % 8 */
			ulStride = (inWidth / 8);
		} else {
			/* Round up to next 16byte boundary */
			ulStride = ((inWidth + 8) / 8);
		}
	} else {
		/* Y component is 8bits so num 16 byte words is width/16 */
		if ((inWidth & 0xf) == 0) {	/* inWidth % 16 */
			ulStride = (inWidth / 16);
		} else {
			/* Round up to next 16byte boundary */
			ulStride = ((inWidth + 16) / 16);
		}
	}


	/* Set Overlay address and Format mode */
	tmp = STG_READ_REG(DACOverlayAddr);
	CLEAR_BITS_FRM_TO(0, 20);
	if (bLinear) {
		CLEAR_BIT(31);	/* Overlay format to Linear */
	} else {
		tmp |= SET_BIT(31);	/* Overlay format to Planer */
	}

	/* Only bits 24:4 of the Overlay address */
	tmp |= (ulOverlayOffset >> 4);
	STG_WRITE_REG(DACOverlayAddr, tmp);

	if (!bLinear) {
		u32 uvSize =
		    (inWidth & 0x1) ? (inWidth + 1 / 2) : (inWidth / 2);
		u32 uvStride;
		u32 ulOffset;
		/* Y component is 8bits so num 32 byte words is width/32 */
		if ((uvSize & 0xf) == 0) {	/* inWidth % 16 */
			uvStride = (uvSize / 16);
		} else {
			/* Round up to next 32byte boundary */
			uvStride = ((uvSize + 16) / 16);
		}

		ulOffset = ulOverlayOffset + (inHeight * (ulStride * 16));
		/* Align U,V data to 32byte boundary */
		if ((ulOffset & 0x1f) != 0)
			ulOffset = (ulOffset + 32L) & 0xffffffE0L;

		tmp = STG_READ_REG(DACOverlayUAddr);
		CLEAR_BITS_FRM_TO(0, 20);
		tmp |= (ulOffset >> 4);
		STG_WRITE_REG(DACOverlayUAddr, tmp);

		ulOffset += (inHeight / 2) * (uvStride * 16);
		/* Align U,V data to 32byte boundary */
		if ((ulOffset & 0x1f) != 0)
			ulOffset = (ulOffset + 32L) & 0xffffffE0L;

		tmp = STG_READ_REG(DACOverlayVAddr);
		CLEAR_BITS_FRM_TO(0, 20);
		tmp |= (ulOffset >> 4);
		STG_WRITE_REG(DACOverlayVAddr, tmp);

		*retUVStride = uvStride * 16;
	}


	/* Set Overlay YUV pixel format
	 * Make sure that LUT not used - ??????
	 */
	tmp = STG_READ_REG(DACPixelFormat);
	/* Only support Planer or UYVY linear formats */
	CLEAR_BITS_FRM_TO(4, 9);
	STG_WRITE_REG(DACPixelFormat, tmp);

	ovlWidth = inWidth;
	ovlHeight = inHeight;
	ovlStride = ulStride;
	ovlLinear = bLinear;
	*retStride = ulStride << 4;	/* In bytes */

	return 0;
}

int SetOverlayBlendMode(volatile STG4000REG __iomem *pSTGReg,
			OVRL_BLEND_MODE mode,
			u32 ulAlpha, u32 ulColorKey)
{
	u32 tmp;

	tmp = STG_READ_REG(DACBlendCtrl);
	CLEAR_BITS_FRM_TO(28, 30);
	tmp |= (mode << 28);

	switch (mode) {
	case COLOR_KEY:
		CLEAR_BITS_FRM_TO(0, 23);
		tmp |= (ulColorKey & 0x00FFFFFF);
		break;

	case GLOBAL_ALPHA:
		CLEAR_BITS_FRM_TO(24, 27);
		tmp |= ((ulAlpha & 0xF) << 24);
		break;

	case CK_PIXEL_ALPHA:
		CLEAR_BITS_FRM_TO(0, 23);
		tmp |= (ulColorKey & 0x00FFFFFF);
		break;

	case CK_GLOBAL_ALPHA:
		CLEAR_BITS_FRM_TO(0, 23);
		tmp |= (ulColorKey & 0x00FFFFFF);
		CLEAR_BITS_FRM_TO(24, 27);
		tmp |= ((ulAlpha & 0xF) << 24);
		break;

	case GRAPHICS_MODE:
	case PER_PIXEL_ALPHA:
		break;

	default:
		return -EINVAL;
	}

	STG_WRITE_REG(DACBlendCtrl, tmp);

	return 0;
}

void EnableOverlayPlane(volatile STG4000REG __iomem *pSTGReg)
{
	u32 tmp;
	/* Enable Overlay */
	tmp = STG_READ_REG(DACPixelFormat);
	tmp |= SET_BIT(7);
	STG_WRITE_REG(DACPixelFormat, tmp);

	/* Set video stream control */
	tmp = STG_READ_REG(DACStreamCtrl);
	tmp |= SET_BIT(1);	/* video stream */
	STG_WRITE_REG(DACStreamCtrl, tmp);
}

static u32 Overlap(u32 ulBits, u32 ulPattern)
{
	u32 ulCount = 0;

	while (ulBits) {
		if (!(ulPattern & 1))
			ulCount++;
		ulBits--;
		ulPattern = ulPattern >> 1;
	}

	return ulCount;

}

int SetOverlayViewPort(volatile STG4000REG __iomem *pSTGReg,
		       u32 left, u32 top,
		       u32 right, u32 bottom)
{
	OVRL_SRC_DEST srcDest;

	u32 ulSrcTop, ulSrcBottom;
	u32 ulSrc, ulDest;
	u32 ulFxScale, ulFxOffset;
	u32 ulHeight, ulWidth;
	u32 ulPattern;
	u32 ulDecimate, ulDecimated;
	u32 ulApplied;
	u32 ulDacXScale, ulDacYScale;
	u32 ulScale;
	u32 ulLeft, ulRight;
	u32 ulSrcLeft, ulSrcRight;
	u32 ulScaleLeft, ulScaleRight;
	u32 ulhDecim;
	u32 ulsVal;
	u32 ulVertDecFactor;
	int bResult;
	u32 ulClipOff = 0;
	u32 ulBits = 0;
	u32 ulsAdd = 0;
	u32 tmp, ulStride;
	u32 ulExcessPixels, ulClip, ulExtraLines;


	srcDest.ulSrcX1 = 0;
	srcDest.ulSrcY1 = 0;
	srcDest.ulSrcX2 = ovlWidth - 1;
	srcDest.ulSrcY2 = ovlHeight - 1;

	srcDest.ulDstX1 = left;
	srcDest.ulDstY1 = top;
	srcDest.ulDstX2 = right;
	srcDest.ulDstY2 = bottom;

	srcDest.lDstX1 = srcDest.ulDstX1;
	srcDest.lDstY1 = srcDest.ulDstY1;
	srcDest.lDstX2 = srcDest.ulDstX2;
	srcDest.lDstY2 = srcDest.ulDstY2;

    /************* Vertical decimation/scaling ******************/

	/* Get Src Top and Bottom */
	ulSrcTop = srcDest.ulSrcY1;
	ulSrcBottom = srcDest.ulSrcY2;

	ulSrc = ulSrcBottom - ulSrcTop;
	ulDest = srcDest.lDstY2 - srcDest.lDstY1;	/* on-screen overlay */

	if (ulSrc <= 1)
		return -EINVAL;

	/* First work out the position we are to display as offset from the
	 * source of the buffer
	 */
	ulFxScale = (ulDest << 11) / ulSrc;	/* fixed point scale factor */
	ulFxOffset = (srcDest.lDstY2 - srcDest.ulDstY2) << 11;

	ulSrcBottom = ulSrcBottom - (ulFxOffset / ulFxScale);
	ulSrc = ulSrcBottom - ulSrcTop;
	ulHeight = ulSrc;

	ulDest = srcDest.ulDstY2 - (srcDest.ulDstY1 - 1);
	ulPattern = adwDecim8[ulBits];

	/* At this point ulSrc represents the input decimator */
	if (ulSrc > ulDest) {
		ulDecimate = ulSrc - ulDest;
		ulBits = 0;
		ulApplied = ulSrc / 32;

		while (((ulBits * ulApplied) +
			Overlap((ulSrc % 32),
				adwDecim8[ulBits])) < ulDecimate)
			ulBits++;

		ulPattern = adwDecim8[ulBits];
		ulDecimated =
		    (ulBits * ulApplied) + Overlap((ulSrc % 32),
						   ulPattern);
		ulSrc = ulSrc - ulDecimated;	/* the number number of lines that will go into the scaler */
	}

	if (ulBits && (ulBits != 32)) {
		ulVertDecFactor = (63 - ulBits) / (32 - ulBits);	/* vertical decimation factor scaled up to nearest integer */
	} else {
		ulVertDecFactor = 1;
	}

	ulDacYScale = ((ulSrc - 1) * 2048) / (ulDest + 1);

	tmp = STG_READ_REG(DACOverlayVtDec);	/* Decimation */
	CLEAR_BITS_FRM_TO(0, 31);
	tmp = ulPattern;
	STG_WRITE_REG(DACOverlayVtDec, tmp);

	/***************** Horizontal decimation/scaling ***************************/

	/*
	 * Now we handle the horizontal case, this is a simplified version of
	 * the vertical case in that we decimate by factors of 2.  as we are
	 * working in words we should always be able to decimate by these
	 * factors.  as we always have to have a buffer which is aligned to a
	 * whole number of 128 bit words, we must align the left side to the
	 * lowest to the next lowest 128 bit boundary, and the right hand edge
	 * to the next largets boundary, (in a similar way to how we didi it in
	 * PMX1) as the left and right hand edges are aligned to these
	 * boundaries normally this only becomes an issue when we are chopping
	 * of one of the sides We shall work out vertical stuff first
	 */
	ulSrc = srcDest.ulSrcX2 - srcDest.ulSrcX1;
	ulDest = srcDest.lDstX2 - srcDest.lDstX1;
#ifdef _OLDCODE
	ulLeft = srcDest.ulDstX1;
	ulRight = srcDest.ulDstX2;
#else
	if (srcDest.ulDstX1 > 2) {
		ulLeft = srcDest.ulDstX1 + 2;
		ulRight = srcDest.ulDstX2 + 1;
	} else {
		ulLeft = srcDest.ulDstX1;
		ulRight = srcDest.ulDstX2 + 1;
	}
#endif
	/* first work out the position we are to display as offset from the source of the buffer */
	bResult = 1;

	do {
		if (ulDest == 0)
			return -EINVAL;

		/* source pixels per dest pixel <<11 */
		ulFxScale = ((ulSrc - 1) << 11) / (ulDest);

		/* then number of destination pixels out we are */
		ulFxOffset = ulFxScale * ((srcDest.ulDstX1 - srcDest.lDstX1) + ulClipOff);
		ulFxOffset >>= 11;

		/* this replaces the code which was making a decision as to use either ulFxOffset or ulSrcX1 */
		ulSrcLeft = srcDest.ulSrcX1 + ulFxOffset;

		/* then number of destination pixels out we are */
		ulFxOffset = ulFxScale * (srcDest.lDstX2 - srcDest.ulDstX2);
		ulFxOffset >>= 11;

		ulSrcRight = srcDest.ulSrcX2 - ulFxOffset;

		/*
		 * we must align these to our 128 bit boundaries. we shall
		 * round down the pixel pos to the nearest 8 pixels.
		 */
		ulScaleLeft = ulSrcLeft;
		ulScaleRight = ulSrcRight;

		/* shift fxscale until it is in the range of the scaler */
		ulhDecim = 0;
		ulScale = (((ulSrcRight - ulSrcLeft) - 1) << (11 - ulhDecim)) / (ulRight - ulLeft + 2);

		while (ulScale > 0x800) {
			ulhDecim++;
			ulScale = (((ulSrcRight - ulSrcLeft) - 1) << (11 - ulhDecim)) / (ulRight - ulLeft + 2);
		}

		/*
		 * to try and get the best values We first try and use
		 * src/dwdest for the scale factor, then we move onto src-1
		 *
		 * we want to check to see if we will need to clip data, if so
		 * then we should clip our source so that we don't need to
		 */
		if (!ovlLinear) {
			ulSrcLeft &= ~0x1f;

			/*
			 * we must align the right hand edge to the next 32
			 * pixel` boundary, must be on a 256 boundary so u, and
			 * v are 128 bit aligned
			 */
			ulSrcRight = (ulSrcRight + 0x1f) & ~0x1f;
		} else {
			ulSrcLeft &= ~0x7;

			/*
			 * we must align the right hand edge to the next
			 * 8pixel` boundary
			 */
			ulSrcRight = (ulSrcRight + 0x7) & ~0x7;
		}

		/* this is the input size line store needs to cope with */
		ulWidth = ulSrcRight - ulSrcLeft;

		/*
		 * use unclipped value to work out scale factror this is the
		 * scale factor we want we shall now work out the horizonal
		 * decimation and scaling
		 */
		ulsVal = ((ulWidth / 8) >> ulhDecim);

		if ((ulWidth != (ulsVal << ulhDecim) * 8))
			ulsAdd = 1;

		/* input pixels to scaler; */
		ulSrc = ulWidth >> ulhDecim;

		if (ulSrc <= 2)
			return -EINVAL;

		ulExcessPixels = ((((ulScaleLeft - ulSrcLeft)) << (11 - ulhDecim)) / ulScale);

		ulClip = (ulSrc << 11) / ulScale;
		ulClip -= (ulRight - ulLeft);
		ulClip += ulExcessPixels;

		if (ulClip)
			ulClip--;

		/* We may need to do more here if we really have a HW rev < 5 */
	} while (!bResult);

	ulExtraLines = (1 << ulhDecim) * ulVertDecFactor;
	ulExtraLines += 64;
	ulHeight += ulExtraLines;

	ulDacXScale = ulScale;


	tmp = STG_READ_REG(DACVerticalScal);
	CLEAR_BITS_FRM_TO(0, 11);
	CLEAR_BITS_FRM_TO(16, 22);	/* Vertical Scaling */

	/* Calculate new output line stride, this is always the number of 422
	   words in the line buffer, so it doesn't matter if the
	   mode is 420. Then set the vertical scale register.
	 */
	ulStride = (ulWidth >> (ulhDecim + 3)) + ulsAdd;
	tmp |= ((ulStride << 16) | (ulDacYScale));	/* DAC_LS_CTRL = stride */
	STG_WRITE_REG(DACVerticalScal, tmp);

	/* Now set up the overlay size using the modified width and height
	   from decimate and scaling calculations
	 */
	tmp = STG_READ_REG(DACOverlaySize);
	CLEAR_BITS_FRM_TO(0, 10);
	CLEAR_BITS_FRM_TO(12, 31);

	if (ovlLinear) {
		tmp |=
		    (ovlStride | ((ulHeight + 1) << 12) |
		     (((ulWidth / 8) - 1) << 23));
	} else {
		tmp |=
		    (ovlStride | ((ulHeight + 1) << 12) |
		     (((ulWidth / 32) - 1) << 23));
	}

	STG_WRITE_REG(DACOverlaySize, tmp);

	/* Set Video Window Start */
	tmp = ((ulLeft << 16)) | (srcDest.ulDstY1);
	STG_WRITE_REG(DACVidWinStart, tmp);

	/* Set Video Window End */
	tmp = ((ulRight) << 16) | (srcDest.ulDstY2);
	STG_WRITE_REG(DACVidWinEnd, tmp);

	/* Finally set up the rest of the overlay regs in the order
	   done in the IMG driver
	 */
	tmp = STG_READ_REG(DACPixelFormat);
	tmp = ((ulExcessPixels << 16) | tmp) & 0x7fffffff;
	STG_WRITE_REG(DACPixelFormat, tmp);

	tmp = STG_READ_REG(DACHorizontalScal);
	CLEAR_BITS_FRM_TO(0, 11);
	CLEAR_BITS_FRM_TO(16, 17);
	tmp |= ((ulhDecim << 16) | (ulDacXScale));
	STG_WRITE_REG(DACHorizontalScal, tmp);

	return 0;
}