aboutsummaryrefslogtreecommitdiffstats
path: root/arch/m68k/fpsp040/stan.S
blob: b5c2a196e617fec38e16e33eb10411a5ab9f2499 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
|
|	stan.sa 3.3 7/29/91
|
|	The entry point stan computes the tangent of
|	an input argument;
|	stand does the same except for denormalized input.
|
|	Input: Double-extended number X in location pointed to
|		by address register a0.
|
|	Output: The value tan(X) returned in floating-point register Fp0.
|
|	Accuracy and Monotonicity: The returned result is within 3 ulp in
|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
|		result is subsequently rounded to double precision. The
|		result is provably monotonic in double precision.
|
|	Speed: The program sTAN takes approximately 170 cycles for
|		input argument X such that |X| < 15Pi, which is the usual
|		situation.
|
|	Algorithm:
|
|	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
|
|	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
|		k = N mod 2, so in particular, k = 0 or 1.
|
|	3. If k is odd, go to 5.
|
|	4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a
|		rational function U/V where
|		U = r + r*s*(P1 + s*(P2 + s*P3)), and
|		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.
|		Exit.
|
|	4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a
|		rational function U/V where
|		U = r + r*s*(P1 + s*(P2 + s*P3)), and
|		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,
|		-Cot(r) = -V/U. Exit.
|
|	6. If |X| > 1, go to 8.
|
|	7. (|X|<2**(-40)) Tan(X) = X. Exit.
|
|	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
|

|		Copyright (C) Motorola, Inc. 1990
|			All Rights Reserved
|
|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
|	The copyright notice above does not evidence any
|	actual or intended publication of such source code.

|STAN	idnt	2,1 | Motorola 040 Floating Point Software Package

	|section	8

#include "fpsp.h"

BOUNDS1:	.long 0x3FD78000,0x4004BC7E
TWOBYPI:	.long 0x3FE45F30,0x6DC9C883

TANQ4:	.long 0x3EA0B759,0xF50F8688
TANP3:	.long 0xBEF2BAA5,0xA8924F04

TANQ3:	.long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000

TANP2:	.long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000

TANQ2:	.long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000

TANP1:	.long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000

TANQ1:	.long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000

INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000

TWOPI1:	.long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
TWOPI2:	.long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000

|--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
|--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
|--MOST 69 BITS LONG.
	.global	PITBL
PITBL:
  .long  0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
  .long  0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
  .long  0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
  .long  0xC0040000,0xB6365E22,0xEE46F000,0x21480000
  .long  0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
  .long  0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
  .long  0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
  .long  0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
  .long  0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
  .long  0xC0040000,0x90836524,0x88034B96,0x20B00000
  .long  0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
  .long  0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
  .long  0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
  .long  0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
  .long  0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
  .long  0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
  .long  0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
  .long  0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
  .long  0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
  .long  0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
  .long  0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
  .long  0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
  .long  0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
  .long  0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
  .long  0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
  .long  0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
  .long  0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
  .long  0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
  .long  0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
  .long  0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
  .long  0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
  .long  0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
  .long  0x00000000,0x00000000,0x00000000,0x00000000
  .long  0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
  .long  0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
  .long  0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
  .long  0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
  .long  0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
  .long  0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
  .long  0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
  .long  0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
  .long  0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
  .long  0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
  .long  0x40030000,0x8A3AE64F,0x76F80584,0x21080000
  .long  0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
  .long  0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
  .long  0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
  .long  0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
  .long  0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
  .long  0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
  .long  0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
  .long  0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
  .long  0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
  .long  0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
  .long  0x40040000,0x8A3AE64F,0x76F80584,0x21880000
  .long  0x40040000,0x90836524,0x88034B96,0xA0B00000
  .long  0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
  .long  0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
  .long  0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
  .long  0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
  .long  0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
  .long  0x40040000,0xB6365E22,0xEE46F000,0xA1480000
  .long  0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
  .long  0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
  .long  0x40040000,0xC90FDAA2,0x2168C235,0xA1800000

	.set	INARG,FP_SCR4

	.set	TWOTO63,L_SCR1
	.set	ENDFLAG,L_SCR2
	.set	N,L_SCR3

	| xref	t_frcinx
	|xref	t_extdnrm

	.global	stand
stand:
|--TAN(X) = X FOR DENORMALIZED X

	bra		t_extdnrm

	.global	stan
stan:
	fmovex		(%a0),%fp0	| ...LOAD INPUT

	movel		(%a0),%d0
	movew		4(%a0),%d0
	andil		#0x7FFFFFFF,%d0

	cmpil		#0x3FD78000,%d0		| ...|X| >= 2**(-40)?
	bges		TANOK1
	bra		TANSM
TANOK1:
	cmpil		#0x4004BC7E,%d0		| ...|X| < 15 PI?
	blts		TANMAIN
	bra		REDUCEX


TANMAIN:
|--THIS IS THE USUAL CASE, |X| <= 15 PI.
|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
	fmovex		%fp0,%fp1
	fmuld		TWOBYPI,%fp1	| ...X*2/PI

|--HIDE THE NEXT TWO INSTRUCTIONS
	leal		PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32

|--FP1 IS NOW READY
	fmovel		%fp1,%d0		| ...CONVERT TO INTEGER

	asll		#4,%d0
	addal		%d0,%a1		| ...ADDRESS N*PIBY2 IN Y1, Y2

	fsubx		(%a1)+,%fp0	| ...X-Y1
|--HIDE THE NEXT ONE

	fsubs		(%a1),%fp0	| ...FP0 IS R = (X-Y1)-Y2

	rorl		#5,%d0
	andil		#0x80000000,%d0	| ...D0 WAS ODD IFF D0 < 0

TANCONT:

	cmpil		#0,%d0
	blt		NODD

	fmovex		%fp0,%fp1
	fmulx		%fp1,%fp1		| ...S = R*R

	fmoved		TANQ4,%fp3
	fmoved		TANP3,%fp2

	fmulx		%fp1,%fp3		| ...SQ4
	fmulx		%fp1,%fp2		| ...SP3

	faddd		TANQ3,%fp3	| ...Q3+SQ4
	faddx		TANP2,%fp2	| ...P2+SP3

	fmulx		%fp1,%fp3		| ...S(Q3+SQ4)
	fmulx		%fp1,%fp2		| ...S(P2+SP3)

	faddx		TANQ2,%fp3	| ...Q2+S(Q3+SQ4)
	faddx		TANP1,%fp2	| ...P1+S(P2+SP3)

	fmulx		%fp1,%fp3		| ...S(Q2+S(Q3+SQ4))
	fmulx		%fp1,%fp2		| ...S(P1+S(P2+SP3))

	faddx		TANQ1,%fp3	| ...Q1+S(Q2+S(Q3+SQ4))
	fmulx		%fp0,%fp2		| ...RS(P1+S(P2+SP3))

	fmulx		%fp3,%fp1		| ...S(Q1+S(Q2+S(Q3+SQ4)))


	faddx		%fp2,%fp0		| ...R+RS(P1+S(P2+SP3))


	fadds		#0x3F800000,%fp1	| ...1+S(Q1+...)

	fmovel		%d1,%fpcr		|restore users exceptions
	fdivx		%fp1,%fp0		|last inst - possible exception set

	bra		t_frcinx

NODD:
	fmovex		%fp0,%fp1
	fmulx		%fp0,%fp0		| ...S = R*R

	fmoved		TANQ4,%fp3
	fmoved		TANP3,%fp2

	fmulx		%fp0,%fp3		| ...SQ4
	fmulx		%fp0,%fp2		| ...SP3

	faddd		TANQ3,%fp3	| ...Q3+SQ4
	faddx		TANP2,%fp2	| ...P2+SP3

	fmulx		%fp0,%fp3		| ...S(Q3+SQ4)
	fmulx		%fp0,%fp2		| ...S(P2+SP3)

	faddx		TANQ2,%fp3	| ...Q2+S(Q3+SQ4)
	faddx		TANP1,%fp2	| ...P1+S(P2+SP3)

	fmulx		%fp0,%fp3		| ...S(Q2+S(Q3+SQ4))
	fmulx		%fp0,%fp2		| ...S(P1+S(P2+SP3))

	faddx		TANQ1,%fp3	| ...Q1+S(Q2+S(Q3+SQ4))
	fmulx		%fp1,%fp2		| ...RS(P1+S(P2+SP3))

	fmulx		%fp3,%fp0		| ...S(Q1+S(Q2+S(Q3+SQ4)))


	faddx		%fp2,%fp1		| ...R+RS(P1+S(P2+SP3))
	fadds		#0x3F800000,%fp0	| ...1+S(Q1+...)


	fmovex		%fp1,-(%sp)
	eoril		#0x80000000,(%sp)

	fmovel		%d1,%fpcr		|restore users exceptions
	fdivx		(%sp)+,%fp0	|last inst - possible exception set

	bra		t_frcinx

TANBORS:
|--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
|--IF |X| < 2**(-40), RETURN X OR 1.
	cmpil		#0x3FFF8000,%d0
	bgts		REDUCEX

TANSM:

	fmovex		%fp0,-(%sp)
	fmovel		%d1,%fpcr		 |restore users exceptions
	fmovex		(%sp)+,%fp0	|last inst - possible exception set

	bra		t_frcinx


REDUCEX:
|--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
|--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
|--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.

	fmovemx	%fp2-%fp5,-(%a7)	| ...save FP2 through FP5
	movel		%d2,-(%a7)
        fmoves         #0x00000000,%fp1

|--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
|--there is a danger of unwanted overflow in first LOOP iteration.  In this
|--case, reduce argument by one remainder step to make subsequent reduction
|--safe.
	cmpil	#0x7ffeffff,%d0		|is argument dangerously large?
	bnes	LOOP
	movel	#0x7ffe0000,FP_SCR2(%a6)	|yes
|					;create 2**16383*PI/2
	movel	#0xc90fdaa2,FP_SCR2+4(%a6)
	clrl	FP_SCR2+8(%a6)
	ftstx	%fp0			|test sign of argument
	movel	#0x7fdc0000,FP_SCR3(%a6)	|create low half of 2**16383*
|					;PI/2 at FP_SCR3
	movel	#0x85a308d3,FP_SCR3+4(%a6)
	clrl   FP_SCR3+8(%a6)
	fblt	red_neg
	orw	#0x8000,FP_SCR2(%a6)	|positive arg
	orw	#0x8000,FP_SCR3(%a6)
red_neg:
	faddx  FP_SCR2(%a6),%fp0		|high part of reduction is exact
	fmovex  %fp0,%fp1		|save high result in fp1
	faddx  FP_SCR3(%a6),%fp0		|low part of reduction
	fsubx  %fp0,%fp1			|determine low component of result
	faddx  FP_SCR3(%a6),%fp1		|fp0/fp1 are reduced argument.

|--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
|--integer quotient will be stored in N
|--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)

LOOP:
	fmovex		%fp0,INARG(%a6)	| ...+-2**K * F, 1 <= F < 2
	movew		INARG(%a6),%d0
        movel          %d0,%a1		| ...save a copy of D0
	andil		#0x00007FFF,%d0
	subil		#0x00003FFF,%d0	| ...D0 IS K
	cmpil		#28,%d0
	bles		LASTLOOP
CONTLOOP:
	subil		#27,%d0	 | ...D0 IS L := K-27
	movel		#0,ENDFLAG(%a6)
	bras		WORK
LASTLOOP:
	clrl		%d0		| ...D0 IS L := 0
	movel		#1,ENDFLAG(%a6)

WORK:
|--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
|--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.

|--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
|--2**L * (PIby2_1), 2**L * (PIby2_2)

	movel		#0x00003FFE,%d2	| ...BIASED EXPO OF 2/PI
	subl		%d0,%d2		| ...BIASED EXPO OF 2**(-L)*(2/PI)

	movel		#0xA2F9836E,FP_SCR1+4(%a6)
	movel		#0x4E44152A,FP_SCR1+8(%a6)
	movew		%d2,FP_SCR1(%a6)	| ...FP_SCR1 is 2**(-L)*(2/PI)

	fmovex		%fp0,%fp2
	fmulx		FP_SCR1(%a6),%fp2
|--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
|--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
|--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
|--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
|--US THE DESIRED VALUE IN FLOATING POINT.

|--HIDE SIX CYCLES OF INSTRUCTION
        movel		%a1,%d2
        swap		%d2
	andil		#0x80000000,%d2
	oril		#0x5F000000,%d2	| ...D2 IS SIGN(INARG)*2**63 IN SGL
	movel		%d2,TWOTO63(%a6)

	movel		%d0,%d2
	addil		#0x00003FFF,%d2	| ...BIASED EXPO OF 2**L * (PI/2)

|--FP2 IS READY
	fadds		TWOTO63(%a6),%fp2	| ...THE FRACTIONAL PART OF FP1 IS ROUNDED

|--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1  and  2**(L)*Piby2_2
        movew		%d2,FP_SCR2(%a6)
	clrw           FP_SCR2+2(%a6)
	movel		#0xC90FDAA2,FP_SCR2+4(%a6)
	clrl		FP_SCR2+8(%a6)		| ...FP_SCR2 is  2**(L) * Piby2_1

|--FP2 IS READY
	fsubs		TWOTO63(%a6),%fp2		| ...FP2 is N

	addil		#0x00003FDD,%d0
        movew		%d0,FP_SCR3(%a6)
	clrw           FP_SCR3+2(%a6)
	movel		#0x85A308D3,FP_SCR3+4(%a6)
	clrl		FP_SCR3+8(%a6)		| ...FP_SCR3 is 2**(L) * Piby2_2

	movel		ENDFLAG(%a6),%d0

|--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
|--P2 = 2**(L) * Piby2_2
	fmovex		%fp2,%fp4
	fmulx		FP_SCR2(%a6),%fp4		| ...W = N*P1
	fmovex		%fp2,%fp5
	fmulx		FP_SCR3(%a6),%fp5		| ...w = N*P2
	fmovex		%fp4,%fp3
|--we want P+p = W+w  but  |p| <= half ulp of P
|--Then, we need to compute  A := R-P   and  a := r-p
	faddx		%fp5,%fp3			| ...FP3 is P
	fsubx		%fp3,%fp4			| ...W-P

	fsubx		%fp3,%fp0			| ...FP0 is A := R - P
        faddx		%fp5,%fp4			| ...FP4 is p = (W-P)+w

	fmovex		%fp0,%fp3			| ...FP3 A
	fsubx		%fp4,%fp1			| ...FP1 is a := r - p

|--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
|--|r| <= half ulp of R.
	faddx		%fp1,%fp0			| ...FP0 is R := A+a
|--No need to calculate r if this is the last loop
	cmpil		#0,%d0
	bgt		RESTORE

|--Need to calculate r
	fsubx		%fp0,%fp3			| ...A-R
	faddx		%fp3,%fp1			| ...FP1 is r := (A-R)+a
	bra		LOOP

RESTORE:
        fmovel		%fp2,N(%a6)
	movel		(%a7)+,%d2
	fmovemx	(%a7)+,%fp2-%fp5


	movel		N(%a6),%d0
        rorl		#1,%d0


	bra		TANCONT

	|end
' href='#n1392'>1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481
/*
 * drivers/ata/sata_fsl.c
 *
 * Freescale 3.0Gbps SATA device driver
 *
 * Author: Ashish Kalra <ashish.kalra@freescale.com>
 * Li Yang <leoli@freescale.com>
 *
 * Copyright (c) 2006-2007, 2011 Freescale Semiconductor, Inc.
 *
 * This program is free software; you can redistribute  it and/or modify it
 * under  the terms of  the GNU General  Public License as published by the
 * Free Software Foundation;  either version 2 of the  License, or (at your
 * option) any later version.
 *
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/slab.h>

#include <scsi/scsi_host.h>
#include <scsi/scsi_cmnd.h>
#include <linux/libata.h>
#include <asm/io.h>
#include <linux/of_platform.h>

/* Controller information */
enum {
	SATA_FSL_QUEUE_DEPTH	= 16,
	SATA_FSL_MAX_PRD	= 63,
	SATA_FSL_MAX_PRD_USABLE	= SATA_FSL_MAX_PRD - 1,
	SATA_FSL_MAX_PRD_DIRECT	= 16,	/* Direct PRDT entries */

	SATA_FSL_HOST_FLAGS	= (ATA_FLAG_SATA | ATA_FLAG_PIO_DMA |
				ATA_FLAG_PMP | ATA_FLAG_NCQ | ATA_FLAG_AN),

	SATA_FSL_MAX_CMDS	= SATA_FSL_QUEUE_DEPTH,
	SATA_FSL_CMD_HDR_SIZE	= 16,	/* 4 DWORDS */
	SATA_FSL_CMD_SLOT_SIZE  = (SATA_FSL_MAX_CMDS * SATA_FSL_CMD_HDR_SIZE),

	/*
	 * SATA-FSL host controller supports a max. of (15+1) direct PRDEs, and
	 * chained indirect PRDEs up to a max count of 63.
	 * We are allocating an array of 63 PRDEs contiguously, but PRDE#15 will
	 * be setup as an indirect descriptor, pointing to it's next
	 * (contiguous) PRDE. Though chained indirect PRDE arrays are
	 * supported,it will be more efficient to use a direct PRDT and
	 * a single chain/link to indirect PRDE array/PRDT.
	 */

	SATA_FSL_CMD_DESC_CFIS_SZ	= 32,
	SATA_FSL_CMD_DESC_SFIS_SZ	= 32,
	SATA_FSL_CMD_DESC_ACMD_SZ	= 16,
	SATA_FSL_CMD_DESC_RSRVD		= 16,

	SATA_FSL_CMD_DESC_SIZE	= (SATA_FSL_CMD_DESC_CFIS_SZ +
				 SATA_FSL_CMD_DESC_SFIS_SZ +
				 SATA_FSL_CMD_DESC_ACMD_SZ +
				 SATA_FSL_CMD_DESC_RSRVD +
				 SATA_FSL_MAX_PRD * 16),

	SATA_FSL_CMD_DESC_OFFSET_TO_PRDT	=
				(SATA_FSL_CMD_DESC_CFIS_SZ +
				 SATA_FSL_CMD_DESC_SFIS_SZ +
				 SATA_FSL_CMD_DESC_ACMD_SZ +
				 SATA_FSL_CMD_DESC_RSRVD),

	SATA_FSL_CMD_DESC_AR_SZ	= (SATA_FSL_CMD_DESC_SIZE * SATA_FSL_MAX_CMDS),
	SATA_FSL_PORT_PRIV_DMA_SZ = (SATA_FSL_CMD_SLOT_SIZE +
					SATA_FSL_CMD_DESC_AR_SZ),

	/*
	 * MPC8315 has two SATA controllers, SATA1 & SATA2
	 * (one port per controller)
	 * MPC837x has 2/4 controllers, one port per controller
	 */

	SATA_FSL_MAX_PORTS	= 1,

	SATA_FSL_IRQ_FLAG	= IRQF_SHARED,
};

/*
* Host Controller command register set - per port
*/
enum {
	CQ = 0,
	CA = 8,
	CC = 0x10,
	CE = 0x18,
	DE = 0x20,
	CHBA = 0x24,
	HSTATUS = 0x28,
	HCONTROL = 0x2C,
	CQPMP = 0x30,
	SIGNATURE = 0x34,
	ICC = 0x38,

	/*
	 * Host Status Register (HStatus) bitdefs
	 */
	ONLINE = (1 << 31),
	GOING_OFFLINE = (1 << 30),
	BIST_ERR = (1 << 29),

	FATAL_ERR_HC_MASTER_ERR = (1 << 18),
	FATAL_ERR_PARITY_ERR_TX = (1 << 17),
	FATAL_ERR_PARITY_ERR_RX = (1 << 16),
	FATAL_ERR_DATA_UNDERRUN = (1 << 13),
	FATAL_ERR_DATA_OVERRUN = (1 << 12),
	FATAL_ERR_CRC_ERR_TX = (1 << 11),
	FATAL_ERR_CRC_ERR_RX = (1 << 10),
	FATAL_ERR_FIFO_OVRFL_TX = (1 << 9),
	FATAL_ERR_FIFO_OVRFL_RX = (1 << 8),

	FATAL_ERROR_DECODE = FATAL_ERR_HC_MASTER_ERR |
	    FATAL_ERR_PARITY_ERR_TX |
	    FATAL_ERR_PARITY_ERR_RX |
	    FATAL_ERR_DATA_UNDERRUN |
	    FATAL_ERR_DATA_OVERRUN |
	    FATAL_ERR_CRC_ERR_TX |
	    FATAL_ERR_CRC_ERR_RX |
	    FATAL_ERR_FIFO_OVRFL_TX | FATAL_ERR_FIFO_OVRFL_RX,

	INT_ON_FATAL_ERR = (1 << 5),
	INT_ON_PHYRDY_CHG = (1 << 4),

	INT_ON_SIGNATURE_UPDATE = (1 << 3),
	INT_ON_SNOTIFY_UPDATE = (1 << 2),
	INT_ON_SINGL_DEVICE_ERR = (1 << 1),
	INT_ON_CMD_COMPLETE = 1,

	INT_ON_ERROR = INT_ON_FATAL_ERR | INT_ON_SNOTIFY_UPDATE |
	    INT_ON_PHYRDY_CHG | INT_ON_SINGL_DEVICE_ERR,

	/*
	 * Host Control Register (HControl) bitdefs
	 */
	HCONTROL_ONLINE_PHY_RST = (1 << 31),
	HCONTROL_FORCE_OFFLINE = (1 << 30),
	HCONTROL_PARITY_PROT_MOD = (1 << 14),
	HCONTROL_DPATH_PARITY = (1 << 12),
	HCONTROL_SNOOP_ENABLE = (1 << 10),
	HCONTROL_PMP_ATTACHED = (1 << 9),
	HCONTROL_COPYOUT_STATFIS = (1 << 8),
	IE_ON_FATAL_ERR = (1 << 5),
	IE_ON_PHYRDY_CHG = (1 << 4),
	IE_ON_SIGNATURE_UPDATE = (1 << 3),
	IE_ON_SNOTIFY_UPDATE = (1 << 2),
	IE_ON_SINGL_DEVICE_ERR = (1 << 1),
	IE_ON_CMD_COMPLETE = 1,

	DEFAULT_PORT_IRQ_ENABLE_MASK = IE_ON_FATAL_ERR | IE_ON_PHYRDY_CHG |
	    IE_ON_SIGNATURE_UPDATE | IE_ON_SNOTIFY_UPDATE |
	    IE_ON_SINGL_DEVICE_ERR | IE_ON_CMD_COMPLETE,

	EXT_INDIRECT_SEG_PRD_FLAG = (1 << 31),
	DATA_SNOOP_ENABLE_V1 = (1 << 22),
	DATA_SNOOP_ENABLE_V2 = (1 << 28),
};

/*
 * SATA Superset Registers
 */
enum {
	SSTATUS = 0,
	SERROR = 4,
	SCONTROL = 8,
	SNOTIFY = 0xC,
};

/*
 * Control Status Register Set
 */
enum {
	TRANSCFG = 0,
	TRANSSTATUS = 4,
	LINKCFG = 8,
	LINKCFG1 = 0xC,
	LINKCFG2 = 0x10,
	LINKSTATUS = 0x14,
	LINKSTATUS1 = 0x18,
	PHYCTRLCFG = 0x1C,
	COMMANDSTAT = 0x20,
};

/* TRANSCFG (transport-layer) configuration control */
enum {
	TRANSCFG_RX_WATER_MARK = (1 << 4),
};

/* PHY (link-layer) configuration control */
enum {
	PHY_BIST_ENABLE = 0x01,
};

/*
 * Command Header Table entry, i.e, command slot
 * 4 Dwords per command slot, command header size ==  64 Dwords.
 */
struct cmdhdr_tbl_entry {
	u32 cda;
	u32 prde_fis_len;
	u32 ttl;
	u32 desc_info;
};

/*
 * Description information bitdefs
 */
enum {
	CMD_DESC_RES = (1 << 11),
	VENDOR_SPECIFIC_BIST = (1 << 10),
	CMD_DESC_SNOOP_ENABLE = (1 << 9),
	FPDMA_QUEUED_CMD = (1 << 8),
	SRST_CMD = (1 << 7),
	BIST = (1 << 6),
	ATAPI_CMD = (1 << 5),
};

/*
 * Command Descriptor
 */
struct command_desc {
	u8 cfis[8 * 4];
	u8 sfis[8 * 4];
	u8 acmd[4 * 4];
	u8 fill[4 * 4];
	u32 prdt[SATA_FSL_MAX_PRD_DIRECT * 4];
	u32 prdt_indirect[(SATA_FSL_MAX_PRD - SATA_FSL_MAX_PRD_DIRECT) * 4];
};

/*
 * Physical region table descriptor(PRD)
 */

struct prde {
	u32 dba;
	u8 fill[2 * 4];
	u32 ddc_and_ext;
};

/*
 * ata_port private data
 * This is our per-port instance data.
 */
struct sata_fsl_port_priv {
	struct cmdhdr_tbl_entry *cmdslot;
	dma_addr_t cmdslot_paddr;
	struct command_desc *cmdentry;
	dma_addr_t cmdentry_paddr;
};

/*
 * ata_port->host_set private data
 */
struct sata_fsl_host_priv {
	void __iomem *hcr_base;
	void __iomem *ssr_base;
	void __iomem *csr_base;
	int irq;
	int data_snoop;
};

static inline unsigned int sata_fsl_tag(unsigned int tag,
					void __iomem *hcr_base)
{
	/* We let libATA core do actual (queue) tag allocation */

	/* all non NCQ/queued commands should have tag#0 */
	if (ata_tag_internal(tag)) {
		DPRINTK("mapping internal cmds to tag#0\n");
		return 0;
	}

	if (unlikely(tag >= SATA_FSL_QUEUE_DEPTH)) {
		DPRINTK("tag %d invalid : out of range\n", tag);
		return 0;
	}

	if (unlikely((ioread32(hcr_base + CQ)) & (1 << tag))) {
		DPRINTK("tag %d invalid : in use!!\n", tag);
		return 0;
	}

	return tag;
}

static void sata_fsl_setup_cmd_hdr_entry(struct sata_fsl_port_priv *pp,
					 unsigned int tag, u32 desc_info,
					 u32 data_xfer_len, u8 num_prde,
					 u8 fis_len)
{
	dma_addr_t cmd_descriptor_address;

	cmd_descriptor_address = pp->cmdentry_paddr +
	    tag * SATA_FSL_CMD_DESC_SIZE;

	/* NOTE: both data_xfer_len & fis_len are Dword counts */

	pp->cmdslot[tag].cda = cpu_to_le32(cmd_descriptor_address);
	pp->cmdslot[tag].prde_fis_len =
	    cpu_to_le32((num_prde << 16) | (fis_len << 2));
	pp->cmdslot[tag].ttl = cpu_to_le32(data_xfer_len & ~0x03);
	pp->cmdslot[tag].desc_info = cpu_to_le32(desc_info | (tag & 0x1F));

	VPRINTK("cda=0x%x, prde_fis_len=0x%x, ttl=0x%x, di=0x%x\n",
		pp->cmdslot[tag].cda,
		pp->cmdslot[tag].prde_fis_len,
		pp->cmdslot[tag].ttl, pp->cmdslot[tag].desc_info);

}

static unsigned int sata_fsl_fill_sg(struct ata_queued_cmd *qc, void *cmd_desc,
				     u32 *ttl, dma_addr_t cmd_desc_paddr,
				     int data_snoop)
{
	struct scatterlist *sg;
	unsigned int num_prde = 0;
	u32 ttl_dwords = 0;

	/*
	 * NOTE : direct & indirect prdt's are contiguously allocated
	 */
	struct prde *prd = (struct prde *)&((struct command_desc *)
					    cmd_desc)->prdt;

	struct prde *prd_ptr_to_indirect_ext = NULL;
	unsigned indirect_ext_segment_sz = 0;
	dma_addr_t indirect_ext_segment_paddr;
	unsigned int si;

	VPRINTK("SATA FSL : cd = 0x%p, prd = 0x%p\n", cmd_desc, prd);

	indirect_ext_segment_paddr = cmd_desc_paddr +
	    SATA_FSL_CMD_DESC_OFFSET_TO_PRDT + SATA_FSL_MAX_PRD_DIRECT * 16;

	for_each_sg(qc->sg, sg, qc->n_elem, si) {
		dma_addr_t sg_addr = sg_dma_address(sg);
		u32 sg_len = sg_dma_len(sg);

		VPRINTK("SATA FSL : fill_sg, sg_addr = 0x%llx, sg_len = %d\n",
			(unsigned long long)sg_addr, sg_len);

		/* warn if each s/g element is not dword aligned */
		if (sg_addr & 0x03)
			ata_port_printk(qc->ap, KERN_ERR,
					"s/g addr unaligned : 0x%llx\n",
					(unsigned long long)sg_addr);
		if (sg_len & 0x03)
			ata_port_printk(qc->ap, KERN_ERR,
					"s/g len unaligned : 0x%x\n", sg_len);

		if (num_prde == (SATA_FSL_MAX_PRD_DIRECT - 1) &&
		    sg_next(sg) != NULL) {
			VPRINTK("setting indirect prde\n");
			prd_ptr_to_indirect_ext = prd;
			prd->dba = cpu_to_le32(indirect_ext_segment_paddr);
			indirect_ext_segment_sz = 0;
			++prd;
			++num_prde;
		}

		ttl_dwords += sg_len;
		prd->dba = cpu_to_le32(sg_addr);
		prd->ddc_and_ext = cpu_to_le32(data_snoop | (sg_len & ~0x03));

		VPRINTK("sg_fill, ttl=%d, dba=0x%x, ddc=0x%x\n",
			ttl_dwords, prd->dba, prd->ddc_and_ext);

		++num_prde;
		++prd;
		if (prd_ptr_to_indirect_ext)
			indirect_ext_segment_sz += sg_len;
	}

	if (prd_ptr_to_indirect_ext) {
		/* set indirect extension flag along with indirect ext. size */
		prd_ptr_to_indirect_ext->ddc_and_ext =
		    cpu_to_le32((EXT_INDIRECT_SEG_PRD_FLAG |
				 data_snoop |
				 (indirect_ext_segment_sz & ~0x03)));
	}

	*ttl = ttl_dwords;
	return num_prde;
}

static void sata_fsl_qc_prep(struct ata_queued_cmd *qc)
{
	struct ata_port *ap = qc->ap;
	struct sata_fsl_port_priv *pp = ap->private_data;
	struct sata_fsl_host_priv *host_priv = ap->host->private_data;
	void __iomem *hcr_base = host_priv->hcr_base;
	unsigned int tag = sata_fsl_tag(qc->tag, hcr_base);
	struct command_desc *cd;
	u32 desc_info = CMD_DESC_RES | CMD_DESC_SNOOP_ENABLE;
	u32 num_prde = 0;
	u32 ttl_dwords = 0;
	dma_addr_t cd_paddr;

	cd = (struct command_desc *)pp->cmdentry + tag;
	cd_paddr = pp->cmdentry_paddr + tag * SATA_FSL_CMD_DESC_SIZE;

	ata_tf_to_fis(&qc->tf, qc->dev->link->pmp, 1, (u8 *) &cd->cfis);

	VPRINTK("Dumping cfis : 0x%x, 0x%x, 0x%x\n",
		cd->cfis[0], cd->cfis[1], cd->cfis[2]);

	if (qc->tf.protocol == ATA_PROT_NCQ) {
		VPRINTK("FPDMA xfer,Sctor cnt[0:7],[8:15] = %d,%d\n",
			cd->cfis[3], cd->cfis[11]);
	}

	/* setup "ACMD - atapi command" in cmd. desc. if this is ATAPI cmd */
	if (ata_is_atapi(qc->tf.protocol)) {
		desc_info |= ATAPI_CMD;
		memset((void *)&cd->acmd, 0, 32);
		memcpy((void *)&cd->acmd, qc->cdb, qc->dev->cdb_len);
	}

	if (qc->flags & ATA_QCFLAG_DMAMAP)
		num_prde = sata_fsl_fill_sg(qc, (void *)cd,
					    &ttl_dwords, cd_paddr,
					    host_priv->data_snoop);

	if (qc->tf.protocol == ATA_PROT_NCQ)
		desc_info |= FPDMA_QUEUED_CMD;

	sata_fsl_setup_cmd_hdr_entry(pp, tag, desc_info, ttl_dwords,
				     num_prde, 5);

	VPRINTK("SATA FSL : xx_qc_prep, di = 0x%x, ttl = %d, num_prde = %d\n",
		desc_info, ttl_dwords, num_prde);
}

static unsigned int sata_fsl_qc_issue(struct ata_queued_cmd *qc)
{
	struct ata_port *ap = qc->ap;
	struct sata_fsl_host_priv *host_priv = ap->host->private_data;
	void __iomem *hcr_base = host_priv->hcr_base;
	unsigned int tag = sata_fsl_tag(qc->tag, hcr_base);

	VPRINTK("xx_qc_issue called,CQ=0x%x,CA=0x%x,CE=0x%x,CC=0x%x\n",
		ioread32(CQ + hcr_base),
		ioread32(CA + hcr_base),
		ioread32(CE + hcr_base), ioread32(CC + hcr_base));

	iowrite32(qc->dev->link->pmp, CQPMP + hcr_base);

	/* Simply queue command to the controller/device */
	iowrite32(1 << tag, CQ + hcr_base);

	VPRINTK("xx_qc_issue called, tag=%d, CQ=0x%x, CA=0x%x\n",
		tag, ioread32(CQ + hcr_base), ioread32(CA + hcr_base));

	VPRINTK("CE=0x%x, DE=0x%x, CC=0x%x, CmdStat = 0x%x\n",
		ioread32(CE + hcr_base),
		ioread32(DE + hcr_base),
		ioread32(CC + hcr_base),
		ioread32(COMMANDSTAT + host_priv->csr_base));

	return 0;
}

static bool sata_fsl_qc_fill_rtf(struct ata_queued_cmd *qc)
{
	struct sata_fsl_port_priv *pp = qc->ap->private_data;
	struct sata_fsl_host_priv *host_priv = qc->ap->host->private_data;
	void __iomem *hcr_base = host_priv->hcr_base;
	unsigned int tag = sata_fsl_tag(qc->tag, hcr_base);
	struct command_desc *cd;

	cd = pp->cmdentry + tag;

	ata_tf_from_fis(cd->sfis, &qc->result_tf);
	return true;
}

static int sata_fsl_scr_write(struct ata_link *link,
			      unsigned int sc_reg_in, u32 val)
{
	struct sata_fsl_host_priv *host_priv = link->ap->host->private_data;
	void __iomem *ssr_base = host_priv->ssr_base;
	unsigned int sc_reg;

	switch (sc_reg_in) {
	case SCR_STATUS:
	case SCR_ERROR:
	case SCR_CONTROL:
	case SCR_ACTIVE:
		sc_reg = sc_reg_in;
		break;
	default:
		return -EINVAL;
	}

	VPRINTK("xx_scr_write, reg_in = %d\n", sc_reg);

	iowrite32(val, ssr_base + (sc_reg * 4));
	return 0;
}

static int sata_fsl_scr_read(struct ata_link *link,
			     unsigned int sc_reg_in, u32 *val)
{
	struct sata_fsl_host_priv *host_priv = link->ap->host->private_data;
	void __iomem *ssr_base = host_priv->ssr_base;
	unsigned int sc_reg;

	switch (sc_reg_in) {
	case SCR_STATUS:
	case SCR_ERROR:
	case SCR_CONTROL:
	case SCR_ACTIVE:
		sc_reg = sc_reg_in;
		break;
	default:
		return -EINVAL;
	}

	VPRINTK("xx_scr_read, reg_in = %d\n", sc_reg);

	*val = ioread32(ssr_base + (sc_reg * 4));
	return 0;
}

static void sata_fsl_freeze(struct ata_port *ap)
{
	struct sata_fsl_host_priv *host_priv = ap->host->private_data;
	void __iomem *hcr_base = host_priv->hcr_base;
	u32 temp;

	VPRINTK("xx_freeze, CQ=0x%x, CA=0x%x, CE=0x%x, DE=0x%x\n",
		ioread32(CQ + hcr_base),
		ioread32(CA + hcr_base),
		ioread32(CE + hcr_base), ioread32(DE + hcr_base));
	VPRINTK("CmdStat = 0x%x\n",
		ioread32(host_priv->csr_base + COMMANDSTAT));

	/* disable interrupts on the controller/port */
	temp = ioread32(hcr_base + HCONTROL);
	iowrite32((temp & ~0x3F), hcr_base + HCONTROL);

	VPRINTK("in xx_freeze : HControl = 0x%x, HStatus = 0x%x\n",
		ioread32(hcr_base + HCONTROL), ioread32(hcr_base + HSTATUS));
}

static void sata_fsl_thaw(struct ata_port *ap)
{
	struct sata_fsl_host_priv *host_priv = ap->host->private_data;
	void __iomem *hcr_base = host_priv->hcr_base;
	u32 temp;

	/* ack. any pending IRQs for this controller/port */
	temp = ioread32(hcr_base + HSTATUS);

	VPRINTK("xx_thaw, pending IRQs = 0x%x\n", (temp & 0x3F));

	if (temp & 0x3F)
		iowrite32((temp & 0x3F), hcr_base + HSTATUS);

	/* enable interrupts on the controller/port */
	temp = ioread32(hcr_base + HCONTROL);
	iowrite32((temp | DEFAULT_PORT_IRQ_ENABLE_MASK), hcr_base + HCONTROL);

	VPRINTK("xx_thaw : HControl = 0x%x, HStatus = 0x%x\n",
		ioread32(hcr_base + HCONTROL), ioread32(hcr_base + HSTATUS));
}

static void sata_fsl_pmp_attach(struct ata_port *ap)
{
	struct sata_fsl_host_priv *host_priv = ap->host->private_data;
	void __iomem *hcr_base = host_priv->hcr_base;
	u32 temp;

	temp = ioread32(hcr_base + HCONTROL);
	iowrite32((temp | HCONTROL_PMP_ATTACHED), hcr_base + HCONTROL);
}

static void sata_fsl_pmp_detach(struct ata_port *ap)
{
	struct sata_fsl_host_priv *host_priv = ap->host->private_data;
	void __iomem *hcr_base = host_priv->hcr_base;
	u32 temp;

	temp = ioread32(hcr_base + HCONTROL);
	temp &= ~HCONTROL_PMP_ATTACHED;
	iowrite32(temp, hcr_base + HCONTROL);

	/* enable interrupts on the controller/port */
	temp = ioread32(hcr_base + HCONTROL);
	iowrite32((temp | DEFAULT_PORT_IRQ_ENABLE_MASK), hcr_base + HCONTROL);

}

static int sata_fsl_port_start(struct ata_port *ap)
{
	struct device *dev = ap->host->dev;
	struct sata_fsl_port_priv *pp;
	void *mem;
	dma_addr_t mem_dma;
	struct sata_fsl_host_priv *host_priv = ap->host->private_data;
	void __iomem *hcr_base = host_priv->hcr_base;
	u32 temp;

	pp = kzalloc(sizeof(*pp), GFP_KERNEL);
	if (!pp)
		return -ENOMEM;

	mem = dma_alloc_coherent(dev, SATA_FSL_PORT_PRIV_DMA_SZ, &mem_dma,
				 GFP_KERNEL);
	if (!mem) {
		kfree(pp);
		return -ENOMEM;
	}
	memset(mem, 0, SATA_FSL_PORT_PRIV_DMA_SZ);

	pp->cmdslot = mem;
	pp->cmdslot_paddr = mem_dma;

	mem += SATA_FSL_CMD_SLOT_SIZE;
	mem_dma += SATA_FSL_CMD_SLOT_SIZE;

	pp->cmdentry = mem;
	pp->cmdentry_paddr = mem_dma;

	ap->private_data = pp;

	VPRINTK("CHBA = 0x%x, cmdentry_phys = 0x%x\n",
		pp->cmdslot_paddr, pp->cmdentry_paddr);

	/* Now, update the CHBA register in host controller cmd register set */
	iowrite32(pp->cmdslot_paddr & 0xffffffff, hcr_base + CHBA);

	/*
	 * Now, we can bring the controller on-line & also initiate
	 * the COMINIT sequence, we simply return here and the boot-probing
	 * & device discovery process is re-initiated by libATA using a
	 * Softreset EH (dummy) session. Hence, boot probing and device
	 * discovey will be part of sata_fsl_softreset() callback.
	 */

	temp = ioread32(hcr_base + HCONTROL);
	iowrite32((temp | HCONTROL_ONLINE_PHY_RST), hcr_base + HCONTROL);

	VPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS));
	VPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL));
	VPRINTK("CHBA  = 0x%x\n", ioread32(hcr_base + CHBA));

#ifdef CONFIG_MPC8315_DS
	/*
	 * Workaround for 8315DS board 3gbps link-up issue,
	 * currently limit SATA port to GEN1 speed
	 */
	sata_fsl_scr_read(&ap->link, SCR_CONTROL, &temp);
	temp &= ~(0xF << 4);
	temp |= (0x1 << 4);
	sata_fsl_scr_write(&ap->link, SCR_CONTROL, temp);

	sata_fsl_scr_read(&ap->link, SCR_CONTROL, &temp);
	dev_printk(KERN_WARNING, dev, "scr_control, speed limited to %x\n",
			temp);
#endif

	return 0;
}

static void sata_fsl_port_stop(struct ata_port *ap)
{
	struct device *dev = ap->host->dev;
	struct sata_fsl_port_priv *pp = ap->private_data;
	struct sata_fsl_host_priv *host_priv = ap->host->private_data;
	void __iomem *hcr_base = host_priv->hcr_base;
	u32 temp;

	/*
	 * Force host controller to go off-line, aborting current operations
	 */
	temp = ioread32(hcr_base + HCONTROL);
	temp &= ~HCONTROL_ONLINE_PHY_RST;
	temp |= HCONTROL_FORCE_OFFLINE;
	iowrite32(temp, hcr_base + HCONTROL);

	/* Poll for controller to go offline - should happen immediately */
	ata_wait_register(ap, hcr_base + HSTATUS, ONLINE, ONLINE, 1, 1);

	ap->private_data = NULL;
	dma_free_coherent(dev, SATA_FSL_PORT_PRIV_DMA_SZ,
			  pp->cmdslot, pp->cmdslot_paddr);

	kfree(pp);
}

static unsigned int sata_fsl_dev_classify(struct ata_port *ap)
{
	struct sata_fsl_host_priv *host_priv = ap->host->private_data;
	void __iomem *hcr_base = host_priv->hcr_base;
	struct ata_taskfile tf;
	u32 temp;

	temp = ioread32(hcr_base + SIGNATURE);

	VPRINTK("raw sig = 0x%x\n", temp);
	VPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS));
	VPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL));

	tf.lbah = (temp >> 24) & 0xff;
	tf.lbam = (temp >> 16) & 0xff;
	tf.lbal = (temp >> 8) & 0xff;
	tf.nsect = temp & 0xff;

	return ata_dev_classify(&tf);
}

static int sata_fsl_hardreset(struct ata_link *link, unsigned int *class,
					unsigned long deadline)
{
	struct ata_port *ap = link->ap;
	struct sata_fsl_host_priv *host_priv = ap->host->private_data;
	void __iomem *hcr_base = host_priv->hcr_base;
	u32 temp;
	int i = 0;
	unsigned long start_jiffies;

	DPRINTK("in xx_hardreset\n");

try_offline_again:
	/*
	 * Force host controller to go off-line, aborting current operations
	 */
	temp = ioread32(hcr_base + HCONTROL);
	temp &= ~HCONTROL_ONLINE_PHY_RST;
	iowrite32(temp, hcr_base + HCONTROL);

	/* Poll for controller to go offline */
	temp = ata_wait_register(ap, hcr_base + HSTATUS, ONLINE, ONLINE,
				 1, 500);

	if (temp & ONLINE) {
		ata_port_printk(ap, KERN_ERR,
				"Hardreset failed, not off-lined %d\n", i);

		/*
		 * Try to offline controller atleast twice
		 */
		i++;
		if (i == 2)
			goto err;
		else
			goto try_offline_again;
	}

	DPRINTK("hardreset, controller off-lined\n");
	VPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS));
	VPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL));

	/*
	 * PHY reset should remain asserted for atleast 1ms
	 */
	ata_msleep(ap, 1);

	/*
	 * Now, bring the host controller online again, this can take time
	 * as PHY reset and communication establishment, 1st D2H FIS and
	 * device signature update is done, on safe side assume 500ms
	 * NOTE : Host online status may be indicated immediately!!
	 */

	temp = ioread32(hcr_base + HCONTROL);
	temp |= (HCONTROL_ONLINE_PHY_RST | HCONTROL_SNOOP_ENABLE);
	temp |= HCONTROL_PMP_ATTACHED;
	iowrite32(temp, hcr_base + HCONTROL);

	temp = ata_wait_register(ap, hcr_base + HSTATUS, ONLINE, 0, 1, 500);

	if (!(temp & ONLINE)) {
		ata_port_printk(ap, KERN_ERR,
				"Hardreset failed, not on-lined\n");
		goto err;
	}

	DPRINTK("hardreset, controller off-lined & on-lined\n");
	VPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS));
	VPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL));

	/*
	 * First, wait for the PHYRDY change to occur before waiting for
	 * the signature, and also verify if SStatus indicates device
	 * presence
	 */

	temp = ata_wait_register(ap, hcr_base + HSTATUS, 0xFF, 0, 1, 500);
	if ((!(temp & 0x10)) || ata_link_offline(link)) {
		ata_port_printk(ap, KERN_WARNING,
				"No Device OR PHYRDY change,Hstatus = 0x%x\n",
				ioread32(hcr_base + HSTATUS));
		*class = ATA_DEV_NONE;
		return 0;
	}

	/*
	 * Wait for the first D2H from device,i.e,signature update notification
	 */
	start_jiffies = jiffies;
	temp = ata_wait_register(ap, hcr_base + HSTATUS, 0xFF, 0x10,
			500, jiffies_to_msecs(deadline - start_jiffies));

	if ((temp & 0xFF) != 0x18) {
		ata_port_printk(ap, KERN_WARNING, "No Signature Update\n");
		*class = ATA_DEV_NONE;
		goto do_followup_srst;
	} else {
		ata_port_printk(ap, KERN_INFO,
				"Signature Update detected @ %d msecs\n",
				jiffies_to_msecs(jiffies - start_jiffies));
		*class = sata_fsl_dev_classify(ap);
		return 0;
	}

do_followup_srst:
	/*
	 * request libATA to perform follow-up softreset
	 */
	return -EAGAIN;

err:
	return -EIO;
}

static int sata_fsl_softreset(struct ata_link *link, unsigned int *class,
					unsigned long deadline)
{
	struct ata_port *ap = link->ap;
	struct sata_fsl_port_priv *pp = ap->private_data;
	struct sata_fsl_host_priv *host_priv = ap->host->private_data;
	void __iomem *hcr_base = host_priv->hcr_base;
	int pmp = sata_srst_pmp(link);
	u32 temp;
	struct ata_taskfile tf;
	u8 *cfis;
	u32 Serror;

	DPRINTK("in xx_softreset\n");

	if (ata_link_offline(link)) {
		DPRINTK("PHY reports no device\n");
		*class = ATA_DEV_NONE;
		return 0;
	}

	/*
	 * Send a device reset (SRST) explicitly on command slot #0
	 * Check : will the command queue (reg) be cleared during offlining ??
	 * Also we will be online only if Phy commn. has been established
	 * and device presence has been detected, therefore if we have
	 * reached here, we can send a command to the target device
	 */

	DPRINTK("Sending SRST/device reset\n");

	ata_tf_init(link->device, &tf);
	cfis = (u8 *) &pp->cmdentry->cfis;

	/* device reset/SRST is a control register update FIS, uses tag0 */
	sata_fsl_setup_cmd_hdr_entry(pp, 0,
		SRST_CMD | CMD_DESC_RES | CMD_DESC_SNOOP_ENABLE, 0, 0, 5);

	tf.ctl |= ATA_SRST;	/* setup SRST bit in taskfile control reg */
	ata_tf_to_fis(&tf, pmp, 0, cfis);

	DPRINTK("Dumping cfis : 0x%x, 0x%x, 0x%x, 0x%x\n",
		cfis[0], cfis[1], cfis[2], cfis[3]);

	/*
	 * Queue SRST command to the controller/device, ensure that no
	 * other commands are active on the controller/device
	 */

	DPRINTK("@Softreset, CQ = 0x%x, CA = 0x%x, CC = 0x%x\n",
		ioread32(CQ + hcr_base),
		ioread32(CA + hcr_base), ioread32(CC + hcr_base));

	iowrite32(0xFFFF, CC + hcr_base);
	if (pmp != SATA_PMP_CTRL_PORT)
		iowrite32(pmp, CQPMP + hcr_base);
	iowrite32(1, CQ + hcr_base);

	temp = ata_wait_register(ap, CQ + hcr_base, 0x1, 0x1, 1, 5000);
	if (temp & 0x1) {
		ata_port_printk(ap, KERN_WARNING, "ATA_SRST issue failed\n");

		DPRINTK("Softreset@5000,CQ=0x%x,CA=0x%x,CC=0x%x\n",
			ioread32(CQ + hcr_base),
			ioread32(CA + hcr_base), ioread32(CC + hcr_base));

		sata_fsl_scr_read(&ap->link, SCR_ERROR, &Serror);

		DPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS));
		DPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL));
		DPRINTK("Serror = 0x%x\n", Serror);
		goto err;
	}

	ata_msleep(ap, 1);

	/*
	 * SATA device enters reset state after receiving a Control register
	 * FIS with SRST bit asserted and it awaits another H2D Control reg.
	 * FIS with SRST bit cleared, then the device does internal diags &
	 * initialization, followed by indicating it's initialization status
	 * using ATA signature D2H register FIS to the host controller.
	 */

	sata_fsl_setup_cmd_hdr_entry(pp, 0, CMD_DESC_RES | CMD_DESC_SNOOP_ENABLE,
				      0, 0, 5);

	tf.ctl &= ~ATA_SRST;	/* 2nd H2D Ctl. register FIS */
	ata_tf_to_fis(&tf, pmp, 0, cfis);

	if (pmp != SATA_PMP_CTRL_PORT)
		iowrite32(pmp, CQPMP + hcr_base);
	iowrite32(1, CQ + hcr_base);
	ata_msleep(ap, 150);		/* ?? */

	/*
	 * The above command would have signalled an interrupt on command
	 * complete, which needs special handling, by clearing the Nth
	 * command bit of the CCreg
	 */
	iowrite32(0x01, CC + hcr_base);	/* We know it will be cmd#0 always */

	DPRINTK("SATA FSL : Now checking device signature\n");

	*class = ATA_DEV_NONE;

	/* Verify if SStatus indicates device presence */
	if (ata_link_online(link)) {
		/*
		 * if we are here, device presence has been detected,
		 * 1st D2H FIS would have been received, but sfis in
		 * command desc. is not updated, but signature register
		 * would have been updated
		 */

		*class = sata_fsl_dev_classify(ap);

		DPRINTK("class = %d\n", *class);
		VPRINTK("ccreg = 0x%x\n", ioread32(hcr_base + CC));
		VPRINTK("cereg = 0x%x\n", ioread32(hcr_base + CE));
	}

	return 0;

err:
	return -EIO;
}

static void sata_fsl_error_handler(struct ata_port *ap)
{

	DPRINTK("in xx_error_handler\n");
	sata_pmp_error_handler(ap);

}

static void sata_fsl_post_internal_cmd(struct ata_queued_cmd *qc)
{
	if (qc->flags & ATA_QCFLAG_FAILED)
		qc->err_mask |= AC_ERR_OTHER;

	if (qc->err_mask) {
		/* make DMA engine forget about the failed command */

	}
}

static void sata_fsl_error_intr(struct ata_port *ap)
{
	struct sata_fsl_host_priv *host_priv = ap->host->private_data;
	void __iomem *hcr_base = host_priv->hcr_base;
	u32 hstatus, dereg=0, cereg = 0, SError = 0;
	unsigned int err_mask = 0, action = 0;
	int freeze = 0, abort=0;
	struct ata_link *link = NULL;
	struct ata_queued_cmd *qc = NULL;
	struct ata_eh_info *ehi;

	hstatus = ioread32(hcr_base + HSTATUS);
	cereg = ioread32(hcr_base + CE);

	/* first, analyze and record host port events */
	link = &ap->link;
	ehi = &link->eh_info;
	ata_ehi_clear_desc(ehi);

	/*
	 * Handle & Clear SError
	 */

	sata_fsl_scr_read(&ap->link, SCR_ERROR, &SError);
	if (unlikely(SError & 0xFFFF0000))
		sata_fsl_scr_write(&ap->link, SCR_ERROR, SError);

	DPRINTK("error_intr,hStat=0x%x,CE=0x%x,DE =0x%x,SErr=0x%x\n",
		hstatus, cereg, ioread32(hcr_base + DE), SError);

	/* handle fatal errors */
	if (hstatus & FATAL_ERROR_DECODE) {
		ehi->err_mask |= AC_ERR_ATA_BUS;
		ehi->action |= ATA_EH_SOFTRESET;

		freeze = 1;
	}

	/* Handle SDB FIS receive & notify update */
	if (hstatus & INT_ON_SNOTIFY_UPDATE)
		sata_async_notification(ap);

	/* Handle PHYRDY change notification */
	if (hstatus & INT_ON_PHYRDY_CHG) {
		DPRINTK("SATA FSL: PHYRDY change indication\n");

		/* Setup a soft-reset EH action */
		ata_ehi_hotplugged(ehi);
		ata_ehi_push_desc(ehi, "%s", "PHY RDY changed");
		freeze = 1;
	}

	/* handle single device errors */
	if (cereg) {
		/*
		 * clear the command error, also clears queue to the device
		 * in error, and we can (re)issue commands to this device.
		 * When a device is in error all commands queued into the
		 * host controller and at the device are considered aborted
		 * and the queue for that device is stopped. Now, after
		 * clearing the device error, we can issue commands to the
		 * device to interrogate it to find the source of the error.
		 */
		abort = 1;

		DPRINTK("single device error, CE=0x%x, DE=0x%x\n",
			ioread32(hcr_base + CE), ioread32(hcr_base + DE));

		/* find out the offending link and qc */
		if (ap->nr_pmp_links) {
			unsigned int dev_num;

			dereg = ioread32(hcr_base + DE);
			iowrite32(dereg, hcr_base + DE);
			iowrite32(cereg, hcr_base + CE);

			dev_num = ffs(dereg) - 1;
			if (dev_num < ap->nr_pmp_links && dereg != 0) {
				link = &ap->pmp_link[dev_num];
				ehi = &link->eh_info;
				qc = ata_qc_from_tag(ap, link->active_tag);
				/*
				 * We should consider this as non fatal error,
                                 * and TF must be updated as done below.
		                 */

				err_mask |= AC_ERR_DEV;

			} else {
				err_mask |= AC_ERR_HSM;
				action |= ATA_EH_HARDRESET;
				freeze = 1;
			}
		} else {
			dereg = ioread32(hcr_base + DE);
			iowrite32(dereg, hcr_base + DE);
			iowrite32(cereg, hcr_base + CE);

			qc = ata_qc_from_tag(ap, link->active_tag);
			/*
			 * We should consider this as non fatal error,
                         * and TF must be updated as done below.
	                */
			err_mask |= AC_ERR_DEV;
		}
	}

	/* record error info */
	if (qc)
		qc->err_mask |= err_mask;
	else
		ehi->err_mask |= err_mask;

	ehi->action |= action;

	/* freeze or abort */
	if (freeze)
		ata_port_freeze(ap);
	else if (abort) {
		if (qc)
			ata_link_abort(qc->dev->link);
		else
			ata_port_abort(ap);
	}
}

static void sata_fsl_host_intr(struct ata_port *ap)
{
	struct sata_fsl_host_priv *host_priv = ap->host->private_data;
	void __iomem *hcr_base = host_priv->hcr_base;
	u32 hstatus, done_mask = 0;
	struct ata_queued_cmd *qc;
	u32 SError;

	hstatus = ioread32(hcr_base + HSTATUS);

	sata_fsl_scr_read(&ap->link, SCR_ERROR, &SError);

	if (unlikely(SError & 0xFFFF0000)) {
		DPRINTK("serror @host_intr : 0x%x\n", SError);
		sata_fsl_error_intr(ap);
	}

	if (unlikely(hstatus & INT_ON_ERROR)) {
		DPRINTK("error interrupt!!\n");
		sata_fsl_error_intr(ap);
		return;
	}

	/* Read command completed register */
	done_mask = ioread32(hcr_base + CC);

	VPRINTK("Status of all queues :\n");
	VPRINTK("done_mask/CC = 0x%x, CA = 0x%x, CE=0x%x,CQ=0x%x,apqa=0x%x\n",
		done_mask,
		ioread32(hcr_base + CA),
		ioread32(hcr_base + CE),
		ioread32(hcr_base + CQ),
		ap->qc_active);

	if (done_mask & ap->qc_active) {
		int i;
		/* clear CC bit, this will also complete the interrupt */
		iowrite32(done_mask, hcr_base + CC);

		DPRINTK("Status of all queues :\n");
		DPRINTK("done_mask/CC = 0x%x, CA = 0x%x, CE=0x%x\n",
			done_mask, ioread32(hcr_base + CA),
			ioread32(hcr_base + CE));

		for (i = 0; i < SATA_FSL_QUEUE_DEPTH; i++) {
			if (done_mask & (1 << i))
				DPRINTK
				    ("completing ncq cmd,tag=%d,CC=0x%x,CA=0x%x\n",
				     i, ioread32(hcr_base + CC),
				     ioread32(hcr_base + CA));
		}
		ata_qc_complete_multiple(ap, ap->qc_active ^ done_mask);
		return;

	} else if ((ap->qc_active & (1 << ATA_TAG_INTERNAL))) {
		iowrite32(1, hcr_base + CC);
		qc = ata_qc_from_tag(ap, ATA_TAG_INTERNAL);

		DPRINTK("completing non-ncq cmd, CC=0x%x\n",
			 ioread32(hcr_base + CC));

		if (qc) {
			ata_qc_complete(qc);
		}
	} else {
		/* Spurious Interrupt!! */
		DPRINTK("spurious interrupt!!, CC = 0x%x\n",
			ioread32(hcr_base + CC));
		iowrite32(done_mask, hcr_base + CC);
		return;
	}
}

static irqreturn_t sata_fsl_interrupt(int irq, void *dev_instance)
{
	struct ata_host *host = dev_instance;
	struct sata_fsl_host_priv *host_priv = host->private_data;
	void __iomem *hcr_base = host_priv->hcr_base;
	u32 interrupt_enables;
	unsigned handled = 0;
	struct ata_port *ap;

	/* ack. any pending IRQs for this controller/port */
	interrupt_enables = ioread32(hcr_base + HSTATUS);
	interrupt_enables &= 0x3F;

	DPRINTK("interrupt status 0x%x\n", interrupt_enables);

	if (!interrupt_enables)
		return IRQ_NONE;

	spin_lock(&host->lock);

	/* Assuming one port per host controller */

	ap = host->ports[0];
	if (ap) {
		sata_fsl_host_intr(ap);
	} else {
		dev_printk(KERN_WARNING, host->dev,
			   "interrupt on disabled port 0\n");
	}

	iowrite32(interrupt_enables, hcr_base + HSTATUS);
	handled = 1;

	spin_unlock(&host->lock);

	return IRQ_RETVAL(handled);
}

/*
 * Multiple ports are represented by multiple SATA controllers with
 * one port per controller
 */
static int sata_fsl_init_controller(struct ata_host *host)
{
	struct sata_fsl_host_priv *host_priv = host->private_data;
	void __iomem *hcr_base = host_priv->hcr_base;
	u32 temp;

	/*
	 * NOTE : We cannot bring the controller online before setting
	 * the CHBA, hence main controller initialization is done as
	 * part of the port_start() callback
	 */

	/* ack. any pending IRQs for this controller/port */
	temp = ioread32(hcr_base + HSTATUS);
	if (temp & 0x3F)
		iowrite32((temp & 0x3F), hcr_base + HSTATUS);

	/* Keep interrupts disabled on the controller */
	temp = ioread32(hcr_base + HCONTROL);
	iowrite32((temp & ~0x3F), hcr_base + HCONTROL);

	/* Disable interrupt coalescing control(icc), for the moment */
	DPRINTK("icc = 0x%x\n", ioread32(hcr_base + ICC));
	iowrite32(0x01000000, hcr_base + ICC);

	/* clear error registers, SError is cleared by libATA  */
	iowrite32(0x00000FFFF, hcr_base + CE);
	iowrite32(0x00000FFFF, hcr_base + DE);

	/*
	 * host controller will be brought on-line, during xx_port_start()
	 * callback, that should also initiate the OOB, COMINIT sequence
	 */

	DPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS));
	DPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL));

	return 0;
}

/*
 * scsi mid-layer and libata interface structures
 */
static struct scsi_host_template sata_fsl_sht = {
	ATA_NCQ_SHT("sata_fsl"),
	.can_queue = SATA_FSL_QUEUE_DEPTH,
	.sg_tablesize = SATA_FSL_MAX_PRD_USABLE,
	.dma_boundary = ATA_DMA_BOUNDARY,
};

static struct ata_port_operations sata_fsl_ops = {
	.inherits		= &sata_pmp_port_ops,

	.qc_defer = ata_std_qc_defer,
	.qc_prep = sata_fsl_qc_prep,
	.qc_issue = sata_fsl_qc_issue,
	.qc_fill_rtf = sata_fsl_qc_fill_rtf,

	.scr_read = sata_fsl_scr_read,
	.scr_write = sata_fsl_scr_write,

	.freeze = sata_fsl_freeze,
	.thaw = sata_fsl_thaw,
	.softreset = sata_fsl_softreset,
	.hardreset = sata_fsl_hardreset,
	.pmp_softreset = sata_fsl_softreset,
	.error_handler = sata_fsl_error_handler,
	.post_internal_cmd = sata_fsl_post_internal_cmd,

	.port_start = sata_fsl_port_start,
	.port_stop = sata_fsl_port_stop,

	.pmp_attach = sata_fsl_pmp_attach,
	.pmp_detach = sata_fsl_pmp_detach,
};

static const struct ata_port_info sata_fsl_port_info[] = {
	{
	 .flags = SATA_FSL_HOST_FLAGS,
	 .pio_mask = ATA_PIO4,
	 .udma_mask = ATA_UDMA6,
	 .port_ops = &sata_fsl_ops,
	 },
};

static int sata_fsl_probe(struct platform_device *ofdev)
{
	int retval = -ENXIO;
	void __iomem *hcr_base = NULL;
	void __iomem *ssr_base = NULL;
	void __iomem *csr_base = NULL;
	struct sata_fsl_host_priv *host_priv = NULL;
	int irq;
	struct ata_host *host;
	u32 temp;

	struct ata_port_info pi = sata_fsl_port_info[0];
	const struct ata_port_info *ppi[] = { &pi, NULL };

	dev_printk(KERN_INFO, &ofdev->dev,
		   "Sata FSL Platform/CSB Driver init\n");

	hcr_base = of_iomap(ofdev->dev.of_node, 0);
	if (!hcr_base)
		goto error_exit_with_cleanup;

	ssr_base = hcr_base + 0x100;
	csr_base = hcr_base + 0x140;

	if (!of_device_is_compatible(ofdev->dev.of_node, "fsl,mpc8315-sata")) {
		temp = ioread32(csr_base + TRANSCFG);
		temp = temp & 0xffffffe0;
		iowrite32(temp | TRANSCFG_RX_WATER_MARK, csr_base + TRANSCFG);
	}

	DPRINTK("@reset i/o = 0x%x\n", ioread32(csr_base + TRANSCFG));
	DPRINTK("sizeof(cmd_desc) = %d\n", sizeof(struct command_desc));
	DPRINTK("sizeof(#define cmd_desc) = %d\n", SATA_FSL_CMD_DESC_SIZE);

	host_priv = kzalloc(sizeof(struct sata_fsl_host_priv), GFP_KERNEL);
	if (!host_priv)
		goto error_exit_with_cleanup;

	host_priv->hcr_base = hcr_base;
	host_priv->ssr_base = ssr_base;
	host_priv->csr_base = csr_base;

	irq = irq_of_parse_and_map(ofdev->dev.of_node, 0);
	if (irq < 0) {
		dev_printk(KERN_ERR, &ofdev->dev, "invalid irq from platform\n");
		goto error_exit_with_cleanup;
	}
	host_priv->irq = irq;

	if (of_device_is_compatible(ofdev->dev.of_node, "fsl,pq-sata-v2"))
		host_priv->data_snoop = DATA_SNOOP_ENABLE_V2;
	else
		host_priv->data_snoop = DATA_SNOOP_ENABLE_V1;

	/* allocate host structure */
	host = ata_host_alloc_pinfo(&ofdev->dev, ppi, SATA_FSL_MAX_PORTS);

	/* host->iomap is not used currently */
	host->private_data = host_priv;

	/* initialize host controller */
	sata_fsl_init_controller(host);

	/*
	 * Now, register with libATA core, this will also initiate the
	 * device discovery process, invoking our port_start() handler &
	 * error_handler() to execute a dummy Softreset EH session
	 */
	ata_host_activate(host, irq, sata_fsl_interrupt, SATA_FSL_IRQ_FLAG,
			  &sata_fsl_sht);

	dev_set_drvdata(&ofdev->dev, host);

	return 0;

error_exit_with_cleanup:

	if (hcr_base)
		iounmap(hcr_base);
	if (host_priv)
		kfree(host_priv);

	return retval;
}

static int sata_fsl_remove(struct platform_device *ofdev)
{
	struct ata_host *host = dev_get_drvdata(&ofdev->dev);
	struct sata_fsl_host_priv *host_priv = host->private_data;

	ata_host_detach(host);

	dev_set_drvdata(&ofdev->dev, NULL);

	irq_dispose_mapping(host_priv->irq);
	iounmap(host_priv->hcr_base);
	kfree(host_priv);

	return 0;
}

#ifdef CONFIG_PM
static int sata_fsl_suspend(struct platform_device *op, pm_message_t state)
{
	struct ata_host *host = dev_get_drvdata(&op->dev);
	return ata_host_suspend(host, state);
}

static int sata_fsl_resume(struct platform_device *op)
{
	struct ata_host *host = dev_get_drvdata(&op->dev);
	struct sata_fsl_host_priv *host_priv = host->private_data;
	int ret;
	void __iomem *hcr_base = host_priv->hcr_base;
	struct ata_port *ap = host->ports[0];
	struct sata_fsl_port_priv *pp = ap->private_data;

	ret = sata_fsl_init_controller(host);
	if (ret) {
		dev_printk(KERN_ERR, &op->dev,
			"Error initialize hardware\n");
		return ret;
	}

	/* Recovery the CHBA register in host controller cmd register set */
	iowrite32(pp->cmdslot_paddr & 0xffffffff, hcr_base + CHBA);

	ata_host_resume(host);
	return 0;
}
#endif

static struct of_device_id fsl_sata_match[] = {
	{
		.compatible = "fsl,pq-sata",
	},
	{
		.compatible = "fsl,pq-sata-v2",
	},
	{},
};

MODULE_DEVICE_TABLE(of, fsl_sata_match);

static struct platform_driver fsl_sata_driver = {
	.driver = {
		.name = "fsl-sata",
		.owner = THIS_MODULE,
		.of_match_table = fsl_sata_match,
	},
	.probe		= sata_fsl_probe,
	.remove		= sata_fsl_remove,
#ifdef CONFIG_PM
	.suspend	= sata_fsl_suspend,
	.resume		= sata_fsl_resume,
#endif
};

static int __init sata_fsl_init(void)
{
	platform_driver_register(&fsl_sata_driver);
	return 0;
}

static void __exit sata_fsl_exit(void)
{
	platform_driver_unregister(&fsl_sata_driver);
}

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ashish Kalra, Freescale Semiconductor");
MODULE_DESCRIPTION("Freescale 3.0Gbps SATA controller low level driver");
MODULE_VERSION("1.10");

module_init(sata_fsl_init);
module_exit(sata_fsl_exit);