score: Add support for Sunplus S+core architecture

This is the complete set of new arch Score's files for linux. Score instruction set support 16bits, 32bits and 64bits instruction, Score SOC had been used in game machine and LCD TV. Signed-off-by: Chen Liqin <liqin.chen@sunplusct.com> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
author: Chen Liqin <liqin.chen@sunplusct.com> 2009-06-12 10:01:00 -0400
committer: Arnd Bergmann <arnd@arndb.de> 2009-06-19 05:38:47 -0400
commit: 6bc9a3966f0395419b09b2ec90f89f7f00341b37 (patch)
tree: 9c0d9d5376020266f5602501c8376d4a4f13142d /arch/score/lib/checksum.S
parent: 0732f87761dbe417cb6e084b712d07e879e876ef (diff)
1 files changed, 255 insertions, 0 deletions
diff --git a/arch/score/lib/checksum.S b/arch/score/lib/checksum.S
new file mode 100644
index 000000000000..706157edc7d5
--- /dev/null
+++ b/arch/score/lib/checksum.S
@@ -0,0 +1,255 @@
+/*
+ * arch/score/lib/csum_partial.S
+ *
+ * Score Processor version.
+ *
+ * Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
+ *  Lennox Wu <lennox.wu@sunplusct.com>
+ *  Chen Liqin <liqin.chen@sunplusct.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see the file COPYING, or write
+ * to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include <linux/linkage.h>
+#define ADDC(sum,reg)                   \
+        add     sum, sum, reg;          \
+        cmp.c   reg, sum;               \
+        bleu    9f;                     \
+        addi    sum, 0x1;               \
+9:
+#define CSUM_BIGCHUNK(src, offset, sum)         \
+        lw      r8, [src, offset + 0x00];       \
+        lw      r9, [src, offset + 0x04];       \
+        lw      r10, [src, offset + 0x08];      \
+        lw      r11, [src, offset + 0x0c];      \
+        ADDC(sum, r8);                          \
+        ADDC(sum, r9);                          \
+        ADDC(sum, r10);                         \
+        ADDC(sum, r11);                         \
+        lw      r8, [src, offset + 0x10];       \
+        lw      r9, [src, offset + 0x14];       \
+        lw      r10, [src, offset + 0x18];      \
+        lw      r11, [src, offset + 0x1c];      \
+        ADDC(sum, r8);                          \
+        ADDC(sum, r9);                          \
+        ADDC(sum, r10);                         \
+        ADDC(sum, r11);                         \
+#define src r4
+#define dest r5
+#define sum r27
+        .text
+/* unknown src alignment and < 8 bytes to go */
+small_csumcpy:
+        mv      r5, r10
+        ldi     r9, 0x0
+        cmpi.c  r25, 0x1
+        beq pass_small_set_t7   /*already set, jump to pass_small_set_t7*/
+        andri.c r25,r4 , 0x1    /*Is src 2 bytes aligned?*/
+pass_small_set_t7:
+        beq     aligned
+        cmpi.c  r5, 0x0
+        beq     fold
+        lbu     r9, [src]
+        slli    r9,r9, 0x8      /*Little endian*/
+        ADDC(sum, r9)
+        addi    src, 0x1
+        subi.c  r5, 0x1
+        /*len still a full word */
+aligned:
+        andri.c r8, r5, 0x4     /*Len >= 4?*/
+        beq     len_less_4bytes
+        /* Still a full word (4byte) to go,and the src is word aligned.*/
+        andri.c r8, src, 0x3    /*src is 4bytes aligned, so use LW!!*/
+        beq     four_byte_aligned
+        lhu     r9, [src]
+        addi    src, 2
+        ADDC(sum, r9)
+        lhu     r9, [src]
+        addi    src, 2
+        ADDC(sum, r9)
+        b len_less_4bytes
+four_byte_aligned:              /* Len >=4 and four byte aligned */
+        lw      r9, [src]
+        addi    src, 4
+        ADDC(sum, r9)
+len_less_4bytes:                /* 2 byte aligned aligned and length<4B */
+        andri.c r8, r5, 0x2
+        beq     len_less_2bytes
+        lhu     r9, [src]
+        addi    src, 0x2        /* src+=2 */
+        ADDC(sum, r9)
+len_less_2bytes:                /* len = 1 */
+        andri.c r8, r5, 0x1
+        beq     fold            /* less than 2 and not equal 1--> len=0 -> fold */
+        lbu     r9, [src]
+fold_ADDC:
+        ADDC(sum, r9)
+fold:
+        /* fold checksum */
+        slli    r26, sum, 16
+        add     sum, sum, r26
+        cmp.c   r26, sum
+        srli    sum, sum, 16
+        bleu    1f              /* if r26<=sum */
+        addi    sum, 0x1        /* r26>sum */
+1:
+        /* odd buffer alignment? r25 was set in csum_partial */
+        cmpi.c  r25, 0x0
+        beq     1f
+        slli    r26, sum, 8
+        srli    sum, sum, 8
+        or      sum, sum, r26
+        andi    sum, 0xffff
+1:
+        .set    optimize
+        /* Add the passed partial csum. */
+        ADDC(sum, r6)
+        mv      r4, sum
+        br      r3
+        .set    volatile
+        .align  5
+ENTRY(csum_partial)
+        ldi sum, 0
+        ldi r25, 0
+        mv r10, r5
+        cmpi.c  r5, 0x8
+        blt     small_csumcpy           /* < 8(singed) bytes to copy */
+        cmpi.c  r5, 0x0
+        beq     out
+        andri.c r25, src, 0x1           /* odd buffer? */
+        beq     word_align
+hword_align:                            /* 1 byte */
+        lbu     r8, [src]
+        subi    r5, 0x1
+        slli    r8, r8, 8
+        ADDC(sum, r8)
+        addi    src, 0x1
+word_align:                             /* 2 bytes */
+        andri.c r8, src, 0x2            /* 4bytes(dword)_aligned? */
+        beq     dword_align             /* not, maybe dword_align */
+        lhu     r8, [src]
+        subi    r5, 0x2
+        ADDC(sum, r8)
+        addi    src, 0x2
+dword_align:                            /* 4bytes */
+        mv      r26, r5                 /* maybe useless when len >=56 */
+        ldi     r8, 56
+        cmp.c   r8, r5
+        bgtu    do_end_words            /* if a1(len)<t0(56) ,unsigned */
+        andri.c r26, src, 0x4
+        beq     qword_align
+        lw      r8, [src]
+        subi    r5, 0x4
+        ADDC(sum, r8)
+        addi    src, 0x4
+qword_align:                            /* 8 bytes */
+        andri.c r26, src, 0x8
+        beq     oword_align
+        lw      r8, [src, 0x0]
+        lw      r9, [src, 0x4]
+        subi    r5, 0x8                 /* len-=0x8 */
+        ADDC(sum, r8)
+        ADDC(sum, r9)
+        addi    src, 0x8
+oword_align:                            /* 16bytes */
+        andri.c r26, src, 0x10
+        beq     begin_movement
+        lw      r10, [src, 0x08]
+        lw      r11, [src, 0x0c]
+        lw      r8, [src, 0x00]
+        lw      r9, [src, 0x04]
+        ADDC(sum, r10)
+        ADDC(sum, r11)
+        ADDC(sum, r8)
+        ADDC(sum, r9)
+        subi    r5, 0x10
+        addi    src, 0x10
+begin_movement:
+        srli.c  r26, r5, 0x7            /* len>=128? */
+        beq     1f                      /* len<128 */
+/* r26 is the result that computed in oword_align */
+move_128bytes:
+        CSUM_BIGCHUNK(src, 0x00, sum)
+        CSUM_BIGCHUNK(src, 0x20, sum)
+        CSUM_BIGCHUNK(src, 0x40, sum)
+        CSUM_BIGCHUNK(src, 0x60, sum)
+        subi.c  r26, 0x01               /* r26 equals len/128 */
+        addi    src, 0x80
+        bne     move_128bytes
+1:      /* len<128,we process 64byte here */
+        andri.c r10, r5, 0x40
+        beq     1f
+move_64bytes:
+        CSUM_BIGCHUNK(src, 0x00, sum)
+        CSUM_BIGCHUNK(src, 0x20, sum)
+        addi    src, 0x40
+1:                                      /* len<64 */
+        andri   r26, r5, 0x1c           /* 0x1c=28 */
+        andri.c r10, r5, 0x20
+        beq     do_end_words            /* decided by andri */
+move_32bytes:
+        CSUM_BIGCHUNK(src, 0x00, sum)
+        andri   r26, r5, 0x1c
+        addri   src, src, 0x20
+do_end_words:                           /* len<32 */
+        /* r26 was set already in dword_align */
+        cmpi.c  r26, 0x0
+        beq     maybe_end_cruft         /* len<28 or len<56 */
+        srli    r26, r26, 0x2
+end_words:
+        lw      r8, [src]
+        subi.c  r26, 0x1                /* unit is 4 byte */
+        ADDC(sum, r8)
+        addi    src, 0x4
+        cmpi.c  r26, 0x0
+        bne     end_words               /* r26!=0 */
+maybe_end_cruft:                        /* len<4 */
+        andri   r10, r5, 0x3
+small_memcpy:
+        mv      r5, r10
+        j       small_csumcpy
+out:
+        mv      r4, sum
+        br      r3
+END(csum_partial)
author	Chen Liqin <liqin.chen@sunplusct.com>	2009-06-12 10:01:00 -0400
committer	Arnd Bergmann <arnd@arndb.de>	2009-06-19 05:38:47 -0400
commit	6bc9a3966f0395419b09b2ec90f89f7f00341b37 (patch)
tree	9c0d9d5376020266f5602501c8376d4a4f13142d /arch/score/lib/checksum.S
parent	0732f87761dbe417cb6e084b712d07e879e876ef (diff)

diff --git a/arch/score/lib/checksum.S b/arch/score/lib/checksum.S new file mode 100644 index 000000000000..706157edc7d5 --- /dev/null +++ b/arch/score/lib/checksum.S
@@ -0,0 +1,255 @@
	1	/*
	2	* arch/score/lib/csum_partial.S
	3	*
	4	* Score Processor version.
	5	*
	6	* Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
	7	* Lennox Wu <lennox.wu@sunplusct.com>
	8	* Chen Liqin <liqin.chen@sunplusct.com>
	9	*
	10	* This program is free software; you can redistribute it and/or modify
	11	* it under the terms of the GNU General Public License as published by
	12	* the Free Software Foundation; either version 2 of the License, or
	13	* (at your option) any later version.
	14	*
	15	* This program is distributed in the hope that it will be useful,
	16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	* GNU General Public License for more details.
	19	*
	20	* You should have received a copy of the GNU General Public License
	21	* along with this program; if not, see the file COPYING, or write
	22	* to the Free Software Foundation, Inc.,
	23	* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
	24	*/
	25	#include <linux/linkage.h>
	26
	27	#define ADDC(sum,reg) \
	28	add sum, sum, reg; \
	29	cmp.c reg, sum; \
	30	bleu 9f; \
	31	addi sum, 0x1; \
	32	9:
	33
	34	#define CSUM_BIGCHUNK(src, offset, sum) \
	35	lw r8, [src, offset + 0x00]; \
	36	lw r9, [src, offset + 0x04]; \
	37	lw r10, [src, offset + 0x08]; \
	38	lw r11, [src, offset + 0x0c]; \
	39	ADDC(sum, r8); \
	40	ADDC(sum, r9); \
	41	ADDC(sum, r10); \
	42	ADDC(sum, r11); \
	43	lw r8, [src, offset + 0x10]; \
	44	lw r9, [src, offset + 0x14]; \
	45	lw r10, [src, offset + 0x18]; \
	46	lw r11, [src, offset + 0x1c]; \
	47	ADDC(sum, r8); \
	48	ADDC(sum, r9); \
	49	ADDC(sum, r10); \
	50	ADDC(sum, r11); \
	51
	52	#define src r4
	53	#define dest r5
	54	#define sum r27
	55
	56	.text
	57	/* unknown src alignment and < 8 bytes to go */
	58	small_csumcpy:
	59	mv r5, r10
	60	ldi r9, 0x0
	61	cmpi.c r25, 0x1
	62	beq pass_small_set_t7 /already set, jump to pass_small_set_t7/
	63	andri.c r25,r4 , 0x1 /Is src 2 bytes aligned?/
	64
	65	pass_small_set_t7:
	66	beq aligned
	67	cmpi.c r5, 0x0
	68	beq fold
	69	lbu r9, [src]
	70	slli r9,r9, 0x8 /Little endian/
	71	ADDC(sum, r9)
	72	addi src, 0x1
	73	subi.c r5, 0x1
	74
	75	/len still a full word /
	76	aligned:
	77	andri.c r8, r5, 0x4 /Len >= 4?/
	78	beq len_less_4bytes
	79
	80	/* Still a full word (4byte) to go,and the src is word aligned.*/
	81	andri.c r8, src, 0x3 /src is 4bytes aligned, so use LW!!/
	82	beq four_byte_aligned
	83	lhu r9, [src]
	84	addi src, 2
	85	ADDC(sum, r9)
	86	lhu r9, [src]
	87	addi src, 2
	88	ADDC(sum, r9)
	89	b len_less_4bytes
	90
	91	four_byte_aligned: /* Len >=4 and four byte aligned */
	92	lw r9, [src]
	93	addi src, 4
	94	ADDC(sum, r9)
	95
	96	len_less_4bytes: /* 2 byte aligned aligned and length<4B */
	97	andri.c r8, r5, 0x2
	98	beq len_less_2bytes
	99	lhu r9, [src]
	100	addi src, 0x2 /* src+=2 */
	101	ADDC(sum, r9)
	102
	103	len_less_2bytes: /* len = 1 */
	104	andri.c r8, r5, 0x1
	105	beq fold /* less than 2 and not equal 1--> len=0 -> fold */
	106	lbu r9, [src]
	107
	108	fold_ADDC:
	109	ADDC(sum, r9)
	110	fold:
	111	/* fold checksum */
	112	slli r26, sum, 16
	113	add sum, sum, r26
	114	cmp.c r26, sum
	115	srli sum, sum, 16
	116	bleu 1f /* if r26<=sum */
	117	addi sum, 0x1 /* r26>sum */
	118	1:
	119	/* odd buffer alignment? r25 was set in csum_partial */
	120	cmpi.c r25, 0x0
	121	beq 1f
	122	slli r26, sum, 8
	123	srli sum, sum, 8
	124	or sum, sum, r26
	125	andi sum, 0xffff
	126	1:
	127	.set optimize
	128	/* Add the passed partial csum. */
	129	ADDC(sum, r6)
	130	mv r4, sum
	131	br r3
	132	.set volatile
	133
	134	.align 5
	135	ENTRY(csum_partial)
	136	ldi sum, 0
	137	ldi r25, 0
	138	mv r10, r5
	139	cmpi.c r5, 0x8
	140	blt small_csumcpy /* < 8(singed) bytes to copy */
	141	cmpi.c r5, 0x0
	142	beq out
	143	andri.c r25, src, 0x1 /* odd buffer? */
	144
	145	beq word_align
	146	hword_align: /* 1 byte */
	147	lbu r8, [src]
	148	subi r5, 0x1
	149	slli r8, r8, 8
	150	ADDC(sum, r8)
	151	addi src, 0x1
	152
	153	word_align: /* 2 bytes */
	154	andri.c r8, src, 0x2 /* 4bytes(dword)_aligned? */
	155	beq dword_align /* not, maybe dword_align */
	156	lhu r8, [src]
	157	subi r5, 0x2
	158	ADDC(sum, r8)
	159	addi src, 0x2
	160
	161	dword_align: /* 4bytes */
	162	mv r26, r5 /* maybe useless when len >=56 */
	163	ldi r8, 56
	164	cmp.c r8, r5
	165	bgtu do_end_words /* if a1(len)<t0(56) ,unsigned */
	166	andri.c r26, src, 0x4
	167	beq qword_align
	168	lw r8, [src]
	169	subi r5, 0x4
	170	ADDC(sum, r8)
	171	addi src, 0x4
	172
	173	qword_align: /* 8 bytes */
	174	andri.c r26, src, 0x8
	175	beq oword_align
	176	lw r8, [src, 0x0]
	177	lw r9, [src, 0x4]
	178	subi r5, 0x8 /* len-=0x8 */
	179	ADDC(sum, r8)
	180	ADDC(sum, r9)
	181	addi src, 0x8
	182
	183	oword_align: /* 16bytes */
	184	andri.c r26, src, 0x10
	185	beq begin_movement
	186	lw r10, [src, 0x08]
	187	lw r11, [src, 0x0c]
	188	lw r8, [src, 0x00]
	189	lw r9, [src, 0x04]
	190	ADDC(sum, r10)
	191	ADDC(sum, r11)
	192	ADDC(sum, r8)
	193	ADDC(sum, r9)
	194	subi r5, 0x10
	195	addi src, 0x10
	196
	197	begin_movement:
	198	srli.c r26, r5, 0x7 /* len>=128? */
	199	beq 1f /* len<128 */
	200
	201	/* r26 is the result that computed in oword_align */
	202	move_128bytes:
	203	CSUM_BIGCHUNK(src, 0x00, sum)
	204	CSUM_BIGCHUNK(src, 0x20, sum)
	205	CSUM_BIGCHUNK(src, 0x40, sum)
	206	CSUM_BIGCHUNK(src, 0x60, sum)
	207	subi.c r26, 0x01 /* r26 equals len/128 */
	208	addi src, 0x80
	209	bne move_128bytes
	210
	211	1: /* len<128,we process 64byte here */
	212	andri.c r10, r5, 0x40
	213	beq 1f
	214
	215	move_64bytes:
	216	CSUM_BIGCHUNK(src, 0x00, sum)
	217	CSUM_BIGCHUNK(src, 0x20, sum)
	218	addi src, 0x40
	219
	220	1: /* len<64 */
	221	andri r26, r5, 0x1c /* 0x1c=28 */
	222	andri.c r10, r5, 0x20
	223	beq do_end_words /* decided by andri */
	224
	225	move_32bytes:
	226	CSUM_BIGCHUNK(src, 0x00, sum)
	227	andri r26, r5, 0x1c
	228	addri src, src, 0x20
	229
	230	do_end_words: /* len<32 */
	231	/* r26 was set already in dword_align */
	232	cmpi.c r26, 0x0
	233	beq maybe_end_cruft /* len<28 or len<56 */
	234	srli r26, r26, 0x2
	235
	236	end_words:
	237	lw r8, [src]
	238	subi.c r26, 0x1 /* unit is 4 byte */
	239	ADDC(sum, r8)
	240	addi src, 0x4
	241	cmpi.c r26, 0x0
	242	bne end_words /* r26!=0 */
	243
	244	maybe_end_cruft: /* len<4 */
	245	andri r10, r5, 0x3
	246
	247	small_memcpy:
	248	mv r5, r10
	249	j small_csumcpy
	250
	251	out:
	252	mv r4, sum
	253	br r3
	254
	255	END(csum_partial)