Linux-2.6.12-rc2v2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/m32r/lib/checksum.S
1 files changed, 322 insertions, 0 deletions
diff --git a/arch/m32r/lib/checksum.S b/arch/m32r/lib/checksum.S
new file mode 100644
index 000000000000..f6fc1bdb87e4
--- /dev/null
+++ b/arch/m32r/lib/checksum.S
@@ -0,0 +1,322 @@
+/*
+ * INET         An implementation of the TCP/IP protocol suite for the LINUX
+ *              operating system.  INET is implemented using the  BSD Socket
+ *              interface as the means of communication with the user level.
+ *
+ *              IP/TCP/UDP checksumming routines
+ *
+ * Authors:     Jorge Cwik, <jorge@laser.satlink.net>
+ *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *              Tom May, <ftom@netcom.com>
+ *              Pentium Pro/II routines:
+ *              Alexander Kjeldaas <astor@guardian.no>
+ *              Finn Arne Gangstad <finnag@guardian.no>
+ *              Lots of code moved from tcp.c and ip.c; see those files
+ *              for more names.
+ *
+ * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
+ *                           handling.
+ *              Andi Kleen,  add zeroing on error
+ *                   converted to pure assembler
+ *              Hirokazu Takata,Hiroyuki Kondo rewrite for the m32r architecture.
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ */
+/* $Id$ */
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/errno.h>
+/*
+ * computes a partial checksum, e.g. for TCP/UDP fragments
+ */
+/*
+unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
+ */
+#ifdef CONFIG_ISA_DUAL_ISSUE
+        /*
+         * Experiments with Ethernet and SLIP connections show that buff
+         * is aligned on either a 2-byte or 4-byte boundary.  We get at
+         * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
+         * Fortunately, it is easy to convert 2-byte alignment to 4-byte
+         * alignment for the unrolled loop.
+         */
+        .text
+ENTRY(csum_partial)
+        ; Function args
+        ;  r0: unsigned char *buff
+        ;  r1: int len
+        ;  r2: unsigned int sum
+        push    r2                  ||  ldi     r2, #0
+        and3    r7, r0, #1              ; Check alignment.
+        beqz    r7, 1f                  ; Jump if alignment is ok.
+        ; 1-byte mis aligned
+        ldub    r4, @r0             ||  addi    r0, #1
+        ; clear c-bit || Alignment uses up bytes.
+        cmp     r0, r0              ||  addi    r1, #-1
+        ldi     r3, #0              ||  addx    r2, r4
+        addx    r2, r3
+        .fillinsn
+1:
+        and3    r4, r0, #2              ; Check alignment.
+        beqz    r4, 2f                  ; Jump if alignment is ok.
+        ; clear c-bit || Alignment uses up two bytes.
+        cmp     r0, r0              ||  addi    r1, #-2
+        bgtz    r1, 1f                  ; Jump if we had at least two bytes.
+        bra     4f                  ||  addi    r1, #2
+        .fillinsn                       ; len(r1) was < 2.  Deal with it.
+1:
+        ; 2-byte aligned
+        lduh    r4, @r0             ||  ldi     r3, #0
+        addx    r2, r4              ||  addi    r0, #2
+        addx    r2, r3
+        .fillinsn
+2:
+        ; 4-byte aligned
+        cmp     r0, r0                  ; clear c-bit
+        srl3    r6, r1, #5
+        beqz    r6, 2f
+        .fillinsn
+1:      ld      r3, @r0+
+        ld      r4, @r0+                                        ; +4
+        ld      r5, @r0+                                        ; +8
+        ld      r3, @r0+            ||  addx    r2, r3          ; +12
+        ld      r4, @r0+            ||  addx    r2, r4          ; +16
+        ld      r5, @r0+            ||  addx    r2, r5          ; +20
+        ld      r3, @r0+            ||  addx    r2, r3          ; +24
+        ld      r4, @r0+            ||  addx    r2, r4          ; +28
+        addx    r2, r5              ||  addi    r6, #-1
+        addx    r2, r3
+        addx    r2, r4
+        bnez    r6, 1b
+        addx    r2, r6                  ; r6=0
+        cmp     r0, r0                  ; This clears c-bit
+        .fillinsn
+2:      and3    r6, r1, #0x1c           ; withdraw len
+        beqz    r6, 4f
+        srli    r6, #2
+        .fillinsn
+3:      ld      r4, @r0+            ||  addi    r6, #-1
+        addx    r2, r4
+        bnez    r6, 3b
+        addx    r2, r6                  ; r6=0
+        cmp     r0, r0                  ; This clears c-bit
+        .fillinsn
+4:      and3    r1, r1, #3
+        beqz    r1, 7f                  ; if len == 0 goto end
+        and3    r6, r1, #2
+        beqz    r6, 5f                  ; if len < 2  goto 5f(1byte)
+        lduh    r4, @r0             ||  addi    r0, #2
+        addi    r1, #-2             ||  slli    r4, #16
+        addx    r2, r4
+        beqz    r1, 6f
+        .fillinsn
+5:      ldub    r4, @r0             ||  ldi     r1, #0
+#ifndef __LITTLE_ENDIAN__
+        slli    r4, #8
+#endif
+        addx    r2, r4
+        .fillinsn
+6:      addx    r2, r1
+        .fillinsn
+7:
+        and3    r0, r2, #0xffff
+        srli    r2, #16
+        add     r0, r2
+        srl3    r2, r0, #16
+        beqz    r2, 1f
+        addi    r0, #1
+        and3    r0, r0, #0xffff
+        .fillinsn
+1:
+        beqz    r7, 1f                  ; swap the upper byte for the lower
+        and3    r2, r0, #0xff
+        srl3    r0, r0, #8
+        slli    r2, #8
+        or      r0, r2
+        .fillinsn
+1:
+        pop     r2                  ||  cmp     r0, r0
+        addx    r0, r2              ||  ldi     r2, #0
+        addx    r0, r2
+        jmp     r14
+#else /* not CONFIG_ISA_DUAL_ISSUE */
+        /*
+         * Experiments with Ethernet and SLIP connections show that buff
+         * is aligned on either a 2-byte or 4-byte boundary.  We get at
+         * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
+         * Fortunately, it is easy to convert 2-byte alignment to 4-byte
+         * alignment for the unrolled loop.
+         */
+        .text
+ENTRY(csum_partial)
+        ; Function args
+        ;  r0: unsigned char *buff
+        ;  r1: int len
+        ;  r2: unsigned int sum
+        push    r2
+        ldi     r2, #0
+        and3    r7, r0, #1              ; Check alignment.
+        beqz    r7, 1f                  ; Jump if alignment is ok.
+        ; 1-byte mis aligned
+        ldub    r4, @r0
+        addi    r0, #1
+        addi    r1, #-1                 ; Alignment uses up bytes.
+        cmp     r0, r0                  ; clear c-bit
+        ldi     r3, #0
+        addx    r2, r4
+        addx    r2, r3
+        .fillinsn
+1:
+        and3    r4, r0, #2              ; Check alignment.
+        beqz    r4, 2f                  ; Jump if alignment is ok.
+        addi    r1, #-2                 ; Alignment uses up two bytes.
+        cmp             r0, r0                  ; clear c-bit
+        bgtz    r1, 1f                  ; Jump if we had at least two bytes.
+        addi    r1, #2                  ; len(r1) was < 2.  Deal with it.
+        bra     4f
+        .fillinsn
+1:
+        ; 2-byte aligned
+        lduh    r4, @r0
+        addi    r0, #2
+        ldi             r3, #0
+        addx    r2, r4
+        addx    r2, r3
+        .fillinsn
+2:
+        ; 4-byte aligned
+        cmp     r0, r0                  ; clear c-bit
+        srl3    r6, r1, #5
+        beqz    r6, 2f
+        .fillinsn
+1:      ld      r3, @r0+
+        ld      r4, @r0+                ; +4
+        ld      r5, @r0+                ; +8
+        addx    r2, r3
+        addx    r2, r4
+        addx    r2, r5
+        ld      r3, @r0+                ; +12
+        ld      r4, @r0+                ; +16
+        ld      r5, @r0+                ; +20
+        addx    r2, r3
+        addx    r2, r4
+        addx    r2, r5
+        ld      r3, @r0+                ; +24
+        ld      r4, @r0+                ; +28
+        addi    r6, #-1
+        addx    r2, r3
+        addx    r2, r4
+        bnez    r6, 1b
+        addx    r2, r6                  ; r6=0
+        cmp     r0, r0                  ; This clears c-bit
+        .fillinsn
+2:      and3    r6, r1, #0x1c           ; withdraw len
+        beqz    r6, 4f
+        srli    r6, #2
+        .fillinsn
+3:      ld      r4, @r0+
+        addi    r6, #-1
+        addx    r2, r4
+        bnez    r6, 3b
+        addx    r2, r6                  ; r6=0
+        cmp     r0, r0                  ; This clears c-bit
+        .fillinsn
+4:      and3    r1, r1, #3
+        beqz    r1, 7f                  ; if len == 0 goto end
+        and3    r6, r1, #2
+        beqz    r6, 5f                  ; if len < 2  goto 5f(1byte)
+        lduh    r4, @r0
+        addi    r0, #2
+        addi    r1, #-2
+        slli    r4, #16
+        addx    r2, r4
+        beqz    r1, 6f
+        .fillinsn
+5:      ldub    r4, @r0
+#ifndef __LITTLE_ENDIAN__
+        slli    r4, #8
+#endif
+        addx    r2, r4
+        .fillinsn
+6:      ldi     r5, #0
+        addx    r2, r5
+        .fillinsn
+7:
+        and3    r0, r2, #0xffff
+        srli    r2, #16
+        add     r0, r2
+        srl3    r2, r0, #16
+        beqz    r2, 1f
+        addi    r0, #1
+        and3    r0, r0, #0xffff
+        .fillinsn
+1:
+        beqz    r7, 1f
+        mv      r2, r0
+        srl3    r0, r2, #8
+        and3    r2, r2, #0xff
+        slli    r2, #8
+        or      r0, r2
+        .fillinsn
+1:
+        pop     r2
+        cmp     r0, r0
+        addx    r0, r2
+        ldi     r2, #0
+        addx    r0, r2
+        jmp     r14
+#endif /* not CONFIG_ISA_DUAL_ISSUE */
+/*
+unsigned int csum_partial_copy_generic (const char *src, char *dst,
+                                  int len, int sum, int *src_err_ptr, int *dst_err_ptr)
+ */
+/*
+ * Copy from ds while checksumming, otherwise like csum_partial
+ *
+ * The macros SRC and DST specify the type of access for the instruction.
+ * thus we can call a custom exception handler for all access types.
+ *
+ * FIXME: could someone double-check whether I haven't mixed up some SRC and
+ *        DST definitions? It's damn hard to trigger all cases.  I hope I got
+ *        them all but there's no guarantee.
+ */
+ENTRY(csum_partial_copy_generic)
+        nop
+        nop
+        nop
+        nop
+        jmp r14
+        nop
+        nop
+        nop
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/m32r/lib/checksum.S

diff --git a/arch/m32r/lib/checksum.S b/arch/m32r/lib/checksum.S new file mode 100644 index 000000000000..f6fc1bdb87e4 --- /dev/null +++ b/arch/m32r/lib/checksum.S
@@ -0,0 +1,322 @@
	1	/*
	2	* INET An implementation of the TCP/IP protocol suite for the LINUX
	3	* operating system. INET is implemented using the BSD Socket
	4	* interface as the means of communication with the user level.
	5	*
	6	* IP/TCP/UDP checksumming routines
	7	*
	8	* Authors: Jorge Cwik, <jorge@laser.satlink.net>
	9	* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
	10	* Tom May, <ftom@netcom.com>
	11	* Pentium Pro/II routines:
	12	* Alexander Kjeldaas <astor@guardian.no>
	13	* Finn Arne Gangstad <finnag@guardian.no>
	14	* Lots of code moved from tcp.c and ip.c; see those files
	15	* for more names.
	16	*
	17	* Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
	18	* handling.
	19	* Andi Kleen, add zeroing on error
	20	* converted to pure assembler
	21	* Hirokazu Takata,Hiroyuki Kondo rewrite for the m32r architecture.
	22	*
	23	* This program is free software; you can redistribute it and/or
	24	* modify it under the terms of the GNU General Public License
	25	* as published by the Free Software Foundation; either version
	26	* 2 of the License, or (at your option) any later version.
	27	*/
	28	/* $Id$ */
	29
	30
	31	#include <linux/config.h>
	32	#include <linux/linkage.h>
	33	#include <asm/assembler.h>
	34	#include <asm/errno.h>
	35
	36	/*
	37	* computes a partial checksum, e.g. for TCP/UDP fragments
	38	*/
	39
	40	/*
	41	unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
	42	*/
	43
	44
	45	#ifdef CONFIG_ISA_DUAL_ISSUE
	46
	47	/*
	48	* Experiments with Ethernet and SLIP connections show that buff
	49	* is aligned on either a 2-byte or 4-byte boundary. We get at
	50	* least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
	51	* Fortunately, it is easy to convert 2-byte alignment to 4-byte
	52	* alignment for the unrolled loop.
	53	*/
	54
	55	.text
	56	ENTRY(csum_partial)
	57	; Function args
	58	; r0: unsigned char *buff
	59	; r1: int len
	60	; r2: unsigned int sum
	61
	62	push r2 \|\| ldi r2, #0
	63	and3 r7, r0, #1 ; Check alignment.
	64	beqz r7, 1f ; Jump if alignment is ok.
	65	; 1-byte mis aligned
	66	ldub r4, @r0 \|\| addi r0, #1
	67	; clear c-bit \|\| Alignment uses up bytes.
	68	cmp r0, r0 \|\| addi r1, #-1
	69	ldi r3, #0 \|\| addx r2, r4
	70	addx r2, r3
	71	.fillinsn
	72	1:
	73	and3 r4, r0, #2 ; Check alignment.
	74	beqz r4, 2f ; Jump if alignment is ok.
	75	; clear c-bit \|\| Alignment uses up two bytes.
	76	cmp r0, r0 \|\| addi r1, #-2
	77	bgtz r1, 1f ; Jump if we had at least two bytes.
	78	bra 4f \|\| addi r1, #2
	79	.fillinsn ; len(r1) was < 2. Deal with it.
	80	1:
	81	; 2-byte aligned
	82	lduh r4, @r0 \|\| ldi r3, #0
	83	addx r2, r4 \|\| addi r0, #2
	84	addx r2, r3
	85	.fillinsn
	86	2:
	87	; 4-byte aligned
	88	cmp r0, r0 ; clear c-bit
	89	srl3 r6, r1, #5
	90	beqz r6, 2f
	91	.fillinsn
	92
	93	1: ld r3, @r0+
	94	ld r4, @r0+ ; +4
	95	ld r5, @r0+ ; +8
	96	ld r3, @r0+ \|\| addx r2, r3 ; +12
	97	ld r4, @r0+ \|\| addx r2, r4 ; +16
	98	ld r5, @r0+ \|\| addx r2, r5 ; +20
	99	ld r3, @r0+ \|\| addx r2, r3 ; +24
	100	ld r4, @r0+ \|\| addx r2, r4 ; +28
	101	addx r2, r5 \|\| addi r6, #-1
	102	addx r2, r3
	103	addx r2, r4
	104	bnez r6, 1b
	105
	106	addx r2, r6 ; r6=0
	107	cmp r0, r0 ; This clears c-bit
	108	.fillinsn
	109	2: and3 r6, r1, #0x1c ; withdraw len
	110	beqz r6, 4f
	111	srli r6, #2
	112	.fillinsn
	113
	114	3: ld r4, @r0+ \|\| addi r6, #-1
	115	addx r2, r4
	116	bnez r6, 3b
	117
	118	addx r2, r6 ; r6=0
	119	cmp r0, r0 ; This clears c-bit
	120	.fillinsn
	121	4: and3 r1, r1, #3
	122	beqz r1, 7f ; if len == 0 goto end
	123	and3 r6, r1, #2
	124	beqz r6, 5f ; if len < 2 goto 5f(1byte)
	125	lduh r4, @r0 \|\| addi r0, #2
	126	addi r1, #-2 \|\| slli r4, #16
	127	addx r2, r4
	128	beqz r1, 6f
	129	.fillinsn
	130	5: ldub r4, @r0 \|\| ldi r1, #0
	131	#ifndef __LITTLE_ENDIAN__
	132	slli r4, #8
	133	#endif
	134	addx r2, r4
	135	.fillinsn
	136	6: addx r2, r1
	137	.fillinsn
	138	7:
	139	and3 r0, r2, #0xffff
	140	srli r2, #16
	141	add r0, r2
	142	srl3 r2, r0, #16
	143	beqz r2, 1f
	144	addi r0, #1
	145	and3 r0, r0, #0xffff
	146	.fillinsn
	147	1:
	148	beqz r7, 1f ; swap the upper byte for the lower
	149	and3 r2, r0, #0xff
	150	srl3 r0, r0, #8
	151	slli r2, #8
	152	or r0, r2
	153	.fillinsn
	154	1:
	155	pop r2 \|\| cmp r0, r0
	156	addx r0, r2 \|\| ldi r2, #0
	157	addx r0, r2
	158	jmp r14
	159
	160	#else /* not CONFIG_ISA_DUAL_ISSUE */
	161
	162	/*
	163	* Experiments with Ethernet and SLIP connections show that buff
	164	* is aligned on either a 2-byte or 4-byte boundary. We get at
	165	* least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
	166	* Fortunately, it is easy to convert 2-byte alignment to 4-byte
	167	* alignment for the unrolled loop.
	168	*/
	169
	170	.text
	171	ENTRY(csum_partial)
	172	; Function args
	173	; r0: unsigned char *buff
	174	; r1: int len
	175	; r2: unsigned int sum
	176
	177	push r2
	178	ldi r2, #0
	179	and3 r7, r0, #1 ; Check alignment.
	180	beqz r7, 1f ; Jump if alignment is ok.
	181	; 1-byte mis aligned
	182	ldub r4, @r0
	183	addi r0, #1
	184	addi r1, #-1 ; Alignment uses up bytes.
	185	cmp r0, r0 ; clear c-bit
	186	ldi r3, #0
	187	addx r2, r4
	188	addx r2, r3
	189	.fillinsn
	190	1:
	191	and3 r4, r0, #2 ; Check alignment.
	192	beqz r4, 2f ; Jump if alignment is ok.
	193	addi r1, #-2 ; Alignment uses up two bytes.
	194	cmp r0, r0 ; clear c-bit
	195	bgtz r1, 1f ; Jump if we had at least two bytes.
	196	addi r1, #2 ; len(r1) was < 2. Deal with it.
	197	bra 4f
	198	.fillinsn
	199	1:
	200	; 2-byte aligned
	201	lduh r4, @r0
	202	addi r0, #2
	203	ldi r3, #0
	204	addx r2, r4
	205	addx r2, r3
	206	.fillinsn
	207	2:
	208	; 4-byte aligned
	209	cmp r0, r0 ; clear c-bit
	210	srl3 r6, r1, #5
	211	beqz r6, 2f
	212	.fillinsn
	213
	214	1: ld r3, @r0+
	215	ld r4, @r0+ ; +4
	216	ld r5, @r0+ ; +8
	217	addx r2, r3
	218	addx r2, r4
	219	addx r2, r5
	220	ld r3, @r0+ ; +12
	221	ld r4, @r0+ ; +16
	222	ld r5, @r0+ ; +20
	223	addx r2, r3
	224	addx r2, r4
	225	addx r2, r5
	226	ld r3, @r0+ ; +24
	227	ld r4, @r0+ ; +28
	228	addi r6, #-1
	229	addx r2, r3
	230	addx r2, r4
	231	bnez r6, 1b
	232	addx r2, r6 ; r6=0
	233	cmp r0, r0 ; This clears c-bit
	234	.fillinsn
	235
	236	2: and3 r6, r1, #0x1c ; withdraw len
	237	beqz r6, 4f
	238	srli r6, #2
	239	.fillinsn
	240
	241	3: ld r4, @r0+
	242	addi r6, #-1
	243	addx r2, r4
	244	bnez r6, 3b
	245	addx r2, r6 ; r6=0
	246	cmp r0, r0 ; This clears c-bit
	247	.fillinsn
	248
	249	4: and3 r1, r1, #3
	250	beqz r1, 7f ; if len == 0 goto end
	251	and3 r6, r1, #2
	252	beqz r6, 5f ; if len < 2 goto 5f(1byte)
	253
	254	lduh r4, @r0
	255	addi r0, #2
	256	addi r1, #-2
	257	slli r4, #16
	258	addx r2, r4
	259	beqz r1, 6f
	260	.fillinsn
	261	5: ldub r4, @r0
	262	#ifndef __LITTLE_ENDIAN__
	263	slli r4, #8
	264	#endif
	265	addx r2, r4
	266	.fillinsn
	267	6: ldi r5, #0
	268	addx r2, r5
	269	.fillinsn
	270	7:
	271	and3 r0, r2, #0xffff
	272	srli r2, #16
	273	add r0, r2
	274	srl3 r2, r0, #16
	275	beqz r2, 1f
	276	addi r0, #1
	277	and3 r0, r0, #0xffff
	278	.fillinsn
	279	1:
	280	beqz r7, 1f
	281	mv r2, r0
	282	srl3 r0, r2, #8
	283	and3 r2, r2, #0xff
	284	slli r2, #8
	285	or r0, r2
	286	.fillinsn
	287	1:
	288	pop r2
	289	cmp r0, r0
	290	addx r0, r2
	291	ldi r2, #0
	292	addx r0, r2
	293	jmp r14
	294
	295	#endif /* not CONFIG_ISA_DUAL_ISSUE */
	296
	297	/*
	298	unsigned int csum_partial_copy_generic (const char src, char dst,
	299	int len, int sum, int src_err_ptr, int dst_err_ptr)
	300	*/
	301
	302	/*
	303	* Copy from ds while checksumming, otherwise like csum_partial
	304	*
	305	* The macros SRC and DST specify the type of access for the instruction.
	306	* thus we can call a custom exception handler for all access types.
	307	*
	308	* FIXME: could someone double-check whether I haven't mixed up some SRC and
	309	* DST definitions? It's damn hard to trigger all cases. I hope I got
	310	* them all but there's no guarantee.
	311	*/
	312
	313	ENTRY(csum_partial_copy_generic)
	314	nop
	315	nop
	316	nop
	317	nop
	318	jmp r14
	319	nop
	320	nop
	321	nop
	322