ARC: Checksum/byteorder/swab routines

TBD: do_csum still needs to be written in asm Signed-off-by: Vineet Gupta <vgupta@synopsys.com> Acked-by: Arnd Bergmann <arnd@arndb.de>
author: Vineet Gupta <vgupta@synopsys.com> 2013-01-18 04:42:17 -0500
committer: Vineet Gupta <vgupta@synopsys.com> 2013-02-11 09:30:34 -0500
commit: ca15c8ecd588dda4377d18d6d27bc1e87b4177cb (patch)
tree: 18dd349a93acd9d32bcf18f2c8c8eb93369fc44d
parent: 64e69073c35439fa19c2ad2a4a18834e0314f071 (diff)
3 files changed, 217 insertions, 0 deletions
diff --git a/arch/arc/include/asm/byteorder.h b/arch/arc/include/asm/byteorder.h
new file mode 100644
index 000000000000..9da71d415c38
--- /dev/null
+++ b/arch/arc/include/asm/byteorder.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_ARC_BYTEORDER_H
+#define __ASM_ARC_BYTEORDER_H
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#include <linux/byteorder/big_endian.h>
+#else
+#include <linux/byteorder/little_endian.h>
+#endif
+#endif /* ASM_ARC_BYTEORDER_H */
diff --git a/arch/arc/include/asm/checksum.h b/arch/arc/include/asm/checksum.h
new file mode 100644
index 000000000000..10957298b7a3
--- /dev/null
+++ b/arch/arc/include/asm/checksum.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Joern Rennecke  <joern.rennecke@embecosm.com>: Jan 2012
+ *  -Insn Scheduling improvements to csum core routines.
+ *      = csum_fold( ) largely derived from ARM version.
+ *      = ip_fast_cum( ) to have module scheduling
+ *  -gcc 4.4.x broke networking. Alias analysis needed to be primed.
+ *   worked around by adding memory clobber to ip_fast_csum( )
+ *
+ * vineetg: May 2010
+ *  -Rewrote ip_fast_cscum( ) and csum_fold( ) with fast inline asm
+ */
+#ifndef _ASM_ARC_CHECKSUM_H
+#define _ASM_ARC_CHECKSUM_H
+/*
+ *      Fold a partial checksum
+ *
+ *  The 2 swords comprising the 32bit sum are added, any carry to 16th bit
+ *  added back and final sword result inverted.
+ */
+static inline __sum16 csum_fold(__wsum s)
+{
+        unsigned r = s << 16 | s >> 16; /* ror */
+        s = ~s;
+        s -= r;
+        return s >> 16;
+}
+/*
+ *      This is a version of ip_compute_csum() optimized for IP headers,
+ *      which always checksum on 4 octet boundaries.
+ */
+static inline __sum16
+ip_fast_csum(const void *iph, unsigned int ihl)
+{
+        const void *ptr = iph;
+        unsigned int tmp, tmp2, sum;
+        __asm__(
+        "       ld.ab  %0, [%3, 4]              \n"
+        "       ld.ab  %2, [%3, 4]              \n"
+        "       sub    %1, %4, 2                \n"
+        "       lsr.f  lp_count, %1, 1          \n"
+        "       bcc    0f                       \n"
+        "       add.f  %0, %0, %2               \n"
+        "       ld.ab  %2, [%3, 4]              \n"
+        "0:     lp     1f                       \n"
+        "       ld.ab  %1, [%3, 4]              \n"
+        "       adc.f  %0, %0, %2               \n"
+        "       ld.ab  %2, [%3, 4]              \n"
+        "       adc.f  %0, %0, %1               \n"
+        "1:     adc.f  %0, %0, %2               \n"
+        "       add.cs %0,%0,1                  \n"
+        : "=&r"(sum), "=r"(tmp), "=&r"(tmp2), "+&r" (ptr)
+        : "r"(ihl)
+        : "cc", "lp_count", "memory");
+        return csum_fold(sum);
+}
+/*
+ * TCP pseudo Header is 12 bytes:
+ * SA [4], DA [4], zeroes [1], Proto[1], TCP Seg(hdr+data) Len [2]
+ */
+static inline __wsum
+csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len,
+                   unsigned short proto, __wsum sum)
+{
+        __asm__ __volatile__(
+        "       add.f %0, %0, %1        \n"
+        "       adc.f %0, %0, %2        \n"
+        "       adc.f %0, %0, %3        \n"
+        "       adc.f %0, %0, %4        \n"
+        "       adc   %0, %0, 0         \n"
+        : "+&r"(sum)
+        : "r"(saddr), "r"(daddr),
+#ifdef CONFIG_CPU_BIG_ENDIAN
+          "r"(len),
+#else
+          "r"(len << 8),
+#endif
+          "r"(htons(proto))
+        : "cc");
+        return sum;
+}
+#define csum_fold csum_fold
+#define ip_fast_csum ip_fast_csum
+#define csum_tcpudp_nofold csum_tcpudp_nofold
+#include <asm-generic/checksum.h>
+#endif /* _ASM_ARC_CHECKSUM_H */
diff --git a/arch/arc/include/asm/swab.h b/arch/arc/include/asm/swab.h
new file mode 100644
index 000000000000..095599a73195
--- /dev/null
+++ b/arch/arc/include/asm/swab.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * vineetg: May 2011
+ *  -Support single cycle endian-swap insn in ARC700 4.10
+ *
+ * vineetg: June 2009
+ *  -Better htonl implementation (5 instead of 9 ALU instructions)
+ *  -Hardware assisted single cycle bswap (Use Case of ARC custom instrn)
+ */
+#ifndef __ASM_ARC_SWAB_H
+#define __ASM_ARC_SWAB_H
+#include <linux/types.h>
+/* Native single cycle endian swap insn */
+#ifdef CONFIG_ARC_HAS_SWAPE
+#define __arch_swab32(x)                \
+({                                      \
+        unsigned int tmp = x;           \
+        __asm__(                        \
+        "       swape   %0, %1  \n"     \
+        : "=r" (tmp)                    \
+        : "r" (tmp));                   \
+        tmp;                            \
+})
+#else
+/* Several ways of Endian-Swap Emulation for ARC
+ * 0: kernel generic
+ * 1: ARC optimised "C"
+ * 2: ARC Custom instruction
+ */
+#define ARC_BSWAP_TYPE  1
+#if (ARC_BSWAP_TYPE == 1)               /******* Software only ********/
+/* The kernel default implementation of htonl is
+ *              return  x<<24 | x>>24 |
+ *               (x & (__u32)0x0000ff00UL)<<8 | (x & (__u32)0x00ff0000UL)>>8;
+ *
+ * This generates 9 instructions on ARC (excluding the ld/st)
+ *
+ * 8051fd8c:    ld     r3,[r7,20]       ; Mem op : Get the value to be swapped
+ * 8051fd98:    asl    r5,r3,24         ; get  3rd Byte
+ * 8051fd9c:    lsr    r2,r3,24         ; get  0th Byte
+ * 8051fda0:    and    r4,r3,0xff00
+ * 8051fda8:    asl    r4,r4,8          ; get 1st Byte
+ * 8051fdac:    and    r3,r3,0x00ff0000
+ * 8051fdb4:    or     r2,r2,r5         ; combine 0th and 3rd Bytes
+ * 8051fdb8:    lsr    r3,r3,8          ; 2nd Byte at correct place in Dst Reg
+ * 8051fdbc:    or     r2,r2,r4         ; combine 0,3 Bytes with 1st Byte
+ * 8051fdc0:    or     r2,r2,r3         ; combine 0,3,1 Bytes with 2nd Byte
+ * 8051fdc4:    st     r2,[r1,20]       ; Mem op : save result back to mem
+ *
+ * Joern suggested a better "C" algorithm which is great since
+ * (1) It is portable to any architecure
+ * (2) At the same time it takes advantage of ARC ISA (rotate intrns)
+ */
+#define __arch_swab32(x)                                        \
+({      unsigned long __in = (x), __tmp;                        \
+        __tmp = __in << 8 | __in >> 24; /* ror tmp,in,24 */     \
+        __in = __in << 24 | __in >> 8; /* ror in,in,8 */        \
+        __tmp ^= __in;                                          \
+        __tmp &= 0xff00ff;                                      \
+        __tmp ^ __in;                                           \
+})
+#elif (ARC_BSWAP_TYPE == 2)     /* Custom single cycle bwap instruction */
+#define __arch_swab32(x)                                                \
+({                                                                      \
+        unsigned int tmp = x;                                           \
+        __asm__(                                                        \
+        "       .extInstruction bswap, 7, 0x00, SUFFIX_NONE, SYNTAX_2OP \n"\
+        "       bswap  %0, %1                                           \n"\
+        : "=r" (tmp)                                                    \
+        : "r" (tmp));                                                   \
+        tmp;                                                            \
+})
+#endif /* ARC_BSWAP_TYPE=zzz */
+#endif /* CONFIG_ARC_HAS_SWAPE */
+#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
+#define __SWAB_64_THRU_32__
+#endif
+#endif
author	Vineet Gupta <vgupta@synopsys.com>	2013-01-18 04:42:17 -0500
committer	Vineet Gupta <vgupta@synopsys.com>	2013-02-11 09:30:34 -0500
commit	ca15c8ecd588dda4377d18d6d27bc1e87b4177cb (patch)
tree	18dd349a93acd9d32bcf18f2c8c8eb93369fc44d
parent	64e69073c35439fa19c2ad2a4a18834e0314f071 (diff)

diff --git a/arch/arc/include/asm/byteorder.h b/arch/arc/include/asm/byteorder.h new file mode 100644 index 000000000000..9da71d415c38 --- /dev/null +++ b/arch/arc/include/asm/byteorder.h
@@ -0,0 +1,18 @@
	1	/*
	2	* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
	3	*
	4	* This program is free software; you can redistribute it and/or modify
	5	* it under the terms of the GNU General Public License version 2 as
	6	* published by the Free Software Foundation.
	7	*/
	8
	9	#ifndef __ASM_ARC_BYTEORDER_H
	10	#define __ASM_ARC_BYTEORDER_H
	11
	12	#ifdef CONFIG_CPU_BIG_ENDIAN
	13	#include <linux/byteorder/big_endian.h>
	14	#else
	15	#include <linux/byteorder/little_endian.h>
	16	#endif
	17
	18	#endif /* ASM_ARC_BYTEORDER_H */


diff --git a/arch/arc/include/asm/checksum.h b/arch/arc/include/asm/checksum.h new file mode 100644 index 000000000000..10957298b7a3 --- /dev/null +++ b/arch/arc/include/asm/checksum.h
@@ -0,0 +1,101 @@
	1	/*
	2	* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
	3	*
	4	* This program is free software; you can redistribute it and/or modify
	5	* it under the terms of the GNU General Public License version 2 as
	6	* published by the Free Software Foundation.
	7	*
	8	* Joern Rennecke <joern.rennecke@embecosm.com>: Jan 2012
	9	* -Insn Scheduling improvements to csum core routines.
	10	* = csum_fold( ) largely derived from ARM version.
	11	* = ip_fast_cum( ) to have module scheduling
	12	* -gcc 4.4.x broke networking. Alias analysis needed to be primed.
	13	* worked around by adding memory clobber to ip_fast_csum( )
	14	*
	15	* vineetg: May 2010
	16	* -Rewrote ip_fast_cscum( ) and csum_fold( ) with fast inline asm
	17	*/
	18
	19	#ifndef _ASM_ARC_CHECKSUM_H
	20	#define _ASM_ARC_CHECKSUM_H
	21
	22	/*
	23	* Fold a partial checksum
	24	*
	25	* The 2 swords comprising the 32bit sum are added, any carry to 16th bit
	26	* added back and final sword result inverted.
	27	*/
	28	static inline __sum16 csum_fold(__wsum s)
	29	{
	30	unsigned r = s << 16 \| s >> 16; /* ror */
	31	s = ~s;
	32	s -= r;
	33	return s >> 16;
	34	}
	35
	36	/*
	37	* This is a version of ip_compute_csum() optimized for IP headers,
	38	* which always checksum on 4 octet boundaries.
	39	*/
	40	static inline __sum16
	41	ip_fast_csum(const void *iph, unsigned int ihl)
	42	{
	43	const void *ptr = iph;
	44	unsigned int tmp, tmp2, sum;
	45
	46	__asm__(
	47	" ld.ab %0, [%3, 4] \n"
	48	" ld.ab %2, [%3, 4] \n"
	49	" sub %1, %4, 2 \n"
	50	" lsr.f lp_count, %1, 1 \n"
	51	" bcc 0f \n"
	52	" add.f %0, %0, %2 \n"
	53	" ld.ab %2, [%3, 4] \n"
	54	"0: lp 1f \n"
	55	" ld.ab %1, [%3, 4] \n"
	56	" adc.f %0, %0, %2 \n"
	57	" ld.ab %2, [%3, 4] \n"
	58	" adc.f %0, %0, %1 \n"
	59	"1: adc.f %0, %0, %2 \n"
	60	" add.cs %0,%0,1 \n"
	61	: "=&r"(sum), "=r"(tmp), "=&r"(tmp2), "+&r" (ptr)
	62	: "r"(ihl)
	63	: "cc", "lp_count", "memory");
	64
	65	return csum_fold(sum);
	66	}
	67
	68	/*
	69	* TCP pseudo Header is 12 bytes:
	70	* SA [4], DA [4], zeroes [1], Proto[1], TCP Seg(hdr+data) Len [2]
	71	*/
	72	static inline __wsum
	73	csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len,
	74	unsigned short proto, __wsum sum)
	75	{
	76	__asm__ __volatile__(
	77	" add.f %0, %0, %1 \n"
	78	" adc.f %0, %0, %2 \n"
	79	" adc.f %0, %0, %3 \n"
	80	" adc.f %0, %0, %4 \n"
	81	" adc %0, %0, 0 \n"
	82	: "+&r"(sum)
	83	: "r"(saddr), "r"(daddr),
	84	#ifdef CONFIG_CPU_BIG_ENDIAN
	85	"r"(len),
	86	#else
	87	"r"(len << 8),
	88	#endif
	89	"r"(htons(proto))
	90	: "cc");
	91
	92	return sum;
	93	}
	94
	95	#define csum_fold csum_fold
	96	#define ip_fast_csum ip_fast_csum
	97	#define csum_tcpudp_nofold csum_tcpudp_nofold
	98
	99	#include <asm-generic/checksum.h>
	100
	101	#endif /* _ASM_ARC_CHECKSUM_H */


diff --git a/arch/arc/include/asm/swab.h b/arch/arc/include/asm/swab.h new file mode 100644 index 000000000000..095599a73195 --- /dev/null +++ b/arch/arc/include/asm/swab.h
@@ -0,0 +1,98 @@
	1	/*
	2	* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
	3	*
	4	* This program is free software; you can redistribute it and/or modify
	5	* it under the terms of the GNU General Public License version 2 as
	6	* published by the Free Software Foundation.
	7	*
	8	* vineetg: May 2011
	9	* -Support single cycle endian-swap insn in ARC700 4.10
	10	*
	11	* vineetg: June 2009
	12	* -Better htonl implementation (5 instead of 9 ALU instructions)
	13	* -Hardware assisted single cycle bswap (Use Case of ARC custom instrn)
	14	*/
	15
	16	#ifndef __ASM_ARC_SWAB_H
	17	#define __ASM_ARC_SWAB_H
	18
	19	#include <linux/types.h>
	20
	21	/* Native single cycle endian swap insn */
	22	#ifdef CONFIG_ARC_HAS_SWAPE
	23
	24	#define __arch_swab32(x) \
	25	({ \
	26	unsigned int tmp = x; \
	27	__asm__( \
	28	" swape %0, %1 \n" \
	29	: "=r" (tmp) \
	30	: "r" (tmp)); \
	31	tmp; \
	32	})
	33
	34	#else
	35
	36	/* Several ways of Endian-Swap Emulation for ARC
	37	* 0: kernel generic
	38	* 1: ARC optimised "C"
	39	* 2: ARC Custom instruction
	40	*/
	41	#define ARC_BSWAP_TYPE 1
	42
	43	#if (ARC_BSWAP_TYPE == 1) /***** Software only ******/
	44
	45	/* The kernel default implementation of htonl is
	46	* return x<<24 \| x>>24 \|
	47	* (x & (__u32)0x0000ff00UL)<<8 \| (x & (__u32)0x00ff0000UL)>>8;
	48	*
	49	* This generates 9 instructions on ARC (excluding the ld/st)
	50	*
	51	* 8051fd8c: ld r3,[r7,20] ; Mem op : Get the value to be swapped
	52	* 8051fd98: asl r5,r3,24 ; get 3rd Byte
	53	* 8051fd9c: lsr r2,r3,24 ; get 0th Byte
	54	* 8051fda0: and r4,r3,0xff00
	55	* 8051fda8: asl r4,r4,8 ; get 1st Byte
	56	* 8051fdac: and r3,r3,0x00ff0000
	57	* 8051fdb4: or r2,r2,r5 ; combine 0th and 3rd Bytes
	58	* 8051fdb8: lsr r3,r3,8 ; 2nd Byte at correct place in Dst Reg
	59	* 8051fdbc: or r2,r2,r4 ; combine 0,3 Bytes with 1st Byte
	60	* 8051fdc0: or r2,r2,r3 ; combine 0,3,1 Bytes with 2nd Byte
	61	* 8051fdc4: st r2,[r1,20] ; Mem op : save result back to mem
	62	*
	63	* Joern suggested a better "C" algorithm which is great since
	64	* (1) It is portable to any architecure
	65	* (2) At the same time it takes advantage of ARC ISA (rotate intrns)
	66	*/
	67
	68	#define __arch_swab32(x) \
	69	({ unsigned long __in = (x), __tmp; \
	70	__tmp = __in << 8 \| __in >> 24; /* ror tmp,in,24 */ \
	71	__in = __in << 24 \| __in >> 8; /* ror in,in,8 */ \
	72	__tmp ^= __in; \
	73	__tmp &= 0xff00ff; \
	74	__tmp ^ __in; \
	75	})
	76
	77	#elif (ARC_BSWAP_TYPE == 2) /* Custom single cycle bwap instruction */
	78
	79	#define __arch_swab32(x) \
	80	({ \
	81	unsigned int tmp = x; \
	82	__asm__( \
	83	" .extInstruction bswap, 7, 0x00, SUFFIX_NONE, SYNTAX_2OP \n"\
	84	" bswap %0, %1 \n"\
	85	: "=r" (tmp) \
	86	: "r" (tmp)); \
	87	tmp; \
	88	})
	89
	90	#endif /* ARC_BSWAP_TYPE=zzz */
	91
	92	#endif /* CONFIG_ARC_HAS_SWAPE */
	93
	94	#if !defined(__STRICT_ANSI__) \|\| defined(__KERNEL__)
	95	#define __SWAB_64_THRU_32__
	96	#endif
	97
	98	#endif