aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVineet Gupta <vgupta@synopsys.com>2013-01-18 04:42:17 -0500
committerVineet Gupta <vgupta@synopsys.com>2013-02-11 09:30:34 -0500
commitca15c8ecd588dda4377d18d6d27bc1e87b4177cb (patch)
tree18dd349a93acd9d32bcf18f2c8c8eb93369fc44d
parent64e69073c35439fa19c2ad2a4a18834e0314f071 (diff)
ARC: Checksum/byteorder/swab routines
TBD: do_csum still needs to be written in asm Signed-off-by: Vineet Gupta <vgupta@synopsys.com> Acked-by: Arnd Bergmann <arnd@arndb.de>
-rw-r--r--arch/arc/include/asm/byteorder.h18
-rw-r--r--arch/arc/include/asm/checksum.h101
-rw-r--r--arch/arc/include/asm/swab.h98
3 files changed, 217 insertions, 0 deletions
diff --git a/arch/arc/include/asm/byteorder.h b/arch/arc/include/asm/byteorder.h
new file mode 100644
index 000000000000..9da71d415c38
--- /dev/null
+++ b/arch/arc/include/asm/byteorder.h
@@ -0,0 +1,18 @@
1/*
2 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#ifndef __ASM_ARC_BYTEORDER_H
10#define __ASM_ARC_BYTEORDER_H
11
12#ifdef CONFIG_CPU_BIG_ENDIAN
13#include <linux/byteorder/big_endian.h>
14#else
15#include <linux/byteorder/little_endian.h>
16#endif
17
18#endif /* ASM_ARC_BYTEORDER_H */
diff --git a/arch/arc/include/asm/checksum.h b/arch/arc/include/asm/checksum.h
new file mode 100644
index 000000000000..10957298b7a3
--- /dev/null
+++ b/arch/arc/include/asm/checksum.h
@@ -0,0 +1,101 @@
1/*
2 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Joern Rennecke <joern.rennecke@embecosm.com>: Jan 2012
9 * -Insn Scheduling improvements to csum core routines.
10 * = csum_fold( ) largely derived from ARM version.
11 * = ip_fast_cum( ) to have module scheduling
12 * -gcc 4.4.x broke networking. Alias analysis needed to be primed.
13 * worked around by adding memory clobber to ip_fast_csum( )
14 *
15 * vineetg: May 2010
16 * -Rewrote ip_fast_cscum( ) and csum_fold( ) with fast inline asm
17 */
18
19#ifndef _ASM_ARC_CHECKSUM_H
20#define _ASM_ARC_CHECKSUM_H
21
22/*
23 * Fold a partial checksum
24 *
25 * The 2 swords comprising the 32bit sum are added, any carry to 16th bit
26 * added back and final sword result inverted.
27 */
28static inline __sum16 csum_fold(__wsum s)
29{
30 unsigned r = s << 16 | s >> 16; /* ror */
31 s = ~s;
32 s -= r;
33 return s >> 16;
34}
35
36/*
37 * This is a version of ip_compute_csum() optimized for IP headers,
38 * which always checksum on 4 octet boundaries.
39 */
40static inline __sum16
41ip_fast_csum(const void *iph, unsigned int ihl)
42{
43 const void *ptr = iph;
44 unsigned int tmp, tmp2, sum;
45
46 __asm__(
47 " ld.ab %0, [%3, 4] \n"
48 " ld.ab %2, [%3, 4] \n"
49 " sub %1, %4, 2 \n"
50 " lsr.f lp_count, %1, 1 \n"
51 " bcc 0f \n"
52 " add.f %0, %0, %2 \n"
53 " ld.ab %2, [%3, 4] \n"
54 "0: lp 1f \n"
55 " ld.ab %1, [%3, 4] \n"
56 " adc.f %0, %0, %2 \n"
57 " ld.ab %2, [%3, 4] \n"
58 " adc.f %0, %0, %1 \n"
59 "1: adc.f %0, %0, %2 \n"
60 " add.cs %0,%0,1 \n"
61 : "=&r"(sum), "=r"(tmp), "=&r"(tmp2), "+&r" (ptr)
62 : "r"(ihl)
63 : "cc", "lp_count", "memory");
64
65 return csum_fold(sum);
66}
67
68/*
69 * TCP pseudo Header is 12 bytes:
70 * SA [4], DA [4], zeroes [1], Proto[1], TCP Seg(hdr+data) Len [2]
71 */
72static inline __wsum
73csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len,
74 unsigned short proto, __wsum sum)
75{
76 __asm__ __volatile__(
77 " add.f %0, %0, %1 \n"
78 " adc.f %0, %0, %2 \n"
79 " adc.f %0, %0, %3 \n"
80 " adc.f %0, %0, %4 \n"
81 " adc %0, %0, 0 \n"
82 : "+&r"(sum)
83 : "r"(saddr), "r"(daddr),
84#ifdef CONFIG_CPU_BIG_ENDIAN
85 "r"(len),
86#else
87 "r"(len << 8),
88#endif
89 "r"(htons(proto))
90 : "cc");
91
92 return sum;
93}
94
95#define csum_fold csum_fold
96#define ip_fast_csum ip_fast_csum
97#define csum_tcpudp_nofold csum_tcpudp_nofold
98
99#include <asm-generic/checksum.h>
100
101#endif /* _ASM_ARC_CHECKSUM_H */
diff --git a/arch/arc/include/asm/swab.h b/arch/arc/include/asm/swab.h
new file mode 100644
index 000000000000..095599a73195
--- /dev/null
+++ b/arch/arc/include/asm/swab.h
@@ -0,0 +1,98 @@
1/*
2 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * vineetg: May 2011
9 * -Support single cycle endian-swap insn in ARC700 4.10
10 *
11 * vineetg: June 2009
12 * -Better htonl implementation (5 instead of 9 ALU instructions)
13 * -Hardware assisted single cycle bswap (Use Case of ARC custom instrn)
14 */
15
16#ifndef __ASM_ARC_SWAB_H
17#define __ASM_ARC_SWAB_H
18
19#include <linux/types.h>
20
21/* Native single cycle endian swap insn */
22#ifdef CONFIG_ARC_HAS_SWAPE
23
24#define __arch_swab32(x) \
25({ \
26 unsigned int tmp = x; \
27 __asm__( \
28 " swape %0, %1 \n" \
29 : "=r" (tmp) \
30 : "r" (tmp)); \
31 tmp; \
32})
33
34#else
35
36/* Several ways of Endian-Swap Emulation for ARC
37 * 0: kernel generic
38 * 1: ARC optimised "C"
39 * 2: ARC Custom instruction
40 */
41#define ARC_BSWAP_TYPE 1
42
43#if (ARC_BSWAP_TYPE == 1) /******* Software only ********/
44
45/* The kernel default implementation of htonl is
46 * return x<<24 | x>>24 |
47 * (x & (__u32)0x0000ff00UL)<<8 | (x & (__u32)0x00ff0000UL)>>8;
48 *
49 * This generates 9 instructions on ARC (excluding the ld/st)
50 *
51 * 8051fd8c: ld r3,[r7,20] ; Mem op : Get the value to be swapped
52 * 8051fd98: asl r5,r3,24 ; get 3rd Byte
53 * 8051fd9c: lsr r2,r3,24 ; get 0th Byte
54 * 8051fda0: and r4,r3,0xff00
55 * 8051fda8: asl r4,r4,8 ; get 1st Byte
56 * 8051fdac: and r3,r3,0x00ff0000
57 * 8051fdb4: or r2,r2,r5 ; combine 0th and 3rd Bytes
58 * 8051fdb8: lsr r3,r3,8 ; 2nd Byte at correct place in Dst Reg
59 * 8051fdbc: or r2,r2,r4 ; combine 0,3 Bytes with 1st Byte
60 * 8051fdc0: or r2,r2,r3 ; combine 0,3,1 Bytes with 2nd Byte
61 * 8051fdc4: st r2,[r1,20] ; Mem op : save result back to mem
62 *
63 * Joern suggested a better "C" algorithm which is great since
64 * (1) It is portable to any architecure
65 * (2) At the same time it takes advantage of ARC ISA (rotate intrns)
66 */
67
68#define __arch_swab32(x) \
69({ unsigned long __in = (x), __tmp; \
70 __tmp = __in << 8 | __in >> 24; /* ror tmp,in,24 */ \
71 __in = __in << 24 | __in >> 8; /* ror in,in,8 */ \
72 __tmp ^= __in; \
73 __tmp &= 0xff00ff; \
74 __tmp ^ __in; \
75})
76
77#elif (ARC_BSWAP_TYPE == 2) /* Custom single cycle bwap instruction */
78
79#define __arch_swab32(x) \
80({ \
81 unsigned int tmp = x; \
82 __asm__( \
83 " .extInstruction bswap, 7, 0x00, SUFFIX_NONE, SYNTAX_2OP \n"\
84 " bswap %0, %1 \n"\
85 : "=r" (tmp) \
86 : "r" (tmp)); \
87 tmp; \
88})
89
90#endif /* ARC_BSWAP_TYPE=zzz */
91
92#endif /* CONFIG_ARC_HAS_SWAPE */
93
94#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
95#define __SWAB_64_THRU_32__
96#endif
97
98#endif