C6X: library code

Original port to early 2.6 kernel using TI COFF toolchain. Brought up to date by Mark Salter <msalter@redhat.com> Signed-off-by: Aurelien Jacquiot <a-jacquiot@ti.com> Signed-off-by: Mark Salter <msalter@redhat.com> Acked-by: Arnd Bergmann <arnd@arndb.de>
author: Aurelien Jacquiot <a-jacquiot@ti.com> 2011-10-04 11:15:51 -0400
committer: Mark Salter <msalter@redhat.com> 2011-10-06 19:48:23 -0400
commit: 09831ca73443bd819ad7993db5409b19c899ba33 (patch)
tree: 4558fb0a5e0e1fcd8582be2155cd9c7498e429db /arch/c6x/lib
parent: a7f626c1948ab6178d2338831c5ffea7385e9f7f (diff)
18 files changed, 1315 insertions, 0 deletions
diff --git a/arch/c6x/lib/checksum.c b/arch/c6x/lib/checksum.c
new file mode 100644
index 000000000000..67cc93b0b932
--- /dev/null
+++ b/arch/c6x/lib/checksum.c
@@ -0,0 +1,36 @@
+/*
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <net/checksum.h>
+#include <asm/byteorder.h>
+/*
+ * copy from fs while checksumming, otherwise like csum_partial
+ */
+__wsum
+csum_partial_copy_from_user(const void __user *src, void *dst, int len,
+                            __wsum sum, int *csum_err)
+{
+        int missing;
+        missing = __copy_from_user(dst, src, len);
+        if (missing) {
+                memset(dst + len - missing, 0, missing);
+                *csum_err = -EFAULT;
+        } else
+                *csum_err = 0;
+        return csum_partial(dst, len, sum);
+}
+EXPORT_SYMBOL(csum_partial_copy_from_user);
+/* These are from csum_64plus.S */
+EXPORT_SYMBOL(csum_partial);
+EXPORT_SYMBOL(csum_partial_copy);
+EXPORT_SYMBOL(ip_compute_csum);
+EXPORT_SYMBOL(ip_fast_csum);
diff --git a/arch/c6x/lib/csum_64plus.S b/arch/c6x/lib/csum_64plus.S
new file mode 100644
index 000000000000..6d2589647227
--- /dev/null
+++ b/arch/c6x/lib/csum_64plus.S
@@ -0,0 +1,419 @@
+;
+;  linux/arch/c6x/lib/csum_64plus.s
+;
+;  Port on Texas Instruments TMS320C6x architecture
+;
+;  Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated
+;  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+;
+;  This program is free software; you can redistribute it and/or modify
+;  it under the terms of the GNU General Public License version 2 as
+;  published by the Free Software Foundation.
+;
+#include <linux/linkage.h>
+;
+;unsigned int csum_partial_copy(const char *src, char * dst,
+;                               int len, int sum)
+;
+; A4:   src
+; B4:   dst
+; A6:   len
+; B6:   sum
+; return csum in A4
+;
+        .text
+ENTRY(csum_partial_copy)
+        MVC     .S2     ILC,B30
+        MV      .D1X    B6,A31          ; given csum
+        ZERO    .D1     A9              ; csum (a side)
+||      ZERO    .D2     B9              ; csum (b side)
+||      SHRU    .S2X    A6,2,B5         ; len / 4
+        ;; Check alignment and size
+        AND     .S1     3,A4,A1
+||      AND     .S2     3,B4,B0
+        OR      .L2X    B0,A1,B0        ; non aligned condition
+||      MVC     .S2     B5,ILC
+||      MVK     .D2     1,B2
+||      MV      .D1X    B5,A1           ; words condition
+  [!A1] B       .S1     L8
+   [B0] BNOP    .S1     L6,5
+        SPLOOP          1
+        ;; Main loop for aligned words
+        LDW     .D1T1   *A4++,A7
+        NOP     4
+        MV      .S2X    A7,B7
+||      EXTU    .S1     A7,0,16,A16
+        STW     .D2T2   B7,*B4++
+||      MPYU    .M2     B7,B2,B8
+||      ADD     .L1     A16,A9,A9
+        NOP
+        SPKERNEL        8,0
+||      ADD     .L2     B8,B9,B9
+        ZERO    .D1     A1
+||      ADD     .L1X    A9,B9,A9        ;  add csum from a and b sides
+L6:
+  [!A1] BNOP    .S1     L8,5
+        ;; Main loop for non-aligned words
+        SPLOOP          2
+ ||     MVK     .L1     1,A2
+        LDNW    .D1T1   *A4++,A7
+        NOP             3
+        NOP
+        MV      .S2X    A7,B7
+ ||     EXTU    .S1     A7,0,16,A16
+ ||     MPYU    .M1     A7,A2,A8
+        ADD     .L1     A16,A9,A9
+        SPKERNEL        6,0
+ ||     STNW    .D2T2   B7,*B4++
+ ||     ADD     .L1     A8,A9,A9
+L8:     AND     .S2X    2,A6,B5
+        CMPGT   .L2     B5,0,B0
+  [!B0] BNOP    .S1     L82,4
+        ;; Manage half-word
+        ZERO    .L1     A7
+||      ZERO    .D1     A8
+#ifdef CONFIG_CPU_BIG_ENDIAN
+        LDBU    .D1T1   *A4++,A7
+        LDBU    .D1T1   *A4++,A8
+        NOP             3
+        SHL     .S1     A7,8,A0
+        ADD     .S1     A8,A9,A9
+        STB     .D2T1   A7,*B4++
+||      ADD     .S1     A0,A9,A9
+        STB     .D2T1   A8,*B4++
+#else
+        LDBU    .D1T1   *A4++,A7
+        LDBU    .D1T1   *A4++,A8
+        NOP             3
+        ADD     .S1     A7,A9,A9
+        SHL     .S1     A8,8,A0
+        STB     .D2T1   A7,*B4++
+||      ADD     .S1     A0,A9,A9
+        STB     .D2T1   A8,*B4++
+#endif
+        ;; Manage eventually the last byte
+L82:    AND     .S2X    1,A6,B0
+  [!B0] BNOP    .S1     L9,5
+||      ZERO    .L1     A7
+L83:    LDBU    .D1T1   *A4++,A7
+        NOP             4
+        MV      .L2X    A7,B7
+#ifdef CONFIG_CPU_BIG_ENDIAN
+        STB     .D2T2   B7,*B4++
+||      SHL     .S1     A7,8,A7
+        ADD     .S1     A7,A9,A9
+#else
+        STB     .D2T2   B7,*B4++
+||      ADD     .S1     A7,A9,A9
+#endif
+        ;; Fold the csum
+L9:     SHRU    .S2X    A9,16,B0
+  [!B0] BNOP    .S1     L10,5
+L91:    SHRU    .S2X    A9,16,B4
+||      EXTU    .S1     A9,16,16,A3
+        ADD     .D1X    A3,B4,A9
+        SHRU    .S1     A9,16,A0
+   [A0] BNOP    .S1     L91,5
+L10:    ADD     .D1     A31,A9,A9
+        MV      .D1     A9,A4
+        BNOP    .S2     B3,4
+        MVC     .S2     B30,ILC
+ENDPROC(csum_partial_copy)
+;
+;unsigned short
+;ip_fast_csum(unsigned char *iph, unsigned int ihl)
+;{
+;       unsigned int checksum = 0;
+;       unsigned short *tosum = (unsigned short *) iph;
+;       int len;
+;
+;       len = ihl*4;
+;
+;       if (len <= 0)
+;               return 0;
+;
+;       while(len) {
+;               len -= 2;
+;               checksum += *tosum++;
+;       }
+;       if (len & 1)
+;               checksum += *(unsigned char*) tosum;
+;
+;       while(checksum >> 16)
+;               checksum = (checksum & 0xffff) + (checksum >> 16);
+;
+;       return ~checksum;
+;}
+;
+; A4:   iph
+; B4:   ihl
+; return checksum in A4
+;
+        .text
+ENTRY(ip_fast_csum)
+        ZERO    .D1     A5
+ ||     MVC     .S2     ILC,B30
+        SHL     .S2     B4,2,B0
+        CMPGT   .L2     B0,0,B1
+  [!B1] BNOP    .S1     L15,4
+  [!B1] ZERO    .D1     A3
+  [!B0] B       .S1     L12
+        SHRU    .S2     B0,1,B0
+        MVC     .S2     B0,ILC
+        NOP     3
+        SPLOOP  1
+        LDHU    .D1T1   *A4++,A3
+        NOP     3
+        NOP
+        SPKERNEL        5,0
+ ||     ADD     .L1     A3,A5,A5
+L12:    SHRU    .S1     A5,16,A0
+  [!A0] BNOP    .S1     L14,5
+L13:    SHRU    .S2X    A5,16,B4
+        EXTU    .S1     A5,16,16,A3
+        ADD     .D1X    A3,B4,A5
+        SHRU    .S1     A5,16,A0
+  [A0]  BNOP    .S1     L13,5
+L14:    NOT     .D1     A5,A3
+        EXTU    .S1     A3,16,16,A3
+L15:    BNOP    .S2     B3,3
+        MVC     .S2     B30,ILC
+        MV      .D1     A3,A4
+ENDPROC(ip_fast_csum)
+;
+;unsigned short
+;do_csum(unsigned char *buff, unsigned int len)
+;{
+;       int odd, count;
+;       unsigned int result = 0;
+;
+;       if (len <= 0)
+;               goto out;
+;       odd = 1 & (unsigned long) buff;
+;       if (odd) {
+;#ifdef __LITTLE_ENDIAN
+;               result += (*buff << 8);
+;#else
+;               result = *buff;
+;#endif
+;               len--;
+;               buff++;
+;       }
+;       count = len >> 1;               /* nr of 16-bit words.. */
+;       if (count) {
+;               if (2 & (unsigned long) buff) {
+;                       result += *(unsigned short *) buff;
+;                       count--;
+;                       len -= 2;
+;                       buff += 2;
+;               }
+;               count >>= 1;            /* nr of 32-bit words.. */
+;               if (count) {
+;                       unsigned int carry = 0;
+;                       do {
+;                               unsigned int w = *(unsigned int *) buff;
+;                               count--;
+;                               buff += 4;
+;                               result += carry;
+;                               result += w;
+;                               carry = (w > result);
+;                       } while (count);
+;                       result += carry;
+;                       result = (result & 0xffff) + (result >> 16);
+;               }
+;               if (len & 2) {
+;                       result += *(unsigned short *) buff;
+;                       buff += 2;
+;               }
+;       }
+;       if (len & 1)
+;#ifdef __LITTLE_ENDIAN
+;               result += *buff;
+;#else
+;               result += (*buff << 8);
+;#endif
+;       result = (result & 0xffff) + (result >> 16);
+;       /* add up carry.. */
+;       result = (result & 0xffff) + (result >> 16);
+;       if (odd)
+;               result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+;out:
+;       return result;
+;}
+;
+; A4:   buff
+; B4:   len
+; return checksum in A4
+;
+ENTRY(do_csum)
+           CMPGT   .L2     B4,0,B0
+   [!B0]   BNOP    .S1     L26,3
+           EXTU    .S1     A4,31,31,A0
+           MV      .L1     A0,A3
+||         MV      .S1X    B3,A5
+||         MV      .L2     B4,B3
+||         ZERO    .D1     A1
+#ifdef CONFIG_CPU_BIG_ENDIAN
+   [A0]    SUB     .L2     B3,1,B3
+|| [A0]    LDBU    .D1T1   *A4++,A1
+#else
+   [!A0]   BNOP    .S1     L21,5
+|| [A0]    LDBU    .D1T1   *A4++,A0
+           SUB     .L2     B3,1,B3
+||         SHL     .S1     A0,8,A1
+L21:
+#endif
+           SHR     .S2     B3,1,B0
+   [!B0]   BNOP    .S1     L24,3
+           MVK     .L1     2,A0
+           AND     .L1     A4,A0,A0
+   [!A0]   BNOP    .S1     L22,5
+|| [A0]    LDHU    .D1T1   *A4++,A0
+           SUB     .L2     B0,1,B0
+||         SUB     .S2     B3,2,B3
+||         ADD     .L1     A0,A1,A1
+L22:
+           SHR     .S2     B0,1,B0
+||         ZERO    .L1     A0
+   [!B0]   BNOP    .S1     L23,5
+|| [B0]    MVC     .S2     B0,ILC
+           SPLOOP  3
+           SPMASK  L1
+||         MV      .L1     A1,A2
+||         LDW     .D1T1   *A4++,A1
+           NOP     4
+           ADD     .L1     A0,A1,A0
+           ADD     .L1     A2,A0,A2
+           SPKERNEL 1,2
+||         CMPGTU  .L1     A1,A2,A0
+           ADD     .L1     A0,A2,A6
+           EXTU    .S1     A6,16,16,A7
+           SHRU    .S2X    A6,16,B0
+           NOP             1
+           ADD     .L1X    A7,B0,A1
+L23:
+           MVK     .L2     2,B0
+           AND     .L2     B3,B0,B0
+   [B0]    LDHU    .D1T1   *A4++,A0
+           NOP     4
+   [B0]    ADD     .L1     A0,A1,A1
+L24:
+           EXTU    .S2     B3,31,31,B0
+#ifdef CONFIG_CPU_BIG_ENDIAN
+   [!B0]   BNOP    .S1     L25,4
+|| [B0]    LDBU    .D1T1   *A4,A0
+           SHL     .S1     A0,8,A0
+           ADD     .L1     A0,A1,A1
+L25:
+#else
+   [B0]    LDBU    .D1T1   *A4,A0
+           NOP     4
+   [B0]    ADD     .L1     A0,A1,A1
+#endif
+           EXTU    .S1     A1,16,16,A0
+           SHRU    .S2X    A1,16,B0
+           NOP     1
+           ADD     .L1X    A0,B0,A0
+           SHRU    .S1     A0,16,A1
+           ADD     .L1     A0,A1,A0
+           EXTU    .S1     A0,16,16,A1
+           EXTU    .S1     A1,16,24,A2
+           EXTU    .S1     A1,24,16,A0
+||         MV      .L2X    A3,B0
+   [B0]    OR      .L1     A0,A2,A1
+L26:
+           NOP     1
+           BNOP    .S2X    A5,4
+           MV      .L1     A1,A4
+ENDPROC(do_csum)
+;__wsum csum_partial(const void *buff, int len, __wsum wsum)
+;{
+;       unsigned int sum = (__force unsigned int)wsum;
+;       unsigned int result = do_csum(buff, len);
+;
+;       /* add in old sum, and carry.. */
+;       result += sum;
+;       if (sum > result)
+;               result += 1;
+;       return (__force __wsum)result;
+;}
+;
+ENTRY(csum_partial)
+           MV      .L1X    B3,A9
+||         CALLP   .S2     do_csum,B3
+||         MV      .S1     A6,A8
+           BNOP    .S2X    A9,2
+           ADD     .L1     A8,A4,A1
+           CMPGTU  .L1     A8,A1,A0
+           ADD     .L1     A1,A0,A4
+ENDPROC(csum_partial)
+;unsigned short
+;ip_compute_csum(unsigned char *buff, unsigned int len)
+;
+; A4:   buff
+; B4:   len
+; return checksum in A4
+ENTRY(ip_compute_csum)
+           MV      .L1X    B3,A9
+||         CALLP   .S2     do_csum,B3
+           BNOP    .S2X    A9,3
+           NOT     .S1     A4,A4
+           CLR     .S1     A4,16,31,A4
+ENDPROC(ip_compute_csum)
diff --git a/arch/c6x/lib/divi.S b/arch/c6x/lib/divi.S
new file mode 100644
index 000000000000..4bde924f2a98
--- /dev/null
+++ b/arch/c6x/lib/divi.S
@@ -0,0 +1,53 @@
+;;  Copyright 2010  Free Software Foundation, Inc.
+;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#include <linux/linkage.h>
+        ;; ABI considerations for the divide functions
+        ;; The following registers are call-used:
+        ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
+        ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
+        ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
+        ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
+        ;;
+        ;; In our implementation, divu and remu are leaf functions,
+        ;; while both divi and remi call into divu.
+        ;; A0 is not clobbered by any of the functions.
+        ;; divu does not clobber B2 either, which is taken advantage of
+        ;; in remi.
+        ;; divi uses B5 to hold the original return address during
+        ;; the call to divu.
+        ;; remi uses B2 and A5 to hold the input values during the
+        ;; call to divu.  It stores B3 in on the stack.
+        .text
+ENTRY(__c6xabi_divi)
+        call    .s2     __c6xabi_divu
+||      mv      .d2     B3, B5
+||      cmpgt   .l1     0, A4, A1
+||      cmpgt   .l2     0, B4, B1
+   [A1] neg     .l1     A4, A4
+|| [B1] neg     .l2     B4, B4
+||      xor     .s1x    A1, B1, A1
+   [A1] addkpc  .s2     _divu_ret, B3, 4
+_divu_ret:
+        neg     .l1     A4, A4
+||      mv      .l2     B3,B5
+||      ret     .s2     B5
+        nop             5
+ENDPROC(__c6xabi_divi)
diff --git a/arch/c6x/lib/divremi.S b/arch/c6x/lib/divremi.S
new file mode 100644
index 000000000000..64bc5aa95ad3
--- /dev/null
+++ b/arch/c6x/lib/divremi.S
@@ -0,0 +1,46 @@
+;;  Copyright 2010  Free Software Foundation, Inc.
+;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#include <linux/linkage.h>
+        .text
+ENTRY(__c6xabi_divremi)
+        stw     .d2t2   B3, *B15--[2]
+||      cmpgt   .l1     0, A4, A1
+||      cmpgt   .l2     0, B4, B2
+||      mv      .s1     A4, A5
+||      call    .s2     __c6xabi_divu
+   [A1] neg     .l1     A4, A4
+|| [B2] neg     .l2     B4, B4
+||      xor     .s2x    B2, A1, B0
+||      mv      .d2     B4, B2
+   [B0] addkpc  .s2     _divu_ret_1, B3, 1
+  [!B0] addkpc  .s2     _divu_ret_2, B3, 1
+        nop     2
+_divu_ret_1:
+        neg     .l1     A4, A4
+_divu_ret_2:
+        ldw     .d2t2   *++B15[2], B3
+        mpy32   .m1x    A4, B2, A6
+        nop             3
+        ret     .s2     B3
+        sub     .l1     A5, A6, A5
+        nop     4
+ENDPROC(__c6xabi_divremi)
diff --git a/arch/c6x/lib/divremu.S b/arch/c6x/lib/divremu.S
new file mode 100644
index 000000000000..caa9f23ee167
--- /dev/null
+++ b/arch/c6x/lib/divremu.S
@@ -0,0 +1,87 @@
+;;  Copyright 2011  Free Software Foundation, Inc.
+;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#include <linux/linkage.h>
+        .text
+ENTRY(__c6xabi_divremu)
+        ;; We use a series of up to 31 subc instructions.  First, we find
+        ;; out how many leading zero bits there are in the divisor.  This
+        ;; gives us both a shift count for aligning (shifting) the divisor
+        ;; to the, and the number of times we have to execute subc.
+        ;; At the end, we have both the remainder and most of the quotient
+        ;; in A4.  The top bit of the quotient is computed first and is
+        ;; placed in A2.
+        ;; Return immediately if the dividend is zero.  Setting B4 to 1
+        ;; is a trick to allow us to leave the following insns in the jump
+        ;; delay slot without affecting the result.
+        mv      .s2x    A4, B1
+  [b1]  lmbd    .l2     1, B4, B1
+||[!b1] b       .s2     B3      ; RETURN A
+||[!b1] mvk     .d2     1, B4
+||[!b1] zero    .s1     A5
+        mv      .l1x    B1, A6
+||      shl     .s2     B4, B1, B4
+        ;; The loop performs a maximum of 28 steps, so we do the
+        ;; first 3 here.
+        cmpltu  .l1x    A4, B4, A2
+  [!A2] sub     .l1x    A4, B4, A4
+||      shru    .s2     B4, 1, B4
+||      xor     .s1     1, A2, A2
+        shl     .s1     A2, 31, A2
+|| [b1] subc    .l1x    A4,B4,A4
+|| [b1] add     .s2     -1, B1, B1
+   [b1] subc    .l1x    A4,B4,A4
+|| [b1] add     .s2     -1, B1, B1
+        ;; RETURN A may happen here (note: must happen before the next branch)
+__divremu0:
+        cmpgt   .l2     B1, 7, B0
+|| [b1] subc    .l1x    A4,B4,A4
+|| [b1] add     .s2     -1, B1, B1
+   [b1] subc    .l1x    A4,B4,A4
+|| [b1] add     .s2     -1, B1, B1
+|| [b0] b       .s1     __divremu0
+   [b1] subc    .l1x    A4,B4,A4
+|| [b1] add     .s2     -1, B1, B1
+   [b1] subc    .l1x    A4,B4,A4
+|| [b1] add     .s2     -1, B1, B1
+   [b1] subc    .l1x    A4,B4,A4
+|| [b1] add     .s2     -1, B1, B1
+   [b1] subc    .l1x    A4,B4,A4
+|| [b1] add     .s2     -1, B1, B1
+   [b1] subc    .l1x    A4,B4,A4
+|| [b1] add     .s2     -1, B1, B1
+        ;; loop backwards branch happens here
+        ret     .s2     B3
+||      mvk     .s1     32, A1
+        sub     .l1     A1, A6, A6
+||      extu    .s1     A4, A6, A5
+        shl     .s1     A4, A6, A4
+        shru    .s1     A4, 1, A4
+||      sub     .l1     A6, 1, A6
+        or      .l1     A2, A4, A4
+        shru    .s1     A4, A6, A4
+        nop
+ENDPROC(__c6xabi_divremu)
diff --git a/arch/c6x/lib/divu.S b/arch/c6x/lib/divu.S
new file mode 100644
index 000000000000..64af3c006dd3
--- /dev/null
+++ b/arch/c6x/lib/divu.S
@@ -0,0 +1,98 @@
+;;  Copyright 2010  Free Software Foundation, Inc.
+;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#include <linux/linkage.h>
+        ;; ABI considerations for the divide functions
+        ;; The following registers are call-used:
+        ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
+        ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
+        ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
+        ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
+        ;;
+        ;; In our implementation, divu and remu are leaf functions,
+        ;; while both divi and remi call into divu.
+        ;; A0 is not clobbered by any of the functions.
+        ;; divu does not clobber B2 either, which is taken advantage of
+        ;; in remi.
+        ;; divi uses B5 to hold the original return address during
+        ;; the call to divu.
+        ;; remi uses B2 and A5 to hold the input values during the
+        ;; call to divu.  It stores B3 in on the stack.
+        .text
+ENTRY(__c6xabi_divu)
+        ;; We use a series of up to 31 subc instructions.  First, we find
+        ;; out how many leading zero bits there are in the divisor.  This
+        ;; gives us both a shift count for aligning (shifting) the divisor
+        ;; to the, and the number of times we have to execute subc.
+        ;; At the end, we have both the remainder and most of the quotient
+        ;; in A4.  The top bit of the quotient is computed first and is
+        ;; placed in A2.
+        ;; Return immediately if the dividend is zero.
+         mv     .s2x    A4, B1
+   [B1]  lmbd   .l2     1, B4, B1
+|| [!B1] b      .s2     B3      ; RETURN A
+|| [!B1] mvk    .d2     1, B4
+         mv     .l1x    B1, A6
+||       shl    .s2     B4, B1, B4
+        ;; The loop performs a maximum of 28 steps, so we do the
+        ;; first 3 here.
+         cmpltu .l1x    A4, B4, A2
+   [!A2] sub    .l1x    A4, B4, A4
+||       shru   .s2     B4, 1, B4
+||       xor    .s1     1, A2, A2
+         shl    .s1     A2, 31, A2
+|| [B1]  subc   .l1x    A4,B4,A4
+|| [B1]  add    .s2     -1, B1, B1
+   [B1]  subc   .l1x    A4,B4,A4
+|| [B1]  add    .s2     -1, B1, B1
+        ;; RETURN A may happen here (note: must happen before the next branch)
+_divu_loop:
+         cmpgt  .l2     B1, 7, B0
+|| [B1]  subc   .l1x    A4,B4,A4
+|| [B1]  add    .s2     -1, B1, B1
+   [B1]  subc   .l1x    A4,B4,A4
+|| [B1]  add    .s2     -1, B1, B1
+|| [B0]  b      .s1     _divu_loop
+   [B1]  subc   .l1x    A4,B4,A4
+|| [B1]  add    .s2     -1, B1, B1
+   [B1]  subc   .l1x    A4,B4,A4
+|| [B1]  add    .s2     -1, B1, B1
+   [B1]  subc   .l1x    A4,B4,A4
+|| [B1]  add    .s2     -1, B1, B1
+   [B1]  subc   .l1x    A4,B4,A4
+|| [B1]  add    .s2     -1, B1, B1
+   [B1]  subc   .l1x    A4,B4,A4
+|| [B1]  add    .s2     -1, B1, B1
+        ;; loop backwards branch happens here
+         ret    .s2     B3
+||       mvk    .s1     32, A1
+         sub    .l1     A1, A6, A6
+         shl    .s1     A4, A6, A4
+         shru   .s1     A4, 1, A4
+||       sub    .l1     A6, 1, A6
+         or     .l1     A2, A4, A4
+         shru   .s1     A4, A6, A4
+         nop
+ENDPROC(__c6xabi_divu)
diff --git a/arch/c6x/lib/llshl.S b/arch/c6x/lib/llshl.S
new file mode 100644
index 000000000000..7b105e2d1b78
--- /dev/null
+++ b/arch/c6x/lib/llshl.S
@@ -0,0 +1,37 @@
+;;  Copyright (C) 2010 Texas Instruments Incorporated
+;;  Contributed by Mark Salter <msalter@redhat.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+;;  uint64_t __c6xabi_llshl(uint64_t val, uint shift)
+#include <linux/linkage.h>
+        .text
+ENTRY(__c6xabi_llshl)
+         mv     .l1x    B4,A1
+   [!A1] b      .s2     B3              ; just return if zero shift
+         mvk    .s1     32,A0
+         sub    .d1     A0,A1,A0
+         cmplt  .l1     0,A0,A2
+   [A2]  shru   .s1     A4,A0,A0
+   [!A2] neg    .l1     A0,A5
+|| [A2]  shl    .s1     A5,A1,A5
+   [!A2] shl    .s1     A4,A5,A5
+|| [A2]  or     .d1     A5,A0,A5
+|| [!A2] mvk    .l1     0,A4
+   [A2]  shl    .s1     A4,A1,A4
+         bnop   .s2     B3,5
+ENDPROC(__c6xabi_llshl)
diff --git a/arch/c6x/lib/llshr.S b/arch/c6x/lib/llshr.S
new file mode 100644
index 000000000000..fde1bec7cf5a
--- /dev/null
+++ b/arch/c6x/lib/llshr.S
@@ -0,0 +1,38 @@
+;;  Copyright (C) 2010 Texas Instruments Incorporated
+;;  Contributed by Mark Salter <msalter@redhat.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+;;  uint64_t __c6xabi_llshr(uint64_t val, uint shift)
+#include <linux/linkage.h>
+        .text
+ENTRY(__c6xabi_llshr)
+         mv     .l1x    B4,A1
+   [!A1] b      .s2     B3              ; return if zero shift count
+         mvk    .s1     32,A0
+         sub    .d1     A0,A1,A0
+         cmplt  .l1     0,A0,A2
+   [A2]  shl    .s1     A5,A0,A0
+         nop
+   [!A2] neg    .l1     A0,A4
+|| [A2]  shru   .s1     A4,A1,A4
+   [!A2] shr    .s1     A5,A4,A4
+|| [A2]  or     .d1     A4,A0,A4
+   [!A2] shr    .s1     A5,0x1f,A5
+   [A2]  shr    .s1     A5,A1,A5
+         bnop   .s2     B3,5
+ENDPROC(__c6xabi_llshr)
diff --git a/arch/c6x/lib/llshru.S b/arch/c6x/lib/llshru.S
new file mode 100644
index 000000000000..596ae3ff5c0f
--- /dev/null
+++ b/arch/c6x/lib/llshru.S
@@ -0,0 +1,38 @@
+;;  Copyright (C) 2010 Texas Instruments Incorporated
+;;  Contributed by Mark Salter <msalter@redhat.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+;;  uint64_t __c6xabi_llshru(uint64_t val, uint shift)
+#include <linux/linkage.h>
+        .text
+ENTRY(__c6xabi_llshru)
+         mv     .l1x    B4,A1
+   [!A1] b      .s2     B3              ; return if zero shift count
+         mvk    .s1     32,A0
+         sub    .d1     A0,A1,A0
+         cmplt  .l1     0,A0,A2
+   [A2]  shl    .s1     A5,A0,A0
+         nop
+   [!A2] neg    .l1     A0,A4
+|| [A2]  shru   .s1     A4,A1,A4
+   [!A2] shru   .s1     A5,A4,A4
+|| [A2]  or     .d1     A4,A0,A4
+|| [!A2] mvk    .l1     0,A5
+   [A2]  shru   .s1     A5,A1,A5
+         bnop   .s2     B3,5
+ENDPROC(__c6xabi_llshru)
diff --git a/arch/c6x/lib/memcpy_64plus.S b/arch/c6x/lib/memcpy_64plus.S
new file mode 100644
index 000000000000..0bbc2cbf9318
--- /dev/null
+++ b/arch/c6x/lib/memcpy_64plus.S
@@ -0,0 +1,46 @@
+;  Port on Texas Instruments TMS320C6x architecture
+;
+;  Copyright (C) 2006, 2009, 2010 Texas Instruments Incorporated
+;  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+;
+;  This program is free software; you can redistribute it and/or modify
+;  it under the terms of the GNU General Public License version 2 as
+;  published by the Free Software Foundation.
+;
+#include <linux/linkage.h>
+        .text
+ENTRY(memcpy)
+        AND     .L1     0x1,A6,A0
+ ||     AND     .S1     0x2,A6,A1
+ ||     AND     .L2X    0x4,A6,B0
+ ||     MV      .D1     A4,A3
+ ||     MVC     .S2     ILC,B2
+   [A0] LDB     .D2T1   *B4++,A5
+   [A1] LDB     .D2T1   *B4++,A7
+   [A1] LDB     .D2T1   *B4++,A8
+   [B0] LDNW    .D2T1   *B4++,A9
+ ||     SHRU    .S2X    A6,0x3,B1
+  [!B1] BNOP    .S2     B3,1
+   [A0] STB     .D1T1   A5,*A3++
+ ||[B1] MVC     .S2     B1,ILC
+   [A1] STB     .D1T1   A7,*A3++
+   [A1] STB     .D1T1   A8,*A3++
+   [B0] STNW    .D1T1   A9,*A3++        ; return when len < 8
+        SPLOOP  2
+        LDNDW   .D2T1   *B4++,A9:A8
+        NOP     3
+        NOP
+        SPKERNEL        0,0
+ ||     STNDW   .D1T1   A9:A8,*A3++
+        BNOP    .S2     B3,4
+        MVC     .S2     B2,ILC
+ENDPROC(memcpy)
diff --git a/arch/c6x/lib/mpyll.S b/arch/c6x/lib/mpyll.S
new file mode 100644
index 000000000000..f1034418b4db
--- /dev/null
+++ b/arch/c6x/lib/mpyll.S
@@ -0,0 +1,49 @@
+;;  Copyright (C) 2010 Texas Instruments Incorporated
+;;  Contributed by Mark Salter <msalter@redhat.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#include <linux/linkage.h>
+        ;; uint64_t __c6xabi_mpyll(uint64_t x, uint64_t y)
+        ;;
+        ;; 64x64 multiply
+        ;; First compute partial results using 32-bit parts of x and y:
+        ;;
+        ;;   b63         b32 b31          b0
+        ;;    -----------------------------
+        ;;    |      1      |      0      |
+        ;;    -----------------------------
+        ;;
+        ;;   P0 = X0*Y0
+        ;;   P1 = X0*Y1 + X1*Y0
+        ;;   P2 = X1*Y1
+        ;;
+        ;;   result = (P2 << 64) + (P1 << 32) + P0
+        ;;
+        ;; Since the result is also 64-bit, we can skip the P2 term.
+        .text
+ENTRY(__c6xabi_mpyll)
+        mpy32u  .m1x    A4,B4,A1:A0     ; X0*Y0
+        b       .s2     B3
+ ||     mpy32u  .m2x    B5,A4,B1:B0     ; X0*Y1 (don't need upper 32-bits)
+ ||     mpy32u  .m1x    A5,B4,A3:A2     ; X1*Y0 (don't need upper 32-bits)
+        nop
+        nop
+        mv      .s1     A0,A4
+        add     .l1x    A2,B0,A5
+        add     .s1     A1,A5,A5
+ENDPROC(__c6xabi_mpyll)
diff --git a/arch/c6x/lib/negll.S b/arch/c6x/lib/negll.S
new file mode 100644
index 000000000000..82f4bcec9afb
--- /dev/null
+++ b/arch/c6x/lib/negll.S
@@ -0,0 +1,31 @@
+;;  Copyright (C) 2010 Texas Instruments Incorporated
+;;  Contributed by Mark Salter <msalter@redhat.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+;;  int64_t __c6xabi_negll(int64_t val)
+#include <linux/linkage.h>
+        .text
+ENTRY(__c6xabi_negll)
+        b       .s2     B3
+        mvk     .l1     0,A0
+        subu    .l1     A0,A4,A3:A2
+        sub     .l1     A0,A5,A0
+||      ext     .s1     A3,24,24,A5
+        add     .l1     A5,A0,A5
+        mv      .s1     A2,A4
+ENDPROC(__c6xabi_negll)
diff --git a/arch/c6x/lib/pop_rts.S b/arch/c6x/lib/pop_rts.S
new file mode 100644
index 000000000000..d7d96c70e9e7
--- /dev/null
+++ b/arch/c6x/lib/pop_rts.S
@@ -0,0 +1,32 @@
+;;  Copyright 2010  Free Software Foundation, Inc.
+;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#include <linux/linkage.h>
+        .text
+ENTRY(__c6xabi_pop_rts)
+        lddw    .d2t2   *++B15, B3:B2
+        lddw    .d2t1   *++B15, A11:A10
+        lddw    .d2t2   *++B15, B11:B10
+        lddw    .d2t1   *++B15, A13:A12
+        lddw    .d2t2   *++B15, B13:B12
+        lddw    .d2t1   *++B15, A15:A14
+||      b       .s2     B3
+        ldw     .d2t2   *++B15[2], B14
+        nop     4
+ENDPROC(__c6xabi_pop_rts)
diff --git a/arch/c6x/lib/push_rts.S b/arch/c6x/lib/push_rts.S
new file mode 100644
index 000000000000..f6e3db3b6065
--- /dev/null
+++ b/arch/c6x/lib/push_rts.S
@@ -0,0 +1,31 @@
+;;  Copyright 2010  Free Software Foundation, Inc.
+;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#include <linux/linkage.h>
+        .text
+ENTRY(__c6xabi_push_rts)
+        stw     .d2t2   B14, *B15--[2]
+        stdw    .d2t1   A15:A14, *B15--
+||      b       .s2x    A3
+        stdw    .d2t2   B13:B12, *B15--
+        stdw    .d2t1   A13:A12, *B15--
+        stdw    .d2t2   B11:B10, *B15--
+        stdw    .d2t1   A11:A10, *B15--
+        stdw    .d2t2   B3:B2, *B15--
+ENDPROC(__c6xabi_push_rts)
diff --git a/arch/c6x/lib/remi.S b/arch/c6x/lib/remi.S
new file mode 100644
index 000000000000..6f2ca18c3f98
--- /dev/null
+++ b/arch/c6x/lib/remi.S
@@ -0,0 +1,64 @@
+;;  Copyright 2010  Free Software Foundation, Inc.
+;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#include <linux/linkage.h>
+        ;; ABI considerations for the divide functions
+        ;; The following registers are call-used:
+        ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
+        ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
+        ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
+        ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
+        ;;
+        ;; In our implementation, divu and remu are leaf functions,
+        ;; while both divi and remi call into divu.
+        ;; A0 is not clobbered by any of the functions.
+        ;; divu does not clobber B2 either, which is taken advantage of
+        ;; in remi.
+        ;; divi uses B5 to hold the original return address during
+        ;; the call to divu.
+        ;; remi uses B2 and A5 to hold the input values during the
+        ;; call to divu.  It stores B3 in on the stack.
+        .text
+ENTRY(__c6xabi_remi)
+        stw     .d2t2   B3, *B15--[2]
+||      cmpgt   .l1     0, A4, A1
+||      cmpgt   .l2     0, B4, B2
+||      mv      .s1     A4, A5
+||      call    .s2     __c6xabi_divu
+   [A1] neg     .l1     A4, A4
+|| [B2] neg     .l2     B4, B4
+||      xor     .s2x    B2, A1, B0
+||      mv      .d2     B4, B2
+   [B0] addkpc  .s2     _divu_ret_1, B3, 1
+  [!B0] addkpc  .s2     _divu_ret_2, B3, 1
+        nop     2
+_divu_ret_1:
+        neg     .l1     A4, A4
+_divu_ret_2:
+        ldw     .d2t2   *++B15[2], B3
+        mpy32   .m1x    A4, B2, A6
+        nop             3
+        ret     .s2     B3
+        sub     .l1     A5, A6, A4
+        nop     4
+ENDPROC(__c6xabi_remi)
diff --git a/arch/c6x/lib/remu.S b/arch/c6x/lib/remu.S
new file mode 100644
index 000000000000..3fae719185ab
--- /dev/null
+++ b/arch/c6x/lib/remu.S
@@ -0,0 +1,82 @@
+;;  Copyright 2010  Free Software Foundation, Inc.
+;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#include <linux/linkage.h>
+        ;; ABI considerations for the divide functions
+        ;; The following registers are call-used:
+        ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
+        ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
+        ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
+        ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
+        ;;
+        ;; In our implementation, divu and remu are leaf functions,
+        ;; while both divi and remi call into divu.
+        ;; A0 is not clobbered by any of the functions.
+        ;; divu does not clobber B2 either, which is taken advantage of
+        ;; in remi.
+        ;; divi uses B5 to hold the original return address during
+        ;; the call to divu.
+        ;; remi uses B2 and A5 to hold the input values during the
+        ;; call to divu.  It stores B3 in on the stack.
+        .text
+ENTRY(__c6xabi_remu)
+        ;; The ABI seems designed to prevent these functions calling each other,
+        ;; so we duplicate most of the divsi3 code here.
+         mv     .s2x    A4, B1
+         lmbd   .l2     1, B4, B1
+|| [!B1] b      .s2     B3      ; RETURN A
+|| [!B1] mvk    .d2     1, B4
+         mv     .l1x    B1, A7
+||       shl    .s2     B4, B1, B4
+         cmpltu .l1x    A4, B4, A1
+   [!A1] sub    .l1x    A4, B4, A4
+         shru   .s2     B4, 1, B4
+_remu_loop:
+         cmpgt  .l2     B1, 7, B0
+|| [B1]  subc   .l1x    A4,B4,A4
+|| [B1]  add    .s2     -1, B1, B1
+        ;; RETURN A may happen here (note: must happen before the next branch)
+   [B1]  subc   .l1x    A4,B4,A4
+|| [B1]  add    .s2     -1, B1, B1
+|| [B0]  b      .s1     _remu_loop
+   [B1]  subc   .l1x    A4,B4,A4
+|| [B1]  add    .s2     -1, B1, B1
+   [B1]  subc   .l1x    A4,B4,A4
+|| [B1]  add    .s2     -1, B1, B1
+   [B1]  subc   .l1x    A4,B4,A4
+|| [B1]  add    .s2     -1, B1, B1
+   [B1]  subc   .l1x    A4,B4,A4
+|| [B1]  add    .s2     -1, B1, B1
+   [B1]  subc   .l1x    A4,B4,A4
+|| [B1]  add    .s2     -1, B1, B1
+        ;; loop backwards branch happens here
+         ret    .s2     B3
+   [B1]  subc   .l1x    A4,B4,A4
+|| [B1]  add    .s2     -1, B1, B1
+   [B1]  subc   .l1x    A4,B4,A4
+         extu   .s1     A4, A7, A4
+         nop    2
+ENDPROC(__c6xabi_remu)
diff --git a/arch/c6x/lib/strasgi.S b/arch/c6x/lib/strasgi.S
new file mode 100644
index 000000000000..de2740765536
--- /dev/null
+++ b/arch/c6x/lib/strasgi.S
@@ -0,0 +1,89 @@
+;;  Copyright 2010  Free Software Foundation, Inc.
+;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#include <linux/linkage.h>
+        .text
+ENTRY(__c6xabi_strasgi)
+        ;; This is essentially memcpy, with alignment known to be at least
+        ;; 4, and the size a multiple of 4 greater than or equal to 28.
+         ldw    .d2t1   *B4++, A0
+||       mvk    .s2     16, B1
+         ldw    .d2t1   *B4++, A1
+||       mvk    .s2     20, B2
+||       sub    .d1     A6, 24, A6
+         ldw    .d2t1   *B4++, A5
+         ldw    .d2t1   *B4++, A7
+||       mv     .l2x    A6, B7
+         ldw    .d2t1   *B4++, A8
+         ldw    .d2t1   *B4++, A9
+||       mv     .s2x    A0, B5
+||       cmpltu .l2     B2, B7, B0
+_strasgi_loop:
+         stw    .d1t2   B5, *A4++
+|| [B0]  ldw    .d2t1   *B4++, A0
+||       mv     .s2x    A1, B5
+||       mv     .l2     B7, B6
+   [B0]  sub    .d2     B6, 24, B7
+|| [B0]  b      .s2     _strasgi_loop
+||       cmpltu .l2     B1, B6, B0
+   [B0]  ldw    .d2t1   *B4++, A1
+||       stw    .d1t2   B5, *A4++
+||       mv     .s2x    A5, B5
+||       cmpltu .l2     12, B6, B0
+   [B0]  ldw    .d2t1   *B4++, A5
+||       stw    .d1t2   B5, *A4++
+||       mv     .s2x    A7, B5
+||       cmpltu .l2     8, B6, B0
+   [B0]  ldw    .d2t1   *B4++, A7
+||       stw    .d1t2   B5, *A4++
+||       mv     .s2x    A8, B5
+||       cmpltu .l2     4, B6, B0
+   [B0]  ldw    .d2t1   *B4++, A8
+||       stw    .d1t2   B5, *A4++
+||       mv     .s2x    A9, B5
+||       cmpltu .l2     0, B6, B0
+   [B0]  ldw    .d2t1   *B4++, A9
+||       stw    .d1t2   B5, *A4++
+||       mv     .s2x    A0, B5
+||       cmpltu .l2     B2, B7, B0
+        ;; loop back branch happens here
+         cmpltu .l2     B1, B6, B0
+||       ret    .s2     b3
+   [B0]  stw    .d1t1   A1, *A4++
+||       cmpltu .l2     12, B6, B0
+   [B0]  stw    .d1t1   A5, *A4++
+||       cmpltu .l2     8, B6, B0
+   [B0]  stw    .d1t1   A7, *A4++
+||       cmpltu .l2     4, B6, B0
+   [B0]  stw    .d1t1   A8, *A4++
+||       cmpltu .l2     0, B6, B0
+   [B0]  stw    .d1t1   A9, *A4++
+        ;; return happens here
+ENDPROC(__c6xabi_strasgi)
diff --git a/arch/c6x/lib/strasgi_64plus.S b/arch/c6x/lib/strasgi_64plus.S
new file mode 100644
index 000000000000..c9fd159b5fa2
--- /dev/null
+++ b/arch/c6x/lib/strasgi_64plus.S
@@ -0,0 +1,39 @@
+;;  Copyright 2010  Free Software Foundation, Inc.
+;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#include <linux/linkage.h>
+        .text
+ENTRY(__c6xabi_strasgi_64plus)
+        shru    .s2x    a6, 2, b31
+||      mv      .s1     a4, a30
+||      mv      .d2     b4, b30
+        add     .s2     -4, b31, b31
+        sploopd         1
+||      mvc     .s2     b31, ilc
+        ldw     .d2t2   *b30++, b31
+        nop     4
+        mv      .s1x    b31,a31
+        spkernel        6, 0
+||      stw     .d1t1   a31, *a30++
+        ret     .s2     b3
+        nop 5
+ENDPROC(__c6xabi_strasgi_64plus)
author	Aurelien Jacquiot <a-jacquiot@ti.com>	2011-10-04 11:15:51 -0400
committer	Mark Salter <msalter@redhat.com>	2011-10-06 19:48:23 -0400
commit	09831ca73443bd819ad7993db5409b19c899ba33 (patch)
tree	4558fb0a5e0e1fcd8582be2155cd9c7498e429db /arch/c6x/lib
parent	a7f626c1948ab6178d2338831c5ffea7385e9f7f (diff)