Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp

Conflicts: litmus/sched_cedf.c
author: Glenn Elliott <gelliott@cs.unc.edu> 2012-03-04 19:47:13 -0500
committer: Glenn Elliott <gelliott@cs.unc.edu> 2012-03-04 19:47:13 -0500
commit: c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
tree: ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /arch/microblaze/lib
parent: ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent: 6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
15 files changed, 671 insertions, 29 deletions
diff --git a/arch/microblaze/lib/Makefile b/arch/microblaze/lib/Makefile
index 4dfe47d3cd91..10c320aa908b 100644
--- a/arch/microblaze/lib/Makefile
+++ b/arch/microblaze/lib/Makefile
@@ -2,6 +2,12 @@
 # Makefile
 #
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_ashldi3.o = -pg
+CFLAGS_REMOVE_ashrdi3.o = -pg
+CFLAGS_REMOVE_lshrdi3.o = -pg
+endif
 lib-y :=  memset.o
 ifeq ($(CONFIG_OPT_LIB_ASM),y)
@@ -11,3 +17,13 @@ lib-y += memcpy.o memmove.o
 endif
 lib-y += uaccess_old.o
+lib-y += ashldi3.o
+lib-y += ashrdi3.o
+lib-y += divsi3.o
+lib-y += lshrdi3.o
+lib-y += modsi3.o
+lib-y += muldi3.o
+lib-y += mulsi3.o
+lib-y += udivsi3.o
+lib-y += umodsi3.o
diff --git a/arch/microblaze/lib/ashldi3.c b/arch/microblaze/lib/ashldi3.c
new file mode 100644
index 000000000000..beb80f316095
--- /dev/null
+++ b/arch/microblaze/lib/ashldi3.c
@@ -0,0 +1,29 @@
+#include <linux/module.h>
+#include "libgcc.h"
+long long __ashldi3(long long u, word_type b)
+{
+        DWunion uu, w;
+        word_type bm;
+        if (b == 0)
+                return u;
+        uu.ll = u;
+        bm = 32 - b;
+        if (bm <= 0) {
+                w.s.low = 0;
+                w.s.high = (unsigned int) uu.s.low << -bm;
+        } else {
+                const unsigned int carries = (unsigned int) uu.s.low >> bm;
+                w.s.low = (unsigned int) uu.s.low << b;
+                w.s.high = ((unsigned int) uu.s.high << b) | carries;
+        }
+        return w.ll;
+}
+EXPORT_SYMBOL(__ashldi3);
diff --git a/arch/microblaze/lib/ashrdi3.c b/arch/microblaze/lib/ashrdi3.c
new file mode 100644
index 000000000000..c884a912b660
--- /dev/null
+++ b/arch/microblaze/lib/ashrdi3.c
@@ -0,0 +1,31 @@
+#include <linux/module.h>
+#include "libgcc.h"
+long long __ashrdi3(long long u, word_type b)
+{
+        DWunion uu, w;
+        word_type bm;
+        if (b == 0)
+                return u;
+        uu.ll = u;
+        bm = 32 - b;
+        if (bm <= 0) {
+                /* w.s.high = 1..1 or 0..0 */
+                w.s.high =
+                    uu.s.high >> 31;
+                w.s.low = uu.s.high >> -bm;
+        } else {
+                const unsigned int carries = (unsigned int) uu.s.high << bm;
+                w.s.high = uu.s.high >> b;
+                w.s.low = ((unsigned int) uu.s.low >> b) | carries;
+        }
+        return w.ll;
+}
+EXPORT_SYMBOL(__ashrdi3);
diff --git a/arch/microblaze/lib/divsi3.S b/arch/microblaze/lib/divsi3.S
new file mode 100644
index 000000000000..595b02d6e86b
--- /dev/null
+++ b/arch/microblaze/lib/divsi3.S
@@ -0,0 +1,73 @@
+#include <linux/linkage.h>
+/*
+* Divide operation for 32 bit integers.
+*       Input : Dividend in Reg r5
+*               Divisor in Reg r6
+*       Output: Result in Reg r3
+*/
+        .text
+        .globl  __divsi3
+        .type __divsi3, @function
+        .ent __divsi3
+__divsi3:
+        .frame  r1, 0, r15
+        addik   r1, r1, -16
+        swi     r28, r1, 0
+        swi     r29, r1, 4
+        swi     r30, r1, 8
+        swi     r31, r1, 12
+        beqi    r6, div_by_zero /* div_by_zero - division error */
+        beqi    r5, result_is_zero /* result is zero */
+        bgeid   r5, r5_pos
+        xor     r28, r5, r6 /* get the sign of the result */
+        rsubi   r5, r5, 0 /* make r5 positive */
+r5_pos:
+        bgei    r6, r6_pos
+        rsubi   r6, r6, 0 /* make r6 positive */
+r6_pos:
+        addik   r30, r0, 0 /* clear mod */
+        addik   r3, r0, 0 /* clear div */
+        addik   r29, r0, 32 /* initialize the loop count */
+        /* first part try to find the first '1' in the r5 */
+div0:
+        blti    r5, div2 /* this traps r5 == 0x80000000 */
+div1:
+        add     r5, r5, r5 /* left shift logical r5 */
+        bgtid   r5, div1
+        addik   r29, r29, -1
+div2:
+        /* left shift logical r5 get the '1' into the carry */
+        add     r5, r5, r5
+        addc    r30, r30, r30 /* move that bit into the mod register */
+        rsub    r31, r6, r30 /* try to subtract (r30 a r6) */
+        blti    r31, mod_too_small
+        /* move the r31 to mod since the result was positive */
+        or      r30, r0, r31
+        addik   r3, r3, 1
+mod_too_small:
+        addik   r29, r29, -1
+        beqi    r29, loop_end
+        add     r3, r3, r3 /* shift in the '1' into div */
+        bri     div2 /* div2 */
+loop_end:
+        bgei    r28, return_here
+        brid    return_here
+        rsubi   r3, r3, 0 /* negate the result */
+div_by_zero:
+result_is_zero:
+        or      r3, r0, r0 /* set result to 0 */
+return_here:
+/* restore values of csrs and that of r3 and the divisor and the dividend */
+        lwi     r28, r1, 0
+        lwi     r29, r1, 4
+        lwi     r30, r1, 8
+        lwi     r31, r1, 12
+        rtsd    r15, 8
+        addik   r1, r1, 16
+.size __divsi3, . - __divsi3
+.end __divsi3
diff --git a/arch/microblaze/lib/fastcopy.S b/arch/microblaze/lib/fastcopy.S
index fdc48bb065d8..62021d7e249e 100644
--- a/arch/microblaze/lib/fastcopy.S
+++ b/arch/microblaze/lib/fastcopy.S
@@ -29,6 +29,10 @@
 *      between mem locations with size of xfer spec'd in bytes
 */
+#ifdef __MICROBLAZEEL__
+#error Microblaze LE not support ASM optimized lib func. Disable OPT_LIB_ASM.
+#endif
 #include <linux/linkage.h>
        .text
        .globl  memcpy
diff --git a/arch/microblaze/lib/libgcc.h b/arch/microblaze/lib/libgcc.h
new file mode 100644
index 000000000000..05909d58e2fe
--- /dev/null
+++ b/arch/microblaze/lib/libgcc.h
@@ -0,0 +1,25 @@
+#ifndef __ASM_LIBGCC_H
+#define __ASM_LIBGCC_H
+#include <asm/byteorder.h>
+typedef int word_type __attribute__ ((mode (__word__)));
+#ifdef __BIG_ENDIAN
+struct DWstruct {
+        int high, low;
+};
+#elif defined(__LITTLE_ENDIAN)
+struct DWstruct {
+        int low, high;
+};
+#else
+#error I feel sick.
+#endif
+typedef union {
+        struct DWstruct s;
+        long long ll;
+} DWunion;
+#endif /* __ASM_LIBGCC_H */
diff --git a/arch/microblaze/lib/lshrdi3.c b/arch/microblaze/lib/lshrdi3.c
new file mode 100644
index 000000000000..dcf8d6810b7c
--- /dev/null
+++ b/arch/microblaze/lib/lshrdi3.c
@@ -0,0 +1,29 @@
+#include <linux/module.h>
+#include "libgcc.h"
+long long __lshrdi3(long long u, word_type b)
+{
+        DWunion uu, w;
+        word_type bm;
+        if (b == 0)
+                return u;
+        uu.ll = u;
+        bm = 32 - b;
+        if (bm <= 0) {
+                w.s.high = 0;
+                w.s.low = (unsigned int) uu.s.high >> -bm;
+        } else {
+                const unsigned int carries = (unsigned int) uu.s.high << bm;
+                w.s.high = (unsigned int) uu.s.high >> b;
+                w.s.low = ((unsigned int) uu.s.low >> b) | carries;
+        }
+        return w.ll;
+}
+EXPORT_SYMBOL(__lshrdi3);
diff --git a/arch/microblaze/lib/memcpy.c b/arch/microblaze/lib/memcpy.c
index 014bac92bdff..52746e718dfa 100644
--- a/arch/microblaze/lib/memcpy.c
+++ b/arch/microblaze/lib/memcpy.c
@@ -33,17 +33,24 @@
 #include <asm/system.h>
 #ifdef __HAVE_ARCH_MEMCPY
+#ifndef CONFIG_OPT_LIB_FUNCTION
 void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
 {
        const char *src = v_src;
        char *dst = v_dst;
-#ifndef CONFIG_OPT_LIB_FUNCTION
        /* Simple, byte oriented memcpy. */
        while (c--)
                *dst++ = *src++;
        return v_dst;
-#else
+}
+#else /* CONFIG_OPT_LIB_FUNCTION */
+void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
+{
+        const char *src = v_src;
+        char *dst = v_dst;
        /* The following code tries to optimize the copy by using unsigned
         * alignment. This will work fine if both source and destination are
         * aligned on the same boundary. However, if they are aligned on
@@ -56,8 +63,8 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
        if (likely(c >= 4)) {
                unsigned  value, buf_hold;
-                /* Align the dstination to a word boundry. */
+                /* Align the destination to a word boundary. */
-                /* This is done in an endian independant manner. */
+                /* This is done in an endian independent manner. */
                switch ((unsigned long)dst & 3) {
                case 1:
                        *dst++ = *src++;
@@ -73,7 +80,7 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
                i_dst = (void *)dst;
                /* Choose a copy scheme based on the source */
-                /* alignment relative to dstination. */
+                /* alignment relative to destination. */
                switch ((unsigned long)src & 3) {
                case 0x0:       /* Both byte offsets are aligned */
                        i_src  = (const void *)src;
@@ -86,7 +93,7 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
                case 0x1:       /* Unaligned - Off by 1 */
                        /* Word align the source */
                        i_src = (const void *) ((unsigned)src & ~3);
+#ifndef __MICROBLAZEEL__
                        /* Load the holding buffer */
                        buf_hold = *i_src++ << 8;
@@ -95,7 +102,16 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
                                *i_dst++ = buf_hold | value >> 24;
                                buf_hold = value << 8;
                        }
+#else
+                        /* Load the holding buffer */
+                        buf_hold = (*i_src++ & 0xFFFFFF00) >>8;
+                        for (; c >= 4; c -= 4) {
+                                value = *i_src++;
+                                *i_dst++ = buf_hold | ((value & 0xFF) << 24);
+                                buf_hold = (value & 0xFFFFFF00) >>8;
+                        }
+#endif
                        /* Realign the source */
                        src = (const void *)i_src;
                        src -= 3;
@@ -103,7 +119,7 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
                case 0x2:       /* Unaligned - Off by 2 */
                        /* Word align the source */
                        i_src = (const void *) ((unsigned)src & ~3);
+#ifndef __MICROBLAZEEL__
                        /* Load the holding buffer */
                        buf_hold = *i_src++ << 16;
@@ -112,7 +128,16 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
                                *i_dst++ = buf_hold | value >> 16;
                                buf_hold = value << 16;
                        }
+#else
+                        /* Load the holding buffer */
+                        buf_hold = (*i_src++ & 0xFFFF0000 )>>16;
+                        for (; c >= 4; c -= 4) {
+                                value = *i_src++;
+                                *i_dst++ = buf_hold | ((value & 0xFFFF)<<16);
+                                buf_hold = (value & 0xFFFF0000) >>16;
+                        }
+#endif
                        /* Realign the source */
                        src = (const void *)i_src;
                        src -= 2;
@@ -120,7 +145,7 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
                case 0x3:       /* Unaligned - Off by 3 */
                        /* Word align the source */
                        i_src = (const void *) ((unsigned)src & ~3);
+#ifndef __MICROBLAZEEL__
                        /* Load the holding buffer */
                        buf_hold = *i_src++ << 24;
@@ -129,7 +154,16 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
                                *i_dst++ = buf_hold | value >> 8;
                                buf_hold = value << 24;
                        }
+#else
+                        /* Load the holding buffer */
+                        buf_hold = (*i_src++ & 0xFF000000) >> 24;
+                        for (; c >= 4; c -= 4) {
+                                value = *i_src++;
+                                *i_dst++ = buf_hold | ((value & 0xFFFFFF) << 8);
+                                buf_hold = (value & 0xFF000000) >> 24;
+                        }
+#endif
                        /* Realign the source */
                        src = (const void *)i_src;
                        src -= 1;
@@ -139,7 +173,7 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
        }
        /* Finish off any remaining bytes */
-        /* simple fast copy, ... unless a cache boundry is crossed */
+        /* simple fast copy, ... unless a cache boundary is crossed */
        switch (c) {
        case 3:
                *dst++ = *src++;
@@ -150,7 +184,7 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
        }
        return v_dst;
-#endif
 }
+#endif /* CONFIG_OPT_LIB_FUNCTION */
 EXPORT_SYMBOL(memcpy);
 #endif /* __HAVE_ARCH_MEMCPY */
diff --git a/arch/microblaze/lib/memmove.c b/arch/microblaze/lib/memmove.c
index 0929198c5e68..2146c3752a80 100644
--- a/arch/microblaze/lib/memmove.c
+++ b/arch/microblaze/lib/memmove.c
@@ -31,16 +31,12 @@
 #include <linux/string.h>
 #ifdef __HAVE_ARCH_MEMMOVE
+#ifndef CONFIG_OPT_LIB_FUNCTION
 void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
 {
        const char *src = v_src;
        char *dst = v_dst;
-#ifdef CONFIG_OPT_LIB_FUNCTION
-        const uint32_t *i_src;
-        uint32_t *i_dst;
-#endif
        if (!c)
                return v_dst;
@@ -48,7 +44,6 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
        if (v_dst <= v_src)
                return memcpy(v_dst, v_src, c);
-#ifndef CONFIG_OPT_LIB_FUNCTION
        /* copy backwards, from end to beginning */
        src += c;
        dst += c;
@@ -58,7 +53,22 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
                *--dst = *--src;
        return v_dst;
-#else
+}
+#else /* CONFIG_OPT_LIB_FUNCTION */
+void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
+{
+        const char *src = v_src;
+        char *dst = v_dst;
+        const uint32_t *i_src;
+        uint32_t *i_dst;
+        if (!c)
+                return v_dst;
+        /* Use memcpy when source is higher than dest */
+        if (v_dst <= v_src)
+                return memcpy(v_dst, v_src, c);
        /* The following code tries to optimize the copy by using unsigned
         * alignment. This will work fine if both source and destination are
         * aligned on the same boundary. However, if they are aligned on
@@ -73,8 +83,8 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
        if (c >= 4) {
                unsigned  value, buf_hold;
-                /* Align the destination to a word boundry. */
+                /* Align the destination to a word boundary. */
-                /* This is done in an endian independant manner. */
+                /* This is done in an endian independent manner. */
                switch ((unsigned long)dst & 3) {
                case 3:
@@ -104,7 +114,7 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
                case 0x1:       /* Unaligned - Off by 1 */
                        /* Word align the source */
                        i_src = (const void *) (((unsigned)src + 4) & ~3);
+#ifndef __MICROBLAZEEL__
                        /* Load the holding buffer */
                        buf_hold = *--i_src >> 24;
@@ -113,7 +123,16 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
                                *--i_dst = buf_hold << 8 | value;
                                buf_hold = value >> 24;
                        }
+#else
+                        /* Load the holding buffer */
+                        buf_hold = (*--i_src & 0xFF) << 24;
+                        for (; c >= 4; c -= 4) {
+                                value = *--i_src;
+                                *--i_dst = buf_hold | ((value & 0xFFFFFF00)>>8);
+                                buf_hold = (value  & 0xFF) << 24;
+                        }
+#endif
                        /* Realign the source */
                        src = (const void *)i_src;
                        src += 1;
@@ -121,7 +140,7 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
                case 0x2:       /* Unaligned - Off by 2 */
                        /* Word align the source */
                        i_src = (const void *) (((unsigned)src + 4) & ~3);
+#ifndef __MICROBLAZEEL__
                        /* Load the holding buffer */
                        buf_hold = *--i_src >> 16;
@@ -130,7 +149,16 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
                                *--i_dst = buf_hold << 16 | value;
                                buf_hold = value >> 16;
                        }
+#else
+                        /* Load the holding buffer */
+                        buf_hold = (*--i_src & 0xFFFF) << 16;
+                        for (; c >= 4; c -= 4) {
+                                value = *--i_src;
+                                *--i_dst = buf_hold | ((value & 0xFFFF0000)>>16);
+                                buf_hold = (value & 0xFFFF) << 16;
+                        }
+#endif
                        /* Realign the source */
                        src = (const void *)i_src;
                        src += 2;
@@ -138,7 +166,7 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
                case 0x3:       /* Unaligned - Off by 3 */
                        /* Word align the source */
                        i_src = (const void *) (((unsigned)src + 4) & ~3);
+#ifndef __MICROBLAZEEL__
                        /* Load the holding buffer */
                        buf_hold = *--i_src >> 8;
@@ -147,7 +175,16 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
                                *--i_dst = buf_hold << 24 | value;
                                buf_hold = value >> 8;
                        }
+#else
+                        /* Load the holding buffer */
+                        buf_hold = (*--i_src & 0xFFFFFF) << 8;
+                        for (; c >= 4; c -= 4) {
+                                value = *--i_src;
+                                *--i_dst = buf_hold | ((value & 0xFF000000)>> 24);
+                                buf_hold = (value & 0xFFFFFF) << 8;
+                        }
+#endif
                        /* Realign the source */
                        src = (const void *)i_src;
                        src += 3;
@@ -156,7 +193,7 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
                dst = (void *)i_dst;
        }
-        /* simple fast copy, ... unless a cache boundry is crossed */
+        /* simple fast copy, ... unless a cache boundary is crossed */
        /* Finish off any remaining bytes */
        switch (c) {
        case 4:
@@ -169,7 +206,7 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
                *--dst = *--src;
        }
        return v_dst;
-#endif
 }
+#endif /* CONFIG_OPT_LIB_FUNCTION */
 EXPORT_SYMBOL(memmove);
 #endif /* __HAVE_ARCH_MEMMOVE */
diff --git a/arch/microblaze/lib/memset.c b/arch/microblaze/lib/memset.c
index ecfb663e1fc1..ddf67939576d 100644
--- a/arch/microblaze/lib/memset.c
+++ b/arch/microblaze/lib/memset.c
@@ -31,17 +31,30 @@
 #include <linux/string.h>
 #ifdef __HAVE_ARCH_MEMSET
+#ifndef CONFIG_OPT_LIB_FUNCTION
+void *memset(void *v_src, int c, __kernel_size_t n)
+{
+        char *src = v_src;
+        /* Truncate c to 8 bits */
+        c = (c & 0xFF);
+        /* Simple, byte oriented memset or the rest of count. */
+        while (n--)
+                *src++ = c;
+        return v_src;
+}
+#else /* CONFIG_OPT_LIB_FUNCTION */
 void *memset(void *v_src, int c, __kernel_size_t n)
 {
        char *src = v_src;
-#ifdef CONFIG_OPT_LIB_FUNCTION
        uint32_t *i_src;
        uint32_t w32 = 0;
-#endif
        /* Truncate c to 8 bits */
        c = (c & 0xFF);
-#ifdef CONFIG_OPT_LIB_FUNCTION
        if (unlikely(c)) {
                /* Make a repeating word out of it */
                w32 = c;
@@ -51,7 +64,7 @@ void *memset(void *v_src, int c, __kernel_size_t n)
        if (likely(n >= 4)) {
                /* Align the destination to a word boundary */
-                /* This is done in an endian independant manner */
+                /* This is done in an endian independent manner */
                switch ((unsigned) src & 3) {
                case 1:
                        *src++ = c;
@@ -72,12 +85,13 @@ void *memset(void *v_src, int c, __kernel_size_t n)
                src  = (void *)i_src;
        }
-#endif
        /* Simple, byte oriented memset or the rest of count. */
        while (n--)
                *src++ = c;
        return v_src;
 }
+#endif /* CONFIG_OPT_LIB_FUNCTION */
 EXPORT_SYMBOL(memset);
 #endif /* __HAVE_ARCH_MEMSET */
diff --git a/arch/microblaze/lib/modsi3.S b/arch/microblaze/lib/modsi3.S
new file mode 100644
index 000000000000..84e0bee6e8c7
--- /dev/null
+++ b/arch/microblaze/lib/modsi3.S
@@ -0,0 +1,73 @@
+#include <linux/linkage.h>
+/*
+* modulo operation for 32 bit integers.
+*       Input : op1 in Reg r5
+*               op2 in Reg r6
+*       Output: op1 mod op2 in Reg r3
+*/
+        .text
+        .globl  __modsi3
+        .type __modsi3,  @function
+        .ent __modsi3
+__modsi3:
+        .frame  r1, 0, r15
+        addik   r1, r1, -16
+        swi     r28, r1, 0
+        swi     r29, r1, 4
+        swi     r30, r1, 8
+        swi     r31, r1, 12
+        beqi    r6, div_by_zero /* div_by_zero division error */
+        beqi    r5, result_is_zero /* result is zero */
+        bgeid   r5, r5_pos
+        /* get the sign of the result [ depends only on the first arg] */
+        add     r28, r5, r0
+        rsubi   r5, r5, 0        /* make r5 positive */
+r5_pos:
+        bgei    r6, r6_pos
+        rsubi   r6, r6, 0        /* make r6 positive */
+r6_pos:
+        addik   r3, r0, 0 /* clear mod */
+        addik   r30, r0, 0 /* clear div */
+        addik   r29, r0, 32 /* initialize the loop count */
+/* first part try to find the first '1' in the r5 */
+div1:
+        add     r5, r5, r5 /* left shift logical r5 */
+        bgeid   r5, div1
+        addik   r29, r29, -1
+div2:
+        /* left shift logical r5 get the '1' into the carry */
+        add     r5, r5, r5
+        addc    r3, r3, r3 /* move that bit into the mod register */
+        rsub    r31, r6, r3 /* try to subtract (r30 a r6) */
+        blti    r31, mod_too_small
+        /* move the r31 to mod since the result was positive */
+        or      r3, r0, r31
+        addik   r30, r30, 1
+mod_too_small:
+        addik   r29, r29, -1
+        beqi    r29, loop_end
+        add     r30, r30, r30 /* shift in the '1' into div */
+        bri     div2 /* div2 */
+loop_end:
+        bgei    r28, return_here
+        brid    return_here
+        rsubi   r3, r3, 0 /* negate the result */
+div_by_zero:
+result_is_zero:
+        or      r3, r0, r0 /* set result to 0 [both mod as well as div are 0] */
+return_here:
+/* restore values of csrs and that of r3 and the divisor and the dividend */
+        lwi     r28, r1, 0
+        lwi     r29, r1, 4
+        lwi     r30, r1, 8
+        lwi     r31, r1, 12
+        rtsd    r15, 8
+        addik   r1, r1, 16
+.size __modsi3,  . - __modsi3
+.end __modsi3
diff --git a/arch/microblaze/lib/muldi3.c b/arch/microblaze/lib/muldi3.c
new file mode 100644
index 000000000000..0585bccb7fad
--- /dev/null
+++ b/arch/microblaze/lib/muldi3.c
@@ -0,0 +1,61 @@
+#include <linux/module.h>
+#include "libgcc.h"
+#define DWtype long long
+#define UWtype unsigned long
+#define UHWtype unsigned short
+#define W_TYPE_SIZE 32
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+/* If we still don't have umul_ppmm, define it using plain C.  */
+#if !defined(umul_ppmm)
+#define umul_ppmm(w1, w0, u, v)                                         \
+        do {                                                            \
+                UWtype __x0, __x1, __x2, __x3;                          \
+                UHWtype __ul, __vl, __uh, __vh;                         \
+                                                                        \
+                __ul = __ll_lowpart(u);                                 \
+                __uh = __ll_highpart(u);                                \
+                __vl = __ll_lowpart(v);                                 \
+                __vh = __ll_highpart(v);                                \
+                                                                        \
+                __x0 = (UWtype) __ul * __vl;                            \
+                __x1 = (UWtype) __ul * __vh;                            \
+                __x2 = (UWtype) __uh * __vl;                            \
+                __x3 = (UWtype) __uh * __vh;                            \
+                                                                        \
+                __x1 += __ll_highpart(__x0); /* this can't give carry */\
+                __x1 += __x2; /* but this indeed can */                 \
+                if (__x1 < __x2) /* did we get it? */                   \
+                __x3 += __ll_B; /* yes, add it in the proper pos */     \
+                                                                        \
+                (w1) = __x3 + __ll_highpart(__x1);                      \
+                (w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);\
+        } while (0)
+#endif
+#if !defined(__umulsidi3)
+#define __umulsidi3(u, v) ({                            \
+        DWunion __w;                                    \
+        umul_ppmm(__w.s.high, __w.s.low, u, v);         \
+        __w.ll;                                         \
+        })
+#endif
+DWtype __muldi3(DWtype u, DWtype v)
+{
+        const DWunion uu = {.ll = u};
+        const DWunion vv = {.ll = v};
+        DWunion w = {.ll = __umulsidi3(uu.s.low, vv.s.low)};
+        w.s.high += ((UWtype) uu.s.low * (UWtype) vv.s.high
+                + (UWtype) uu.s.high * (UWtype) vv.s.low);
+        return w.ll;
+}
+EXPORT_SYMBOL(__muldi3);
diff --git a/arch/microblaze/lib/mulsi3.S b/arch/microblaze/lib/mulsi3.S
new file mode 100644
index 000000000000..90bd7b93afe6
--- /dev/null
+++ b/arch/microblaze/lib/mulsi3.S
@@ -0,0 +1,46 @@
+#include <linux/linkage.h>
+/*
+ * Multiply operation for 32 bit integers.
+ *      Input : Operand1 in Reg r5
+ *              Operand2 in Reg r6
+ *      Output: Result [op1 * op2] in Reg r3
+ */
+        .text
+        .globl  __mulsi3
+        .type __mulsi3,  @function
+        .ent __mulsi3
+__mulsi3:
+        .frame  r1, 0, r15
+        add     r3, r0, r0
+        beqi    r5, result_is_zero /* multiply by zero */
+        beqi    r6, result_is_zero /* multiply by zero */
+        bgeid   r5, r5_pos
+        xor     r4, r5, r6 /* get the sign of the result */
+        rsubi   r5, r5, 0 /* make r5 positive */
+r5_pos:
+        bgei    r6, r6_pos
+        rsubi   r6, r6, 0 /* make r6 positive */
+r6_pos:
+        bri     l1
+l2:
+        add     r5, r5, r5
+l1:
+        srl     r6, r6
+        addc    r7, r0, r0
+        beqi    r7, l2
+        bneid   r6, l2
+        add     r3, r3, r5
+        blti    r4, negateresult
+        rtsd    r15, 8
+        nop
+negateresult:
+        rtsd    r15, 8
+        rsub    r3, r3, r0
+result_is_zero:
+        rtsd    r15, 8
+        addi    r3, r0, 0
+.size __mulsi3,  . - __mulsi3
+.end __mulsi3
diff --git a/arch/microblaze/lib/udivsi3.S b/arch/microblaze/lib/udivsi3.S
new file mode 100644
index 000000000000..64cf57e4bb85
--- /dev/null
+++ b/arch/microblaze/lib/udivsi3.S
@@ -0,0 +1,84 @@
+#include <linux/linkage.h>
+/*
+* Unsigned divide operation.
+*       Input : Divisor in Reg r5
+*               Dividend in Reg r6
+*       Output: Result in Reg r3
+*/
+        .text
+        .globl  __udivsi3
+        .type __udivsi3, @function
+        .ent __udivsi3
+__udivsi3:
+        .frame  r1, 0, r15
+        addik   r1, r1, -12
+        swi     r29, r1, 0
+        swi     r30, r1, 4
+        swi     r31, r1, 8
+        beqi    r6, div_by_zero /* div_by_zero /* division error */
+        beqid   r5, result_is_zero /* result is zero */
+        addik   r30, r0, 0 /* clear mod */
+        addik   r29, r0, 32 /* initialize the loop count */
+/* check if r6 and r5 are equal - if yes, return 1 */
+        rsub    r18, r5, r6
+        beqid   r18, return_here
+        addik   r3, r0, 1
+/* check if (uns)r6 is greater than (uns)r5. in that case, just return 0 */
+        xor     r18, r5, r6
+        bgeid   r18, 16
+        add     r3, r0, r0 /* we would anyways clear r3 */
+        blti    r6, return_here /* r6[bit 31 = 1] hence is greater */
+        bri     checkr6
+        rsub    r18, r6, r5 /* microblazecmp */
+        blti    r18, return_here
+/* if r6 [bit 31] is set, then return result as 1 */
+checkr6:
+        bgti    r6, div0
+        brid    return_here
+        addik   r3, r0, 1
+/* first part try to find the first '1' in the r5 */
+div0:
+        blti    r5, div2
+div1:
+        add     r5, r5, r5 /* left shift logical r5 */
+        bgtid   r5, div1
+        addik   r29, r29, -1
+div2:
+/* left shift logical r5 get the '1' into the carry */
+        add     r5, r5, r5
+        addc    r30, r30, r30 /* move that bit into the mod register */
+        rsub    r31, r6, r30 /* try to subtract (r30 a r6) */
+        blti    r31, mod_too_small
+/* move the r31 to mod since the result was positive */
+        or      r30, r0, r31
+        addik   r3, r3, 1
+mod_too_small:
+        addik   r29, r29, -1
+        beqi    r29, loop_end
+        add     r3, r3, r3 /* shift in the '1' into div */
+        bri     div2 /* div2 */
+loop_end:
+        bri     return_here
+div_by_zero:
+result_is_zero:
+        or      r3, r0, r0 /* set result to 0 */
+return_here:
+/* restore values of csrs and that of r3 and the divisor and the dividend */
+        lwi     r29, r1, 0
+        lwi     r30, r1, 4
+        lwi     r31, r1, 8
+        rtsd    r15, 8
+        addik   r1, r1, 12
+.size __udivsi3, . - __udivsi3
+.end __udivsi3
diff --git a/arch/microblaze/lib/umodsi3.S b/arch/microblaze/lib/umodsi3.S
new file mode 100644
index 000000000000..17d16bafae58
--- /dev/null
+++ b/arch/microblaze/lib/umodsi3.S
@@ -0,0 +1,86 @@
+#include <linux/linkage.h>
+/*
+ * Unsigned modulo operation for 32 bit integers.
+ *      Input : op1 in Reg r5
+ *              op2 in Reg r6
+ *      Output: op1 mod op2 in Reg r3
+ */
+        .text
+        .globl  __umodsi3
+        .type __umodsi3, @function
+        .ent __umodsi3
+__umodsi3:
+        .frame  r1, 0, r15
+        addik   r1, r1, -12
+        swi     r29, r1, 0
+        swi     r30, r1, 4
+        swi     r31, r1, 8
+        beqi    r6, div_by_zero /* div_by_zero - division error */
+        beqid   r5, result_is_zero /* result is zero */
+        addik   r3, r0, 0 /* clear div */
+        addik   r30, r0, 0 /* clear mod */
+        addik   r29, r0, 32 /* initialize the loop count */
+/* check if r6 and r5 are equal /* if yes, return 0 */
+        rsub    r18, r5, r6
+        beqi    r18, return_here
+/* check if (uns)r6 is greater than (uns)r5. in that case, just return r5 */
+        xor     r18, r5, r6
+        bgeid   r18, 16
+        addik   r3, r5, 0
+        blti    r6, return_here
+        bri     $lcheckr6
+        rsub    r18, r5, r6 /* microblazecmp */
+        bgti    r18, return_here
+/* if r6 [bit 31] is set, then return result as r5-r6 */
+$lcheckr6:
+        bgtid   r6, div0
+        addik   r3, r0, 0
+        addik   r18, r0, 0x7fffffff
+        and     r5, r5, r18
+        and     r6, r6, r18
+        brid    return_here
+        rsub    r3, r6, r5
+/* first part: try to find the first '1' in the r5 */
+div0:
+        blti    r5, div2
+div1:
+        add     r5, r5, r5 /* left shift logical r5 */
+        bgeid   r5, div1
+        addik   r29, r29, -1
+div2:
+        /* left shift logical r5 get the '1' into the carry */
+        add     r5, r5, r5
+        addc    r3, r3, r3 /* move that bit into the mod register */
+        rsub    r31, r6, r3 /* try to subtract (r3 a r6) */
+        blti    r31, mod_too_small
+        /* move the r31 to mod since the result was positive */
+        or      r3, r0, r31
+        addik   r30, r30, 1
+mod_too_small:
+        addik   r29, r29, -1
+        beqi    r29, loop_end
+        add     r30, r30, r30 /* shift in the '1' into div */
+        bri     div2 /* div2 */
+loop_end:
+        bri     return_here
+div_by_zero:
+result_is_zero:
+        or      r3, r0, r0 /* set result to 0 */
+return_here:
+/* restore values of csrs and that of r3 and the divisor and the dividend */
+        lwi     r29, r1, 0
+        lwi     r30, r1, 4
+        lwi     r31, r1, 8
+        rtsd    r15, 8
+        addik   r1, r1, 12
+.size __umodsi3, . - __umodsi3
+.end __umodsi3
author	Glenn Elliott <gelliott@cs.unc.edu>	2012-03-04 19:47:13 -0500
committer	Glenn Elliott <gelliott@cs.unc.edu>	2012-03-04 19:47:13 -0500
commit	c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
tree	ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /arch/microblaze/lib
parent	ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent	6a00f206debf8a5c8899055726ad127dbeeed098 (diff)