aboutsummaryrefslogtreecommitdiffstats
path: root/arch/tile/lib
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@tilera.com>2011-05-04 14:38:26 -0400
committerChris Metcalf <cmetcalf@tilera.com>2011-05-12 15:52:12 -0400
commit18aecc2b645bbb07851b196452a2af314222069b (patch)
tree959f765f69af01046c6e26db12b45c3390799d3e /arch/tile/lib
parentbe84cb43833ee40a42e08f5425d20310f16229c7 (diff)
arch/tile: finish enabling support for TILE-Gx 64-bit chip
This support was partially present in the existing code (look for "__tilegx__" ifdefs) but with this change you can build a working kernel using the TILE-Gx toolchain and ARCH=tilegx. Most of these files are new, generally adding a foo_64.c file where previously there was just a foo_32.c file. The ARCH=tilegx directive redirects to arch/tile, not arch/tilegx, using the existing SRCARCH mechanism in the top-level Makefile. Changes to existing files: - <asm/bitops.h> and <asm/bitops_32.h> changed to factor the include of <asm-generic/bitops/non-atomic.h> in the common header. - <asm/compat.h> and arch/tile/kernel/compat.c changed to remove the "const" markers I had put on compat_sys_execve() when trying to match some recent similar changes to the non-compat execve. It turns out the compat version wasn't "upgraded" to use const. - <asm/opcode-tile_64.h> and <asm/opcode_constants_64.h> were previously included accidentally, with the 32-bit contents. Now they have the proper 64-bit contents. Finally, I had to hack the existing hacky drivers/input/input-compat.h to add yet another "#ifdef" for INPUT_COMPAT_TEST (same as x86_64). Signed-off-by: Chris Metcalf <cmetcalf@tilera.com> Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com> [drivers/input]
Diffstat (limited to 'arch/tile/lib')
-rw-r--r--arch/tile/lib/memchr_64.c71
-rw-r--r--arch/tile/lib/memcpy_64.c220
-rw-r--r--arch/tile/lib/memcpy_user_64.c86
-rw-r--r--arch/tile/lib/memset_64.c145
-rw-r--r--arch/tile/lib/spinlock_64.c104
-rw-r--r--arch/tile/lib/strchr_64.c67
-rw-r--r--arch/tile/lib/strlen_64.c38
-rw-r--r--arch/tile/lib/usercopy_64.S196
8 files changed, 927 insertions, 0 deletions
diff --git a/arch/tile/lib/memchr_64.c b/arch/tile/lib/memchr_64.c
new file mode 100644
index 00000000000..84fdc8d8e73
--- /dev/null
+++ b/arch/tile/lib/memchr_64.c
@@ -0,0 +1,71 @@
1/*
2 * Copyright 2011 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/types.h>
16#include <linux/string.h>
17#include <linux/module.h>
18
19void *memchr(const void *s, int c, size_t n)
20{
21 const uint64_t *last_word_ptr;
22 const uint64_t *p;
23 const char *last_byte_ptr;
24 uintptr_t s_int;
25 uint64_t goal, before_mask, v, bits;
26 char *ret;
27
28 if (__builtin_expect(n == 0, 0)) {
29 /* Don't dereference any memory if the array is empty. */
30 return NULL;
31 }
32
33 /* Get an aligned pointer. */
34 s_int = (uintptr_t) s;
35 p = (const uint64_t *)(s_int & -8);
36
37 /* Create eight copies of the byte for which we are looking. */
38 goal = 0x0101010101010101ULL * (uint8_t) c;
39
40 /* Read the first word, but munge it so that bytes before the array
41 * will not match goal.
42 *
43 * Note that this shift count expression works because we know
44 * shift counts are taken mod 64.
45 */
46 before_mask = (1ULL << (s_int << 3)) - 1;
47 v = (*p | before_mask) ^ (goal & before_mask);
48
49 /* Compute the address of the last byte. */
50 last_byte_ptr = (const char *)s + n - 1;
51
52 /* Compute the address of the word containing the last byte. */
53 last_word_ptr = (const uint64_t *)((uintptr_t) last_byte_ptr & -8);
54
55 while ((bits = __insn_v1cmpeq(v, goal)) == 0) {
56 if (__builtin_expect(p == last_word_ptr, 0)) {
57 /* We already read the last word in the array,
58 * so give up.
59 */
60 return NULL;
61 }
62 v = *++p;
63 }
64
65 /* We found a match, but it might be in a byte past the end
66 * of the array.
67 */
68 ret = ((char *)p) + (__insn_ctz(bits) >> 3);
69 return (ret <= last_byte_ptr) ? ret : NULL;
70}
71EXPORT_SYMBOL(memchr);
diff --git a/arch/tile/lib/memcpy_64.c b/arch/tile/lib/memcpy_64.c
new file mode 100644
index 00000000000..3fab9a6a2bb
--- /dev/null
+++ b/arch/tile/lib/memcpy_64.c
@@ -0,0 +1,220 @@
1/*
2 * Copyright 2011 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/types.h>
16#include <linux/string.h>
17#include <linux/module.h>
18#define __memcpy memcpy
19/* EXPORT_SYMBOL() is in arch/tile/lib/exports.c since this should be asm. */
20
21/* Must be 8 bytes in size. */
22#define word_t uint64_t
23
24#if CHIP_L2_LINE_SIZE() != 64 && CHIP_L2_LINE_SIZE() != 128
25#error "Assumes 64 or 128 byte line size"
26#endif
27
28/* How many cache lines ahead should we prefetch? */
29#define PREFETCH_LINES_AHEAD 3
30
31/*
32 * Provide "base versions" of load and store for the normal code path.
33 * The kernel provides other versions for userspace copies.
34 */
35#define ST(p, v) (*(p) = (v))
36#define LD(p) (*(p))
37
38#ifndef USERCOPY_FUNC
39#define ST1 ST
40#define ST2 ST
41#define ST4 ST
42#define ST8 ST
43#define LD1 LD
44#define LD2 LD
45#define LD4 LD
46#define LD8 LD
47#define RETVAL dstv
48void *memcpy(void *__restrict dstv, const void *__restrict srcv, size_t n)
49#else
50/*
51 * Special kernel version will provide implementation of the LDn/STn
52 * macros to return a count of uncopied bytes due to mm fault.
53 */
54#define RETVAL 0
55int USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n)
56#endif
57{
58 char *__restrict dst1 = (char *)dstv;
59 const char *__restrict src1 = (const char *)srcv;
60 const char *__restrict src1_end;
61 const char *__restrict prefetch;
62 word_t *__restrict dst8; /* 8-byte pointer to destination memory. */
63 word_t final; /* Final bytes to write to trailing word, if any */
64 long i;
65
66 if (n < 16) {
67 for (; n; n--)
68 ST1(dst1++, LD1(src1++));
69 return RETVAL;
70 }
71
72 /*
73 * Locate the end of source memory we will copy. Don't
74 * prefetch past this.
75 */
76 src1_end = src1 + n - 1;
77
78 /* Prefetch ahead a few cache lines, but not past the end. */
79 prefetch = src1;
80 for (i = 0; i < PREFETCH_LINES_AHEAD; i++) {
81 __insn_prefetch(prefetch);
82 prefetch += CHIP_L2_LINE_SIZE();
83 prefetch = (prefetch > src1_end) ? prefetch : src1;
84 }
85
86 /* Copy bytes until dst is word-aligned. */
87 for (; (uintptr_t)dst1 & (sizeof(word_t) - 1); n--)
88 ST1(dst1++, LD1(src1++));
89
90 /* 8-byte pointer to destination memory. */
91 dst8 = (word_t *)dst1;
92
93 if (__builtin_expect((uintptr_t)src1 & (sizeof(word_t) - 1), 0)) {
94 /*
95 * Misaligned copy. Copy 8 bytes at a time, but don't
96 * bother with other fanciness.
97 *
98 * TODO: Consider prefetching and using wh64 as well.
99 */
100
101 /* Create an aligned src8. */
102 const word_t *__restrict src8 =
103 (const word_t *)((uintptr_t)src1 & -sizeof(word_t));
104 word_t b;
105
106 word_t a = LD8(src8++);
107 for (; n >= sizeof(word_t); n -= sizeof(word_t)) {
108 b = LD8(src8++);
109 a = __insn_dblalign(a, b, src1);
110 ST8(dst8++, a);
111 a = b;
112 }
113
114 if (n == 0)
115 return RETVAL;
116
117 b = ((const char *)src8 <= src1_end) ? *src8 : 0;
118
119 /*
120 * Final source bytes to write to trailing partial
121 * word, if any.
122 */
123 final = __insn_dblalign(a, b, src1);
124 } else {
125 /* Aligned copy. */
126
127 const word_t* __restrict src8 = (const word_t *)src1;
128
129 /* src8 and dst8 are both word-aligned. */
130 if (n >= CHIP_L2_LINE_SIZE()) {
131 /* Copy until 'dst' is cache-line-aligned. */
132 for (; (uintptr_t)dst8 & (CHIP_L2_LINE_SIZE() - 1);
133 n -= sizeof(word_t))
134 ST8(dst8++, LD8(src8++));
135
136 for (; n >= CHIP_L2_LINE_SIZE(); ) {
137 __insn_wh64(dst8);
138
139 /*
140 * Prefetch and advance to next line
141 * to prefetch, but don't go past the end
142 */
143 __insn_prefetch(prefetch);
144 prefetch += CHIP_L2_LINE_SIZE();
145 prefetch = (prefetch > src1_end) ? prefetch :
146 (const char *)src8;
147
148 /*
149 * Copy an entire cache line. Manually
150 * unrolled to avoid idiosyncracies of
151 * compiler unrolling.
152 */
153#define COPY_WORD(offset) ({ ST8(dst8+offset, LD8(src8+offset)); n -= 8; })
154 COPY_WORD(0);
155 COPY_WORD(1);
156 COPY_WORD(2);
157 COPY_WORD(3);
158 COPY_WORD(4);
159 COPY_WORD(5);
160 COPY_WORD(6);
161 COPY_WORD(7);
162#if CHIP_L2_LINE_SIZE() == 128
163 COPY_WORD(8);
164 COPY_WORD(9);
165 COPY_WORD(10);
166 COPY_WORD(11);
167 COPY_WORD(12);
168 COPY_WORD(13);
169 COPY_WORD(14);
170 COPY_WORD(15);
171#elif CHIP_L2_LINE_SIZE() != 64
172# error Fix code that assumes particular L2 cache line sizes
173#endif
174
175 dst8 += CHIP_L2_LINE_SIZE() / sizeof(word_t);
176 src8 += CHIP_L2_LINE_SIZE() / sizeof(word_t);
177 }
178 }
179
180 for (; n >= sizeof(word_t); n -= sizeof(word_t))
181 ST8(dst8++, LD8(src8++));
182
183 if (__builtin_expect(n == 0, 1))
184 return RETVAL;
185
186 final = LD8(src8);
187 }
188
189 /* n != 0 if we get here. Write out any trailing bytes. */
190 dst1 = (char *)dst8;
191 if (n & 4) {
192 ST4((uint32_t *)dst1, final);
193 dst1 += 4;
194 final >>= 32;
195 n &= 3;
196 }
197 if (n & 2) {
198 ST2((uint16_t *)dst1, final);
199 dst1 += 2;
200 final >>= 16;
201 n &= 1;
202 }
203 if (n)
204 ST1((uint8_t *)dst1, final);
205
206 return RETVAL;
207}
208
209
210#ifdef USERCOPY_FUNC
211#undef ST1
212#undef ST2
213#undef ST4
214#undef ST8
215#undef LD1
216#undef LD2
217#undef LD4
218#undef LD8
219#undef USERCOPY_FUNC
220#endif
diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c
new file mode 100644
index 00000000000..4763b3aff1c
--- /dev/null
+++ b/arch/tile/lib/memcpy_user_64.c
@@ -0,0 +1,86 @@
1/*
2 * Copyright 2011 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 *
14 * Do memcpy(), but trap and return "n" when a load or store faults.
15 *
16 * Note: this idiom only works when memcpy() compiles to a leaf function.
17 * If "sp" is updated during memcpy, the "jrp lr" will be incorrect.
18 *
19 * Also note that we are capturing "n" from the containing scope here.
20 */
21
22#define _ST(p, inst, v) \
23 ({ \
24 asm("1: " #inst " %0, %1;" \
25 ".pushsection .coldtext.memcpy,\"ax\";" \
26 "2: { move r0, %2; jrp lr };" \
27 ".section __ex_table,\"a\";" \
28 ".quad 1b, 2b;" \
29 ".popsection" \
30 : "=m" (*(p)) : "r" (v), "r" (n)); \
31 })
32
33#define _LD(p, inst) \
34 ({ \
35 unsigned long __v; \
36 asm("1: " #inst " %0, %1;" \
37 ".pushsection .coldtext.memcpy,\"ax\";" \
38 "2: { move r0, %2; jrp lr };" \
39 ".section __ex_table,\"a\";" \
40 ".quad 1b, 2b;" \
41 ".popsection" \
42 : "=r" (__v) : "m" (*(p)), "r" (n)); \
43 __v; \
44 })
45
46#define USERCOPY_FUNC __copy_to_user_inatomic
47#define ST1(p, v) _ST((p), st1, (v))
48#define ST2(p, v) _ST((p), st2, (v))
49#define ST4(p, v) _ST((p), st4, (v))
50#define ST8(p, v) _ST((p), st, (v))
51#define LD1 LD
52#define LD2 LD
53#define LD4 LD
54#define LD8 LD
55#include "memcpy_64.c"
56
57#define USERCOPY_FUNC __copy_from_user_inatomic
58#define ST1 ST
59#define ST2 ST
60#define ST4 ST
61#define ST8 ST
62#define LD1(p) _LD((p), ld1u)
63#define LD2(p) _LD((p), ld2u)
64#define LD4(p) _LD((p), ld4u)
65#define LD8(p) _LD((p), ld)
66#include "memcpy_64.c"
67
68#define USERCOPY_FUNC __copy_in_user_inatomic
69#define ST1(p, v) _ST((p), st1, (v))
70#define ST2(p, v) _ST((p), st2, (v))
71#define ST4(p, v) _ST((p), st4, (v))
72#define ST8(p, v) _ST((p), st, (v))
73#define LD1(p) _LD((p), ld1u)
74#define LD2(p) _LD((p), ld2u)
75#define LD4(p) _LD((p), ld4u)
76#define LD8(p) _LD((p), ld)
77#include "memcpy_64.c"
78
79unsigned long __copy_from_user_zeroing(void *to, const void __user *from,
80 unsigned long n)
81{
82 unsigned long rc = __copy_from_user_inatomic(to, from, n);
83 if (unlikely(rc))
84 memset(to + n - rc, 0, rc);
85 return rc;
86}
diff --git a/arch/tile/lib/memset_64.c b/arch/tile/lib/memset_64.c
new file mode 100644
index 00000000000..3873085711d
--- /dev/null
+++ b/arch/tile/lib/memset_64.c
@@ -0,0 +1,145 @@
1/*
2 * Copyright 2011 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#include <arch/chip.h>
16
17#include <linux/types.h>
18#include <linux/string.h>
19#include <linux/module.h>
20
21#undef memset
22
23void *memset(void *s, int c, size_t n)
24{
25 uint64_t *out64;
26 int n64, to_align64;
27 uint64_t v64;
28 uint8_t *out8 = s;
29
30 /* Experimentation shows that a trivial tight loop is a win up until
31 * around a size of 20, where writing a word at a time starts to win.
32 */
33#define BYTE_CUTOFF 20
34
35#if BYTE_CUTOFF < 7
36 /* This must be at least at least this big, or some code later
37 * on doesn't work.
38 */
39#error "BYTE_CUTOFF is too small"
40#endif
41
42 if (n < BYTE_CUTOFF) {
43 /* Strangely, this turns out to be the tightest way to
44 * write this loop.
45 */
46 if (n != 0) {
47 do {
48 /* Strangely, combining these into one line
49 * performs worse.
50 */
51 *out8 = c;
52 out8++;
53 } while (--n != 0);
54 }
55
56 return s;
57 }
58
59 /* Align 'out8'. We know n >= 7 so this won't write past the end. */
60 while (((uintptr_t) out8 & 7) != 0) {
61 *out8++ = c;
62 --n;
63 }
64
65 /* Align 'n'. */
66 while (n & 7)
67 out8[--n] = c;
68
69 out64 = (uint64_t *) out8;
70 n64 = n >> 3;
71
72 /* Tile input byte out to 64 bits. */
73 /* KLUDGE */
74 v64 = 0x0101010101010101ULL * (uint8_t)c;
75
76 /* This must be at least 8 or the following loop doesn't work. */
77#define CACHE_LINE_SIZE_IN_DOUBLEWORDS (CHIP_L2_LINE_SIZE() / 8)
78
79 /* Determine how many words we need to emit before the 'out32'
80 * pointer becomes aligned modulo the cache line size.
81 */
82 to_align64 = (-((uintptr_t)out64 >> 3)) &
83 (CACHE_LINE_SIZE_IN_DOUBLEWORDS - 1);
84
85 /* Only bother aligning and using wh64 if there is at least
86 * one full cache line to process. This check also prevents
87 * overrunning the end of the buffer with alignment words.
88 */
89 if (to_align64 <= n64 - CACHE_LINE_SIZE_IN_DOUBLEWORDS) {
90 int lines_left;
91
92 /* Align out64 mod the cache line size so we can use wh64. */
93 n64 -= to_align64;
94 for (; to_align64 != 0; to_align64--) {
95 *out64 = v64;
96 out64++;
97 }
98
99 /* Use unsigned divide to turn this into a right shift. */
100 lines_left = (unsigned)n64 / CACHE_LINE_SIZE_IN_DOUBLEWORDS;
101
102 do {
103 /* Only wh64 a few lines at a time, so we don't
104 * exceed the maximum number of victim lines.
105 */
106 int x = ((lines_left < CHIP_MAX_OUTSTANDING_VICTIMS())
107 ? lines_left
108 : CHIP_MAX_OUTSTANDING_VICTIMS());
109 uint64_t *wh = out64;
110 int i = x;
111 int j;
112
113 lines_left -= x;
114
115 do {
116 __insn_wh64(wh);
117 wh += CACHE_LINE_SIZE_IN_DOUBLEWORDS;
118 } while (--i);
119
120 for (j = x * (CACHE_LINE_SIZE_IN_DOUBLEWORDS / 4);
121 j != 0; j--) {
122 *out64++ = v64;
123 *out64++ = v64;
124 *out64++ = v64;
125 *out64++ = v64;
126 }
127 } while (lines_left != 0);
128
129 /* We processed all full lines above, so only this many
130 * words remain to be processed.
131 */
132 n64 &= CACHE_LINE_SIZE_IN_DOUBLEWORDS - 1;
133 }
134
135 /* Now handle any leftover values. */
136 if (n64 != 0) {
137 do {
138 *out64 = v64;
139 out64++;
140 } while (--n64 != 0);
141 }
142
143 return s;
144}
145EXPORT_SYMBOL(memset);
diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c
new file mode 100644
index 00000000000..d6fb9581e98
--- /dev/null
+++ b/arch/tile/lib/spinlock_64.c
@@ -0,0 +1,104 @@
1/*
2 * Copyright 2011 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/spinlock.h>
16#include <linux/module.h>
17#include <asm/processor.h>
18
19#include "spinlock_common.h"
20
21/*
22 * Read the spinlock value without allocating in our cache and without
23 * causing an invalidation to another cpu with a copy of the cacheline.
24 * This is important when we are spinning waiting for the lock.
25 */
26static inline u32 arch_spin_read_noalloc(void *lock)
27{
28 return atomic_cmpxchg((atomic_t *)lock, -1, -1);
29}
30
31/*
32 * Wait until the high bits (current) match my ticket.
33 * If we notice the overflow bit set on entry, we clear it.
34 */
35void arch_spin_lock_slow(arch_spinlock_t *lock, u32 my_ticket)
36{
37 if (unlikely(my_ticket & __ARCH_SPIN_NEXT_OVERFLOW)) {
38 __insn_fetchand4(&lock->lock, ~__ARCH_SPIN_NEXT_OVERFLOW);
39 my_ticket &= ~__ARCH_SPIN_NEXT_OVERFLOW;
40 }
41
42 for (;;) {
43 u32 val = arch_spin_read_noalloc(lock);
44 u32 delta = my_ticket - arch_spin_current(val);
45 if (delta == 0)
46 return;
47 relax((128 / CYCLES_PER_RELAX_LOOP) * delta);
48 }
49}
50EXPORT_SYMBOL(arch_spin_lock_slow);
51
52/*
53 * Check the lock to see if it is plausible, and try to get it with cmpxchg().
54 */
55int arch_spin_trylock(arch_spinlock_t *lock)
56{
57 u32 val = arch_spin_read_noalloc(lock);
58 if (unlikely(arch_spin_current(val) != arch_spin_next(val)))
59 return 0;
60 return cmpxchg(&lock->lock, val, (val + 1) & ~__ARCH_SPIN_NEXT_OVERFLOW)
61 == val;
62}
63EXPORT_SYMBOL(arch_spin_trylock);
64
65void arch_spin_unlock_wait(arch_spinlock_t *lock)
66{
67 u32 iterations = 0;
68 while (arch_spin_is_locked(lock))
69 delay_backoff(iterations++);
70}
71EXPORT_SYMBOL(arch_spin_unlock_wait);
72
73/*
74 * If the read lock fails due to a writer, we retry periodically
75 * until the value is positive and we write our incremented reader count.
76 */
77void __read_lock_failed(arch_rwlock_t *rw)
78{
79 u32 val;
80 int iterations = 0;
81 do {
82 delay_backoff(iterations++);
83 val = __insn_fetchaddgez4(&rw->lock, 1);
84 } while (unlikely(arch_write_val_locked(val)));
85}
86EXPORT_SYMBOL(__read_lock_failed);
87
88/*
89 * If we failed because there were readers, clear the "writer" bit
90 * so we don't block additional readers. Otherwise, there was another
91 * writer anyway, so our "fetchor" made no difference. Then wait,
92 * issuing periodic fetchor instructions, till we get the lock.
93 */
94void __write_lock_failed(arch_rwlock_t *rw, u32 val)
95{
96 int iterations = 0;
97 do {
98 if (!arch_write_val_locked(val))
99 val = __insn_fetchand4(&rw->lock, ~__WRITE_LOCK_BIT);
100 delay_backoff(iterations++);
101 val = __insn_fetchor4(&rw->lock, __WRITE_LOCK_BIT);
102 } while (val != 0);
103}
104EXPORT_SYMBOL(__write_lock_failed);
diff --git a/arch/tile/lib/strchr_64.c b/arch/tile/lib/strchr_64.c
new file mode 100644
index 00000000000..617a9273aaa
--- /dev/null
+++ b/arch/tile/lib/strchr_64.c
@@ -0,0 +1,67 @@
1/*
2 * Copyright 2011 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/types.h>
16#include <linux/string.h>
17#include <linux/module.h>
18
19#undef strchr
20
21char *strchr(const char *s, int c)
22{
23 int z, g;
24
25 /* Get an aligned pointer. */
26 const uintptr_t s_int = (uintptr_t) s;
27 const uint64_t *p = (const uint64_t *)(s_int & -8);
28
29 /* Create eight copies of the byte for which we are looking. */
30 const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c;
31
32 /* Read the first aligned word, but force bytes before the string to
33 * match neither zero nor goal (we make sure the high bit of each
34 * byte is 1, and the low 7 bits are all the opposite of the goal
35 * byte).
36 *
37 * Note that this shift count expression works because we know shift
38 * counts are taken mod 64.
39 */
40 const uint64_t before_mask = (1ULL << (s_int << 3)) - 1;
41 uint64_t v = (*p | before_mask) ^
42 (goal & __insn_v1shrsi(before_mask, 1));
43
44 uint64_t zero_matches, goal_matches;
45 while (1) {
46 /* Look for a terminating '\0'. */
47 zero_matches = __insn_v1cmpeqi(v, 0);
48
49 /* Look for the goal byte. */
50 goal_matches = __insn_v1cmpeq(v, goal);
51
52 if (__builtin_expect((zero_matches | goal_matches) != 0, 0))
53 break;
54
55 v = *++p;
56 }
57
58 z = __insn_ctz(zero_matches);
59 g = __insn_ctz(goal_matches);
60
61 /* If we found c before '\0' we got a match. Note that if c == '\0'
62 * then g == z, and we correctly return the address of the '\0'
63 * rather than NULL.
64 */
65 return (g <= z) ? ((char *)p) + (g >> 3) : NULL;
66}
67EXPORT_SYMBOL(strchr);
diff --git a/arch/tile/lib/strlen_64.c b/arch/tile/lib/strlen_64.c
new file mode 100644
index 00000000000..1c92d46202a
--- /dev/null
+++ b/arch/tile/lib/strlen_64.c
@@ -0,0 +1,38 @@
1/*
2 * Copyright 2011 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/types.h>
16#include <linux/string.h>
17#include <linux/module.h>
18
19#undef strlen
20
21size_t strlen(const char *s)
22{
23 /* Get an aligned pointer. */
24 const uintptr_t s_int = (uintptr_t) s;
25 const uint64_t *p = (const uint64_t *)(s_int & -8);
26
27 /* Read the first word, but force bytes before the string to be nonzero.
28 * This expression works because we know shift counts are taken mod 64.
29 */
30 uint64_t v = *p | ((1ULL << (s_int << 3)) - 1);
31
32 uint64_t bits;
33 while ((bits = __insn_v1cmpeqi(v, 0)) == 0)
34 v = *++p;
35
36 return ((const char *)p) + (__insn_ctz(bits) >> 3) - s;
37}
38EXPORT_SYMBOL(strlen);
diff --git a/arch/tile/lib/usercopy_64.S b/arch/tile/lib/usercopy_64.S
new file mode 100644
index 00000000000..2ff44f87b78
--- /dev/null
+++ b/arch/tile/lib/usercopy_64.S
@@ -0,0 +1,196 @@
1/*
2 * Copyright 2011 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/linkage.h>
16#include <asm/errno.h>
17#include <asm/cache.h>
18#include <arch/chip.h>
19
20/* Access user memory, but use MMU to avoid propagating kernel exceptions. */
21
22 .pushsection .fixup,"ax"
23
24get_user_fault:
25 { movei r1, -EFAULT; move r0, zero }
26 jrp lr
27 ENDPROC(get_user_fault)
28
29put_user_fault:
30 { movei r0, -EFAULT; jrp lr }
31 ENDPROC(put_user_fault)
32
33 .popsection
34
35/*
36 * __get_user_N functions take a pointer in r0, and return 0 in r1
37 * on success, with the value in r0; or else -EFAULT in r1.
38 */
39#define __get_user_N(bytes, LOAD) \
40 STD_ENTRY(__get_user_##bytes); \
411: { LOAD r0, r0; move r1, zero }; \
42 jrp lr; \
43 STD_ENDPROC(__get_user_##bytes); \
44 .pushsection __ex_table,"a"; \
45 .quad 1b, get_user_fault; \
46 .popsection
47
48__get_user_N(1, ld1u)
49__get_user_N(2, ld2u)
50__get_user_N(4, ld4u)
51__get_user_N(8, ld)
52
53/*
54 * __put_user_N functions take a value in r0 and a pointer in r1,
55 * and return 0 in r0 on success or -EFAULT on failure.
56 */
57#define __put_user_N(bytes, STORE) \
58 STD_ENTRY(__put_user_##bytes); \
591: { STORE r1, r0; move r0, zero }; \
60 jrp lr; \
61 STD_ENDPROC(__put_user_##bytes); \
62 .pushsection __ex_table,"a"; \
63 .quad 1b, put_user_fault; \
64 .popsection
65
66__put_user_N(1, st1)
67__put_user_N(2, st2)
68__put_user_N(4, st4)
69__put_user_N(8, st)
70
71/*
72 * strnlen_user_asm takes the pointer in r0, and the length bound in r1.
73 * It returns the length, including the terminating NUL, or zero on exception.
74 * If length is greater than the bound, returns one plus the bound.
75 */
76STD_ENTRY(strnlen_user_asm)
77 { beqz r1, 2f; addi r3, r0, -1 } /* bias down to include NUL */
781: { ld1u r4, r0; addi r1, r1, -1 }
79 beqz r4, 2f
80 { bnezt r1, 1b; addi r0, r0, 1 }
812: { sub r0, r0, r3; jrp lr }
82 STD_ENDPROC(strnlen_user_asm)
83 .pushsection .fixup,"ax"
84strnlen_user_fault:
85 { move r0, zero; jrp lr }
86 ENDPROC(strnlen_user_fault)
87 .section __ex_table,"a"
88 .quad 1b, strnlen_user_fault
89 .popsection
90
91/*
92 * strncpy_from_user_asm takes the kernel target pointer in r0,
93 * the userspace source pointer in r1, and the length bound (including
94 * the trailing NUL) in r2. On success, it returns the string length
95 * (not including the trailing NUL), or -EFAULT on failure.
96 */
97STD_ENTRY(strncpy_from_user_asm)
98 { beqz r2, 2f; move r3, r0 }
991: { ld1u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
100 { st1 r0, r4; addi r0, r0, 1 }
101 beqz r2, 2f
102 bnezt r4, 1b
103 addi r0, r0, -1 /* don't count the trailing NUL */
1042: { sub r0, r0, r3; jrp lr }
105 STD_ENDPROC(strncpy_from_user_asm)
106 .pushsection .fixup,"ax"
107strncpy_from_user_fault:
108 { movei r0, -EFAULT; jrp lr }
109 ENDPROC(strncpy_from_user_fault)
110 .section __ex_table,"a"
111 .quad 1b, strncpy_from_user_fault
112 .popsection
113
114/*
115 * clear_user_asm takes the user target address in r0 and the
116 * number of bytes to zero in r1.
117 * It returns the number of uncopiable bytes (hopefully zero) in r0.
118 * Note that we don't use a separate .fixup section here since we fall
119 * through into the "fixup" code as the last straight-line bundle anyway.
120 */
121STD_ENTRY(clear_user_asm)
122 { beqz r1, 2f; or r2, r0, r1 }
123 andi r2, r2, 7
124 beqzt r2, .Lclear_aligned_user_asm
1251: { st1 r0, zero; addi r0, r0, 1; addi r1, r1, -1 }
126 bnezt r1, 1b
1272: { move r0, r1; jrp lr }
128 .pushsection __ex_table,"a"
129 .quad 1b, 2b
130 .popsection
131
132.Lclear_aligned_user_asm:
1331: { st r0, zero; addi r0, r0, 8; addi r1, r1, -8 }
134 bnezt r1, 1b
1352: { move r0, r1; jrp lr }
136 STD_ENDPROC(clear_user_asm)
137 .pushsection __ex_table,"a"
138 .quad 1b, 2b
139 .popsection
140
141/*
142 * flush_user_asm takes the user target address in r0 and the
143 * number of bytes to flush in r1.
144 * It returns the number of unflushable bytes (hopefully zero) in r0.
145 */
146STD_ENTRY(flush_user_asm)
147 beqz r1, 2f
148 { movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
149 { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
150 { and r0, r0, r2; and r1, r1, r2 }
151 { sub r1, r1, r0 }
1521: { flush r0; addi r1, r1, -CHIP_FLUSH_STRIDE() }
153 { addi r0, r0, CHIP_FLUSH_STRIDE(); bnezt r1, 1b }
1542: { move r0, r1; jrp lr }
155 STD_ENDPROC(flush_user_asm)
156 .pushsection __ex_table,"a"
157 .quad 1b, 2b
158 .popsection
159
160/*
161 * inv_user_asm takes the user target address in r0 and the
162 * number of bytes to invalidate in r1.
163 * It returns the number of not inv'able bytes (hopefully zero) in r0.
164 */
165STD_ENTRY(inv_user_asm)
166 beqz r1, 2f
167 { movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
168 { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
169 { and r0, r0, r2; and r1, r1, r2 }
170 { sub r1, r1, r0 }
1711: { inv r0; addi r1, r1, -CHIP_INV_STRIDE() }
172 { addi r0, r0, CHIP_INV_STRIDE(); bnezt r1, 1b }
1732: { move r0, r1; jrp lr }
174 STD_ENDPROC(inv_user_asm)
175 .pushsection __ex_table,"a"
176 .quad 1b, 2b
177 .popsection
178
179/*
180 * finv_user_asm takes the user target address in r0 and the
181 * number of bytes to flush-invalidate in r1.
182 * It returns the number of not finv'able bytes (hopefully zero) in r0.
183 */
184STD_ENTRY(finv_user_asm)
185 beqz r1, 2f
186 { movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
187 { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
188 { and r0, r0, r2; and r1, r1, r2 }
189 { sub r1, r1, r0 }
1901: { finv r0; addi r1, r1, -CHIP_FINV_STRIDE() }
191 { addi r0, r0, CHIP_FINV_STRIDE(); bnezt r1, 1b }
1922: { move r0, r1; jrp lr }
193 STD_ENDPROC(finv_user_asm)
194 .pushsection __ex_table,"a"
195 .quad 1b, 2b
196 .popsection