aboutsummaryrefslogtreecommitdiffstats
path: root/arch/tile/lib
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
committerJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
commit8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch)
treea8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /arch/tile/lib
parent406089d01562f1e2bf9f089fd7637009ebaad589 (diff)
Patched in Tegra support.
Diffstat (limited to 'arch/tile/lib')
-rw-r--r--arch/tile/lib/Makefile1
-rw-r--r--arch/tile/lib/atomic_32.c47
-rw-r--r--arch/tile/lib/cacheflush.c30
-rw-r--r--arch/tile/lib/checksum.c15
-rw-r--r--arch/tile/lib/exports.c13
-rw-r--r--arch/tile/lib/memchr_64.c8
-rw-r--r--arch/tile/lib/memcpy_64.c23
-rw-r--r--arch/tile/lib/memcpy_tile64.c8
-rw-r--r--arch/tile/lib/memcpy_user_64.c8
-rw-r--r--arch/tile/lib/spinlock_32.c2
-rw-r--r--arch/tile/lib/spinlock_common.h2
-rw-r--r--arch/tile/lib/strchr_64.c15
-rw-r--r--arch/tile/lib/string-endian.h33
-rw-r--r--arch/tile/lib/strlen_64.c11
-rw-r--r--arch/tile/lib/usercopy_32.S76
-rw-r--r--arch/tile/lib/usercopy_64.S49
16 files changed, 229 insertions, 112 deletions
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile
index 985f5985823..0c26086ecbe 100644
--- a/arch/tile/lib/Makefile
+++ b/arch/tile/lib/Makefile
@@ -7,7 +7,6 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \
7 strchr_$(BITS).o strlen_$(BITS).o 7 strchr_$(BITS).o strlen_$(BITS).o
8 8
9ifeq ($(CONFIG_TILEGX),y) 9ifeq ($(CONFIG_TILEGX),y)
10CFLAGS_REMOVE_memcpy_user_64.o = -fno-omit-frame-pointer
11lib-y += memcpy_user_64.o 10lib-y += memcpy_user_64.o
12else 11else
13lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o 12lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
index f5cada70c3c..771b251b409 100644
--- a/arch/tile/lib/atomic_32.c
+++ b/arch/tile/lib/atomic_32.c
@@ -18,6 +18,7 @@
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/mm.h> 19#include <linux/mm.h>
20#include <linux/atomic.h> 20#include <linux/atomic.h>
21#include <asm/futex.h>
21#include <arch/chip.h> 22#include <arch/chip.h>
22 23
23/* See <asm/atomic_32.h> */ 24/* See <asm/atomic_32.h> */
@@ -49,7 +50,7 @@ int atomic_locks[PAGE_SIZE / sizeof(int)] __page_aligned_bss;
49 50
50#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ 51#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
51 52
52int *__atomic_hashed_lock(volatile void *v) 53static inline int *__atomic_hashed_lock(volatile void *v)
53{ 54{
54 /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */ 55 /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */
55#if ATOMIC_LOCKS_FOUND_VIA_TABLE() 56#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
@@ -190,6 +191,47 @@ u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n)
190EXPORT_SYMBOL(_atomic64_cmpxchg); 191EXPORT_SYMBOL(_atomic64_cmpxchg);
191 192
192 193
194static inline int *__futex_setup(int __user *v)
195{
196 /*
197 * Issue a prefetch to the counter to bring it into cache.
198 * As for __atomic_setup, but we can't do a read into the L1
199 * since it might fault; instead we do a prefetch into the L2.
200 */
201 __insn_prefetch(v);
202 return __atomic_hashed_lock((int __force *)v);
203}
204
205struct __get_user futex_set(u32 __user *v, int i)
206{
207 return __atomic_xchg((int __force *)v, __futex_setup(v), i);
208}
209
210struct __get_user futex_add(u32 __user *v, int n)
211{
212 return __atomic_xchg_add((int __force *)v, __futex_setup(v), n);
213}
214
215struct __get_user futex_or(u32 __user *v, int n)
216{
217 return __atomic_or((int __force *)v, __futex_setup(v), n);
218}
219
220struct __get_user futex_andn(u32 __user *v, int n)
221{
222 return __atomic_andn((int __force *)v, __futex_setup(v), n);
223}
224
225struct __get_user futex_xor(u32 __user *v, int n)
226{
227 return __atomic_xor((int __force *)v, __futex_setup(v), n);
228}
229
230struct __get_user futex_cmpxchg(u32 __user *v, int o, int n)
231{
232 return __atomic_cmpxchg((int __force *)v, __futex_setup(v), o, n);
233}
234
193/* 235/*
194 * If any of the atomic or futex routines hit a bad address (not in 236 * If any of the atomic or futex routines hit a bad address (not in
195 * the page tables at kernel PL) this routine is called. The futex 237 * the page tables at kernel PL) this routine is called. The futex
@@ -281,4 +323,7 @@ void __init __init_atomic_per_cpu(void)
281 BUILD_BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE); 323 BUILD_BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE);
282 324
283#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ 325#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
326
327 /* The futex code makes this assumption, so we validate it here. */
328 BUILD_BUG_ON(sizeof(atomic_t) != sizeof(int));
284} 329}
diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c
index db4fb89e12d..8928aace7a6 100644
--- a/arch/tile/lib/cacheflush.c
+++ b/arch/tile/lib/cacheflush.c
@@ -39,21 +39,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
39{ 39{
40 char *p, *base; 40 char *p, *base;
41 size_t step_size, load_count; 41 size_t step_size, load_count;
42
43 /*
44 * On TILEPro the striping granularity is a fixed 8KB; on
45 * TILE-Gx it is configurable, and we rely on the fact that
46 * the hypervisor always configures maximum striping, so that
47 * bits 9 and 10 of the PA are part of the stripe function, so
48 * every 512 bytes we hit a striping boundary.
49 *
50 */
51#ifdef __tilegx__
52 const unsigned long STRIPE_WIDTH = 512;
53#else
54 const unsigned long STRIPE_WIDTH = 8192; 42 const unsigned long STRIPE_WIDTH = 8192;
55#endif
56
57#ifdef __tilegx__ 43#ifdef __tilegx__
58 /* 44 /*
59 * On TILE-Gx, we must disable the dstream prefetcher before doing 45 * On TILE-Gx, we must disable the dstream prefetcher before doing
@@ -88,7 +74,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
88 * memory, that one load would be sufficient, but since we may 74 * memory, that one load would be sufficient, but since we may
89 * be, we also need to back up to the last load issued to 75 * be, we also need to back up to the last load issued to
90 * another memory controller, which would be the point where 76 * another memory controller, which would be the point where
91 * we crossed a "striping" boundary (the granularity of striping 77 * we crossed an 8KB boundary (the granularity of striping
92 * across memory controllers). Keep backing up and doing this 78 * across memory controllers). Keep backing up and doing this
93 * until we are before the beginning of the buffer, or have 79 * until we are before the beginning of the buffer, or have
94 * hit all the controllers. 80 * hit all the controllers.
@@ -102,22 +88,12 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
102 * every cache line on a full memory stripe on each 88 * every cache line on a full memory stripe on each
103 * controller" that we simply do that, to simplify the logic. 89 * controller" that we simply do that, to simplify the logic.
104 * 90 *
105 * On TILE-Gx the hash-for-home function is much more complex, 91 * FIXME: See bug 9535 for some issues with this code.
106 * with the upshot being we can't readily guarantee we have
107 * hit both entries in the 128-entry AMT that were hit by any
108 * load in the entire range, so we just re-load them all.
109 * With larger buffers, we may want to consider using a hypervisor
110 * trap to issue loads directly to each hash-for-home tile for
111 * each controller (doing it from Linux would trash the TLB).
112 */ 92 */
113 if (hfh) { 93 if (hfh) {
114 step_size = L2_CACHE_BYTES; 94 step_size = L2_CACHE_BYTES;
115#ifdef __tilegx__
116 load_count = (size + L2_CACHE_BYTES - 1) / L2_CACHE_BYTES;
117#else
118 load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) * 95 load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) *
119 (1 << CHIP_LOG_NUM_MSHIMS()); 96 (1 << CHIP_LOG_NUM_MSHIMS());
120#endif
121 } else { 97 } else {
122 step_size = STRIPE_WIDTH; 98 step_size = STRIPE_WIDTH;
123 load_count = (1 << CHIP_LOG_NUM_MSHIMS()); 99 load_count = (1 << CHIP_LOG_NUM_MSHIMS());
@@ -133,7 +109,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
133 109
134 /* Figure out how far back we need to go. */ 110 /* Figure out how far back we need to go. */
135 base = p - (step_size * (load_count - 2)); 111 base = p - (step_size * (load_count - 2));
136 if ((unsigned long)base < (unsigned long)buffer) 112 if ((long)base < (long)buffer)
137 base = buffer; 113 base = buffer;
138 114
139 /* 115 /*
diff --git a/arch/tile/lib/checksum.c b/arch/tile/lib/checksum.c
index c3ca3e64d9d..e4bab5bd3f3 100644
--- a/arch/tile/lib/checksum.c
+++ b/arch/tile/lib/checksum.c
@@ -16,6 +16,19 @@
16#include <net/checksum.h> 16#include <net/checksum.h>
17#include <linux/module.h> 17#include <linux/module.h>
18 18
19static inline unsigned int longto16(unsigned long x)
20{
21 unsigned long ret;
22#ifdef __tilegx__
23 ret = __insn_v2sadu(x, 0);
24 ret = __insn_v2sadu(ret, 0);
25#else
26 ret = __insn_sadh_u(x, 0);
27 ret = __insn_sadh_u(ret, 0);
28#endif
29 return ret;
30}
31
19__wsum do_csum(const unsigned char *buff, int len) 32__wsum do_csum(const unsigned char *buff, int len)
20{ 33{
21 int odd, count; 34 int odd, count;
@@ -81,7 +94,7 @@ __wsum do_csum(const unsigned char *buff, int len)
81 } 94 }
82 if (len & 1) 95 if (len & 1)
83 result += *buff; 96 result += *buff;
84 result = csum_long(result); 97 result = longto16(result);
85 if (odd) 98 if (odd)
86 result = swab16(result); 99 result = swab16(result);
87out: 100out:
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c
index dd5f0a33fda..49284fae9d0 100644
--- a/arch/tile/lib/exports.c
+++ b/arch/tile/lib/exports.c
@@ -18,6 +18,14 @@
18 18
19/* arch/tile/lib/usercopy.S */ 19/* arch/tile/lib/usercopy.S */
20#include <linux/uaccess.h> 20#include <linux/uaccess.h>
21EXPORT_SYMBOL(__get_user_1);
22EXPORT_SYMBOL(__get_user_2);
23EXPORT_SYMBOL(__get_user_4);
24EXPORT_SYMBOL(__get_user_8);
25EXPORT_SYMBOL(__put_user_1);
26EXPORT_SYMBOL(__put_user_2);
27EXPORT_SYMBOL(__put_user_4);
28EXPORT_SYMBOL(__put_user_8);
21EXPORT_SYMBOL(strnlen_user_asm); 29EXPORT_SYMBOL(strnlen_user_asm);
22EXPORT_SYMBOL(strncpy_from_user_asm); 30EXPORT_SYMBOL(strncpy_from_user_asm);
23EXPORT_SYMBOL(clear_user_asm); 31EXPORT_SYMBOL(clear_user_asm);
@@ -31,9 +39,6 @@ EXPORT_SYMBOL(finv_user_asm);
31EXPORT_SYMBOL(current_text_addr); 39EXPORT_SYMBOL(current_text_addr);
32EXPORT_SYMBOL(dump_stack); 40EXPORT_SYMBOL(dump_stack);
33 41
34/* arch/tile/kernel/head.S */
35EXPORT_SYMBOL(empty_zero_page);
36
37/* arch/tile/lib/, various memcpy files */ 42/* arch/tile/lib/, various memcpy files */
38EXPORT_SYMBOL(memcpy); 43EXPORT_SYMBOL(memcpy);
39EXPORT_SYMBOL(__copy_to_user_inatomic); 44EXPORT_SYMBOL(__copy_to_user_inatomic);
@@ -74,6 +79,8 @@ EXPORT_SYMBOL(__umoddi3);
74int64_t __moddi3(int64_t dividend, int64_t divisor); 79int64_t __moddi3(int64_t dividend, int64_t divisor);
75EXPORT_SYMBOL(__moddi3); 80EXPORT_SYMBOL(__moddi3);
76#ifndef __tilegx__ 81#ifndef __tilegx__
82uint64_t __ll_mul(uint64_t n0, uint64_t n1);
83EXPORT_SYMBOL(__ll_mul);
77int64_t __muldi3(int64_t, int64_t); 84int64_t __muldi3(int64_t, int64_t);
78EXPORT_SYMBOL(__muldi3); 85EXPORT_SYMBOL(__muldi3);
79uint64_t __lshrdi3(uint64_t, unsigned int); 86uint64_t __lshrdi3(uint64_t, unsigned int);
diff --git a/arch/tile/lib/memchr_64.c b/arch/tile/lib/memchr_64.c
index 6f867dbf7c5..84fdc8d8e73 100644
--- a/arch/tile/lib/memchr_64.c
+++ b/arch/tile/lib/memchr_64.c
@@ -15,7 +15,6 @@
15#include <linux/types.h> 15#include <linux/types.h>
16#include <linux/string.h> 16#include <linux/string.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include "string-endian.h"
19 18
20void *memchr(const void *s, int c, size_t n) 19void *memchr(const void *s, int c, size_t n)
21{ 20{
@@ -40,8 +39,11 @@ void *memchr(const void *s, int c, size_t n)
40 39
41 /* Read the first word, but munge it so that bytes before the array 40 /* Read the first word, but munge it so that bytes before the array
42 * will not match goal. 41 * will not match goal.
42 *
43 * Note that this shift count expression works because we know
44 * shift counts are taken mod 64.
43 */ 45 */
44 before_mask = MASK(s_int); 46 before_mask = (1ULL << (s_int << 3)) - 1;
45 v = (*p | before_mask) ^ (goal & before_mask); 47 v = (*p | before_mask) ^ (goal & before_mask);
46 48
47 /* Compute the address of the last byte. */ 49 /* Compute the address of the last byte. */
@@ -63,7 +65,7 @@ void *memchr(const void *s, int c, size_t n)
63 /* We found a match, but it might be in a byte past the end 65 /* We found a match, but it might be in a byte past the end
64 * of the array. 66 * of the array.
65 */ 67 */
66 ret = ((char *)p) + (CFZ(bits) >> 3); 68 ret = ((char *)p) + (__insn_ctz(bits) >> 3);
67 return (ret <= last_byte_ptr) ? ret : NULL; 69 return (ret <= last_byte_ptr) ? ret : NULL;
68} 70}
69EXPORT_SYMBOL(memchr); 71EXPORT_SYMBOL(memchr);
diff --git a/arch/tile/lib/memcpy_64.c b/arch/tile/lib/memcpy_64.c
index c79b8e7c682..3fab9a6a2bb 100644
--- a/arch/tile/lib/memcpy_64.c
+++ b/arch/tile/lib/memcpy_64.c
@@ -15,6 +15,7 @@
15#include <linux/types.h> 15#include <linux/types.h>
16#include <linux/string.h> 16#include <linux/string.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#define __memcpy memcpy
18/* EXPORT_SYMBOL() is in arch/tile/lib/exports.c since this should be asm. */ 19/* EXPORT_SYMBOL() is in arch/tile/lib/exports.c since this should be asm. */
19 20
20/* Must be 8 bytes in size. */ 21/* Must be 8 bytes in size. */
@@ -187,7 +188,6 @@ int USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n)
187 188
188 /* n != 0 if we get here. Write out any trailing bytes. */ 189 /* n != 0 if we get here. Write out any trailing bytes. */
189 dst1 = (char *)dst8; 190 dst1 = (char *)dst8;
190#ifndef __BIG_ENDIAN__
191 if (n & 4) { 191 if (n & 4) {
192 ST4((uint32_t *)dst1, final); 192 ST4((uint32_t *)dst1, final);
193 dst1 += 4; 193 dst1 += 4;
@@ -202,30 +202,11 @@ int USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n)
202 } 202 }
203 if (n) 203 if (n)
204 ST1((uint8_t *)dst1, final); 204 ST1((uint8_t *)dst1, final);
205#else
206 if (n & 4) {
207 ST4((uint32_t *)dst1, final >> 32);
208 dst1 += 4;
209 }
210 else
211 {
212 final >>= 32;
213 }
214 if (n & 2) {
215 ST2((uint16_t *)dst1, final >> 16);
216 dst1 += 2;
217 }
218 else
219 {
220 final >>= 16;
221 }
222 if (n & 1)
223 ST1((uint8_t *)dst1, final >> 8);
224#endif
225 205
226 return RETVAL; 206 return RETVAL;
227} 207}
228 208
209
229#ifdef USERCOPY_FUNC 210#ifdef USERCOPY_FUNC
230#undef ST1 211#undef ST1
231#undef ST2 212#undef ST2
diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c
index 3bc4b4e40d9..b2fe15e0107 100644
--- a/arch/tile/lib/memcpy_tile64.c
+++ b/arch/tile/lib/memcpy_tile64.c
@@ -160,7 +160,7 @@ retry_source:
160 break; 160 break;
161 if (get_remote_cache_cpu(src_pte) == smp_processor_id()) 161 if (get_remote_cache_cpu(src_pte) == smp_processor_id())
162 break; 162 break;
163 src_page = pfn_to_page(pte_pfn(src_pte)); 163 src_page = pfn_to_page(hv_pte_get_pfn(src_pte));
164 get_page(src_page); 164 get_page(src_page);
165 if (pte_val(src_pte) != pte_val(*src_ptep)) { 165 if (pte_val(src_pte) != pte_val(*src_ptep)) {
166 put_page(src_page); 166 put_page(src_page);
@@ -168,7 +168,7 @@ retry_source:
168 } 168 }
169 if (pte_huge(src_pte)) { 169 if (pte_huge(src_pte)) {
170 /* Adjust the PTE to correspond to a small page */ 170 /* Adjust the PTE to correspond to a small page */
171 int pfn = pte_pfn(src_pte); 171 int pfn = hv_pte_get_pfn(src_pte);
172 pfn += (((unsigned long)source & (HPAGE_SIZE-1)) 172 pfn += (((unsigned long)source & (HPAGE_SIZE-1))
173 >> PAGE_SHIFT); 173 >> PAGE_SHIFT);
174 src_pte = pfn_pte(pfn, src_pte); 174 src_pte = pfn_pte(pfn, src_pte);
@@ -188,7 +188,7 @@ retry_dest:
188 put_page(src_page); 188 put_page(src_page);
189 break; 189 break;
190 } 190 }
191 dst_page = pfn_to_page(pte_pfn(dst_pte)); 191 dst_page = pfn_to_page(hv_pte_get_pfn(dst_pte));
192 if (dst_page == src_page) { 192 if (dst_page == src_page) {
193 /* 193 /*
194 * Source and dest are on the same page; this 194 * Source and dest are on the same page; this
@@ -206,7 +206,7 @@ retry_dest:
206 } 206 }
207 if (pte_huge(dst_pte)) { 207 if (pte_huge(dst_pte)) {
208 /* Adjust the PTE to correspond to a small page */ 208 /* Adjust the PTE to correspond to a small page */
209 int pfn = pte_pfn(dst_pte); 209 int pfn = hv_pte_get_pfn(dst_pte);
210 pfn += (((unsigned long)dest & (HPAGE_SIZE-1)) 210 pfn += (((unsigned long)dest & (HPAGE_SIZE-1))
211 >> PAGE_SHIFT); 211 >> PAGE_SHIFT);
212 dst_pte = pfn_pte(pfn, dst_pte); 212 dst_pte = pfn_pte(pfn, dst_pte);
diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c
index 37440caa737..4763b3aff1c 100644
--- a/arch/tile/lib/memcpy_user_64.c
+++ b/arch/tile/lib/memcpy_user_64.c
@@ -14,13 +14,7 @@
14 * Do memcpy(), but trap and return "n" when a load or store faults. 14 * Do memcpy(), but trap and return "n" when a load or store faults.
15 * 15 *
16 * Note: this idiom only works when memcpy() compiles to a leaf function. 16 * Note: this idiom only works when memcpy() compiles to a leaf function.
17 * Here leaf function not only means it does not have calls, but also 17 * If "sp" is updated during memcpy, the "jrp lr" will be incorrect.
18 * requires no stack operations (sp, stack frame pointer) and no
19 * use of callee-saved registers, else "jrp lr" will be incorrect since
20 * unwinding stack frame is bypassed. Since memcpy() is not complex so
21 * these conditions are satisfied here, but we need to be careful when
22 * modifying this file. This is not a clean solution but is the best
23 * one so far.
24 * 18 *
25 * Also note that we are capturing "n" from the containing scope here. 19 * Also note that we are capturing "n" from the containing scope here.
26 */ 20 */
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
index b16ac49a968..cb0999fb64b 100644
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -144,7 +144,7 @@ void arch_read_unlock(arch_rwlock_t *rwlock)
144 for (;;) { 144 for (;;) {
145 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); 145 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1);
146 val = __insn_tns((int *)&rwlock->lock); 146 val = __insn_tns((int *)&rwlock->lock);
147 if (likely((val & 1) == 0)) { 147 if (likely(val & 1) == 0) {
148 rwlock->lock = val - (1 << _RD_COUNT_SHIFT); 148 rwlock->lock = val - (1 << _RD_COUNT_SHIFT);
149 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); 149 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
150 break; 150 break;
diff --git a/arch/tile/lib/spinlock_common.h b/arch/tile/lib/spinlock_common.h
index 6ac37509fac..c1010980913 100644
--- a/arch/tile/lib/spinlock_common.h
+++ b/arch/tile/lib/spinlock_common.h
@@ -60,5 +60,5 @@ static void delay_backoff(int iterations)
60 loops += __insn_crc32_32(stack_pointer, get_cycles_low()) & 60 loops += __insn_crc32_32(stack_pointer, get_cycles_low()) &
61 (loops - 1); 61 (loops - 1);
62 62
63 relax(loops); 63 relax(1 << exponent);
64} 64}
diff --git a/arch/tile/lib/strchr_64.c b/arch/tile/lib/strchr_64.c
index f39f9dc422b..617a9273aaa 100644
--- a/arch/tile/lib/strchr_64.c
+++ b/arch/tile/lib/strchr_64.c
@@ -15,7 +15,8 @@
15#include <linux/types.h> 15#include <linux/types.h>
16#include <linux/string.h> 16#include <linux/string.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include "string-endian.h" 18
19#undef strchr
19 20
20char *strchr(const char *s, int c) 21char *strchr(const char *s, int c)
21{ 22{
@@ -32,9 +33,13 @@ char *strchr(const char *s, int c)
32 * match neither zero nor goal (we make sure the high bit of each 33 * match neither zero nor goal (we make sure the high bit of each
33 * byte is 1, and the low 7 bits are all the opposite of the goal 34 * byte is 1, and the low 7 bits are all the opposite of the goal
34 * byte). 35 * byte).
36 *
37 * Note that this shift count expression works because we know shift
38 * counts are taken mod 64.
35 */ 39 */
36 const uint64_t before_mask = MASK(s_int); 40 const uint64_t before_mask = (1ULL << (s_int << 3)) - 1;
37 uint64_t v = (*p | before_mask) ^ (goal & __insn_v1shrui(before_mask, 1)); 41 uint64_t v = (*p | before_mask) ^
42 (goal & __insn_v1shrsi(before_mask, 1));
38 43
39 uint64_t zero_matches, goal_matches; 44 uint64_t zero_matches, goal_matches;
40 while (1) { 45 while (1) {
@@ -50,8 +55,8 @@ char *strchr(const char *s, int c)
50 v = *++p; 55 v = *++p;
51 } 56 }
52 57
53 z = CFZ(zero_matches); 58 z = __insn_ctz(zero_matches);
54 g = CFZ(goal_matches); 59 g = __insn_ctz(goal_matches);
55 60
56 /* If we found c before '\0' we got a match. Note that if c == '\0' 61 /* If we found c before '\0' we got a match. Note that if c == '\0'
57 * then g == z, and we correctly return the address of the '\0' 62 * then g == z, and we correctly return the address of the '\0'
diff --git a/arch/tile/lib/string-endian.h b/arch/tile/lib/string-endian.h
deleted file mode 100644
index c0eed7ce69c..00000000000
--- a/arch/tile/lib/string-endian.h
+++ /dev/null
@@ -1,33 +0,0 @@
1/*
2 * Copyright 2011 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 *
14 * Provide a mask based on the pointer alignment that
15 * sets up non-zero bytes before the beginning of the string.
16 * The MASK expression works because shift counts are taken mod 64.
17 * Also, specify how to count "first" and "last" bits
18 * when the bits have been read as a word.
19 */
20
21#include <asm/byteorder.h>
22
23#ifdef __LITTLE_ENDIAN
24#define MASK(x) (__insn_shl(1ULL, (x << 3)) - 1)
25#define NULMASK(x) ((2ULL << x) - 1)
26#define CFZ(x) __insn_ctz(x)
27#define REVCZ(x) __insn_clz(x)
28#else
29#define MASK(x) (__insn_shl(-2LL, ((-x << 3) - 1)))
30#define NULMASK(x) (-2LL << (63 - x))
31#define CFZ(x) __insn_clz(x)
32#define REVCZ(x) __insn_ctz(x)
33#endif
diff --git a/arch/tile/lib/strlen_64.c b/arch/tile/lib/strlen_64.c
index 9583fc3361f..1c92d46202a 100644
--- a/arch/tile/lib/strlen_64.c
+++ b/arch/tile/lib/strlen_64.c
@@ -15,7 +15,8 @@
15#include <linux/types.h> 15#include <linux/types.h>
16#include <linux/string.h> 16#include <linux/string.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include "string-endian.h" 18
19#undef strlen
19 20
20size_t strlen(const char *s) 21size_t strlen(const char *s)
21{ 22{
@@ -23,13 +24,15 @@ size_t strlen(const char *s)
23 const uintptr_t s_int = (uintptr_t) s; 24 const uintptr_t s_int = (uintptr_t) s;
24 const uint64_t *p = (const uint64_t *)(s_int & -8); 25 const uint64_t *p = (const uint64_t *)(s_int & -8);
25 26
26 /* Read and MASK the first word. */ 27 /* Read the first word, but force bytes before the string to be nonzero.
27 uint64_t v = *p | MASK(s_int); 28 * This expression works because we know shift counts are taken mod 64.
29 */
30 uint64_t v = *p | ((1ULL << (s_int << 3)) - 1);
28 31
29 uint64_t bits; 32 uint64_t bits;
30 while ((bits = __insn_v1cmpeqi(v, 0)) == 0) 33 while ((bits = __insn_v1cmpeqi(v, 0)) == 0)
31 v = *++p; 34 v = *++p;
32 35
33 return ((const char *)p) + (CFZ(bits) >> 3) - s; 36 return ((const char *)p) + (__insn_ctz(bits) >> 3) - s;
34} 37}
35EXPORT_SYMBOL(strlen); 38EXPORT_SYMBOL(strlen);
diff --git a/arch/tile/lib/usercopy_32.S b/arch/tile/lib/usercopy_32.S
index b62d002af00..979f76d8374 100644
--- a/arch/tile/lib/usercopy_32.S
+++ b/arch/tile/lib/usercopy_32.S
@@ -19,6 +19,82 @@
19 19
20/* Access user memory, but use MMU to avoid propagating kernel exceptions. */ 20/* Access user memory, but use MMU to avoid propagating kernel exceptions. */
21 21
22 .pushsection .fixup,"ax"
23
24get_user_fault:
25 { move r0, zero; move r1, zero }
26 { movei r2, -EFAULT; jrp lr }
27 ENDPROC(get_user_fault)
28
29put_user_fault:
30 { movei r0, -EFAULT; jrp lr }
31 ENDPROC(put_user_fault)
32
33 .popsection
34
35/*
36 * __get_user_N functions take a pointer in r0, and return 0 in r2
37 * on success, with the value in r0; or else -EFAULT in r2.
38 */
39#define __get_user_N(bytes, LOAD) \
40 STD_ENTRY(__get_user_##bytes); \
411: { LOAD r0, r0; move r1, zero; move r2, zero }; \
42 jrp lr; \
43 STD_ENDPROC(__get_user_##bytes); \
44 .pushsection __ex_table,"a"; \
45 .word 1b, get_user_fault; \
46 .popsection
47
48__get_user_N(1, lb_u)
49__get_user_N(2, lh_u)
50__get_user_N(4, lw)
51
52/*
53 * __get_user_8 takes a pointer in r0, and returns 0 in r2
54 * on success, with the value in r0/r1; or else -EFAULT in r2.
55 */
56 STD_ENTRY(__get_user_8);
571: { lw r0, r0; addi r1, r0, 4 };
582: { lw r1, r1; move r2, zero };
59 jrp lr;
60 STD_ENDPROC(__get_user_8);
61 .pushsection __ex_table,"a";
62 .word 1b, get_user_fault;
63 .word 2b, get_user_fault;
64 .popsection
65
66/*
67 * __put_user_N functions take a value in r0 and a pointer in r1,
68 * and return 0 in r0 on success or -EFAULT on failure.
69 */
70#define __put_user_N(bytes, STORE) \
71 STD_ENTRY(__put_user_##bytes); \
721: { STORE r1, r0; move r0, zero }; \
73 jrp lr; \
74 STD_ENDPROC(__put_user_##bytes); \
75 .pushsection __ex_table,"a"; \
76 .word 1b, put_user_fault; \
77 .popsection
78
79__put_user_N(1, sb)
80__put_user_N(2, sh)
81__put_user_N(4, sw)
82
83/*
84 * __put_user_8 takes a value in r0/r1 and a pointer in r2,
85 * and returns 0 in r0 on success or -EFAULT on failure.
86 */
87STD_ENTRY(__put_user_8)
881: { sw r2, r0; addi r2, r2, 4 }
892: { sw r2, r1; move r0, zero }
90 jrp lr
91 STD_ENDPROC(__put_user_8)
92 .pushsection __ex_table,"a"
93 .word 1b, put_user_fault
94 .word 2b, put_user_fault
95 .popsection
96
97
22/* 98/*
23 * strnlen_user_asm takes the pointer in r0, and the length bound in r1. 99 * strnlen_user_asm takes the pointer in r0, and the length bound in r1.
24 * It returns the length, including the terminating NUL, or zero on exception. 100 * It returns the length, including the terminating NUL, or zero on exception.
diff --git a/arch/tile/lib/usercopy_64.S b/arch/tile/lib/usercopy_64.S
index adb2dbbc70c..2ff44f87b78 100644
--- a/arch/tile/lib/usercopy_64.S
+++ b/arch/tile/lib/usercopy_64.S
@@ -19,6 +19,55 @@
19 19
20/* Access user memory, but use MMU to avoid propagating kernel exceptions. */ 20/* Access user memory, but use MMU to avoid propagating kernel exceptions. */
21 21
22 .pushsection .fixup,"ax"
23
24get_user_fault:
25 { movei r1, -EFAULT; move r0, zero }
26 jrp lr
27 ENDPROC(get_user_fault)
28
29put_user_fault:
30 { movei r0, -EFAULT; jrp lr }
31 ENDPROC(put_user_fault)
32
33 .popsection
34
35/*
36 * __get_user_N functions take a pointer in r0, and return 0 in r1
37 * on success, with the value in r0; or else -EFAULT in r1.
38 */
39#define __get_user_N(bytes, LOAD) \
40 STD_ENTRY(__get_user_##bytes); \
411: { LOAD r0, r0; move r1, zero }; \
42 jrp lr; \
43 STD_ENDPROC(__get_user_##bytes); \
44 .pushsection __ex_table,"a"; \
45 .quad 1b, get_user_fault; \
46 .popsection
47
48__get_user_N(1, ld1u)
49__get_user_N(2, ld2u)
50__get_user_N(4, ld4u)
51__get_user_N(8, ld)
52
53/*
54 * __put_user_N functions take a value in r0 and a pointer in r1,
55 * and return 0 in r0 on success or -EFAULT on failure.
56 */
57#define __put_user_N(bytes, STORE) \
58 STD_ENTRY(__put_user_##bytes); \
591: { STORE r1, r0; move r0, zero }; \
60 jrp lr; \
61 STD_ENDPROC(__put_user_##bytes); \
62 .pushsection __ex_table,"a"; \
63 .quad 1b, put_user_fault; \
64 .popsection
65
66__put_user_N(1, st1)
67__put_user_N(2, st2)
68__put_user_N(4, st4)
69__put_user_N(8, st)
70
22/* 71/*
23 * strnlen_user_asm takes the pointer in r0, and the length bound in r1. 72 * strnlen_user_asm takes the pointer in r0, and the length bound in r1.
24 * It returns the length, including the terminating NUL, or zero on exception. 73 * It returns the length, including the terminating NUL, or zero on exception.