diff options
author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
---|---|---|
committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
commit | 8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch) | |
tree | a8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /arch/tile/lib | |
parent | 406089d01562f1e2bf9f089fd7637009ebaad589 (diff) |
Patched in Tegra support.
Diffstat (limited to 'arch/tile/lib')
-rw-r--r-- | arch/tile/lib/Makefile | 1 | ||||
-rw-r--r-- | arch/tile/lib/atomic_32.c | 47 | ||||
-rw-r--r-- | arch/tile/lib/cacheflush.c | 30 | ||||
-rw-r--r-- | arch/tile/lib/checksum.c | 15 | ||||
-rw-r--r-- | arch/tile/lib/exports.c | 13 | ||||
-rw-r--r-- | arch/tile/lib/memchr_64.c | 8 | ||||
-rw-r--r-- | arch/tile/lib/memcpy_64.c | 23 | ||||
-rw-r--r-- | arch/tile/lib/memcpy_tile64.c | 8 | ||||
-rw-r--r-- | arch/tile/lib/memcpy_user_64.c | 8 | ||||
-rw-r--r-- | arch/tile/lib/spinlock_32.c | 2 | ||||
-rw-r--r-- | arch/tile/lib/spinlock_common.h | 2 | ||||
-rw-r--r-- | arch/tile/lib/strchr_64.c | 15 | ||||
-rw-r--r-- | arch/tile/lib/string-endian.h | 33 | ||||
-rw-r--r-- | arch/tile/lib/strlen_64.c | 11 | ||||
-rw-r--r-- | arch/tile/lib/usercopy_32.S | 76 | ||||
-rw-r--r-- | arch/tile/lib/usercopy_64.S | 49 |
16 files changed, 229 insertions, 112 deletions
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile index 985f5985823..0c26086ecbe 100644 --- a/arch/tile/lib/Makefile +++ b/arch/tile/lib/Makefile | |||
@@ -7,7 +7,6 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \ | |||
7 | strchr_$(BITS).o strlen_$(BITS).o | 7 | strchr_$(BITS).o strlen_$(BITS).o |
8 | 8 | ||
9 | ifeq ($(CONFIG_TILEGX),y) | 9 | ifeq ($(CONFIG_TILEGX),y) |
10 | CFLAGS_REMOVE_memcpy_user_64.o = -fno-omit-frame-pointer | ||
11 | lib-y += memcpy_user_64.o | 10 | lib-y += memcpy_user_64.o |
12 | else | 11 | else |
13 | lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o | 12 | lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o |
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c index f5cada70c3c..771b251b409 100644 --- a/arch/tile/lib/atomic_32.c +++ b/arch/tile/lib/atomic_32.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <linux/mm.h> | 19 | #include <linux/mm.h> |
20 | #include <linux/atomic.h> | 20 | #include <linux/atomic.h> |
21 | #include <asm/futex.h> | ||
21 | #include <arch/chip.h> | 22 | #include <arch/chip.h> |
22 | 23 | ||
23 | /* See <asm/atomic_32.h> */ | 24 | /* See <asm/atomic_32.h> */ |
@@ -49,7 +50,7 @@ int atomic_locks[PAGE_SIZE / sizeof(int)] __page_aligned_bss; | |||
49 | 50 | ||
50 | #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ | 51 | #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ |
51 | 52 | ||
52 | int *__atomic_hashed_lock(volatile void *v) | 53 | static inline int *__atomic_hashed_lock(volatile void *v) |
53 | { | 54 | { |
54 | /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */ | 55 | /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */ |
55 | #if ATOMIC_LOCKS_FOUND_VIA_TABLE() | 56 | #if ATOMIC_LOCKS_FOUND_VIA_TABLE() |
@@ -190,6 +191,47 @@ u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n) | |||
190 | EXPORT_SYMBOL(_atomic64_cmpxchg); | 191 | EXPORT_SYMBOL(_atomic64_cmpxchg); |
191 | 192 | ||
192 | 193 | ||
194 | static inline int *__futex_setup(int __user *v) | ||
195 | { | ||
196 | /* | ||
197 | * Issue a prefetch to the counter to bring it into cache. | ||
198 | * As for __atomic_setup, but we can't do a read into the L1 | ||
199 | * since it might fault; instead we do a prefetch into the L2. | ||
200 | */ | ||
201 | __insn_prefetch(v); | ||
202 | return __atomic_hashed_lock((int __force *)v); | ||
203 | } | ||
204 | |||
205 | struct __get_user futex_set(u32 __user *v, int i) | ||
206 | { | ||
207 | return __atomic_xchg((int __force *)v, __futex_setup(v), i); | ||
208 | } | ||
209 | |||
210 | struct __get_user futex_add(u32 __user *v, int n) | ||
211 | { | ||
212 | return __atomic_xchg_add((int __force *)v, __futex_setup(v), n); | ||
213 | } | ||
214 | |||
215 | struct __get_user futex_or(u32 __user *v, int n) | ||
216 | { | ||
217 | return __atomic_or((int __force *)v, __futex_setup(v), n); | ||
218 | } | ||
219 | |||
220 | struct __get_user futex_andn(u32 __user *v, int n) | ||
221 | { | ||
222 | return __atomic_andn((int __force *)v, __futex_setup(v), n); | ||
223 | } | ||
224 | |||
225 | struct __get_user futex_xor(u32 __user *v, int n) | ||
226 | { | ||
227 | return __atomic_xor((int __force *)v, __futex_setup(v), n); | ||
228 | } | ||
229 | |||
230 | struct __get_user futex_cmpxchg(u32 __user *v, int o, int n) | ||
231 | { | ||
232 | return __atomic_cmpxchg((int __force *)v, __futex_setup(v), o, n); | ||
233 | } | ||
234 | |||
193 | /* | 235 | /* |
194 | * If any of the atomic or futex routines hit a bad address (not in | 236 | * If any of the atomic or futex routines hit a bad address (not in |
195 | * the page tables at kernel PL) this routine is called. The futex | 237 | * the page tables at kernel PL) this routine is called. The futex |
@@ -281,4 +323,7 @@ void __init __init_atomic_per_cpu(void) | |||
281 | BUILD_BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE); | 323 | BUILD_BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE); |
282 | 324 | ||
283 | #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ | 325 | #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ |
326 | |||
327 | /* The futex code makes this assumption, so we validate it here. */ | ||
328 | BUILD_BUG_ON(sizeof(atomic_t) != sizeof(int)); | ||
284 | } | 329 | } |
diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c index db4fb89e12d..8928aace7a6 100644 --- a/arch/tile/lib/cacheflush.c +++ b/arch/tile/lib/cacheflush.c | |||
@@ -39,21 +39,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) | |||
39 | { | 39 | { |
40 | char *p, *base; | 40 | char *p, *base; |
41 | size_t step_size, load_count; | 41 | size_t step_size, load_count; |
42 | |||
43 | /* | ||
44 | * On TILEPro the striping granularity is a fixed 8KB; on | ||
45 | * TILE-Gx it is configurable, and we rely on the fact that | ||
46 | * the hypervisor always configures maximum striping, so that | ||
47 | * bits 9 and 10 of the PA are part of the stripe function, so | ||
48 | * every 512 bytes we hit a striping boundary. | ||
49 | * | ||
50 | */ | ||
51 | #ifdef __tilegx__ | ||
52 | const unsigned long STRIPE_WIDTH = 512; | ||
53 | #else | ||
54 | const unsigned long STRIPE_WIDTH = 8192; | 42 | const unsigned long STRIPE_WIDTH = 8192; |
55 | #endif | ||
56 | |||
57 | #ifdef __tilegx__ | 43 | #ifdef __tilegx__ |
58 | /* | 44 | /* |
59 | * On TILE-Gx, we must disable the dstream prefetcher before doing | 45 | * On TILE-Gx, we must disable the dstream prefetcher before doing |
@@ -88,7 +74,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) | |||
88 | * memory, that one load would be sufficient, but since we may | 74 | * memory, that one load would be sufficient, but since we may |
89 | * be, we also need to back up to the last load issued to | 75 | * be, we also need to back up to the last load issued to |
90 | * another memory controller, which would be the point where | 76 | * another memory controller, which would be the point where |
91 | * we crossed a "striping" boundary (the granularity of striping | 77 | * we crossed an 8KB boundary (the granularity of striping |
92 | * across memory controllers). Keep backing up and doing this | 78 | * across memory controllers). Keep backing up and doing this |
93 | * until we are before the beginning of the buffer, or have | 79 | * until we are before the beginning of the buffer, or have |
94 | * hit all the controllers. | 80 | * hit all the controllers. |
@@ -102,22 +88,12 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) | |||
102 | * every cache line on a full memory stripe on each | 88 | * every cache line on a full memory stripe on each |
103 | * controller" that we simply do that, to simplify the logic. | 89 | * controller" that we simply do that, to simplify the logic. |
104 | * | 90 | * |
105 | * On TILE-Gx the hash-for-home function is much more complex, | 91 | * FIXME: See bug 9535 for some issues with this code. |
106 | * with the upshot being we can't readily guarantee we have | ||
107 | * hit both entries in the 128-entry AMT that were hit by any | ||
108 | * load in the entire range, so we just re-load them all. | ||
109 | * With larger buffers, we may want to consider using a hypervisor | ||
110 | * trap to issue loads directly to each hash-for-home tile for | ||
111 | * each controller (doing it from Linux would trash the TLB). | ||
112 | */ | 92 | */ |
113 | if (hfh) { | 93 | if (hfh) { |
114 | step_size = L2_CACHE_BYTES; | 94 | step_size = L2_CACHE_BYTES; |
115 | #ifdef __tilegx__ | ||
116 | load_count = (size + L2_CACHE_BYTES - 1) / L2_CACHE_BYTES; | ||
117 | #else | ||
118 | load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) * | 95 | load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) * |
119 | (1 << CHIP_LOG_NUM_MSHIMS()); | 96 | (1 << CHIP_LOG_NUM_MSHIMS()); |
120 | #endif | ||
121 | } else { | 97 | } else { |
122 | step_size = STRIPE_WIDTH; | 98 | step_size = STRIPE_WIDTH; |
123 | load_count = (1 << CHIP_LOG_NUM_MSHIMS()); | 99 | load_count = (1 << CHIP_LOG_NUM_MSHIMS()); |
@@ -133,7 +109,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) | |||
133 | 109 | ||
134 | /* Figure out how far back we need to go. */ | 110 | /* Figure out how far back we need to go. */ |
135 | base = p - (step_size * (load_count - 2)); | 111 | base = p - (step_size * (load_count - 2)); |
136 | if ((unsigned long)base < (unsigned long)buffer) | 112 | if ((long)base < (long)buffer) |
137 | base = buffer; | 113 | base = buffer; |
138 | 114 | ||
139 | /* | 115 | /* |
diff --git a/arch/tile/lib/checksum.c b/arch/tile/lib/checksum.c index c3ca3e64d9d..e4bab5bd3f3 100644 --- a/arch/tile/lib/checksum.c +++ b/arch/tile/lib/checksum.c | |||
@@ -16,6 +16,19 @@ | |||
16 | #include <net/checksum.h> | 16 | #include <net/checksum.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | 18 | ||
19 | static inline unsigned int longto16(unsigned long x) | ||
20 | { | ||
21 | unsigned long ret; | ||
22 | #ifdef __tilegx__ | ||
23 | ret = __insn_v2sadu(x, 0); | ||
24 | ret = __insn_v2sadu(ret, 0); | ||
25 | #else | ||
26 | ret = __insn_sadh_u(x, 0); | ||
27 | ret = __insn_sadh_u(ret, 0); | ||
28 | #endif | ||
29 | return ret; | ||
30 | } | ||
31 | |||
19 | __wsum do_csum(const unsigned char *buff, int len) | 32 | __wsum do_csum(const unsigned char *buff, int len) |
20 | { | 33 | { |
21 | int odd, count; | 34 | int odd, count; |
@@ -81,7 +94,7 @@ __wsum do_csum(const unsigned char *buff, int len) | |||
81 | } | 94 | } |
82 | if (len & 1) | 95 | if (len & 1) |
83 | result += *buff; | 96 | result += *buff; |
84 | result = csum_long(result); | 97 | result = longto16(result); |
85 | if (odd) | 98 | if (odd) |
86 | result = swab16(result); | 99 | result = swab16(result); |
87 | out: | 100 | out: |
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c index dd5f0a33fda..49284fae9d0 100644 --- a/arch/tile/lib/exports.c +++ b/arch/tile/lib/exports.c | |||
@@ -18,6 +18,14 @@ | |||
18 | 18 | ||
19 | /* arch/tile/lib/usercopy.S */ | 19 | /* arch/tile/lib/usercopy.S */ |
20 | #include <linux/uaccess.h> | 20 | #include <linux/uaccess.h> |
21 | EXPORT_SYMBOL(__get_user_1); | ||
22 | EXPORT_SYMBOL(__get_user_2); | ||
23 | EXPORT_SYMBOL(__get_user_4); | ||
24 | EXPORT_SYMBOL(__get_user_8); | ||
25 | EXPORT_SYMBOL(__put_user_1); | ||
26 | EXPORT_SYMBOL(__put_user_2); | ||
27 | EXPORT_SYMBOL(__put_user_4); | ||
28 | EXPORT_SYMBOL(__put_user_8); | ||
21 | EXPORT_SYMBOL(strnlen_user_asm); | 29 | EXPORT_SYMBOL(strnlen_user_asm); |
22 | EXPORT_SYMBOL(strncpy_from_user_asm); | 30 | EXPORT_SYMBOL(strncpy_from_user_asm); |
23 | EXPORT_SYMBOL(clear_user_asm); | 31 | EXPORT_SYMBOL(clear_user_asm); |
@@ -31,9 +39,6 @@ EXPORT_SYMBOL(finv_user_asm); | |||
31 | EXPORT_SYMBOL(current_text_addr); | 39 | EXPORT_SYMBOL(current_text_addr); |
32 | EXPORT_SYMBOL(dump_stack); | 40 | EXPORT_SYMBOL(dump_stack); |
33 | 41 | ||
34 | /* arch/tile/kernel/head.S */ | ||
35 | EXPORT_SYMBOL(empty_zero_page); | ||
36 | |||
37 | /* arch/tile/lib/, various memcpy files */ | 42 | /* arch/tile/lib/, various memcpy files */ |
38 | EXPORT_SYMBOL(memcpy); | 43 | EXPORT_SYMBOL(memcpy); |
39 | EXPORT_SYMBOL(__copy_to_user_inatomic); | 44 | EXPORT_SYMBOL(__copy_to_user_inatomic); |
@@ -74,6 +79,8 @@ EXPORT_SYMBOL(__umoddi3); | |||
74 | int64_t __moddi3(int64_t dividend, int64_t divisor); | 79 | int64_t __moddi3(int64_t dividend, int64_t divisor); |
75 | EXPORT_SYMBOL(__moddi3); | 80 | EXPORT_SYMBOL(__moddi3); |
76 | #ifndef __tilegx__ | 81 | #ifndef __tilegx__ |
82 | uint64_t __ll_mul(uint64_t n0, uint64_t n1); | ||
83 | EXPORT_SYMBOL(__ll_mul); | ||
77 | int64_t __muldi3(int64_t, int64_t); | 84 | int64_t __muldi3(int64_t, int64_t); |
78 | EXPORT_SYMBOL(__muldi3); | 85 | EXPORT_SYMBOL(__muldi3); |
79 | uint64_t __lshrdi3(uint64_t, unsigned int); | 86 | uint64_t __lshrdi3(uint64_t, unsigned int); |
diff --git a/arch/tile/lib/memchr_64.c b/arch/tile/lib/memchr_64.c index 6f867dbf7c5..84fdc8d8e73 100644 --- a/arch/tile/lib/memchr_64.c +++ b/arch/tile/lib/memchr_64.c | |||
@@ -15,7 +15,6 @@ | |||
15 | #include <linux/types.h> | 15 | #include <linux/types.h> |
16 | #include <linux/string.h> | 16 | #include <linux/string.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include "string-endian.h" | ||
19 | 18 | ||
20 | void *memchr(const void *s, int c, size_t n) | 19 | void *memchr(const void *s, int c, size_t n) |
21 | { | 20 | { |
@@ -40,8 +39,11 @@ void *memchr(const void *s, int c, size_t n) | |||
40 | 39 | ||
41 | /* Read the first word, but munge it so that bytes before the array | 40 | /* Read the first word, but munge it so that bytes before the array |
42 | * will not match goal. | 41 | * will not match goal. |
42 | * | ||
43 | * Note that this shift count expression works because we know | ||
44 | * shift counts are taken mod 64. | ||
43 | */ | 45 | */ |
44 | before_mask = MASK(s_int); | 46 | before_mask = (1ULL << (s_int << 3)) - 1; |
45 | v = (*p | before_mask) ^ (goal & before_mask); | 47 | v = (*p | before_mask) ^ (goal & before_mask); |
46 | 48 | ||
47 | /* Compute the address of the last byte. */ | 49 | /* Compute the address of the last byte. */ |
@@ -63,7 +65,7 @@ void *memchr(const void *s, int c, size_t n) | |||
63 | /* We found a match, but it might be in a byte past the end | 65 | /* We found a match, but it might be in a byte past the end |
64 | * of the array. | 66 | * of the array. |
65 | */ | 67 | */ |
66 | ret = ((char *)p) + (CFZ(bits) >> 3); | 68 | ret = ((char *)p) + (__insn_ctz(bits) >> 3); |
67 | return (ret <= last_byte_ptr) ? ret : NULL; | 69 | return (ret <= last_byte_ptr) ? ret : NULL; |
68 | } | 70 | } |
69 | EXPORT_SYMBOL(memchr); | 71 | EXPORT_SYMBOL(memchr); |
diff --git a/arch/tile/lib/memcpy_64.c b/arch/tile/lib/memcpy_64.c index c79b8e7c682..3fab9a6a2bb 100644 --- a/arch/tile/lib/memcpy_64.c +++ b/arch/tile/lib/memcpy_64.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/types.h> | 15 | #include <linux/types.h> |
16 | #include <linux/string.h> | 16 | #include <linux/string.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #define __memcpy memcpy | ||
18 | /* EXPORT_SYMBOL() is in arch/tile/lib/exports.c since this should be asm. */ | 19 | /* EXPORT_SYMBOL() is in arch/tile/lib/exports.c since this should be asm. */ |
19 | 20 | ||
20 | /* Must be 8 bytes in size. */ | 21 | /* Must be 8 bytes in size. */ |
@@ -187,7 +188,6 @@ int USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n) | |||
187 | 188 | ||
188 | /* n != 0 if we get here. Write out any trailing bytes. */ | 189 | /* n != 0 if we get here. Write out any trailing bytes. */ |
189 | dst1 = (char *)dst8; | 190 | dst1 = (char *)dst8; |
190 | #ifndef __BIG_ENDIAN__ | ||
191 | if (n & 4) { | 191 | if (n & 4) { |
192 | ST4((uint32_t *)dst1, final); | 192 | ST4((uint32_t *)dst1, final); |
193 | dst1 += 4; | 193 | dst1 += 4; |
@@ -202,30 +202,11 @@ int USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n) | |||
202 | } | 202 | } |
203 | if (n) | 203 | if (n) |
204 | ST1((uint8_t *)dst1, final); | 204 | ST1((uint8_t *)dst1, final); |
205 | #else | ||
206 | if (n & 4) { | ||
207 | ST4((uint32_t *)dst1, final >> 32); | ||
208 | dst1 += 4; | ||
209 | } | ||
210 | else | ||
211 | { | ||
212 | final >>= 32; | ||
213 | } | ||
214 | if (n & 2) { | ||
215 | ST2((uint16_t *)dst1, final >> 16); | ||
216 | dst1 += 2; | ||
217 | } | ||
218 | else | ||
219 | { | ||
220 | final >>= 16; | ||
221 | } | ||
222 | if (n & 1) | ||
223 | ST1((uint8_t *)dst1, final >> 8); | ||
224 | #endif | ||
225 | 205 | ||
226 | return RETVAL; | 206 | return RETVAL; |
227 | } | 207 | } |
228 | 208 | ||
209 | |||
229 | #ifdef USERCOPY_FUNC | 210 | #ifdef USERCOPY_FUNC |
230 | #undef ST1 | 211 | #undef ST1 |
231 | #undef ST2 | 212 | #undef ST2 |
diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c index 3bc4b4e40d9..b2fe15e0107 100644 --- a/arch/tile/lib/memcpy_tile64.c +++ b/arch/tile/lib/memcpy_tile64.c | |||
@@ -160,7 +160,7 @@ retry_source: | |||
160 | break; | 160 | break; |
161 | if (get_remote_cache_cpu(src_pte) == smp_processor_id()) | 161 | if (get_remote_cache_cpu(src_pte) == smp_processor_id()) |
162 | break; | 162 | break; |
163 | src_page = pfn_to_page(pte_pfn(src_pte)); | 163 | src_page = pfn_to_page(hv_pte_get_pfn(src_pte)); |
164 | get_page(src_page); | 164 | get_page(src_page); |
165 | if (pte_val(src_pte) != pte_val(*src_ptep)) { | 165 | if (pte_val(src_pte) != pte_val(*src_ptep)) { |
166 | put_page(src_page); | 166 | put_page(src_page); |
@@ -168,7 +168,7 @@ retry_source: | |||
168 | } | 168 | } |
169 | if (pte_huge(src_pte)) { | 169 | if (pte_huge(src_pte)) { |
170 | /* Adjust the PTE to correspond to a small page */ | 170 | /* Adjust the PTE to correspond to a small page */ |
171 | int pfn = pte_pfn(src_pte); | 171 | int pfn = hv_pte_get_pfn(src_pte); |
172 | pfn += (((unsigned long)source & (HPAGE_SIZE-1)) | 172 | pfn += (((unsigned long)source & (HPAGE_SIZE-1)) |
173 | >> PAGE_SHIFT); | 173 | >> PAGE_SHIFT); |
174 | src_pte = pfn_pte(pfn, src_pte); | 174 | src_pte = pfn_pte(pfn, src_pte); |
@@ -188,7 +188,7 @@ retry_dest: | |||
188 | put_page(src_page); | 188 | put_page(src_page); |
189 | break; | 189 | break; |
190 | } | 190 | } |
191 | dst_page = pfn_to_page(pte_pfn(dst_pte)); | 191 | dst_page = pfn_to_page(hv_pte_get_pfn(dst_pte)); |
192 | if (dst_page == src_page) { | 192 | if (dst_page == src_page) { |
193 | /* | 193 | /* |
194 | * Source and dest are on the same page; this | 194 | * Source and dest are on the same page; this |
@@ -206,7 +206,7 @@ retry_dest: | |||
206 | } | 206 | } |
207 | if (pte_huge(dst_pte)) { | 207 | if (pte_huge(dst_pte)) { |
208 | /* Adjust the PTE to correspond to a small page */ | 208 | /* Adjust the PTE to correspond to a small page */ |
209 | int pfn = pte_pfn(dst_pte); | 209 | int pfn = hv_pte_get_pfn(dst_pte); |
210 | pfn += (((unsigned long)dest & (HPAGE_SIZE-1)) | 210 | pfn += (((unsigned long)dest & (HPAGE_SIZE-1)) |
211 | >> PAGE_SHIFT); | 211 | >> PAGE_SHIFT); |
212 | dst_pte = pfn_pte(pfn, dst_pte); | 212 | dst_pte = pfn_pte(pfn, dst_pte); |
diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c index 37440caa737..4763b3aff1c 100644 --- a/arch/tile/lib/memcpy_user_64.c +++ b/arch/tile/lib/memcpy_user_64.c | |||
@@ -14,13 +14,7 @@ | |||
14 | * Do memcpy(), but trap and return "n" when a load or store faults. | 14 | * Do memcpy(), but trap and return "n" when a load or store faults. |
15 | * | 15 | * |
16 | * Note: this idiom only works when memcpy() compiles to a leaf function. | 16 | * Note: this idiom only works when memcpy() compiles to a leaf function. |
17 | * Here leaf function not only means it does not have calls, but also | 17 | * If "sp" is updated during memcpy, the "jrp lr" will be incorrect. |
18 | * requires no stack operations (sp, stack frame pointer) and no | ||
19 | * use of callee-saved registers, else "jrp lr" will be incorrect since | ||
20 | * unwinding stack frame is bypassed. Since memcpy() is not complex so | ||
21 | * these conditions are satisfied here, but we need to be careful when | ||
22 | * modifying this file. This is not a clean solution but is the best | ||
23 | * one so far. | ||
24 | * | 18 | * |
25 | * Also note that we are capturing "n" from the containing scope here. | 19 | * Also note that we are capturing "n" from the containing scope here. |
26 | */ | 20 | */ |
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c index b16ac49a968..cb0999fb64b 100644 --- a/arch/tile/lib/spinlock_32.c +++ b/arch/tile/lib/spinlock_32.c | |||
@@ -144,7 +144,7 @@ void arch_read_unlock(arch_rwlock_t *rwlock) | |||
144 | for (;;) { | 144 | for (;;) { |
145 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); | 145 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); |
146 | val = __insn_tns((int *)&rwlock->lock); | 146 | val = __insn_tns((int *)&rwlock->lock); |
147 | if (likely((val & 1) == 0)) { | 147 | if (likely(val & 1) == 0) { |
148 | rwlock->lock = val - (1 << _RD_COUNT_SHIFT); | 148 | rwlock->lock = val - (1 << _RD_COUNT_SHIFT); |
149 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); | 149 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); |
150 | break; | 150 | break; |
diff --git a/arch/tile/lib/spinlock_common.h b/arch/tile/lib/spinlock_common.h index 6ac37509fac..c1010980913 100644 --- a/arch/tile/lib/spinlock_common.h +++ b/arch/tile/lib/spinlock_common.h | |||
@@ -60,5 +60,5 @@ static void delay_backoff(int iterations) | |||
60 | loops += __insn_crc32_32(stack_pointer, get_cycles_low()) & | 60 | loops += __insn_crc32_32(stack_pointer, get_cycles_low()) & |
61 | (loops - 1); | 61 | (loops - 1); |
62 | 62 | ||
63 | relax(loops); | 63 | relax(1 << exponent); |
64 | } | 64 | } |
diff --git a/arch/tile/lib/strchr_64.c b/arch/tile/lib/strchr_64.c index f39f9dc422b..617a9273aaa 100644 --- a/arch/tile/lib/strchr_64.c +++ b/arch/tile/lib/strchr_64.c | |||
@@ -15,7 +15,8 @@ | |||
15 | #include <linux/types.h> | 15 | #include <linux/types.h> |
16 | #include <linux/string.h> | 16 | #include <linux/string.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include "string-endian.h" | 18 | |
19 | #undef strchr | ||
19 | 20 | ||
20 | char *strchr(const char *s, int c) | 21 | char *strchr(const char *s, int c) |
21 | { | 22 | { |
@@ -32,9 +33,13 @@ char *strchr(const char *s, int c) | |||
32 | * match neither zero nor goal (we make sure the high bit of each | 33 | * match neither zero nor goal (we make sure the high bit of each |
33 | * byte is 1, and the low 7 bits are all the opposite of the goal | 34 | * byte is 1, and the low 7 bits are all the opposite of the goal |
34 | * byte). | 35 | * byte). |
36 | * | ||
37 | * Note that this shift count expression works because we know shift | ||
38 | * counts are taken mod 64. | ||
35 | */ | 39 | */ |
36 | const uint64_t before_mask = MASK(s_int); | 40 | const uint64_t before_mask = (1ULL << (s_int << 3)) - 1; |
37 | uint64_t v = (*p | before_mask) ^ (goal & __insn_v1shrui(before_mask, 1)); | 41 | uint64_t v = (*p | before_mask) ^ |
42 | (goal & __insn_v1shrsi(before_mask, 1)); | ||
38 | 43 | ||
39 | uint64_t zero_matches, goal_matches; | 44 | uint64_t zero_matches, goal_matches; |
40 | while (1) { | 45 | while (1) { |
@@ -50,8 +55,8 @@ char *strchr(const char *s, int c) | |||
50 | v = *++p; | 55 | v = *++p; |
51 | } | 56 | } |
52 | 57 | ||
53 | z = CFZ(zero_matches); | 58 | z = __insn_ctz(zero_matches); |
54 | g = CFZ(goal_matches); | 59 | g = __insn_ctz(goal_matches); |
55 | 60 | ||
56 | /* If we found c before '\0' we got a match. Note that if c == '\0' | 61 | /* If we found c before '\0' we got a match. Note that if c == '\0' |
57 | * then g == z, and we correctly return the address of the '\0' | 62 | * then g == z, and we correctly return the address of the '\0' |
diff --git a/arch/tile/lib/string-endian.h b/arch/tile/lib/string-endian.h deleted file mode 100644 index c0eed7ce69c..00000000000 --- a/arch/tile/lib/string-endian.h +++ /dev/null | |||
@@ -1,33 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright 2011 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * Provide a mask based on the pointer alignment that | ||
15 | * sets up non-zero bytes before the beginning of the string. | ||
16 | * The MASK expression works because shift counts are taken mod 64. | ||
17 | * Also, specify how to count "first" and "last" bits | ||
18 | * when the bits have been read as a word. | ||
19 | */ | ||
20 | |||
21 | #include <asm/byteorder.h> | ||
22 | |||
23 | #ifdef __LITTLE_ENDIAN | ||
24 | #define MASK(x) (__insn_shl(1ULL, (x << 3)) - 1) | ||
25 | #define NULMASK(x) ((2ULL << x) - 1) | ||
26 | #define CFZ(x) __insn_ctz(x) | ||
27 | #define REVCZ(x) __insn_clz(x) | ||
28 | #else | ||
29 | #define MASK(x) (__insn_shl(-2LL, ((-x << 3) - 1))) | ||
30 | #define NULMASK(x) (-2LL << (63 - x)) | ||
31 | #define CFZ(x) __insn_clz(x) | ||
32 | #define REVCZ(x) __insn_ctz(x) | ||
33 | #endif | ||
diff --git a/arch/tile/lib/strlen_64.c b/arch/tile/lib/strlen_64.c index 9583fc3361f..1c92d46202a 100644 --- a/arch/tile/lib/strlen_64.c +++ b/arch/tile/lib/strlen_64.c | |||
@@ -15,7 +15,8 @@ | |||
15 | #include <linux/types.h> | 15 | #include <linux/types.h> |
16 | #include <linux/string.h> | 16 | #include <linux/string.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include "string-endian.h" | 18 | |
19 | #undef strlen | ||
19 | 20 | ||
20 | size_t strlen(const char *s) | 21 | size_t strlen(const char *s) |
21 | { | 22 | { |
@@ -23,13 +24,15 @@ size_t strlen(const char *s) | |||
23 | const uintptr_t s_int = (uintptr_t) s; | 24 | const uintptr_t s_int = (uintptr_t) s; |
24 | const uint64_t *p = (const uint64_t *)(s_int & -8); | 25 | const uint64_t *p = (const uint64_t *)(s_int & -8); |
25 | 26 | ||
26 | /* Read and MASK the first word. */ | 27 | /* Read the first word, but force bytes before the string to be nonzero. |
27 | uint64_t v = *p | MASK(s_int); | 28 | * This expression works because we know shift counts are taken mod 64. |
29 | */ | ||
30 | uint64_t v = *p | ((1ULL << (s_int << 3)) - 1); | ||
28 | 31 | ||
29 | uint64_t bits; | 32 | uint64_t bits; |
30 | while ((bits = __insn_v1cmpeqi(v, 0)) == 0) | 33 | while ((bits = __insn_v1cmpeqi(v, 0)) == 0) |
31 | v = *++p; | 34 | v = *++p; |
32 | 35 | ||
33 | return ((const char *)p) + (CFZ(bits) >> 3) - s; | 36 | return ((const char *)p) + (__insn_ctz(bits) >> 3) - s; |
34 | } | 37 | } |
35 | EXPORT_SYMBOL(strlen); | 38 | EXPORT_SYMBOL(strlen); |
diff --git a/arch/tile/lib/usercopy_32.S b/arch/tile/lib/usercopy_32.S index b62d002af00..979f76d8374 100644 --- a/arch/tile/lib/usercopy_32.S +++ b/arch/tile/lib/usercopy_32.S | |||
@@ -19,6 +19,82 @@ | |||
19 | 19 | ||
20 | /* Access user memory, but use MMU to avoid propagating kernel exceptions. */ | 20 | /* Access user memory, but use MMU to avoid propagating kernel exceptions. */ |
21 | 21 | ||
22 | .pushsection .fixup,"ax" | ||
23 | |||
24 | get_user_fault: | ||
25 | { move r0, zero; move r1, zero } | ||
26 | { movei r2, -EFAULT; jrp lr } | ||
27 | ENDPROC(get_user_fault) | ||
28 | |||
29 | put_user_fault: | ||
30 | { movei r0, -EFAULT; jrp lr } | ||
31 | ENDPROC(put_user_fault) | ||
32 | |||
33 | .popsection | ||
34 | |||
35 | /* | ||
36 | * __get_user_N functions take a pointer in r0, and return 0 in r2 | ||
37 | * on success, with the value in r0; or else -EFAULT in r2. | ||
38 | */ | ||
39 | #define __get_user_N(bytes, LOAD) \ | ||
40 | STD_ENTRY(__get_user_##bytes); \ | ||
41 | 1: { LOAD r0, r0; move r1, zero; move r2, zero }; \ | ||
42 | jrp lr; \ | ||
43 | STD_ENDPROC(__get_user_##bytes); \ | ||
44 | .pushsection __ex_table,"a"; \ | ||
45 | .word 1b, get_user_fault; \ | ||
46 | .popsection | ||
47 | |||
48 | __get_user_N(1, lb_u) | ||
49 | __get_user_N(2, lh_u) | ||
50 | __get_user_N(4, lw) | ||
51 | |||
52 | /* | ||
53 | * __get_user_8 takes a pointer in r0, and returns 0 in r2 | ||
54 | * on success, with the value in r0/r1; or else -EFAULT in r2. | ||
55 | */ | ||
56 | STD_ENTRY(__get_user_8); | ||
57 | 1: { lw r0, r0; addi r1, r0, 4 }; | ||
58 | 2: { lw r1, r1; move r2, zero }; | ||
59 | jrp lr; | ||
60 | STD_ENDPROC(__get_user_8); | ||
61 | .pushsection __ex_table,"a"; | ||
62 | .word 1b, get_user_fault; | ||
63 | .word 2b, get_user_fault; | ||
64 | .popsection | ||
65 | |||
66 | /* | ||
67 | * __put_user_N functions take a value in r0 and a pointer in r1, | ||
68 | * and return 0 in r0 on success or -EFAULT on failure. | ||
69 | */ | ||
70 | #define __put_user_N(bytes, STORE) \ | ||
71 | STD_ENTRY(__put_user_##bytes); \ | ||
72 | 1: { STORE r1, r0; move r0, zero }; \ | ||
73 | jrp lr; \ | ||
74 | STD_ENDPROC(__put_user_##bytes); \ | ||
75 | .pushsection __ex_table,"a"; \ | ||
76 | .word 1b, put_user_fault; \ | ||
77 | .popsection | ||
78 | |||
79 | __put_user_N(1, sb) | ||
80 | __put_user_N(2, sh) | ||
81 | __put_user_N(4, sw) | ||
82 | |||
83 | /* | ||
84 | * __put_user_8 takes a value in r0/r1 and a pointer in r2, | ||
85 | * and returns 0 in r0 on success or -EFAULT on failure. | ||
86 | */ | ||
87 | STD_ENTRY(__put_user_8) | ||
88 | 1: { sw r2, r0; addi r2, r2, 4 } | ||
89 | 2: { sw r2, r1; move r0, zero } | ||
90 | jrp lr | ||
91 | STD_ENDPROC(__put_user_8) | ||
92 | .pushsection __ex_table,"a" | ||
93 | .word 1b, put_user_fault | ||
94 | .word 2b, put_user_fault | ||
95 | .popsection | ||
96 | |||
97 | |||
22 | /* | 98 | /* |
23 | * strnlen_user_asm takes the pointer in r0, and the length bound in r1. | 99 | * strnlen_user_asm takes the pointer in r0, and the length bound in r1. |
24 | * It returns the length, including the terminating NUL, or zero on exception. | 100 | * It returns the length, including the terminating NUL, or zero on exception. |
diff --git a/arch/tile/lib/usercopy_64.S b/arch/tile/lib/usercopy_64.S index adb2dbbc70c..2ff44f87b78 100644 --- a/arch/tile/lib/usercopy_64.S +++ b/arch/tile/lib/usercopy_64.S | |||
@@ -19,6 +19,55 @@ | |||
19 | 19 | ||
20 | /* Access user memory, but use MMU to avoid propagating kernel exceptions. */ | 20 | /* Access user memory, but use MMU to avoid propagating kernel exceptions. */ |
21 | 21 | ||
22 | .pushsection .fixup,"ax" | ||
23 | |||
24 | get_user_fault: | ||
25 | { movei r1, -EFAULT; move r0, zero } | ||
26 | jrp lr | ||
27 | ENDPROC(get_user_fault) | ||
28 | |||
29 | put_user_fault: | ||
30 | { movei r0, -EFAULT; jrp lr } | ||
31 | ENDPROC(put_user_fault) | ||
32 | |||
33 | .popsection | ||
34 | |||
35 | /* | ||
36 | * __get_user_N functions take a pointer in r0, and return 0 in r1 | ||
37 | * on success, with the value in r0; or else -EFAULT in r1. | ||
38 | */ | ||
39 | #define __get_user_N(bytes, LOAD) \ | ||
40 | STD_ENTRY(__get_user_##bytes); \ | ||
41 | 1: { LOAD r0, r0; move r1, zero }; \ | ||
42 | jrp lr; \ | ||
43 | STD_ENDPROC(__get_user_##bytes); \ | ||
44 | .pushsection __ex_table,"a"; \ | ||
45 | .quad 1b, get_user_fault; \ | ||
46 | .popsection | ||
47 | |||
48 | __get_user_N(1, ld1u) | ||
49 | __get_user_N(2, ld2u) | ||
50 | __get_user_N(4, ld4u) | ||
51 | __get_user_N(8, ld) | ||
52 | |||
53 | /* | ||
54 | * __put_user_N functions take a value in r0 and a pointer in r1, | ||
55 | * and return 0 in r0 on success or -EFAULT on failure. | ||
56 | */ | ||
57 | #define __put_user_N(bytes, STORE) \ | ||
58 | STD_ENTRY(__put_user_##bytes); \ | ||
59 | 1: { STORE r1, r0; move r0, zero }; \ | ||
60 | jrp lr; \ | ||
61 | STD_ENDPROC(__put_user_##bytes); \ | ||
62 | .pushsection __ex_table,"a"; \ | ||
63 | .quad 1b, put_user_fault; \ | ||
64 | .popsection | ||
65 | |||
66 | __put_user_N(1, st1) | ||
67 | __put_user_N(2, st2) | ||
68 | __put_user_N(4, st4) | ||
69 | __put_user_N(8, st) | ||
70 | |||
22 | /* | 71 | /* |
23 | * strnlen_user_asm takes the pointer in r0, and the length bound in r1. | 72 | * strnlen_user_asm takes the pointer in r0, and the length bound in r1. |
24 | * It returns the length, including the terminating NUL, or zero on exception. | 73 | * It returns the length, including the terminating NUL, or zero on exception. |