aboutsummaryrefslogtreecommitdiffstats
path: root/arch/tile/lib
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@tilera.com>2010-08-13 08:52:19 -0400
committerChris Metcalf <cmetcalf@tilera.com>2010-08-13 08:52:19 -0400
commitc745a8a11fa1df6078bfc61fc29492ed43f71c2b (patch)
tree2db1cdf9cd0d0e892f4f92de1fd2700ac319f04a /arch/tile/lib
parent1fcbe027b5d29ec9cd0eeb753c14fb366ae852ac (diff)
arch/tile: Various cleanups.
This change rolls up random cleanups not representing any actual bugs. - Remove a stale CONFIG_ value from the default tile_defconfig - Remove unused tns_atomic_xxx() family of methods from <asm/atomic.h> - Optimize get_order() using Tile's "clz" instruction - Fix a bad hypervisor upcall name (not currently used in Linux anyway) - Use __copy_in_user_inatomic() name for consistency, and export it - Export some additional hypervisor driver I/O upcalls and some homecache calls - Remove the obfuscating MEMCPY_TEST_WH64 support code - Other stray comment cleanups, #if 0 removal, etc. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Diffstat (limited to 'arch/tile/lib')
-rw-r--r--arch/tile/lib/Makefile4
-rw-r--r--arch/tile/lib/exports.c16
-rw-r--r--arch/tile/lib/memcpy_32.S20
-rw-r--r--arch/tile/lib/memset_32.c25
4 files changed, 18 insertions, 47 deletions
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile
index 438af38bc9eb..746dc81ed3c4 100644
--- a/arch/tile/lib/Makefile
+++ b/arch/tile/lib/Makefile
@@ -7,7 +7,9 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o \
7 memcpy_$(BITS).o memchr_$(BITS).o memmove_$(BITS).o memset_$(BITS).o \ 7 memcpy_$(BITS).o memchr_$(BITS).o memmove_$(BITS).o memset_$(BITS).o \
8 strchr_$(BITS).o strlen_$(BITS).o 8 strchr_$(BITS).o strlen_$(BITS).o
9 9
10ifneq ($(CONFIG_TILEGX),y) 10ifeq ($(CONFIG_TILEGX),y)
11lib-y += memcpy_user_64.o
12else
11lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o 13lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o
12endif 14endif
13 15
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c
index 6bc7b52b4aa0..ce5dbf56578f 100644
--- a/arch/tile/lib/exports.c
+++ b/arch/tile/lib/exports.c
@@ -36,21 +36,29 @@ EXPORT_SYMBOL(clear_user_asm);
36EXPORT_SYMBOL(current_text_addr); 36EXPORT_SYMBOL(current_text_addr);
37EXPORT_SYMBOL(dump_stack); 37EXPORT_SYMBOL(dump_stack);
38 38
39/* arch/tile/lib/__memcpy.S */ 39/* arch/tile/lib/, various memcpy files */
40/* NOTE: on TILE64, these symbols appear in arch/tile/lib/memcpy_tile64.c */
41EXPORT_SYMBOL(memcpy); 40EXPORT_SYMBOL(memcpy);
42EXPORT_SYMBOL(__copy_to_user_inatomic); 41EXPORT_SYMBOL(__copy_to_user_inatomic);
43EXPORT_SYMBOL(__copy_from_user_inatomic); 42EXPORT_SYMBOL(__copy_from_user_inatomic);
44EXPORT_SYMBOL(__copy_from_user_zeroing); 43EXPORT_SYMBOL(__copy_from_user_zeroing);
44#ifdef __tilegx__
45EXPORT_SYMBOL(__copy_in_user_inatomic);
46#endif
45 47
46/* hypervisor glue */ 48/* hypervisor glue */
47#include <hv/hypervisor.h> 49#include <hv/hypervisor.h>
48EXPORT_SYMBOL(hv_dev_open); 50EXPORT_SYMBOL(hv_dev_open);
49EXPORT_SYMBOL(hv_dev_pread); 51EXPORT_SYMBOL(hv_dev_pread);
50EXPORT_SYMBOL(hv_dev_pwrite); 52EXPORT_SYMBOL(hv_dev_pwrite);
53EXPORT_SYMBOL(hv_dev_preada);
54EXPORT_SYMBOL(hv_dev_pwritea);
55EXPORT_SYMBOL(hv_dev_poll);
56EXPORT_SYMBOL(hv_dev_poll_cancel);
51EXPORT_SYMBOL(hv_dev_close); 57EXPORT_SYMBOL(hv_dev_close);
58EXPORT_SYMBOL(hv_sysconf);
59EXPORT_SYMBOL(hv_confstr);
52 60
53/* -ltile-cc */ 61/* libgcc.a */
54uint32_t __udivsi3(uint32_t dividend, uint32_t divisor); 62uint32_t __udivsi3(uint32_t dividend, uint32_t divisor);
55EXPORT_SYMBOL(__udivsi3); 63EXPORT_SYMBOL(__udivsi3);
56int32_t __divsi3(int32_t dividend, int32_t divisor); 64int32_t __divsi3(int32_t dividend, int32_t divisor);
@@ -70,8 +78,6 @@ EXPORT_SYMBOL(__moddi3);
70#ifndef __tilegx__ 78#ifndef __tilegx__
71uint64_t __ll_mul(uint64_t n0, uint64_t n1); 79uint64_t __ll_mul(uint64_t n0, uint64_t n1);
72EXPORT_SYMBOL(__ll_mul); 80EXPORT_SYMBOL(__ll_mul);
73#endif
74#ifndef __tilegx__
75int64_t __muldi3(int64_t, int64_t); 81int64_t __muldi3(int64_t, int64_t);
76EXPORT_SYMBOL(__muldi3); 82EXPORT_SYMBOL(__muldi3);
77uint64_t __lshrdi3(uint64_t, unsigned int); 83uint64_t __lshrdi3(uint64_t, unsigned int);
diff --git a/arch/tile/lib/memcpy_32.S b/arch/tile/lib/memcpy_32.S
index f92984bf60ec..30c3b7ebb55d 100644
--- a/arch/tile/lib/memcpy_32.S
+++ b/arch/tile/lib/memcpy_32.S
@@ -17,10 +17,6 @@
17 17
18#include <arch/chip.h> 18#include <arch/chip.h>
19 19
20#if CHIP_HAS_WH64() || defined(MEMCPY_TEST_WH64)
21#define MEMCPY_USE_WH64
22#endif
23
24 20
25#include <linux/linkage.h> 21#include <linux/linkage.h>
26 22
@@ -160,7 +156,7 @@ EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 }
160 156
161 { addi r3, r1, 60; andi r9, r9, -64 } 157 { addi r3, r1, 60; andi r9, r9, -64 }
162 158
163#ifdef MEMCPY_USE_WH64 159#if CHIP_HAS_WH64()
164 /* No need to prefetch dst, we'll just do the wh64 160 /* No need to prefetch dst, we'll just do the wh64
165 * right before we copy a line. 161 * right before we copy a line.
166 */ 162 */
@@ -173,7 +169,7 @@ EX: { lw r6, r3; addi r3, r3, 64 }
173 /* Intentionally stall for a few cycles to leave L2 cache alone. */ 169 /* Intentionally stall for a few cycles to leave L2 cache alone. */
174 { bnzt zero, . } 170 { bnzt zero, . }
175EX: { lw r7, r3; addi r3, r3, 64 } 171EX: { lw r7, r3; addi r3, r3, 64 }
176#ifndef MEMCPY_USE_WH64 172#if !CHIP_HAS_WH64()
177 /* Prefetch the dest */ 173 /* Prefetch the dest */
178 /* Intentionally stall for a few cycles to leave L2 cache alone. */ 174 /* Intentionally stall for a few cycles to leave L2 cache alone. */
179 { bnzt zero, . } 175 { bnzt zero, . }
@@ -288,15 +284,7 @@ EX: { lw r7, r3; addi r3, r3, 64 }
288 /* Fill second L1D line. */ 284 /* Fill second L1D line. */
289EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */ 285EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */
290 286
291#ifdef MEMCPY_TEST_WH64 287#if CHIP_HAS_WH64()
292 /* Issue a fake wh64 that clobbers the destination words
293 * with random garbage, for testing.
294 */
295 { movei r19, 64; crc32_32 r10, r2, r9 }
296.Lwh64_test_loop:
297EX: { sw r9, r10; addi r9, r9, 4; addi r19, r19, -4 }
298 { bnzt r19, .Lwh64_test_loop; crc32_32 r10, r10, r19 }
299#elif CHIP_HAS_WH64()
300 /* Prepare destination line for writing. */ 288 /* Prepare destination line for writing. */
301EX: { wh64 r9; addi r9, r9, 64 } 289EX: { wh64 r9; addi r9, r9, 64 }
302#else 290#else
@@ -340,7 +328,7 @@ EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */
340EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */ 328EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */
341EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */ 329EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */
342EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */ 330EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */
343#ifdef MEMCPY_USE_WH64 331#if CHIP_HAS_WH64()
344EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */ 332EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */
345#else 333#else
346 /* Back up the r9 to a cache line we are already storing to 334 /* Back up the r9 to a cache line we are already storing to
diff --git a/arch/tile/lib/memset_32.c b/arch/tile/lib/memset_32.c
index bfde5d864df1..d014c1fbcbc2 100644
--- a/arch/tile/lib/memset_32.c
+++ b/arch/tile/lib/memset_32.c
@@ -141,7 +141,6 @@ void *memset(void *s, int c, size_t n)
141 */ 141 */
142 __insn_prefetch(&out32[ahead32]); 142 __insn_prefetch(&out32[ahead32]);
143 143
144#if 1
145#if CACHE_LINE_SIZE_IN_WORDS % 4 != 0 144#if CACHE_LINE_SIZE_IN_WORDS % 4 != 0
146#error "Unhandled CACHE_LINE_SIZE_IN_WORDS" 145#error "Unhandled CACHE_LINE_SIZE_IN_WORDS"
147#endif 146#endif
@@ -157,30 +156,6 @@ void *memset(void *s, int c, size_t n)
157 *out32++ = v32; 156 *out32++ = v32;
158 *out32++ = v32; 157 *out32++ = v32;
159 } 158 }
160#else
161 /* Unfortunately, due to a code generator flaw this
162 * allocates a separate register for each of these
163 * stores, which requires a large number of spills,
164 * which makes this procedure enormously bigger
165 * (something like 70%)
166 */
167 *out32++ = v32;
168 *out32++ = v32;
169 *out32++ = v32;
170 *out32++ = v32;
171 *out32++ = v32;
172 *out32++ = v32;
173 *out32++ = v32;
174 *out32++ = v32;
175 *out32++ = v32;
176 *out32++ = v32;
177 *out32++ = v32;
178 *out32++ = v32;
179 *out32++ = v32;
180 *out32++ = v32;
181 *out32++ = v32;
182 n32 -= 16;
183#endif
184 159
185 /* To save compiled code size, reuse this loop even 160 /* To save compiled code size, reuse this loop even
186 * when we run out of prefetching to do by dropping 161 * when we run out of prefetching to do by dropping