aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/tile/configs/tile_defconfig1
-rw-r--r--arch/tile/include/asm/atomic_32.h37
-rw-r--r--arch/tile/include/asm/page.h6
-rw-r--r--arch/tile/include/asm/uaccess.h4
-rw-r--r--arch/tile/include/hv/hypervisor.h8
-rw-r--r--arch/tile/lib/Makefile4
-rw-r--r--arch/tile/lib/exports.c16
-rw-r--r--arch/tile/lib/memcpy_32.S20
-rw-r--r--arch/tile/lib/memset_32.c25
-rw-r--r--arch/tile/mm/fault.c8
-rw-r--r--arch/tile/mm/homecache.c3
11 files changed, 40 insertions, 92 deletions
diff --git a/arch/tile/configs/tile_defconfig b/arch/tile/configs/tile_defconfig
index f34c70b46c6..5ad1a71ae3c 100644
--- a/arch/tile/configs/tile_defconfig
+++ b/arch/tile/configs/tile_defconfig
@@ -231,7 +231,6 @@ CONFIG_HARDWALL=y
231CONFIG_MEMPROF=y 231CONFIG_MEMPROF=y
232CONFIG_XGBE=y 232CONFIG_XGBE=y
233CONFIG_NET_TILE=y 233CONFIG_NET_TILE=y
234CONFIG_PSEUDO_NAPI=y
235CONFIG_TILEPCI_ENDP=y 234CONFIG_TILEPCI_ENDP=y
236CONFIG_TILEPCI_HOST_SUBSET=m 235CONFIG_TILEPCI_HOST_SUBSET=m
237CONFIG_TILE_IDE_GPIO=y 236CONFIG_TILE_IDE_GPIO=y
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index 40a5a3a876d..ed359aee883 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -255,43 +255,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n)
255#define smp_mb__after_atomic_dec() do { } while (0) 255#define smp_mb__after_atomic_dec() do { } while (0)
256#define smp_mb__after_atomic_inc() do { } while (0) 256#define smp_mb__after_atomic_inc() do { } while (0)
257 257
258
259/*
260 * Support "tns" atomic integers. These are atomic integers that can
261 * hold any value but "1". They are more efficient than regular atomic
262 * operations because the "lock" (aka acquire) step is a single "tns"
263 * in the uncontended case, and the "unlock" (aka release) step is a
264 * single "store" without an mf. (However, note that on tilepro the
265 * "tns" will evict the local cache line, so it's not all upside.)
266 *
267 * Note that you can ONLY observe the value stored in the pointer
268 * using these operations; a direct read of the value may confusingly
269 * return the special value "1".
270 */
271
272int __tns_atomic_acquire(atomic_t *);
273void __tns_atomic_release(atomic_t *p, int v);
274
275static inline void tns_atomic_set(atomic_t *v, int i)
276{
277 __tns_atomic_acquire(v);
278 __tns_atomic_release(v, i);
279}
280
281static inline int tns_atomic_cmpxchg(atomic_t *v, int o, int n)
282{
283 int ret = __tns_atomic_acquire(v);
284 __tns_atomic_release(v, (ret == o) ? n : ret);
285 return ret;
286}
287
288static inline int tns_atomic_xchg(atomic_t *v, int n)
289{
290 int ret = __tns_atomic_acquire(v);
291 __tns_atomic_release(v, n);
292 return ret;
293}
294
295#endif /* !__ASSEMBLY__ */ 258#endif /* !__ASSEMBLY__ */
296 259
297/* 260/*
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index f894a9016da..7d90641cf18 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -129,6 +129,11 @@ static inline u64 pmd_val(pmd_t pmd)
129 129
130#endif 130#endif
131 131
132static inline __attribute_const__ int get_order(unsigned long size)
133{
134 return BITS_PER_LONG - __builtin_clzl((size - 1) >> PAGE_SHIFT);
135}
136
132#endif /* !__ASSEMBLY__ */ 137#endif /* !__ASSEMBLY__ */
133 138
134#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) 139#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
@@ -332,7 +337,6 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr);
332 (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) 337 (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
333 338
334#include <asm-generic/memory_model.h> 339#include <asm-generic/memory_model.h>
335#include <asm-generic/getorder.h>
336 340
337#endif /* __KERNEL__ */ 341#endif /* __KERNEL__ */
338 342
diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h
index ed17a80ec0e..ef34d2caa5b 100644
--- a/arch/tile/include/asm/uaccess.h
+++ b/arch/tile/include/asm/uaccess.h
@@ -389,14 +389,14 @@ static inline unsigned long __must_check copy_from_user(void *to,
389 * Returns number of bytes that could not be copied. 389 * Returns number of bytes that could not be copied.
390 * On success, this will be zero. 390 * On success, this will be zero.
391 */ 391 */
392extern unsigned long __copy_in_user_asm( 392extern unsigned long __copy_in_user_inatomic(
393 void __user *to, const void __user *from, unsigned long n); 393 void __user *to, const void __user *from, unsigned long n);
394 394
395static inline unsigned long __must_check 395static inline unsigned long __must_check
396__copy_in_user(void __user *to, const void __user *from, unsigned long n) 396__copy_in_user(void __user *to, const void __user *from, unsigned long n)
397{ 397{
398 might_sleep(); 398 might_sleep();
399 return __copy_in_user_asm(to, from, n); 399 return __copy_in_user_inatomic(to, from, n);
400} 400}
401 401
402static inline unsigned long __must_check 402static inline unsigned long __must_check
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h
index 59b46dc5399..9bd303a141b 100644
--- a/arch/tile/include/hv/hypervisor.h
+++ b/arch/tile/include/hv/hypervisor.h
@@ -532,11 +532,11 @@ void hv_disable_intr(HV_IntrMask disab_mask);
532 */ 532 */
533void hv_clear_intr(HV_IntrMask clear_mask); 533void hv_clear_intr(HV_IntrMask clear_mask);
534 534
535/** Assert a set of device interrupts. 535/** Raise a set of device interrupts.
536 * 536 *
537 * @param assert_mask Bitmap of interrupts to clear. 537 * @param raise_mask Bitmap of interrupts to raise.
538 */ 538 */
539void hv_assert_intr(HV_IntrMask assert_mask); 539void hv_raise_intr(HV_IntrMask raise_mask);
540 540
541/** Trigger a one-shot interrupt on some tile 541/** Trigger a one-shot interrupt on some tile
542 * 542 *
@@ -1712,7 +1712,7 @@ typedef struct
1712 * @param cache_control This argument allows you to specify a length of 1712 * @param cache_control This argument allows you to specify a length of
1713 * physical address space to flush (maximum HV_FLUSH_MAX_CACHE_LEN). 1713 * physical address space to flush (maximum HV_FLUSH_MAX_CACHE_LEN).
1714 * You can "or" in HV_FLUSH_EVICT_L2 to flush the whole L2 cache. 1714 * You can "or" in HV_FLUSH_EVICT_L2 to flush the whole L2 cache.
1715 * You can "or" in HV_FLUSH_EVICT_LI1 to flush the whole LII cache. 1715 * You can "or" in HV_FLUSH_EVICT_L1I to flush the whole L1I cache.
1716 * HV_FLUSH_ALL flushes all caches. 1716 * HV_FLUSH_ALL flushes all caches.
1717 * @param cache_cpumask Bitmask (in row-major order, supervisor-relative) of 1717 * @param cache_cpumask Bitmask (in row-major order, supervisor-relative) of
1718 * tile indices to perform cache flush on. The low bit of the first 1718 * tile indices to perform cache flush on. The low bit of the first
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile
index 438af38bc9e..746dc81ed3c 100644
--- a/arch/tile/lib/Makefile
+++ b/arch/tile/lib/Makefile
@@ -7,7 +7,9 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o \
7 memcpy_$(BITS).o memchr_$(BITS).o memmove_$(BITS).o memset_$(BITS).o \ 7 memcpy_$(BITS).o memchr_$(BITS).o memmove_$(BITS).o memset_$(BITS).o \
8 strchr_$(BITS).o strlen_$(BITS).o 8 strchr_$(BITS).o strlen_$(BITS).o
9 9
10ifneq ($(CONFIG_TILEGX),y) 10ifeq ($(CONFIG_TILEGX),y)
11lib-y += memcpy_user_64.o
12else
11lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o 13lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o
12endif 14endif
13 15
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c
index 6bc7b52b4aa..ce5dbf56578 100644
--- a/arch/tile/lib/exports.c
+++ b/arch/tile/lib/exports.c
@@ -36,21 +36,29 @@ EXPORT_SYMBOL(clear_user_asm);
36EXPORT_SYMBOL(current_text_addr); 36EXPORT_SYMBOL(current_text_addr);
37EXPORT_SYMBOL(dump_stack); 37EXPORT_SYMBOL(dump_stack);
38 38
39/* arch/tile/lib/__memcpy.S */ 39/* arch/tile/lib/, various memcpy files */
40/* NOTE: on TILE64, these symbols appear in arch/tile/lib/memcpy_tile64.c */
41EXPORT_SYMBOL(memcpy); 40EXPORT_SYMBOL(memcpy);
42EXPORT_SYMBOL(__copy_to_user_inatomic); 41EXPORT_SYMBOL(__copy_to_user_inatomic);
43EXPORT_SYMBOL(__copy_from_user_inatomic); 42EXPORT_SYMBOL(__copy_from_user_inatomic);
44EXPORT_SYMBOL(__copy_from_user_zeroing); 43EXPORT_SYMBOL(__copy_from_user_zeroing);
44#ifdef __tilegx__
45EXPORT_SYMBOL(__copy_in_user_inatomic);
46#endif
45 47
46/* hypervisor glue */ 48/* hypervisor glue */
47#include <hv/hypervisor.h> 49#include <hv/hypervisor.h>
48EXPORT_SYMBOL(hv_dev_open); 50EXPORT_SYMBOL(hv_dev_open);
49EXPORT_SYMBOL(hv_dev_pread); 51EXPORT_SYMBOL(hv_dev_pread);
50EXPORT_SYMBOL(hv_dev_pwrite); 52EXPORT_SYMBOL(hv_dev_pwrite);
53EXPORT_SYMBOL(hv_dev_preada);
54EXPORT_SYMBOL(hv_dev_pwritea);
55EXPORT_SYMBOL(hv_dev_poll);
56EXPORT_SYMBOL(hv_dev_poll_cancel);
51EXPORT_SYMBOL(hv_dev_close); 57EXPORT_SYMBOL(hv_dev_close);
58EXPORT_SYMBOL(hv_sysconf);
59EXPORT_SYMBOL(hv_confstr);
52 60
53/* -ltile-cc */ 61/* libgcc.a */
54uint32_t __udivsi3(uint32_t dividend, uint32_t divisor); 62uint32_t __udivsi3(uint32_t dividend, uint32_t divisor);
55EXPORT_SYMBOL(__udivsi3); 63EXPORT_SYMBOL(__udivsi3);
56int32_t __divsi3(int32_t dividend, int32_t divisor); 64int32_t __divsi3(int32_t dividend, int32_t divisor);
@@ -70,8 +78,6 @@ EXPORT_SYMBOL(__moddi3);
70#ifndef __tilegx__ 78#ifndef __tilegx__
71uint64_t __ll_mul(uint64_t n0, uint64_t n1); 79uint64_t __ll_mul(uint64_t n0, uint64_t n1);
72EXPORT_SYMBOL(__ll_mul); 80EXPORT_SYMBOL(__ll_mul);
73#endif
74#ifndef __tilegx__
75int64_t __muldi3(int64_t, int64_t); 81int64_t __muldi3(int64_t, int64_t);
76EXPORT_SYMBOL(__muldi3); 82EXPORT_SYMBOL(__muldi3);
77uint64_t __lshrdi3(uint64_t, unsigned int); 83uint64_t __lshrdi3(uint64_t, unsigned int);
diff --git a/arch/tile/lib/memcpy_32.S b/arch/tile/lib/memcpy_32.S
index f92984bf60e..30c3b7ebb55 100644
--- a/arch/tile/lib/memcpy_32.S
+++ b/arch/tile/lib/memcpy_32.S
@@ -17,10 +17,6 @@
17 17
18#include <arch/chip.h> 18#include <arch/chip.h>
19 19
20#if CHIP_HAS_WH64() || defined(MEMCPY_TEST_WH64)
21#define MEMCPY_USE_WH64
22#endif
23
24 20
25#include <linux/linkage.h> 21#include <linux/linkage.h>
26 22
@@ -160,7 +156,7 @@ EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 }
160 156
161 { addi r3, r1, 60; andi r9, r9, -64 } 157 { addi r3, r1, 60; andi r9, r9, -64 }
162 158
163#ifdef MEMCPY_USE_WH64 159#if CHIP_HAS_WH64()
164 /* No need to prefetch dst, we'll just do the wh64 160 /* No need to prefetch dst, we'll just do the wh64
165 * right before we copy a line. 161 * right before we copy a line.
166 */ 162 */
@@ -173,7 +169,7 @@ EX: { lw r6, r3; addi r3, r3, 64 }
173 /* Intentionally stall for a few cycles to leave L2 cache alone. */ 169 /* Intentionally stall for a few cycles to leave L2 cache alone. */
174 { bnzt zero, . } 170 { bnzt zero, . }
175EX: { lw r7, r3; addi r3, r3, 64 } 171EX: { lw r7, r3; addi r3, r3, 64 }
176#ifndef MEMCPY_USE_WH64 172#if !CHIP_HAS_WH64()
177 /* Prefetch the dest */ 173 /* Prefetch the dest */
178 /* Intentionally stall for a few cycles to leave L2 cache alone. */ 174 /* Intentionally stall for a few cycles to leave L2 cache alone. */
179 { bnzt zero, . } 175 { bnzt zero, . }
@@ -288,15 +284,7 @@ EX: { lw r7, r3; addi r3, r3, 64 }
288 /* Fill second L1D line. */ 284 /* Fill second L1D line. */
289EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */ 285EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */
290 286
291#ifdef MEMCPY_TEST_WH64 287#if CHIP_HAS_WH64()
292 /* Issue a fake wh64 that clobbers the destination words
293 * with random garbage, for testing.
294 */
295 { movei r19, 64; crc32_32 r10, r2, r9 }
296.Lwh64_test_loop:
297EX: { sw r9, r10; addi r9, r9, 4; addi r19, r19, -4 }
298 { bnzt r19, .Lwh64_test_loop; crc32_32 r10, r10, r19 }
299#elif CHIP_HAS_WH64()
300 /* Prepare destination line for writing. */ 288 /* Prepare destination line for writing. */
301EX: { wh64 r9; addi r9, r9, 64 } 289EX: { wh64 r9; addi r9, r9, 64 }
302#else 290#else
@@ -340,7 +328,7 @@ EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */
340EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */ 328EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */
341EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */ 329EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */
342EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */ 330EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */
343#ifdef MEMCPY_USE_WH64 331#if CHIP_HAS_WH64()
344EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */ 332EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */
345#else 333#else
346 /* Back up the r9 to a cache line we are already storing to 334 /* Back up the r9 to a cache line we are already storing to
diff --git a/arch/tile/lib/memset_32.c b/arch/tile/lib/memset_32.c
index bfde5d864df..d014c1fbcbc 100644
--- a/arch/tile/lib/memset_32.c
+++ b/arch/tile/lib/memset_32.c
@@ -141,7 +141,6 @@ void *memset(void *s, int c, size_t n)
141 */ 141 */
142 __insn_prefetch(&out32[ahead32]); 142 __insn_prefetch(&out32[ahead32]);
143 143
144#if 1
145#if CACHE_LINE_SIZE_IN_WORDS % 4 != 0 144#if CACHE_LINE_SIZE_IN_WORDS % 4 != 0
146#error "Unhandled CACHE_LINE_SIZE_IN_WORDS" 145#error "Unhandled CACHE_LINE_SIZE_IN_WORDS"
147#endif 146#endif
@@ -157,30 +156,6 @@ void *memset(void *s, int c, size_t n)
157 *out32++ = v32; 156 *out32++ = v32;
158 *out32++ = v32; 157 *out32++ = v32;
159 } 158 }
160#else
161 /* Unfortunately, due to a code generator flaw this
162 * allocates a separate register for each of these
163 * stores, which requires a large number of spills,
164 * which makes this procedure enormously bigger
165 * (something like 70%)
166 */
167 *out32++ = v32;
168 *out32++ = v32;
169 *out32++ = v32;
170 *out32++ = v32;
171 *out32++ = v32;
172 *out32++ = v32;
173 *out32++ = v32;
174 *out32++ = v32;
175 *out32++ = v32;
176 *out32++ = v32;
177 *out32++ = v32;
178 *out32++ = v32;
179 *out32++ = v32;
180 *out32++ = v32;
181 *out32++ = v32;
182 n32 -= 16;
183#endif
184 159
185 /* To save compiled code size, reuse this loop even 160 /* To save compiled code size, reuse this loop even
186 * when we run out of prefetching to do by dropping 161 * when we run out of prefetching to do by dropping
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 0011f06b4fe..704f3e8a438 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -567,6 +567,14 @@ do_sigbus:
567 * since that might indicate we have not yet squirreled the SPR 567 * since that might indicate we have not yet squirreled the SPR
568 * contents away and can thus safely take a recursive interrupt. 568 * contents away and can thus safely take a recursive interrupt.
569 * Accordingly, the hypervisor passes us the PC via SYSTEM_SAVE_1_2. 569 * Accordingly, the hypervisor passes us the PC via SYSTEM_SAVE_1_2.
570 *
571 * Note that this routine is called before homecache_tlb_defer_enter(),
572 * which means that we can properly unlock any atomics that might
573 * be used there (good), but also means we must be very sensitive
574 * to not touch any data structures that might be located in memory
575 * that could migrate, as we could be entering the kernel on a dataplane
576 * cpu that has been deferring kernel TLB updates. This means, for
577 * example, that we can't migrate init_mm or its pgd.
570 */ 578 */
571struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num, 579struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
572 unsigned long address, 580 unsigned long address,
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index 97c478e7be2..fb3b4a55cec 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -29,6 +29,7 @@
29#include <linux/timex.h> 29#include <linux/timex.h>
30#include <linux/cache.h> 30#include <linux/cache.h>
31#include <linux/smp.h> 31#include <linux/smp.h>
32#include <linux/module.h>
32 33
33#include <asm/page.h> 34#include <asm/page.h>
34#include <asm/sections.h> 35#include <asm/sections.h>
@@ -348,6 +349,7 @@ pte_t pte_set_home(pte_t pte, int home)
348 349
349 return pte; 350 return pte;
350} 351}
352EXPORT_SYMBOL(pte_set_home);
351 353
352/* 354/*
353 * The routines in this section are the "static" versions of the normal 355 * The routines in this section are the "static" versions of the normal
@@ -403,6 +405,7 @@ struct page *homecache_alloc_pages(gfp_t gfp_mask,
403 homecache_change_page_home(page, order, home); 405 homecache_change_page_home(page, order, home);
404 return page; 406 return page;
405} 407}
408EXPORT_SYMBOL(homecache_alloc_pages);
406 409
407struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask, 410struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask,
408 unsigned int order, int home) 411 unsigned int order, int home)