aboutsummaryrefslogtreecommitdiffstats
path: root/arch/tile/lib/memcpy_tile64.c
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@tilera.com>2010-05-28 23:09:12 -0400
committerChris Metcalf <cmetcalf@tilera.com>2010-06-04 17:11:18 -0400
commit867e359b97c970a60626d5d76bbe2a8fadbf38fb (patch)
treec5ccbb7f5172e8555977119608ecb1eee3cc37e3 /arch/tile/lib/memcpy_tile64.c
parent5360bd776f73d0a7da571d72a09a03f237e99900 (diff)
arch/tile: core support for Tilera 32-bit chips.
This change is the core kernel support for TILEPro and TILE64 chips. No driver support (except the console driver) is included yet. This includes the relevant Linux headers in asm/; the low-level low-level "Tile architecture" headers in arch/, which are shared with the hypervisor, etc., and are build-system agnostic; and the relevant hypervisor headers in hv/. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com> Acked-by: Arnd Bergmann <arnd@arndb.de> Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Reviewed-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch/tile/lib/memcpy_tile64.c')
-rw-r--r--arch/tile/lib/memcpy_tile64.c271
1 files changed, 271 insertions, 0 deletions
diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c
new file mode 100644
index 00000000000..4f004734246
--- /dev/null
+++ b/arch/tile/lib/memcpy_tile64.c
@@ -0,0 +1,271 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/string.h>
16#include <linux/smp.h>
17#include <linux/module.h>
18#include <linux/uaccess.h>
19#include <asm/fixmap.h>
20#include <asm/kmap_types.h>
21#include <asm/tlbflush.h>
22#include <hv/hypervisor.h>
23#include <arch/chip.h>
24
25
26#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
27
28/* Defined in memcpy.S */
29extern unsigned long __memcpy_asm(void *to, const void *from, unsigned long n);
30extern unsigned long __copy_to_user_inatomic_asm(
31 void __user *to, const void *from, unsigned long n);
32extern unsigned long __copy_from_user_inatomic_asm(
33 void *to, const void __user *from, unsigned long n);
34extern unsigned long __copy_from_user_zeroing_asm(
35 void *to, const void __user *from, unsigned long n);
36
37typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long);
38
39/* Size above which to consider TLB games for performance */
40#define LARGE_COPY_CUTOFF 2048
41
42/* Communicate to the simulator what we are trying to do. */
43#define sim_allow_multiple_caching(b) \
44 __insn_mtspr(SPR_SIM_CONTROL, \
45 SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ((b) << _SIM_CONTROL_OPERATOR_BITS))
46
47/*
48 * Copy memory by briefly enabling incoherent cacheline-at-a-time mode.
49 *
50 * We set up our own source and destination PTEs that we fully control.
51 * This is the only way to guarantee that we don't race with another
52 * thread that is modifying the PTE; we can't afford to try the
53 * copy_{to,from}_user() technique of catching the interrupt, since
54 * we must run with interrupts disabled to avoid the risk of some
55 * other code seeing the incoherent data in our cache. (Recall that
56 * our cache is indexed by PA, so even if the other code doesn't use
57 * our KM_MEMCPY virtual addresses, they'll still hit in cache using
58 * the normal VAs that aren't supposed to hit in cache.)
59 */
60static void memcpy_multicache(void *dest, const void *source,
61 pte_t dst_pte, pte_t src_pte, int len)
62{
63 int idx, i;
64 unsigned long flags, newsrc, newdst, endsrc;
65 pmd_t *pmdp;
66 pte_t *ptep;
67 int cpu = get_cpu();
68
69 /*
70 * Disable interrupts so that we don't recurse into memcpy()
71 * in an interrupt handler, nor accidentally reference
72 * the PA of the source from an interrupt routine. Also
73 * notify the simulator that we're playing games so we don't
74 * generate spurious coherency warnings.
75 */
76 local_irq_save(flags);
77 sim_allow_multiple_caching(1);
78
79 /* Set up the new dest mapping */
80 idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + KM_MEMCPY0;
81 newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1));
82 pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst);
83 ptep = pte_offset_kernel(pmdp, newdst);
84 if (pte_val(*ptep) != pte_val(dst_pte)) {
85 set_pte(ptep, dst_pte);
86 local_flush_tlb_page(NULL, newdst, PAGE_SIZE);
87 }
88
89 /* Set up the new source mapping */
90 idx += (KM_MEMCPY0 - KM_MEMCPY1);
91 src_pte = hv_pte_set_nc(src_pte);
92 src_pte = hv_pte_clear_writable(src_pte); /* be paranoid */
93 newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1));
94 pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc);
95 ptep = pte_offset_kernel(pmdp, newsrc);
96 *ptep = src_pte; /* set_pte() would be confused by this */
97 local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
98
99 /* Actually move the data. */
100 __memcpy_asm((void *)newdst, (const void *)newsrc, len);
101
102 /*
103 * Remap the source as locally-cached and not OLOC'ed so that
104 * we can inval without also invaling the remote cpu's cache.
105 * This also avoids known errata with inv'ing cacheable oloc data.
106 */
107 src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3);
108 src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */
109 *ptep = src_pte; /* set_pte() would be confused by this */
110 local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
111
112 /*
113 * Do the actual invalidation, covering the full L2 cache line
114 * at the end since __memcpy_asm() is somewhat aggressive.
115 */
116 __inv_buffer((void *)newsrc, len);
117
118 /*
119 * We're done: notify the simulator that all is back to normal,
120 * and re-enable interrupts and pre-emption.
121 */
122 sim_allow_multiple_caching(0);
123 local_irq_restore(flags);
124 put_cpu_no_resched();
125}
126
127/*
128 * Identify large copies from remotely-cached memory, and copy them
129 * via memcpy_multicache() if they look good, otherwise fall back
130 * to the particular kind of copying passed as the memcpy_t function.
131 */
132static unsigned long fast_copy(void *dest, const void *source, int len,
133 memcpy_t func)
134{
135 /*
136 * Check if it's big enough to bother with. We may end up doing a
137 * small copy via TLB manipulation if we're near a page boundary,
138 * but presumably we'll make it up when we hit the second page.
139 */
140 while (len >= LARGE_COPY_CUTOFF) {
141 int copy_size, bytes_left_on_page;
142 pte_t *src_ptep, *dst_ptep;
143 pte_t src_pte, dst_pte;
144 struct page *src_page, *dst_page;
145
146 /* Is the source page oloc'ed to a remote cpu? */
147retry_source:
148 src_ptep = virt_to_pte(current->mm, (unsigned long)source);
149 if (src_ptep == NULL)
150 break;
151 src_pte = *src_ptep;
152 if (!hv_pte_get_present(src_pte) ||
153 !hv_pte_get_readable(src_pte) ||
154 hv_pte_get_mode(src_pte) != HV_PTE_MODE_CACHE_TILE_L3)
155 break;
156 if (get_remote_cache_cpu(src_pte) == smp_processor_id())
157 break;
158 src_page = pfn_to_page(hv_pte_get_pfn(src_pte));
159 get_page(src_page);
160 if (pte_val(src_pte) != pte_val(*src_ptep)) {
161 put_page(src_page);
162 goto retry_source;
163 }
164 if (pte_huge(src_pte)) {
165 /* Adjust the PTE to correspond to a small page */
166 int pfn = hv_pte_get_pfn(src_pte);
167 pfn += (((unsigned long)source & (HPAGE_SIZE-1))
168 >> PAGE_SHIFT);
169 src_pte = pfn_pte(pfn, src_pte);
170 src_pte = pte_mksmall(src_pte);
171 }
172
173 /* Is the destination page writable? */
174retry_dest:
175 dst_ptep = virt_to_pte(current->mm, (unsigned long)dest);
176 if (dst_ptep == NULL) {
177 put_page(src_page);
178 break;
179 }
180 dst_pte = *dst_ptep;
181 if (!hv_pte_get_present(dst_pte) ||
182 !hv_pte_get_writable(dst_pte)) {
183 put_page(src_page);
184 break;
185 }
186 dst_page = pfn_to_page(hv_pte_get_pfn(dst_pte));
187 if (dst_page == src_page) {
188 /*
189 * Source and dest are on the same page; this
190 * potentially exposes us to incoherence if any
191 * part of src and dest overlap on a cache line.
192 * Just give up rather than trying to be precise.
193 */
194 put_page(src_page);
195 break;
196 }
197 get_page(dst_page);
198 if (pte_val(dst_pte) != pte_val(*dst_ptep)) {
199 put_page(dst_page);
200 goto retry_dest;
201 }
202 if (pte_huge(dst_pte)) {
203 /* Adjust the PTE to correspond to a small page */
204 int pfn = hv_pte_get_pfn(dst_pte);
205 pfn += (((unsigned long)dest & (HPAGE_SIZE-1))
206 >> PAGE_SHIFT);
207 dst_pte = pfn_pte(pfn, dst_pte);
208 dst_pte = pte_mksmall(dst_pte);
209 }
210
211 /* All looks good: create a cachable PTE and copy from it */
212 copy_size = len;
213 bytes_left_on_page =
214 PAGE_SIZE - (((int)source) & (PAGE_SIZE-1));
215 if (copy_size > bytes_left_on_page)
216 copy_size = bytes_left_on_page;
217 bytes_left_on_page =
218 PAGE_SIZE - (((int)dest) & (PAGE_SIZE-1));
219 if (copy_size > bytes_left_on_page)
220 copy_size = bytes_left_on_page;
221 memcpy_multicache(dest, source, dst_pte, src_pte, copy_size);
222
223 /* Release the pages */
224 put_page(dst_page);
225 put_page(src_page);
226
227 /* Continue on the next page */
228 dest += copy_size;
229 source += copy_size;
230 len -= copy_size;
231 }
232
233 return func(dest, source, len);
234}
235
236void *memcpy(void *to, const void *from, __kernel_size_t n)
237{
238 if (n < LARGE_COPY_CUTOFF)
239 return (void *)__memcpy_asm(to, from, n);
240 else
241 return (void *)fast_copy(to, from, n, __memcpy_asm);
242}
243
244unsigned long __copy_to_user_inatomic(void __user *to, const void *from,
245 unsigned long n)
246{
247 if (n < LARGE_COPY_CUTOFF)
248 return __copy_to_user_inatomic_asm(to, from, n);
249 else
250 return fast_copy(to, from, n, __copy_to_user_inatomic_asm);
251}
252
253unsigned long __copy_from_user_inatomic(void *to, const void __user *from,
254 unsigned long n)
255{
256 if (n < LARGE_COPY_CUTOFF)
257 return __copy_from_user_inatomic_asm(to, from, n);
258 else
259 return fast_copy(to, from, n, __copy_from_user_inatomic_asm);
260}
261
262unsigned long __copy_from_user_zeroing(void *to, const void __user *from,
263 unsigned long n)
264{
265 if (n < LARGE_COPY_CUTOFF)
266 return __copy_from_user_zeroing_asm(to, from, n);
267 else
268 return fast_copy(to, from, n, __copy_from_user_zeroing_asm);
269}
270
271#endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */