diff options
Diffstat (limited to 'arch/tile/lib/memcpy_tile64.c')
-rw-r--r-- | arch/tile/lib/memcpy_tile64.c | 280 |
1 files changed, 0 insertions, 280 deletions
diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c deleted file mode 100644 index 0290c222847b..000000000000 --- a/arch/tile/lib/memcpy_tile64.c +++ /dev/null | |||
@@ -1,280 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #include <linux/string.h> | ||
16 | #include <linux/smp.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/uaccess.h> | ||
19 | #include <asm/fixmap.h> | ||
20 | #include <asm/kmap_types.h> | ||
21 | #include <asm/tlbflush.h> | ||
22 | #include <hv/hypervisor.h> | ||
23 | #include <arch/chip.h> | ||
24 | |||
25 | |||
26 | #if !CHIP_HAS_COHERENT_LOCAL_CACHE() | ||
27 | |||
28 | /* Defined in memcpy.S */ | ||
29 | extern unsigned long __memcpy_asm(void *to, const void *from, unsigned long n); | ||
30 | extern unsigned long __copy_to_user_inatomic_asm( | ||
31 | void __user *to, const void *from, unsigned long n); | ||
32 | extern unsigned long __copy_from_user_inatomic_asm( | ||
33 | void *to, const void __user *from, unsigned long n); | ||
34 | extern unsigned long __copy_from_user_zeroing_asm( | ||
35 | void *to, const void __user *from, unsigned long n); | ||
36 | |||
37 | typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long); | ||
38 | |||
39 | /* Size above which to consider TLB games for performance */ | ||
40 | #define LARGE_COPY_CUTOFF 2048 | ||
41 | |||
42 | /* Communicate to the simulator what we are trying to do. */ | ||
43 | #define sim_allow_multiple_caching(b) \ | ||
44 | __insn_mtspr(SPR_SIM_CONTROL, \ | ||
45 | SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ((b) << _SIM_CONTROL_OPERATOR_BITS)) | ||
46 | |||
47 | /* | ||
48 | * Copy memory by briefly enabling incoherent cacheline-at-a-time mode. | ||
49 | * | ||
50 | * We set up our own source and destination PTEs that we fully control. | ||
51 | * This is the only way to guarantee that we don't race with another | ||
52 | * thread that is modifying the PTE; we can't afford to try the | ||
53 | * copy_{to,from}_user() technique of catching the interrupt, since | ||
54 | * we must run with interrupts disabled to avoid the risk of some | ||
55 | * other code seeing the incoherent data in our cache. (Recall that | ||
56 | * our cache is indexed by PA, so even if the other code doesn't use | ||
57 | * our kmap_atomic virtual addresses, they'll still hit in cache using | ||
58 | * the normal VAs that aren't supposed to hit in cache.) | ||
59 | */ | ||
60 | static void memcpy_multicache(void *dest, const void *source, | ||
61 | pte_t dst_pte, pte_t src_pte, int len) | ||
62 | { | ||
63 | int idx; | ||
64 | unsigned long flags, newsrc, newdst; | ||
65 | pmd_t *pmdp; | ||
66 | pte_t *ptep; | ||
67 | int type0, type1; | ||
68 | int cpu = smp_processor_id(); | ||
69 | |||
70 | /* | ||
71 | * Disable interrupts so that we don't recurse into memcpy() | ||
72 | * in an interrupt handler, nor accidentally reference | ||
73 | * the PA of the source from an interrupt routine. Also | ||
74 | * notify the simulator that we're playing games so we don't | ||
75 | * generate spurious coherency warnings. | ||
76 | */ | ||
77 | local_irq_save(flags); | ||
78 | sim_allow_multiple_caching(1); | ||
79 | |||
80 | /* Set up the new dest mapping */ | ||
81 | type0 = kmap_atomic_idx_push(); | ||
82 | idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0; | ||
83 | newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1)); | ||
84 | pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst); | ||
85 | ptep = pte_offset_kernel(pmdp, newdst); | ||
86 | if (pte_val(*ptep) != pte_val(dst_pte)) { | ||
87 | set_pte(ptep, dst_pte); | ||
88 | local_flush_tlb_page(NULL, newdst, PAGE_SIZE); | ||
89 | } | ||
90 | |||
91 | /* Set up the new source mapping */ | ||
92 | type1 = kmap_atomic_idx_push(); | ||
93 | idx += (type0 - type1); | ||
94 | src_pte = hv_pte_set_nc(src_pte); | ||
95 | src_pte = hv_pte_clear_writable(src_pte); /* be paranoid */ | ||
96 | newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); | ||
97 | pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc); | ||
98 | ptep = pte_offset_kernel(pmdp, newsrc); | ||
99 | __set_pte(ptep, src_pte); /* set_pte() would be confused by this */ | ||
100 | local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); | ||
101 | |||
102 | /* Actually move the data. */ | ||
103 | __memcpy_asm((void *)newdst, (const void *)newsrc, len); | ||
104 | |||
105 | /* | ||
106 | * Remap the source as locally-cached and not OLOC'ed so that | ||
107 | * we can inval without also invaling the remote cpu's cache. | ||
108 | * This also avoids known errata with inv'ing cacheable oloc data. | ||
109 | */ | ||
110 | src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3); | ||
111 | src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */ | ||
112 | __set_pte(ptep, src_pte); /* set_pte() would be confused by this */ | ||
113 | local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); | ||
114 | |||
115 | /* | ||
116 | * Do the actual invalidation, covering the full L2 cache line | ||
117 | * at the end since __memcpy_asm() is somewhat aggressive. | ||
118 | */ | ||
119 | __inv_buffer((void *)newsrc, len); | ||
120 | |||
121 | /* | ||
122 | * We're done: notify the simulator that all is back to normal, | ||
123 | * and re-enable interrupts and pre-emption. | ||
124 | */ | ||
125 | kmap_atomic_idx_pop(); | ||
126 | kmap_atomic_idx_pop(); | ||
127 | sim_allow_multiple_caching(0); | ||
128 | local_irq_restore(flags); | ||
129 | } | ||
130 | |||
131 | /* | ||
132 | * Identify large copies from remotely-cached memory, and copy them | ||
133 | * via memcpy_multicache() if they look good, otherwise fall back | ||
134 | * to the particular kind of copying passed as the memcpy_t function. | ||
135 | */ | ||
136 | static unsigned long fast_copy(void *dest, const void *source, int len, | ||
137 | memcpy_t func) | ||
138 | { | ||
139 | int cpu = get_cpu(); | ||
140 | unsigned long retval; | ||
141 | |||
142 | /* | ||
143 | * Check if it's big enough to bother with. We may end up doing a | ||
144 | * small copy via TLB manipulation if we're near a page boundary, | ||
145 | * but presumably we'll make it up when we hit the second page. | ||
146 | */ | ||
147 | while (len >= LARGE_COPY_CUTOFF) { | ||
148 | int copy_size, bytes_left_on_page; | ||
149 | pte_t *src_ptep, *dst_ptep; | ||
150 | pte_t src_pte, dst_pte; | ||
151 | struct page *src_page, *dst_page; | ||
152 | |||
153 | /* Is the source page oloc'ed to a remote cpu? */ | ||
154 | retry_source: | ||
155 | src_ptep = virt_to_pte(current->mm, (unsigned long)source); | ||
156 | if (src_ptep == NULL) | ||
157 | break; | ||
158 | src_pte = *src_ptep; | ||
159 | if (!hv_pte_get_present(src_pte) || | ||
160 | !hv_pte_get_readable(src_pte) || | ||
161 | hv_pte_get_mode(src_pte) != HV_PTE_MODE_CACHE_TILE_L3) | ||
162 | break; | ||
163 | if (get_remote_cache_cpu(src_pte) == cpu) | ||
164 | break; | ||
165 | src_page = pfn_to_page(pte_pfn(src_pte)); | ||
166 | get_page(src_page); | ||
167 | if (pte_val(src_pte) != pte_val(*src_ptep)) { | ||
168 | put_page(src_page); | ||
169 | goto retry_source; | ||
170 | } | ||
171 | if (pte_huge(src_pte)) { | ||
172 | /* Adjust the PTE to correspond to a small page */ | ||
173 | int pfn = pte_pfn(src_pte); | ||
174 | pfn += (((unsigned long)source & (HPAGE_SIZE-1)) | ||
175 | >> PAGE_SHIFT); | ||
176 | src_pte = pfn_pte(pfn, src_pte); | ||
177 | src_pte = pte_mksmall(src_pte); | ||
178 | } | ||
179 | |||
180 | /* Is the destination page writable? */ | ||
181 | retry_dest: | ||
182 | dst_ptep = virt_to_pte(current->mm, (unsigned long)dest); | ||
183 | if (dst_ptep == NULL) { | ||
184 | put_page(src_page); | ||
185 | break; | ||
186 | } | ||
187 | dst_pte = *dst_ptep; | ||
188 | if (!hv_pte_get_present(dst_pte) || | ||
189 | !hv_pte_get_writable(dst_pte)) { | ||
190 | put_page(src_page); | ||
191 | break; | ||
192 | } | ||
193 | dst_page = pfn_to_page(pte_pfn(dst_pte)); | ||
194 | if (dst_page == src_page) { | ||
195 | /* | ||
196 | * Source and dest are on the same page; this | ||
197 | * potentially exposes us to incoherence if any | ||
198 | * part of src and dest overlap on a cache line. | ||
199 | * Just give up rather than trying to be precise. | ||
200 | */ | ||
201 | put_page(src_page); | ||
202 | break; | ||
203 | } | ||
204 | get_page(dst_page); | ||
205 | if (pte_val(dst_pte) != pte_val(*dst_ptep)) { | ||
206 | put_page(dst_page); | ||
207 | goto retry_dest; | ||
208 | } | ||
209 | if (pte_huge(dst_pte)) { | ||
210 | /* Adjust the PTE to correspond to a small page */ | ||
211 | int pfn = pte_pfn(dst_pte); | ||
212 | pfn += (((unsigned long)dest & (HPAGE_SIZE-1)) | ||
213 | >> PAGE_SHIFT); | ||
214 | dst_pte = pfn_pte(pfn, dst_pte); | ||
215 | dst_pte = pte_mksmall(dst_pte); | ||
216 | } | ||
217 | |||
218 | /* All looks good: create a cachable PTE and copy from it */ | ||
219 | copy_size = len; | ||
220 | bytes_left_on_page = | ||
221 | PAGE_SIZE - (((int)source) & (PAGE_SIZE-1)); | ||
222 | if (copy_size > bytes_left_on_page) | ||
223 | copy_size = bytes_left_on_page; | ||
224 | bytes_left_on_page = | ||
225 | PAGE_SIZE - (((int)dest) & (PAGE_SIZE-1)); | ||
226 | if (copy_size > bytes_left_on_page) | ||
227 | copy_size = bytes_left_on_page; | ||
228 | memcpy_multicache(dest, source, dst_pte, src_pte, copy_size); | ||
229 | |||
230 | /* Release the pages */ | ||
231 | put_page(dst_page); | ||
232 | put_page(src_page); | ||
233 | |||
234 | /* Continue on the next page */ | ||
235 | dest += copy_size; | ||
236 | source += copy_size; | ||
237 | len -= copy_size; | ||
238 | } | ||
239 | |||
240 | retval = func(dest, source, len); | ||
241 | put_cpu(); | ||
242 | return retval; | ||
243 | } | ||
244 | |||
245 | void *memcpy(void *to, const void *from, __kernel_size_t n) | ||
246 | { | ||
247 | if (n < LARGE_COPY_CUTOFF) | ||
248 | return (void *)__memcpy_asm(to, from, n); | ||
249 | else | ||
250 | return (void *)fast_copy(to, from, n, __memcpy_asm); | ||
251 | } | ||
252 | |||
253 | unsigned long __copy_to_user_inatomic(void __user *to, const void *from, | ||
254 | unsigned long n) | ||
255 | { | ||
256 | if (n < LARGE_COPY_CUTOFF) | ||
257 | return __copy_to_user_inatomic_asm(to, from, n); | ||
258 | else | ||
259 | return fast_copy(to, from, n, __copy_to_user_inatomic_asm); | ||
260 | } | ||
261 | |||
262 | unsigned long __copy_from_user_inatomic(void *to, const void __user *from, | ||
263 | unsigned long n) | ||
264 | { | ||
265 | if (n < LARGE_COPY_CUTOFF) | ||
266 | return __copy_from_user_inatomic_asm(to, from, n); | ||
267 | else | ||
268 | return fast_copy(to, from, n, __copy_from_user_inatomic_asm); | ||
269 | } | ||
270 | |||
271 | unsigned long __copy_from_user_zeroing(void *to, const void __user *from, | ||
272 | unsigned long n) | ||
273 | { | ||
274 | if (n < LARGE_COPY_CUTOFF) | ||
275 | return __copy_from_user_zeroing_asm(to, from, n); | ||
276 | else | ||
277 | return fast_copy(to, from, n, __copy_from_user_zeroing_asm); | ||
278 | } | ||
279 | |||
280 | #endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */ | ||