diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/ia64/sn/kernel/bte.c |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'arch/ia64/sn/kernel/bte.c')
-rw-r--r-- | arch/ia64/sn/kernel/bte.c | 453 |
1 files changed, 453 insertions, 0 deletions
diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c new file mode 100644 index 000000000000..ce0bc4085eae --- /dev/null +++ b/arch/ia64/sn/kernel/bte.c | |||
@@ -0,0 +1,453 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. | ||
7 | */ | ||
8 | |||
9 | #include <linux/config.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <asm/sn/nodepda.h> | ||
12 | #include <asm/sn/addrs.h> | ||
13 | #include <asm/sn/arch.h> | ||
14 | #include <asm/sn/sn_cpuid.h> | ||
15 | #include <asm/sn/pda.h> | ||
16 | #include <asm/sn/shubio.h> | ||
17 | #include <asm/nodedata.h> | ||
18 | #include <asm/delay.h> | ||
19 | |||
20 | #include <linux/bootmem.h> | ||
21 | #include <linux/string.h> | ||
22 | #include <linux/sched.h> | ||
23 | |||
24 | #include <asm/sn/bte.h> | ||
25 | |||
26 | #ifndef L1_CACHE_MASK | ||
27 | #define L1_CACHE_MASK (L1_CACHE_BYTES - 1) | ||
28 | #endif | ||
29 | |||
30 | /* two interfaces on two btes */ | ||
31 | #define MAX_INTERFACES_TO_TRY 4 | ||
32 | |||
33 | static struct bteinfo_s *bte_if_on_node(nasid_t nasid, int interface) | ||
34 | { | ||
35 | nodepda_t *tmp_nodepda; | ||
36 | |||
37 | tmp_nodepda = NODEPDA(nasid_to_cnodeid(nasid)); | ||
38 | return &tmp_nodepda->bte_if[interface]; | ||
39 | |||
40 | } | ||
41 | |||
42 | /************************************************************************ | ||
43 | * Block Transfer Engine copy related functions. | ||
44 | * | ||
45 | ***********************************************************************/ | ||
46 | |||
47 | /* | ||
48 | * bte_copy(src, dest, len, mode, notification) | ||
49 | * | ||
50 | * Use the block transfer engine to move kernel memory from src to dest | ||
51 | * using the assigned mode. | ||
52 | * | ||
53 | * Paramaters: | ||
54 | * src - physical address of the transfer source. | ||
55 | * dest - physical address of the transfer destination. | ||
56 | * len - number of bytes to transfer from source to dest. | ||
57 | * mode - hardware defined. See reference information | ||
58 | * for IBCT0/1 in the SHUB Programmers Reference | ||
59 | * notification - kernel virtual address of the notification cache | ||
60 | * line. If NULL, the default is used and | ||
61 | * the bte_copy is synchronous. | ||
62 | * | ||
63 | * NOTE: This function requires src, dest, and len to | ||
64 | * be cacheline aligned. | ||
65 | */ | ||
66 | bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) | ||
67 | { | ||
68 | u64 transfer_size; | ||
69 | u64 transfer_stat; | ||
70 | struct bteinfo_s *bte; | ||
71 | bte_result_t bte_status; | ||
72 | unsigned long irq_flags; | ||
73 | unsigned long itc_end = 0; | ||
74 | struct bteinfo_s *btes_to_try[MAX_INTERFACES_TO_TRY]; | ||
75 | int bte_if_index; | ||
76 | int bte_pri, bte_sec; | ||
77 | |||
78 | BTE_PRINTK(("bte_copy(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%p)\n", | ||
79 | src, dest, len, mode, notification)); | ||
80 | |||
81 | if (len == 0) { | ||
82 | return BTE_SUCCESS; | ||
83 | } | ||
84 | |||
85 | BUG_ON((len & L1_CACHE_MASK) || | ||
86 | (src & L1_CACHE_MASK) || (dest & L1_CACHE_MASK)); | ||
87 | BUG_ON(!(len < ((BTE_LEN_MASK + 1) << L1_CACHE_SHIFT))); | ||
88 | |||
89 | /* CPU 0 (per node) tries bte0 first, CPU 1 try bte1 first */ | ||
90 | if (cpuid_to_subnode(smp_processor_id()) == 0) { | ||
91 | bte_pri = 0; | ||
92 | bte_sec = 1; | ||
93 | } else { | ||
94 | bte_pri = 1; | ||
95 | bte_sec = 0; | ||
96 | } | ||
97 | |||
98 | if (mode & BTE_USE_DEST) { | ||
99 | /* try remote then local */ | ||
100 | btes_to_try[0] = bte_if_on_node(NASID_GET(dest), bte_pri); | ||
101 | btes_to_try[1] = bte_if_on_node(NASID_GET(dest), bte_sec); | ||
102 | if (mode & BTE_USE_ANY) { | ||
103 | btes_to_try[2] = bte_if_on_node(get_nasid(), bte_pri); | ||
104 | btes_to_try[3] = bte_if_on_node(get_nasid(), bte_sec); | ||
105 | } else { | ||
106 | btes_to_try[2] = NULL; | ||
107 | btes_to_try[3] = NULL; | ||
108 | } | ||
109 | } else { | ||
110 | /* try local then remote */ | ||
111 | btes_to_try[0] = bte_if_on_node(get_nasid(), bte_pri); | ||
112 | btes_to_try[1] = bte_if_on_node(get_nasid(), bte_sec); | ||
113 | if (mode & BTE_USE_ANY) { | ||
114 | btes_to_try[2] = bte_if_on_node(NASID_GET(dest), bte_pri); | ||
115 | btes_to_try[3] = bte_if_on_node(NASID_GET(dest), bte_sec); | ||
116 | } else { | ||
117 | btes_to_try[2] = NULL; | ||
118 | btes_to_try[3] = NULL; | ||
119 | } | ||
120 | } | ||
121 | |||
122 | retry_bteop: | ||
123 | do { | ||
124 | local_irq_save(irq_flags); | ||
125 | |||
126 | bte_if_index = 0; | ||
127 | |||
128 | /* Attempt to lock one of the BTE interfaces. */ | ||
129 | while (bte_if_index < MAX_INTERFACES_TO_TRY) { | ||
130 | bte = btes_to_try[bte_if_index++]; | ||
131 | |||
132 | if (bte == NULL) { | ||
133 | continue; | ||
134 | } | ||
135 | |||
136 | if (spin_trylock(&bte->spinlock)) { | ||
137 | if (!(*bte->most_rcnt_na & BTE_WORD_AVAILABLE) || | ||
138 | (BTE_LNSTAT_LOAD(bte) & BTE_ACTIVE)) { | ||
139 | /* Got the lock but BTE still busy */ | ||
140 | spin_unlock(&bte->spinlock); | ||
141 | } else { | ||
142 | /* we got the lock and it's not busy */ | ||
143 | break; | ||
144 | } | ||
145 | } | ||
146 | bte = NULL; | ||
147 | } | ||
148 | |||
149 | if (bte != NULL) { | ||
150 | break; | ||
151 | } | ||
152 | |||
153 | local_irq_restore(irq_flags); | ||
154 | |||
155 | if (!(mode & BTE_WACQUIRE)) { | ||
156 | return BTEFAIL_NOTAVAIL; | ||
157 | } | ||
158 | } while (1); | ||
159 | |||
160 | if (notification == NULL) { | ||
161 | /* User does not want to be notified. */ | ||
162 | bte->most_rcnt_na = &bte->notify; | ||
163 | } else { | ||
164 | bte->most_rcnt_na = notification; | ||
165 | } | ||
166 | |||
167 | /* Calculate the number of cache lines to transfer. */ | ||
168 | transfer_size = ((len >> L1_CACHE_SHIFT) & BTE_LEN_MASK); | ||
169 | |||
170 | /* Initialize the notification to a known value. */ | ||
171 | *bte->most_rcnt_na = BTE_WORD_BUSY; | ||
172 | |||
173 | /* Set the status reg busy bit and transfer length */ | ||
174 | BTE_PRINTKV(("IBLS = 0x%lx\n", IBLS_BUSY | transfer_size)); | ||
175 | BTE_LNSTAT_STORE(bte, IBLS_BUSY | transfer_size); | ||
176 | |||
177 | /* Set the source and destination registers */ | ||
178 | BTE_PRINTKV(("IBSA = 0x%lx)\n", (TO_PHYS(src)))); | ||
179 | BTE_SRC_STORE(bte, TO_PHYS(src)); | ||
180 | BTE_PRINTKV(("IBDA = 0x%lx)\n", (TO_PHYS(dest)))); | ||
181 | BTE_DEST_STORE(bte, TO_PHYS(dest)); | ||
182 | |||
183 | /* Set the notification register */ | ||
184 | BTE_PRINTKV(("IBNA = 0x%lx)\n", | ||
185 | TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na)))); | ||
186 | BTE_NOTIF_STORE(bte, | ||
187 | TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na))); | ||
188 | |||
189 | /* Initiate the transfer */ | ||
190 | BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode))); | ||
191 | BTE_CTRL_STORE(bte, BTE_VALID_MODE(mode)); | ||
192 | |||
193 | itc_end = ia64_get_itc() + (40000000 * local_cpu_data->cyc_per_usec); | ||
194 | |||
195 | spin_unlock_irqrestore(&bte->spinlock, irq_flags); | ||
196 | |||
197 | if (notification != NULL) { | ||
198 | return BTE_SUCCESS; | ||
199 | } | ||
200 | |||
201 | while ((transfer_stat = *bte->most_rcnt_na) == BTE_WORD_BUSY) { | ||
202 | if (ia64_get_itc() > itc_end) { | ||
203 | BTE_PRINTK(("BTE timeout nasid 0x%x bte%d IBLS = 0x%lx na 0x%lx\n", | ||
204 | NASID_GET(bte->bte_base_addr), bte->bte_num, | ||
205 | BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na) ); | ||
206 | bte->bte_error_count++; | ||
207 | bte->bh_error = IBLS_ERROR; | ||
208 | bte_error_handler((unsigned long)NODEPDA(bte->bte_cnode)); | ||
209 | *bte->most_rcnt_na = BTE_WORD_AVAILABLE; | ||
210 | goto retry_bteop; | ||
211 | } | ||
212 | } | ||
213 | |||
214 | BTE_PRINTKV((" Delay Done. IBLS = 0x%lx, most_rcnt_na = 0x%lx\n", | ||
215 | BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na)); | ||
216 | |||
217 | if (transfer_stat & IBLS_ERROR) { | ||
218 | bte_status = transfer_stat & ~IBLS_ERROR; | ||
219 | } else { | ||
220 | bte_status = BTE_SUCCESS; | ||
221 | } | ||
222 | *bte->most_rcnt_na = BTE_WORD_AVAILABLE; | ||
223 | |||
224 | BTE_PRINTK(("Returning status is 0x%lx and most_rcnt_na is 0x%lx\n", | ||
225 | BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na)); | ||
226 | |||
227 | return bte_status; | ||
228 | } | ||
229 | |||
230 | EXPORT_SYMBOL(bte_copy); | ||
231 | |||
232 | /* | ||
233 | * bte_unaligned_copy(src, dest, len, mode) | ||
234 | * | ||
235 | * use the block transfer engine to move kernel | ||
236 | * memory from src to dest using the assigned mode. | ||
237 | * | ||
238 | * Paramaters: | ||
239 | * src - physical address of the transfer source. | ||
240 | * dest - physical address of the transfer destination. | ||
241 | * len - number of bytes to transfer from source to dest. | ||
242 | * mode - hardware defined. See reference information | ||
243 | * for IBCT0/1 in the SGI documentation. | ||
244 | * | ||
245 | * NOTE: If the source, dest, and len are all cache line aligned, | ||
246 | * then it would be _FAR_ preferrable to use bte_copy instead. | ||
247 | */ | ||
248 | bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode) | ||
249 | { | ||
250 | int destFirstCacheOffset; | ||
251 | u64 headBteSource; | ||
252 | u64 headBteLen; | ||
253 | u64 headBcopySrcOffset; | ||
254 | u64 headBcopyDest; | ||
255 | u64 headBcopyLen; | ||
256 | u64 footBteSource; | ||
257 | u64 footBteLen; | ||
258 | u64 footBcopyDest; | ||
259 | u64 footBcopyLen; | ||
260 | bte_result_t rv; | ||
261 | char *bteBlock, *bteBlock_unaligned; | ||
262 | |||
263 | if (len == 0) { | ||
264 | return BTE_SUCCESS; | ||
265 | } | ||
266 | |||
267 | /* temporary buffer used during unaligned transfers */ | ||
268 | bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES, | ||
269 | GFP_KERNEL | GFP_DMA); | ||
270 | if (bteBlock_unaligned == NULL) { | ||
271 | return BTEFAIL_NOTAVAIL; | ||
272 | } | ||
273 | bteBlock = (char *)L1_CACHE_ALIGN((u64) bteBlock_unaligned); | ||
274 | |||
275 | headBcopySrcOffset = src & L1_CACHE_MASK; | ||
276 | destFirstCacheOffset = dest & L1_CACHE_MASK; | ||
277 | |||
278 | /* | ||
279 | * At this point, the transfer is broken into | ||
280 | * (up to) three sections. The first section is | ||
281 | * from the start address to the first physical | ||
282 | * cache line, the second is from the first physical | ||
283 | * cache line to the last complete cache line, | ||
284 | * and the third is from the last cache line to the | ||
285 | * end of the buffer. The first and third sections | ||
286 | * are handled by bte copying into a temporary buffer | ||
287 | * and then bcopy'ing the necessary section into the | ||
288 | * final location. The middle section is handled with | ||
289 | * a standard bte copy. | ||
290 | * | ||
291 | * One nasty exception to the above rule is when the | ||
292 | * source and destination are not symetrically | ||
293 | * mis-aligned. If the source offset from the first | ||
294 | * cache line is different from the destination offset, | ||
295 | * we make the first section be the entire transfer | ||
296 | * and the bcopy the entire block into place. | ||
297 | */ | ||
298 | if (headBcopySrcOffset == destFirstCacheOffset) { | ||
299 | |||
300 | /* | ||
301 | * Both the source and destination are the same | ||
302 | * distance from a cache line boundary so we can | ||
303 | * use the bte to transfer the bulk of the | ||
304 | * data. | ||
305 | */ | ||
306 | headBteSource = src & ~L1_CACHE_MASK; | ||
307 | headBcopyDest = dest; | ||
308 | if (headBcopySrcOffset) { | ||
309 | headBcopyLen = | ||
310 | (len > | ||
311 | (L1_CACHE_BYTES - | ||
312 | headBcopySrcOffset) ? L1_CACHE_BYTES | ||
313 | - headBcopySrcOffset : len); | ||
314 | headBteLen = L1_CACHE_BYTES; | ||
315 | } else { | ||
316 | headBcopyLen = 0; | ||
317 | headBteLen = 0; | ||
318 | } | ||
319 | |||
320 | if (len > headBcopyLen) { | ||
321 | footBcopyLen = (len - headBcopyLen) & L1_CACHE_MASK; | ||
322 | footBteLen = L1_CACHE_BYTES; | ||
323 | |||
324 | footBteSource = src + len - footBcopyLen; | ||
325 | footBcopyDest = dest + len - footBcopyLen; | ||
326 | |||
327 | if (footBcopyDest == (headBcopyDest + headBcopyLen)) { | ||
328 | /* | ||
329 | * We have two contigous bcopy | ||
330 | * blocks. Merge them. | ||
331 | */ | ||
332 | headBcopyLen += footBcopyLen; | ||
333 | headBteLen += footBteLen; | ||
334 | } else if (footBcopyLen > 0) { | ||
335 | rv = bte_copy(footBteSource, | ||
336 | ia64_tpa((unsigned long)bteBlock), | ||
337 | footBteLen, mode, NULL); | ||
338 | if (rv != BTE_SUCCESS) { | ||
339 | kfree(bteBlock_unaligned); | ||
340 | return rv; | ||
341 | } | ||
342 | |||
343 | memcpy(__va(footBcopyDest), | ||
344 | (char *)bteBlock, footBcopyLen); | ||
345 | } | ||
346 | } else { | ||
347 | footBcopyLen = 0; | ||
348 | footBteLen = 0; | ||
349 | } | ||
350 | |||
351 | if (len > (headBcopyLen + footBcopyLen)) { | ||
352 | /* now transfer the middle. */ | ||
353 | rv = bte_copy((src + headBcopyLen), | ||
354 | (dest + | ||
355 | headBcopyLen), | ||
356 | (len - headBcopyLen - | ||
357 | footBcopyLen), mode, NULL); | ||
358 | if (rv != BTE_SUCCESS) { | ||
359 | kfree(bteBlock_unaligned); | ||
360 | return rv; | ||
361 | } | ||
362 | |||
363 | } | ||
364 | } else { | ||
365 | |||
366 | /* | ||
367 | * The transfer is not symetric, we will | ||
368 | * allocate a buffer large enough for all the | ||
369 | * data, bte_copy into that buffer and then | ||
370 | * bcopy to the destination. | ||
371 | */ | ||
372 | |||
373 | /* Add the leader from source */ | ||
374 | headBteLen = len + (src & L1_CACHE_MASK); | ||
375 | /* Add the trailing bytes from footer. */ | ||
376 | headBteLen += L1_CACHE_BYTES - (headBteLen & L1_CACHE_MASK); | ||
377 | headBteSource = src & ~L1_CACHE_MASK; | ||
378 | headBcopySrcOffset = src & L1_CACHE_MASK; | ||
379 | headBcopyDest = dest; | ||
380 | headBcopyLen = len; | ||
381 | } | ||
382 | |||
383 | if (headBcopyLen > 0) { | ||
384 | rv = bte_copy(headBteSource, | ||
385 | ia64_tpa((unsigned long)bteBlock), headBteLen, | ||
386 | mode, NULL); | ||
387 | if (rv != BTE_SUCCESS) { | ||
388 | kfree(bteBlock_unaligned); | ||
389 | return rv; | ||
390 | } | ||
391 | |||
392 | memcpy(__va(headBcopyDest), ((char *)bteBlock + | ||
393 | headBcopySrcOffset), headBcopyLen); | ||
394 | } | ||
395 | kfree(bteBlock_unaligned); | ||
396 | return BTE_SUCCESS; | ||
397 | } | ||
398 | |||
399 | EXPORT_SYMBOL(bte_unaligned_copy); | ||
400 | |||
401 | /************************************************************************ | ||
402 | * Block Transfer Engine initialization functions. | ||
403 | * | ||
404 | ***********************************************************************/ | ||
405 | |||
406 | /* | ||
407 | * bte_init_node(nodepda, cnode) | ||
408 | * | ||
409 | * Initialize the nodepda structure with BTE base addresses and | ||
410 | * spinlocks. | ||
411 | */ | ||
412 | void bte_init_node(nodepda_t * mynodepda, cnodeid_t cnode) | ||
413 | { | ||
414 | int i; | ||
415 | |||
416 | /* | ||
417 | * Indicate that all the block transfer engines on this node | ||
418 | * are available. | ||
419 | */ | ||
420 | |||
421 | /* | ||
422 | * Allocate one bte_recover_t structure per node. It holds | ||
423 | * the recovery lock for node. All the bte interface structures | ||
424 | * will point at this one bte_recover structure to get the lock. | ||
425 | */ | ||
426 | spin_lock_init(&mynodepda->bte_recovery_lock); | ||
427 | init_timer(&mynodepda->bte_recovery_timer); | ||
428 | mynodepda->bte_recovery_timer.function = bte_error_handler; | ||
429 | mynodepda->bte_recovery_timer.data = (unsigned long)mynodepda; | ||
430 | |||
431 | for (i = 0; i < BTES_PER_NODE; i++) { | ||
432 | /* Which link status register should we use? */ | ||
433 | unsigned long link_status = (i == 0 ? IIO_IBLS0 : IIO_IBLS1); | ||
434 | mynodepda->bte_if[i].bte_base_addr = (u64 *) | ||
435 | REMOTE_HUB_ADDR(cnodeid_to_nasid(cnode), link_status); | ||
436 | |||
437 | /* | ||
438 | * Initialize the notification and spinlock | ||
439 | * so the first transfer can occur. | ||
440 | */ | ||
441 | mynodepda->bte_if[i].most_rcnt_na = | ||
442 | &(mynodepda->bte_if[i].notify); | ||
443 | mynodepda->bte_if[i].notify = BTE_WORD_AVAILABLE; | ||
444 | spin_lock_init(&mynodepda->bte_if[i].spinlock); | ||
445 | |||
446 | mynodepda->bte_if[i].bte_cnode = cnode; | ||
447 | mynodepda->bte_if[i].bte_error_count = 0; | ||
448 | mynodepda->bte_if[i].bte_num = i; | ||
449 | mynodepda->bte_if[i].cleanup_active = 0; | ||
450 | mynodepda->bte_if[i].bh_error = 0; | ||
451 | } | ||
452 | |||
453 | } | ||