diff options
author | Graf Yang <graf.yang@analog.com> | 2009-01-07 10:14:39 -0500 |
---|---|---|
committer | Bryan Wu <cooloney@kernel.org> | 2009-01-07 10:14:39 -0500 |
commit | c51b4488cd5bff08ed5690a8f303ff7f0894da2a (patch) | |
tree | 1f6a2919e011b033ba5177efe3a612f4ebebb4b5 /arch/blackfin/mach-bf561/atomic.S | |
parent | 2de73e71c298842db814556379cbe25f5c14691e (diff) |
Blackfin arch: SMP supporting patchset: BF561 related code
Blackfin dual core BF561 processor can support SMP like features.
https://docs.blackfin.uclinux.org/doku.php?id=linux-kernel:smp-like
In this patch, we provide SMP extend to BF561 kernel code
Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Mike Frysinger <vapier.adi@gmail.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Diffstat (limited to 'arch/blackfin/mach-bf561/atomic.S')
-rw-r--r-- | arch/blackfin/mach-bf561/atomic.S | 919 |
1 files changed, 919 insertions, 0 deletions
diff --git a/arch/blackfin/mach-bf561/atomic.S b/arch/blackfin/mach-bf561/atomic.S new file mode 100644 index 000000000000..9439bc6bd01f --- /dev/null +++ b/arch/blackfin/mach-bf561/atomic.S | |||
@@ -0,0 +1,919 @@ | |||
1 | /* | ||
2 | * File: arch/blackfin/mach-bf561/atomic.S | ||
3 | * Author: Philippe Gerum <rpm@xenomai.org> | ||
4 | * | ||
5 | * Copyright 2007 Analog Devices Inc. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, see the file COPYING, or write | ||
19 | * to the Free Software Foundation, Inc., | ||
20 | * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
21 | */ | ||
22 | |||
23 | #include <linux/linkage.h> | ||
24 | #include <asm/blackfin.h> | ||
25 | #include <asm/cache.h> | ||
26 | #include <asm/asm-offsets.h> | ||
27 | #include <asm/rwlock.h> | ||
28 | #include <asm/cplb.h> | ||
29 | |||
30 | .text | ||
31 | |||
32 | .macro coreslot_loadaddr reg:req | ||
33 | \reg\().l = _corelock; | ||
34 | \reg\().h = _corelock; | ||
35 | .endm | ||
36 | |||
37 | /* | ||
38 | * r0 = address of atomic data to flush and invalidate (32bit). | ||
39 | * | ||
40 | * Clear interrupts and return the old mask. | ||
41 | * We assume that no atomic data can span cachelines. | ||
42 | * | ||
43 | * Clobbers: r2:0, p0 | ||
44 | */ | ||
45 | ENTRY(_get_core_lock) | ||
46 | r1 = -L1_CACHE_BYTES; | ||
47 | r1 = r0 & r1; | ||
48 | cli r0; | ||
49 | coreslot_loadaddr p0; | ||
50 | .Lretry_corelock: | ||
51 | testset (p0); | ||
52 | if cc jump .Ldone_corelock; | ||
53 | SSYNC(r2); | ||
54 | jump .Lretry_corelock | ||
55 | .Ldone_corelock: | ||
56 | p0 = r1; | ||
57 | CSYNC(r2); | ||
58 | flushinv[p0]; | ||
59 | SSYNC(r2); | ||
60 | rts; | ||
61 | ENDPROC(_get_core_lock) | ||
62 | |||
63 | /* | ||
64 | * r0 = address of atomic data in uncacheable memory region (32bit). | ||
65 | * | ||
66 | * Clear interrupts and return the old mask. | ||
67 | * | ||
68 | * Clobbers: r0, p0 | ||
69 | */ | ||
70 | ENTRY(_get_core_lock_noflush) | ||
71 | cli r0; | ||
72 | coreslot_loadaddr p0; | ||
73 | .Lretry_corelock_noflush: | ||
74 | testset (p0); | ||
75 | if cc jump .Ldone_corelock_noflush; | ||
76 | SSYNC(r2); | ||
77 | jump .Lretry_corelock_noflush | ||
78 | .Ldone_corelock_noflush: | ||
79 | rts; | ||
80 | ENDPROC(_get_core_lock_noflush) | ||
81 | |||
82 | /* | ||
83 | * r0 = interrupt mask to restore. | ||
84 | * r1 = address of atomic data to flush and invalidate (32bit). | ||
85 | * | ||
86 | * Interrupts are masked on entry (see _get_core_lock). | ||
87 | * Clobbers: r2:0, p0 | ||
88 | */ | ||
89 | ENTRY(_put_core_lock) | ||
90 | /* Write-through cache assumed, so no flush needed here. */ | ||
91 | coreslot_loadaddr p0; | ||
92 | r1 = 0; | ||
93 | [p0] = r1; | ||
94 | SSYNC(r2); | ||
95 | sti r0; | ||
96 | rts; | ||
97 | ENDPROC(_put_core_lock) | ||
98 | |||
99 | #ifdef __ARCH_SYNC_CORE_DCACHE | ||
100 | |||
101 | ENTRY(___raw_smp_mark_barrier_asm) | ||
102 | [--sp] = rets; | ||
103 | [--sp] = ( r7:5 ); | ||
104 | [--sp] = r0; | ||
105 | [--sp] = p1; | ||
106 | [--sp] = p0; | ||
107 | call _get_core_lock_noflush; | ||
108 | |||
109 | /* | ||
110 | * Calculate current core mask | ||
111 | */ | ||
112 | GET_CPUID(p1, r7); | ||
113 | r6 = 1; | ||
114 | r6 <<= r7; | ||
115 | |||
116 | /* | ||
117 | * Set bit of other cores in barrier mask. Don't change current core bit. | ||
118 | */ | ||
119 | p1.l = _barrier_mask; | ||
120 | p1.h = _barrier_mask; | ||
121 | r7 = [p1]; | ||
122 | r5 = r7 & r6; | ||
123 | r7 = ~r6; | ||
124 | cc = r5 == 0; | ||
125 | if cc jump 1f; | ||
126 | r7 = r7 | r6; | ||
127 | 1: | ||
128 | [p1] = r7; | ||
129 | SSYNC(r2); | ||
130 | |||
131 | call _put_core_lock; | ||
132 | p0 = [sp++]; | ||
133 | p1 = [sp++]; | ||
134 | r0 = [sp++]; | ||
135 | ( r7:5 ) = [sp++]; | ||
136 | rets = [sp++]; | ||
137 | rts; | ||
138 | ENDPROC(___raw_smp_mark_barrier_asm) | ||
139 | |||
140 | ENTRY(___raw_smp_check_barrier_asm) | ||
141 | [--sp] = rets; | ||
142 | [--sp] = ( r7:5 ); | ||
143 | [--sp] = r0; | ||
144 | [--sp] = p1; | ||
145 | [--sp] = p0; | ||
146 | call _get_core_lock_noflush; | ||
147 | |||
148 | /* | ||
149 | * Calculate current core mask | ||
150 | */ | ||
151 | GET_CPUID(p1, r7); | ||
152 | r6 = 1; | ||
153 | r6 <<= r7; | ||
154 | |||
155 | /* | ||
156 | * Clear current core bit in barrier mask if it is set. | ||
157 | */ | ||
158 | p1.l = _barrier_mask; | ||
159 | p1.h = _barrier_mask; | ||
160 | r7 = [p1]; | ||
161 | r5 = r7 & r6; | ||
162 | cc = r5 == 0; | ||
163 | if cc jump 1f; | ||
164 | r6 = ~r6; | ||
165 | r7 = r7 & r6; | ||
166 | [p1] = r7; | ||
167 | SSYNC(r2); | ||
168 | |||
169 | call _put_core_lock; | ||
170 | |||
171 | /* | ||
172 | * Invalidate the entire D-cache of current core. | ||
173 | */ | ||
174 | sp += -12; | ||
175 | call _resync_core_dcache | ||
176 | sp += 12; | ||
177 | jump 2f; | ||
178 | 1: | ||
179 | call _put_core_lock; | ||
180 | 2: | ||
181 | p0 = [sp++]; | ||
182 | p1 = [sp++]; | ||
183 | r0 = [sp++]; | ||
184 | ( r7:5 ) = [sp++]; | ||
185 | rets = [sp++]; | ||
186 | rts; | ||
187 | ENDPROC(___raw_smp_check_barrier_asm) | ||
188 | |||
189 | /* | ||
190 | * r0 = irqflags | ||
191 | * r1 = address of atomic data | ||
192 | * | ||
193 | * Clobbers: r2:0, p1:0 | ||
194 | */ | ||
195 | _start_lock_coherent: | ||
196 | |||
197 | [--sp] = rets; | ||
198 | [--sp] = ( r7:6 ); | ||
199 | r7 = r0; | ||
200 | p1 = r1; | ||
201 | |||
202 | /* | ||
203 | * Determine whether the atomic data was previously | ||
204 | * owned by another CPU (=r6). | ||
205 | */ | ||
206 | GET_CPUID(p0, r2); | ||
207 | r1 = 1; | ||
208 | r1 <<= r2; | ||
209 | r2 = ~r1; | ||
210 | |||
211 | r1 = [p1]; | ||
212 | r1 >>= 28; /* CPU fingerprints are stored in the high nibble. */ | ||
213 | r6 = r1 & r2; | ||
214 | r1 = [p1]; | ||
215 | r1 <<= 4; | ||
216 | r1 >>= 4; | ||
217 | [p1] = r1; | ||
218 | |||
219 | /* | ||
220 | * Release the core lock now, but keep IRQs disabled while we are | ||
221 | * performing the remaining housekeeping chores for the current CPU. | ||
222 | */ | ||
223 | coreslot_loadaddr p0; | ||
224 | r1 = 0; | ||
225 | [p0] = r1; | ||
226 | |||
227 | /* | ||
228 | * If another CPU has owned the same atomic section before us, | ||
229 | * then our D-cached copy of the shared data protected by the | ||
230 | * current spin/write_lock may be obsolete. | ||
231 | */ | ||
232 | cc = r6 == 0; | ||
233 | if cc jump .Lcache_synced | ||
234 | |||
235 | /* | ||
236 | * Invalidate the entire D-cache of the current core. | ||
237 | */ | ||
238 | sp += -12; | ||
239 | call _resync_core_dcache | ||
240 | sp += 12; | ||
241 | |||
242 | .Lcache_synced: | ||
243 | SSYNC(r2); | ||
244 | sti r7; | ||
245 | ( r7:6 ) = [sp++]; | ||
246 | rets = [sp++]; | ||
247 | rts | ||
248 | |||
249 | /* | ||
250 | * r0 = irqflags | ||
251 | * r1 = address of atomic data | ||
252 | * | ||
253 | * Clobbers: r2:0, p1:0 | ||
254 | */ | ||
255 | _end_lock_coherent: | ||
256 | |||
257 | p1 = r1; | ||
258 | GET_CPUID(p0, r2); | ||
259 | r2 += 28; | ||
260 | r1 = 1; | ||
261 | r1 <<= r2; | ||
262 | r2 = [p1]; | ||
263 | r2 = r1 | r2; | ||
264 | [p1] = r2; | ||
265 | r1 = p1; | ||
266 | jump _put_core_lock; | ||
267 | |||
268 | #endif /* __ARCH_SYNC_CORE_DCACHE */ | ||
269 | |||
270 | /* | ||
271 | * r0 = &spinlock->lock | ||
272 | * | ||
273 | * Clobbers: r3:0, p1:0 | ||
274 | */ | ||
275 | ENTRY(___raw_spin_is_locked_asm) | ||
276 | p1 = r0; | ||
277 | [--sp] = rets; | ||
278 | call _get_core_lock; | ||
279 | r3 = [p1]; | ||
280 | cc = bittst( r3, 0 ); | ||
281 | r3 = cc; | ||
282 | r1 = p1; | ||
283 | call _put_core_lock; | ||
284 | rets = [sp++]; | ||
285 | r0 = r3; | ||
286 | rts; | ||
287 | ENDPROC(___raw_spin_is_locked_asm) | ||
288 | |||
289 | /* | ||
290 | * r0 = &spinlock->lock | ||
291 | * | ||
292 | * Clobbers: r3:0, p1:0 | ||
293 | */ | ||
294 | ENTRY(___raw_spin_lock_asm) | ||
295 | p1 = r0; | ||
296 | [--sp] = rets; | ||
297 | .Lretry_spinlock: | ||
298 | call _get_core_lock; | ||
299 | r1 = p1; | ||
300 | r2 = [p1]; | ||
301 | cc = bittst( r2, 0 ); | ||
302 | if cc jump .Lbusy_spinlock | ||
303 | #ifdef __ARCH_SYNC_CORE_DCACHE | ||
304 | r3 = p1; | ||
305 | bitset ( r2, 0 ); /* Raise the lock bit. */ | ||
306 | [p1] = r2; | ||
307 | call _start_lock_coherent | ||
308 | #else | ||
309 | r2 = 1; | ||
310 | [p1] = r2; | ||
311 | call _put_core_lock; | ||
312 | #endif | ||
313 | rets = [sp++]; | ||
314 | rts; | ||
315 | |||
316 | .Lbusy_spinlock: | ||
317 | /* We don't touch the atomic area if busy, so that flush | ||
318 | will behave like nop in _put_core_lock. */ | ||
319 | call _put_core_lock; | ||
320 | SSYNC(r2); | ||
321 | r0 = p1; | ||
322 | jump .Lretry_spinlock | ||
323 | ENDPROC(___raw_spin_lock_asm) | ||
324 | |||
325 | /* | ||
326 | * r0 = &spinlock->lock | ||
327 | * | ||
328 | * Clobbers: r3:0, p1:0 | ||
329 | */ | ||
330 | ENTRY(___raw_spin_trylock_asm) | ||
331 | p1 = r0; | ||
332 | [--sp] = rets; | ||
333 | call _get_core_lock; | ||
334 | r1 = p1; | ||
335 | r3 = [p1]; | ||
336 | cc = bittst( r3, 0 ); | ||
337 | if cc jump .Lfailed_trylock | ||
338 | #ifdef __ARCH_SYNC_CORE_DCACHE | ||
339 | bitset ( r3, 0 ); /* Raise the lock bit. */ | ||
340 | [p1] = r3; | ||
341 | call _start_lock_coherent | ||
342 | #else | ||
343 | r2 = 1; | ||
344 | [p1] = r2; | ||
345 | call _put_core_lock; | ||
346 | #endif | ||
347 | r0 = 1; | ||
348 | rets = [sp++]; | ||
349 | rts; | ||
350 | .Lfailed_trylock: | ||
351 | call _put_core_lock; | ||
352 | r0 = 0; | ||
353 | rets = [sp++]; | ||
354 | rts; | ||
355 | ENDPROC(___raw_spin_trylock_asm) | ||
356 | |||
357 | /* | ||
358 | * r0 = &spinlock->lock | ||
359 | * | ||
360 | * Clobbers: r2:0, p1:0 | ||
361 | */ | ||
362 | ENTRY(___raw_spin_unlock_asm) | ||
363 | p1 = r0; | ||
364 | [--sp] = rets; | ||
365 | call _get_core_lock; | ||
366 | r2 = [p1]; | ||
367 | bitclr ( r2, 0 ); | ||
368 | [p1] = r2; | ||
369 | r1 = p1; | ||
370 | #ifdef __ARCH_SYNC_CORE_DCACHE | ||
371 | call _end_lock_coherent | ||
372 | #else | ||
373 | call _put_core_lock; | ||
374 | #endif | ||
375 | rets = [sp++]; | ||
376 | rts; | ||
377 | ENDPROC(___raw_spin_unlock_asm) | ||
378 | |||
379 | /* | ||
380 | * r0 = &rwlock->lock | ||
381 | * | ||
382 | * Clobbers: r2:0, p1:0 | ||
383 | */ | ||
384 | ENTRY(___raw_read_lock_asm) | ||
385 | p1 = r0; | ||
386 | [--sp] = rets; | ||
387 | call _get_core_lock; | ||
388 | .Lrdlock_try: | ||
389 | r1 = [p1]; | ||
390 | r1 += -1; | ||
391 | [p1] = r1; | ||
392 | cc = r1 < 0; | ||
393 | if cc jump .Lrdlock_failed | ||
394 | r1 = p1; | ||
395 | #ifdef __ARCH_SYNC_CORE_DCACHE | ||
396 | call _start_lock_coherent | ||
397 | #else | ||
398 | call _put_core_lock; | ||
399 | #endif | ||
400 | rets = [sp++]; | ||
401 | rts; | ||
402 | |||
403 | .Lrdlock_failed: | ||
404 | r1 += 1; | ||
405 | [p1] = r1; | ||
406 | .Lrdlock_wait: | ||
407 | r1 = p1; | ||
408 | call _put_core_lock; | ||
409 | SSYNC(r2); | ||
410 | r0 = p1; | ||
411 | call _get_core_lock; | ||
412 | r1 = [p1]; | ||
413 | cc = r1 < 2; | ||
414 | if cc jump .Lrdlock_wait; | ||
415 | jump .Lrdlock_try | ||
416 | ENDPROC(___raw_read_lock_asm) | ||
417 | |||
418 | /* | ||
419 | * r0 = &rwlock->lock | ||
420 | * | ||
421 | * Clobbers: r3:0, p1:0 | ||
422 | */ | ||
423 | ENTRY(___raw_read_trylock_asm) | ||
424 | p1 = r0; | ||
425 | [--sp] = rets; | ||
426 | call _get_core_lock; | ||
427 | r1 = [p1]; | ||
428 | cc = r1 <= 0; | ||
429 | if cc jump .Lfailed_tryrdlock; | ||
430 | r1 += -1; | ||
431 | [p1] = r1; | ||
432 | r1 = p1; | ||
433 | #ifdef __ARCH_SYNC_CORE_DCACHE | ||
434 | call _start_lock_coherent | ||
435 | #else | ||
436 | call _put_core_lock; | ||
437 | #endif | ||
438 | rets = [sp++]; | ||
439 | r0 = 1; | ||
440 | rts; | ||
441 | .Lfailed_tryrdlock: | ||
442 | r1 = p1; | ||
443 | call _put_core_lock; | ||
444 | rets = [sp++]; | ||
445 | r0 = 0; | ||
446 | rts; | ||
447 | ENDPROC(___raw_read_trylock_asm) | ||
448 | |||
449 | /* | ||
450 | * r0 = &rwlock->lock | ||
451 | * | ||
452 | * Note: Processing controlled by a reader lock should not have | ||
453 | * any side-effect on cache issues with the other core, so we | ||
454 | * just release the core lock and exit (no _end_lock_coherent). | ||
455 | * | ||
456 | * Clobbers: r3:0, p1:0 | ||
457 | */ | ||
458 | ENTRY(___raw_read_unlock_asm) | ||
459 | p1 = r0; | ||
460 | [--sp] = rets; | ||
461 | call _get_core_lock; | ||
462 | r1 = [p1]; | ||
463 | r1 += 1; | ||
464 | [p1] = r1; | ||
465 | r1 = p1; | ||
466 | call _put_core_lock; | ||
467 | rets = [sp++]; | ||
468 | rts; | ||
469 | ENDPROC(___raw_read_unlock_asm) | ||
470 | |||
471 | /* | ||
472 | * r0 = &rwlock->lock | ||
473 | * | ||
474 | * Clobbers: r3:0, p1:0 | ||
475 | */ | ||
476 | ENTRY(___raw_write_lock_asm) | ||
477 | p1 = r0; | ||
478 | r3.l = lo(RW_LOCK_BIAS); | ||
479 | r3.h = hi(RW_LOCK_BIAS); | ||
480 | [--sp] = rets; | ||
481 | call _get_core_lock; | ||
482 | .Lwrlock_try: | ||
483 | r1 = [p1]; | ||
484 | r1 = r1 - r3; | ||
485 | #ifdef __ARCH_SYNC_CORE_DCACHE | ||
486 | r2 = r1; | ||
487 | r2 <<= 4; | ||
488 | r2 >>= 4; | ||
489 | cc = r2 == 0; | ||
490 | #else | ||
491 | cc = r1 == 0; | ||
492 | #endif | ||
493 | if !cc jump .Lwrlock_wait | ||
494 | [p1] = r1; | ||
495 | r1 = p1; | ||
496 | #ifdef __ARCH_SYNC_CORE_DCACHE | ||
497 | call _start_lock_coherent | ||
498 | #else | ||
499 | call _put_core_lock; | ||
500 | #endif | ||
501 | rets = [sp++]; | ||
502 | rts; | ||
503 | |||
504 | .Lwrlock_wait: | ||
505 | r1 = p1; | ||
506 | call _put_core_lock; | ||
507 | SSYNC(r2); | ||
508 | r0 = p1; | ||
509 | call _get_core_lock; | ||
510 | r1 = [p1]; | ||
511 | #ifdef __ARCH_SYNC_CORE_DCACHE | ||
512 | r1 <<= 4; | ||
513 | r1 >>= 4; | ||
514 | #endif | ||
515 | cc = r1 == r3; | ||
516 | if !cc jump .Lwrlock_wait; | ||
517 | jump .Lwrlock_try | ||
518 | ENDPROC(___raw_write_lock_asm) | ||
519 | |||
520 | /* | ||
521 | * r0 = &rwlock->lock | ||
522 | * | ||
523 | * Clobbers: r3:0, p1:0 | ||
524 | */ | ||
525 | ENTRY(___raw_write_trylock_asm) | ||
526 | p1 = r0; | ||
527 | [--sp] = rets; | ||
528 | call _get_core_lock; | ||
529 | r1 = [p1]; | ||
530 | r2.l = lo(RW_LOCK_BIAS); | ||
531 | r2.h = hi(RW_LOCK_BIAS); | ||
532 | cc = r1 == r2; | ||
533 | if !cc jump .Lfailed_trywrlock; | ||
534 | #ifdef __ARCH_SYNC_CORE_DCACHE | ||
535 | r1 >>= 28; | ||
536 | r1 <<= 28; | ||
537 | #else | ||
538 | r1 = 0; | ||
539 | #endif | ||
540 | [p1] = r1; | ||
541 | r1 = p1; | ||
542 | #ifdef __ARCH_SYNC_CORE_DCACHE | ||
543 | call _start_lock_coherent | ||
544 | #else | ||
545 | call _put_core_lock; | ||
546 | #endif | ||
547 | rets = [sp++]; | ||
548 | r0 = 1; | ||
549 | rts; | ||
550 | |||
551 | .Lfailed_trywrlock: | ||
552 | r1 = p1; | ||
553 | call _put_core_lock; | ||
554 | rets = [sp++]; | ||
555 | r0 = 0; | ||
556 | rts; | ||
557 | ENDPROC(___raw_write_trylock_asm) | ||
558 | |||
559 | /* | ||
560 | * r0 = &rwlock->lock | ||
561 | * | ||
562 | * Clobbers: r3:0, p1:0 | ||
563 | */ | ||
564 | ENTRY(___raw_write_unlock_asm) | ||
565 | p1 = r0; | ||
566 | r3.l = lo(RW_LOCK_BIAS); | ||
567 | r3.h = hi(RW_LOCK_BIAS); | ||
568 | [--sp] = rets; | ||
569 | call _get_core_lock; | ||
570 | r1 = [p1]; | ||
571 | r1 = r1 + r3; | ||
572 | [p1] = r1; | ||
573 | r1 = p1; | ||
574 | #ifdef __ARCH_SYNC_CORE_DCACHE | ||
575 | call _end_lock_coherent | ||
576 | #else | ||
577 | call _put_core_lock; | ||
578 | #endif | ||
579 | rets = [sp++]; | ||
580 | rts; | ||
581 | ENDPROC(___raw_write_unlock_asm) | ||
582 | |||
583 | /* | ||
584 | * r0 = ptr | ||
585 | * r1 = value | ||
586 | * | ||
587 | * Add a signed value to a 32bit word and return the new value atomically. | ||
588 | * Clobbers: r3:0, p1:0 | ||
589 | */ | ||
590 | ENTRY(___raw_atomic_update_asm) | ||
591 | p1 = r0; | ||
592 | r3 = r1; | ||
593 | [--sp] = rets; | ||
594 | call _get_core_lock; | ||
595 | r2 = [p1]; | ||
596 | r3 = r3 + r2; | ||
597 | [p1] = r3; | ||
598 | r1 = p1; | ||
599 | call _put_core_lock; | ||
600 | r0 = r3; | ||
601 | rets = [sp++]; | ||
602 | rts; | ||
603 | ENDPROC(___raw_atomic_update_asm) | ||
604 | |||
605 | /* | ||
606 | * r0 = ptr | ||
607 | * r1 = mask | ||
608 | * | ||
609 | * Clear the mask bits from a 32bit word and return the old 32bit value | ||
610 | * atomically. | ||
611 | * Clobbers: r3:0, p1:0 | ||
612 | */ | ||
613 | ENTRY(___raw_atomic_clear_asm) | ||
614 | p1 = r0; | ||
615 | r3 = ~r1; | ||
616 | [--sp] = rets; | ||
617 | call _get_core_lock; | ||
618 | r2 = [p1]; | ||
619 | r3 = r2 & r3; | ||
620 | [p1] = r3; | ||
621 | r3 = r2; | ||
622 | r1 = p1; | ||
623 | call _put_core_lock; | ||
624 | r0 = r3; | ||
625 | rets = [sp++]; | ||
626 | rts; | ||
627 | ENDPROC(___raw_atomic_clear_asm) | ||
628 | |||
629 | /* | ||
630 | * r0 = ptr | ||
631 | * r1 = mask | ||
632 | * | ||
633 | * Set the mask bits into a 32bit word and return the old 32bit value | ||
634 | * atomically. | ||
635 | * Clobbers: r3:0, p1:0 | ||
636 | */ | ||
637 | ENTRY(___raw_atomic_set_asm) | ||
638 | p1 = r0; | ||
639 | r3 = r1; | ||
640 | [--sp] = rets; | ||
641 | call _get_core_lock; | ||
642 | r2 = [p1]; | ||
643 | r3 = r2 | r3; | ||
644 | [p1] = r3; | ||
645 | r3 = r2; | ||
646 | r1 = p1; | ||
647 | call _put_core_lock; | ||
648 | r0 = r3; | ||
649 | rets = [sp++]; | ||
650 | rts; | ||
651 | ENDPROC(___raw_atomic_set_asm) | ||
652 | |||
653 | /* | ||
654 | * r0 = ptr | ||
655 | * r1 = mask | ||
656 | * | ||
657 | * XOR the mask bits with a 32bit word and return the old 32bit value | ||
658 | * atomically. | ||
659 | * Clobbers: r3:0, p1:0 | ||
660 | */ | ||
661 | ENTRY(___raw_atomic_xor_asm) | ||
662 | p1 = r0; | ||
663 | r3 = r1; | ||
664 | [--sp] = rets; | ||
665 | call _get_core_lock; | ||
666 | r2 = [p1]; | ||
667 | r3 = r2 ^ r3; | ||
668 | [p1] = r3; | ||
669 | r3 = r2; | ||
670 | r1 = p1; | ||
671 | call _put_core_lock; | ||
672 | r0 = r3; | ||
673 | rets = [sp++]; | ||
674 | rts; | ||
675 | ENDPROC(___raw_atomic_xor_asm) | ||
676 | |||
677 | /* | ||
678 | * r0 = ptr | ||
679 | * r1 = mask | ||
680 | * | ||
681 | * Perform a logical AND between the mask bits and a 32bit word, and | ||
682 | * return the masked value. We need this on this architecture in | ||
683 | * order to invalidate the local cache before testing. | ||
684 | * | ||
685 | * Clobbers: r3:0, p1:0 | ||
686 | */ | ||
687 | ENTRY(___raw_atomic_test_asm) | ||
688 | p1 = r0; | ||
689 | r3 = r1; | ||
690 | r1 = -L1_CACHE_BYTES; | ||
691 | r1 = r0 & r1; | ||
692 | p0 = r1; | ||
693 | flushinv[p0]; | ||
694 | SSYNC(r2); | ||
695 | r0 = [p1]; | ||
696 | r0 = r0 & r3; | ||
697 | rts; | ||
698 | ENDPROC(___raw_atomic_test_asm) | ||
699 | |||
700 | /* | ||
701 | * r0 = ptr | ||
702 | * r1 = value | ||
703 | * | ||
704 | * Swap *ptr with value and return the old 32bit value atomically. | ||
705 | * Clobbers: r3:0, p1:0 | ||
706 | */ | ||
707 | #define __do_xchg(src, dst) \ | ||
708 | p1 = r0; \ | ||
709 | r3 = r1; \ | ||
710 | [--sp] = rets; \ | ||
711 | call _get_core_lock; \ | ||
712 | r2 = src; \ | ||
713 | dst = r3; \ | ||
714 | r3 = r2; \ | ||
715 | r1 = p1; \ | ||
716 | call _put_core_lock; \ | ||
717 | r0 = r3; \ | ||
718 | rets = [sp++]; \ | ||
719 | rts; | ||
720 | |||
721 | ENTRY(___raw_xchg_1_asm) | ||
722 | __do_xchg(b[p1] (z), b[p1]) | ||
723 | ENDPROC(___raw_xchg_1_asm) | ||
724 | |||
725 | ENTRY(___raw_xchg_2_asm) | ||
726 | __do_xchg(w[p1] (z), w[p1]) | ||
727 | ENDPROC(___raw_xchg_2_asm) | ||
728 | |||
729 | ENTRY(___raw_xchg_4_asm) | ||
730 | __do_xchg([p1], [p1]) | ||
731 | ENDPROC(___raw_xchg_4_asm) | ||
732 | |||
733 | /* | ||
734 | * r0 = ptr | ||
735 | * r1 = new | ||
736 | * r2 = old | ||
737 | * | ||
738 | * Swap *ptr with new if *ptr == old and return the previous *ptr | ||
739 | * value atomically. | ||
740 | * | ||
741 | * Clobbers: r3:0, p1:0 | ||
742 | */ | ||
743 | #define __do_cmpxchg(src, dst) \ | ||
744 | [--sp] = rets; \ | ||
745 | [--sp] = r4; \ | ||
746 | p1 = r0; \ | ||
747 | r3 = r1; \ | ||
748 | r4 = r2; \ | ||
749 | call _get_core_lock; \ | ||
750 | r2 = src; \ | ||
751 | cc = r2 == r4; \ | ||
752 | if !cc jump 1f; \ | ||
753 | dst = r3; \ | ||
754 | 1: r3 = r2; \ | ||
755 | r1 = p1; \ | ||
756 | call _put_core_lock; \ | ||
757 | r0 = r3; \ | ||
758 | r4 = [sp++]; \ | ||
759 | rets = [sp++]; \ | ||
760 | rts; | ||
761 | |||
762 | ENTRY(___raw_cmpxchg_1_asm) | ||
763 | __do_cmpxchg(b[p1] (z), b[p1]) | ||
764 | ENDPROC(___raw_cmpxchg_1_asm) | ||
765 | |||
766 | ENTRY(___raw_cmpxchg_2_asm) | ||
767 | __do_cmpxchg(w[p1] (z), w[p1]) | ||
768 | ENDPROC(___raw_cmpxchg_2_asm) | ||
769 | |||
770 | ENTRY(___raw_cmpxchg_4_asm) | ||
771 | __do_cmpxchg([p1], [p1]) | ||
772 | ENDPROC(___raw_cmpxchg_4_asm) | ||
773 | |||
774 | /* | ||
775 | * r0 = ptr | ||
776 | * r1 = bitnr | ||
777 | * | ||
778 | * Set a bit in a 32bit word and return the old 32bit value atomically. | ||
779 | * Clobbers: r3:0, p1:0 | ||
780 | */ | ||
781 | ENTRY(___raw_bit_set_asm) | ||
782 | r2 = r1; | ||
783 | r1 = 1; | ||
784 | r1 <<= r2; | ||
785 | jump ___raw_atomic_set_asm | ||
786 | ENDPROC(___raw_bit_set_asm) | ||
787 | |||
788 | /* | ||
789 | * r0 = ptr | ||
790 | * r1 = bitnr | ||
791 | * | ||
792 | * Clear a bit in a 32bit word and return the old 32bit value atomically. | ||
793 | * Clobbers: r3:0, p1:0 | ||
794 | */ | ||
795 | ENTRY(___raw_bit_clear_asm) | ||
796 | r2 = r1; | ||
797 | r1 = 1; | ||
798 | r1 <<= r2; | ||
799 | jump ___raw_atomic_clear_asm | ||
800 | ENDPROC(___raw_bit_clear_asm) | ||
801 | |||
802 | /* | ||
803 | * r0 = ptr | ||
804 | * r1 = bitnr | ||
805 | * | ||
806 | * Toggle a bit in a 32bit word and return the old 32bit value atomically. | ||
807 | * Clobbers: r3:0, p1:0 | ||
808 | */ | ||
809 | ENTRY(___raw_bit_toggle_asm) | ||
810 | r2 = r1; | ||
811 | r1 = 1; | ||
812 | r1 <<= r2; | ||
813 | jump ___raw_atomic_xor_asm | ||
814 | ENDPROC(___raw_bit_toggle_asm) | ||
815 | |||
816 | /* | ||
817 | * r0 = ptr | ||
818 | * r1 = bitnr | ||
819 | * | ||
820 | * Test-and-set a bit in a 32bit word and return the old bit value atomically. | ||
821 | * Clobbers: r3:0, p1:0 | ||
822 | */ | ||
823 | ENTRY(___raw_bit_test_set_asm) | ||
824 | [--sp] = rets; | ||
825 | [--sp] = r1; | ||
826 | call ___raw_bit_set_asm | ||
827 | r1 = [sp++]; | ||
828 | r2 = 1; | ||
829 | r2 <<= r1; | ||
830 | r0 = r0 & r2; | ||
831 | cc = r0 == 0; | ||
832 | if cc jump 1f | ||
833 | r0 = 1; | ||
834 | 1: | ||
835 | rets = [sp++]; | ||
836 | rts; | ||
837 | ENDPROC(___raw_bit_test_set_asm) | ||
838 | |||
839 | /* | ||
840 | * r0 = ptr | ||
841 | * r1 = bitnr | ||
842 | * | ||
843 | * Test-and-clear a bit in a 32bit word and return the old bit value atomically. | ||
844 | * Clobbers: r3:0, p1:0 | ||
845 | */ | ||
846 | ENTRY(___raw_bit_test_clear_asm) | ||
847 | [--sp] = rets; | ||
848 | [--sp] = r1; | ||
849 | call ___raw_bit_clear_asm | ||
850 | r1 = [sp++]; | ||
851 | r2 = 1; | ||
852 | r2 <<= r1; | ||
853 | r0 = r0 & r2; | ||
854 | cc = r0 == 0; | ||
855 | if cc jump 1f | ||
856 | r0 = 1; | ||
857 | 1: | ||
858 | rets = [sp++]; | ||
859 | rts; | ||
860 | ENDPROC(___raw_bit_test_clear_asm) | ||
861 | |||
862 | /* | ||
863 | * r0 = ptr | ||
864 | * r1 = bitnr | ||
865 | * | ||
866 | * Test-and-toggle a bit in a 32bit word, | ||
867 | * and return the old bit value atomically. | ||
868 | * Clobbers: r3:0, p1:0 | ||
869 | */ | ||
870 | ENTRY(___raw_bit_test_toggle_asm) | ||
871 | [--sp] = rets; | ||
872 | [--sp] = r1; | ||
873 | call ___raw_bit_toggle_asm | ||
874 | r1 = [sp++]; | ||
875 | r2 = 1; | ||
876 | r2 <<= r1; | ||
877 | r0 = r0 & r2; | ||
878 | cc = r0 == 0; | ||
879 | if cc jump 1f | ||
880 | r0 = 1; | ||
881 | 1: | ||
882 | rets = [sp++]; | ||
883 | rts; | ||
884 | ENDPROC(___raw_bit_test_toggle_asm) | ||
885 | |||
886 | /* | ||
887 | * r0 = ptr | ||
888 | * r1 = bitnr | ||
889 | * | ||
890 | * Test a bit in a 32bit word and return its value. | ||
891 | * We need this on this architecture in order to invalidate | ||
892 | * the local cache before testing. | ||
893 | * | ||
894 | * Clobbers: r3:0, p1:0 | ||
895 | */ | ||
896 | ENTRY(___raw_bit_test_asm) | ||
897 | r2 = r1; | ||
898 | r1 = 1; | ||
899 | r1 <<= r2; | ||
900 | jump ___raw_atomic_test_asm | ||
901 | ENDPROC(___raw_bit_test_asm) | ||
902 | |||
903 | /* | ||
904 | * r0 = ptr | ||
905 | * | ||
906 | * Fetch and return an uncached 32bit value. | ||
907 | * | ||
908 | * Clobbers: r2:0, p1:0 | ||
909 | */ | ||
910 | ENTRY(___raw_uncached_fetch_asm) | ||
911 | p1 = r0; | ||
912 | r1 = -L1_CACHE_BYTES; | ||
913 | r1 = r0 & r1; | ||
914 | p0 = r1; | ||
915 | flushinv[p0]; | ||
916 | SSYNC(r2); | ||
917 | r0 = [p1]; | ||
918 | rts; | ||
919 | ENDPROC(___raw_uncached_fetch_asm) | ||