aboutsummaryrefslogtreecommitdiffstats
path: root/arch/tile/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/tile/lib')
-rw-r--r--arch/tile/lib/Makefile5
-rw-r--r--arch/tile/lib/atomic_32.c5
-rw-r--r--arch/tile/lib/atomic_asm_32.S2
-rw-r--r--arch/tile/lib/cacheflush.c102
-rw-r--r--arch/tile/lib/delay.c21
-rw-r--r--arch/tile/lib/exports.c10
-rw-r--r--arch/tile/lib/mb_incoherent.S34
-rw-r--r--arch/tile/lib/memcpy_tile64.c4
-rw-r--r--arch/tile/lib/spinlock_32.c161
9 files changed, 228 insertions, 116 deletions
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile
index 93122d5b1558..0c26086ecbef 100644
--- a/arch/tile/lib/Makefile
+++ b/arch/tile/lib/Makefile
@@ -2,9 +2,8 @@
2# Makefile for TILE-specific library files.. 2# Makefile for TILE-specific library files..
3# 3#
4 4
5lib-y = cacheflush.o checksum.o cpumask.o delay.o \ 5lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \
6 mb_incoherent.o uaccess.o memmove.o \ 6 memmove.o memcpy_$(BITS).o memchr_$(BITS).o memset_$(BITS).o \
7 memcpy_$(BITS).o memchr_$(BITS).o memset_$(BITS).o \
8 strchr_$(BITS).o strlen_$(BITS).o 7 strchr_$(BITS).o strlen_$(BITS).o
9 8
10ifeq ($(CONFIG_TILEGX),y) 9ifeq ($(CONFIG_TILEGX),y)
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
index 7a5cc706ab62..f02040d3614e 100644
--- a/arch/tile/lib/atomic_32.c
+++ b/arch/tile/lib/atomic_32.c
@@ -46,14 +46,13 @@ struct atomic_locks_on_cpu *atomic_lock_ptr[ATOMIC_HASH_L1_SIZE]
46#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ 46#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
47 47
48/* This page is remapped on startup to be hash-for-home. */ 48/* This page is remapped on startup to be hash-for-home. */
49int atomic_locks[PAGE_SIZE / sizeof(int) /* Only ATOMIC_HASH_SIZE is used */] 49int atomic_locks[PAGE_SIZE / sizeof(int)] __page_aligned_bss;
50 __attribute__((aligned(PAGE_SIZE), section(".bss.page_aligned")));
51 50
52#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ 51#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
53 52
54static inline int *__atomic_hashed_lock(volatile void *v) 53static inline int *__atomic_hashed_lock(volatile void *v)
55{ 54{
56 /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec.S */ 55 /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */
57#if ATOMIC_LOCKS_FOUND_VIA_TABLE() 56#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
58 unsigned long i = 57 unsigned long i =
59 (unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long)); 58 (unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long));
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
index 5a5514b77e78..82f64cc63658 100644
--- a/arch/tile/lib/atomic_asm_32.S
+++ b/arch/tile/lib/atomic_asm_32.S
@@ -14,7 +14,7 @@
14 * Support routines for atomic operations. Each function takes: 14 * Support routines for atomic operations. Each function takes:
15 * 15 *
16 * r0: address to manipulate 16 * r0: address to manipulate
17 * r1: pointer to atomic lock guarding this operation (for FUTEX_LOCK_REG) 17 * r1: pointer to atomic lock guarding this operation (for ATOMIC_LOCK_REG)
18 * r2: new value to write, or for cmpxchg/add_unless, value to compare against 18 * r2: new value to write, or for cmpxchg/add_unless, value to compare against
19 * r3: (cmpxchg/xchg_add_unless) new value to write or add; 19 * r3: (cmpxchg/xchg_add_unless) new value to write or add;
20 * (atomic64 ops) high word of value to write 20 * (atomic64 ops) high word of value to write
diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c
index 11b6164c2097..35c1d8ca5f38 100644
--- a/arch/tile/lib/cacheflush.c
+++ b/arch/tile/lib/cacheflush.c
@@ -21,3 +21,105 @@ void __flush_icache_range(unsigned long start, unsigned long end)
21{ 21{
22 invalidate_icache((const void *)start, end - start, PAGE_SIZE); 22 invalidate_icache((const void *)start, end - start, PAGE_SIZE);
23} 23}
24
25
26/* Force a load instruction to issue. */
27static inline void force_load(char *p)
28{
29 *(volatile char *)p;
30}
31
32/*
33 * Flush and invalidate a VA range that is homed remotely on a single
34 * core (if "!hfh") or homed via hash-for-home (if "hfh"), waiting
35 * until the memory controller holds the flushed values.
36 */
37void finv_buffer_remote(void *buffer, size_t size, int hfh)
38{
39 char *p, *base;
40 size_t step_size, load_count;
41 const unsigned long STRIPE_WIDTH = 8192;
42
43 /*
44 * Flush and invalidate the buffer out of the local L1/L2
45 * and request the home cache to flush and invalidate as well.
46 */
47 __finv_buffer(buffer, size);
48
49 /*
50 * Wait for the home cache to acknowledge that it has processed
51 * all the flush-and-invalidate requests. This does not mean
52 * that the flushed data has reached the memory controller yet,
53 * but it does mean the home cache is processing the flushes.
54 */
55 __insn_mf();
56
57 /*
58 * Issue a load to the last cache line, which can't complete
59 * until all the previously-issued flushes to the same memory
60 * controller have also completed. If we weren't striping
61 * memory, that one load would be sufficient, but since we may
62 * be, we also need to back up to the last load issued to
63 * another memory controller, which would be the point where
64 * we crossed an 8KB boundary (the granularity of striping
65 * across memory controllers). Keep backing up and doing this
66 * until we are before the beginning of the buffer, or have
67 * hit all the controllers.
68 *
69 * If we are flushing a hash-for-home buffer, it's even worse.
70 * Each line may be homed on a different tile, and each tile
71 * may have up to four lines that are on different
72 * controllers. So as we walk backwards, we have to touch
73 * enough cache lines to satisfy these constraints. In
74 * practice this ends up being close enough to "load from
75 * every cache line on a full memory stripe on each
76 * controller" that we simply do that, to simplify the logic.
77 *
78 * FIXME: See bug 9535 for some issues with this code.
79 */
80 if (hfh) {
81 step_size = L2_CACHE_BYTES;
82 load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) *
83 (1 << CHIP_LOG_NUM_MSHIMS());
84 } else {
85 step_size = STRIPE_WIDTH;
86 load_count = (1 << CHIP_LOG_NUM_MSHIMS());
87 }
88
89 /* Load the last byte of the buffer. */
90 p = (char *)buffer + size - 1;
91 force_load(p);
92
93 /* Bump down to the end of the previous stripe or cache line. */
94 p -= step_size;
95 p = (char *)((unsigned long)p | (step_size - 1));
96
97 /* Figure out how far back we need to go. */
98 base = p - (step_size * (load_count - 2));
99 if ((long)base < (long)buffer)
100 base = buffer;
101
102 /*
103 * Fire all the loads we need. The MAF only has eight entries
104 * so we can have at most eight outstanding loads, so we
105 * unroll by that amount.
106 */
107#pragma unroll 8
108 for (; p >= base; p -= step_size)
109 force_load(p);
110
111 /*
112 * Repeat, but with inv's instead of loads, to get rid of the
113 * data we just loaded into our own cache and the old home L3.
114 * No need to unroll since inv's don't target a register.
115 */
116 p = (char *)buffer + size - 1;
117 __insn_inv(p);
118 p -= step_size;
119 p = (char *)((unsigned long)p | (step_size - 1));
120 for (; p >= base; p -= step_size)
121 __insn_inv(p);
122
123 /* Wait for the load+inv's (and thus finvs) to have completed. */
124 __insn_mf();
125}
diff --git a/arch/tile/lib/delay.c b/arch/tile/lib/delay.c
index 5801b03c13ef..cdacdd11d360 100644
--- a/arch/tile/lib/delay.c
+++ b/arch/tile/lib/delay.c
@@ -15,20 +15,31 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/delay.h> 16#include <linux/delay.h>
17#include <linux/thread_info.h> 17#include <linux/thread_info.h>
18#include <asm/fixmap.h> 18#include <asm/timex.h>
19#include <hv/hypervisor.h>
20 19
21void __udelay(unsigned long usecs) 20void __udelay(unsigned long usecs)
22{ 21{
23 hv_nanosleep(usecs * 1000); 22 if (usecs > ULONG_MAX / 1000) {
23 WARN_ON_ONCE(usecs > ULONG_MAX / 1000);
24 usecs = ULONG_MAX / 1000;
25 }
26 __ndelay(usecs * 1000);
24} 27}
25EXPORT_SYMBOL(__udelay); 28EXPORT_SYMBOL(__udelay);
26 29
27void __ndelay(unsigned long nsecs) 30void __ndelay(unsigned long nsecs)
28{ 31{
29 hv_nanosleep(nsecs); 32 cycles_t target = get_cycles();
33 target += ns2cycles(nsecs);
34 while (get_cycles() < target)
35 cpu_relax();
30} 36}
31EXPORT_SYMBOL(__ndelay); 37EXPORT_SYMBOL(__ndelay);
32 38
33/* FIXME: should be declared in a header somewhere. */ 39void __delay(unsigned long cycles)
40{
41 cycles_t target = get_cycles() + cycles;
42 while (get_cycles() < target)
43 cpu_relax();
44}
34EXPORT_SYMBOL(__delay); 45EXPORT_SYMBOL(__delay);
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c
index 1509c5597653..49284fae9d09 100644
--- a/arch/tile/lib/exports.c
+++ b/arch/tile/lib/exports.c
@@ -29,6 +29,9 @@ EXPORT_SYMBOL(__put_user_8);
29EXPORT_SYMBOL(strnlen_user_asm); 29EXPORT_SYMBOL(strnlen_user_asm);
30EXPORT_SYMBOL(strncpy_from_user_asm); 30EXPORT_SYMBOL(strncpy_from_user_asm);
31EXPORT_SYMBOL(clear_user_asm); 31EXPORT_SYMBOL(clear_user_asm);
32EXPORT_SYMBOL(flush_user_asm);
33EXPORT_SYMBOL(inv_user_asm);
34EXPORT_SYMBOL(finv_user_asm);
32 35
33/* arch/tile/kernel/entry.S */ 36/* arch/tile/kernel/entry.S */
34#include <linux/kernel.h> 37#include <linux/kernel.h>
@@ -45,9 +48,6 @@ EXPORT_SYMBOL(__copy_from_user_zeroing);
45EXPORT_SYMBOL(__copy_in_user_inatomic); 48EXPORT_SYMBOL(__copy_in_user_inatomic);
46#endif 49#endif
47 50
48/* arch/tile/lib/mb_incoherent.S */
49EXPORT_SYMBOL(__mb_incoherent);
50
51/* hypervisor glue */ 51/* hypervisor glue */
52#include <hv/hypervisor.h> 52#include <hv/hypervisor.h>
53EXPORT_SYMBOL(hv_dev_open); 53EXPORT_SYMBOL(hv_dev_open);
@@ -85,4 +85,8 @@ int64_t __muldi3(int64_t, int64_t);
85EXPORT_SYMBOL(__muldi3); 85EXPORT_SYMBOL(__muldi3);
86uint64_t __lshrdi3(uint64_t, unsigned int); 86uint64_t __lshrdi3(uint64_t, unsigned int);
87EXPORT_SYMBOL(__lshrdi3); 87EXPORT_SYMBOL(__lshrdi3);
88uint64_t __ashrdi3(uint64_t, unsigned int);
89EXPORT_SYMBOL(__ashrdi3);
90uint64_t __ashldi3(uint64_t, unsigned int);
91EXPORT_SYMBOL(__ashldi3);
88#endif 92#endif
diff --git a/arch/tile/lib/mb_incoherent.S b/arch/tile/lib/mb_incoherent.S
deleted file mode 100644
index 989ad7b68d5a..000000000000
--- a/arch/tile/lib/mb_incoherent.S
+++ /dev/null
@@ -1,34 +0,0 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 *
14 * Assembly code for invoking the HV's fence_incoherent syscall.
15 */
16
17#include <linux/linkage.h>
18#include <hv/syscall_public.h>
19#include <arch/abi.h>
20#include <arch/chip.h>
21
22#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
23
24/*
25 * Invoke the hypervisor's fence_incoherent syscall, which guarantees
26 * that all victims for cachelines homed on this tile have reached memory.
27 */
28STD_ENTRY(__mb_incoherent)
29 moveli TREG_SYSCALL_NR_NAME, HV_SYS_fence_incoherent
30 swint2
31 jrp lr
32 STD_ENDPROC(__mb_incoherent)
33
34#endif
diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c
index f7d4a6ad61e8..b2fe15e01075 100644
--- a/arch/tile/lib/memcpy_tile64.c
+++ b/arch/tile/lib/memcpy_tile64.c
@@ -96,7 +96,7 @@ static void memcpy_multicache(void *dest, const void *source,
96 newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); 96 newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1));
97 pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc); 97 pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc);
98 ptep = pte_offset_kernel(pmdp, newsrc); 98 ptep = pte_offset_kernel(pmdp, newsrc);
99 *ptep = src_pte; /* set_pte() would be confused by this */ 99 __set_pte(ptep, src_pte); /* set_pte() would be confused by this */
100 local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); 100 local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
101 101
102 /* Actually move the data. */ 102 /* Actually move the data. */
@@ -109,7 +109,7 @@ static void memcpy_multicache(void *dest, const void *source,
109 */ 109 */
110 src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3); 110 src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3);
111 src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */ 111 src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */
112 *ptep = src_pte; /* set_pte() would be confused by this */ 112 __set_pte(ptep, src_pte); /* set_pte() would be confused by this */
113 local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); 113 local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
114 114
115 /* 115 /*
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
index 5cd1c4004eca..cb0999fb64b4 100644
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -15,6 +15,7 @@
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <asm/processor.h> 17#include <asm/processor.h>
18#include <arch/spr_def.h>
18 19
19#include "spinlock_common.h" 20#include "spinlock_common.h"
20 21
@@ -91,75 +92,75 @@ EXPORT_SYMBOL(arch_spin_unlock_wait);
91#define RD_COUNT_MASK ((1 << RD_COUNT_WIDTH) - 1) 92#define RD_COUNT_MASK ((1 << RD_COUNT_WIDTH) - 1)
92 93
93 94
94/* Lock the word, spinning until there are no tns-ers. */ 95/*
95static inline u32 get_rwlock(arch_rwlock_t *rwlock) 96 * We can get the read lock if everything but the reader bits (which
96{ 97 * are in the high part of the word) is zero, i.e. no active or
97 u32 iterations = 0; 98 * waiting writers, no tns.
98 for (;;) { 99 *
99 u32 val = __insn_tns((int *)&rwlock->lock); 100 * We guard the tns/store-back with an interrupt critical section to
100 if (unlikely(val & 1)) { 101 * preserve the semantic that the same read lock can be acquired in an
101 delay_backoff(iterations++); 102 * interrupt context.
102 continue; 103 */
103 } 104inline int arch_read_trylock(arch_rwlock_t *rwlock)
104 return val;
105 }
106}
107
108int arch_read_trylock_slow(arch_rwlock_t *rwlock)
109{
110 u32 val = get_rwlock(rwlock);
111 int locked = (val << RD_COUNT_WIDTH) == 0;
112 rwlock->lock = val + (locked << RD_COUNT_SHIFT);
113 return locked;
114}
115EXPORT_SYMBOL(arch_read_trylock_slow);
116
117void arch_read_unlock_slow(arch_rwlock_t *rwlock)
118{
119 u32 val = get_rwlock(rwlock);
120 rwlock->lock = val - (1 << RD_COUNT_SHIFT);
121}
122EXPORT_SYMBOL(arch_read_unlock_slow);
123
124void arch_write_unlock_slow(arch_rwlock_t *rwlock, u32 val)
125{ 105{
126 u32 eq, mask = 1 << WR_CURR_SHIFT; 106 u32 val;
127 while (unlikely(val & 1)) { 107 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1);
128 /* Limited backoff since we are the highest-priority task. */ 108 val = __insn_tns((int *)&rwlock->lock);
129 relax(4); 109 if (likely((val << _RD_COUNT_WIDTH) == 0)) {
130 val = __insn_tns((int *)&rwlock->lock); 110 val += 1 << RD_COUNT_SHIFT;
111 rwlock->lock = val;
112 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
113 BUG_ON(val == 0); /* we don't expect wraparound */
114 return 1;
131 } 115 }
132 val = __insn_addb(val, mask); 116 if ((val & 1) == 0)
133 eq = __insn_seqb(val, val << (WR_CURR_SHIFT - WR_NEXT_SHIFT)); 117 rwlock->lock = val;
134 val = __insn_mz(eq & mask, val); 118 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
135 rwlock->lock = val; 119 return 0;
136} 120}
137EXPORT_SYMBOL(arch_write_unlock_slow); 121EXPORT_SYMBOL(arch_read_trylock);
138 122
139/* 123/*
140 * We spin until everything but the reader bits (which are in the high 124 * Spin doing arch_read_trylock() until we acquire the lock.
141 * part of the word) are zero, i.e. no active or waiting writers, no tns.
142 *
143 * ISSUE: This approach can permanently starve readers. A reader who sees 125 * ISSUE: This approach can permanently starve readers. A reader who sees
144 * a writer could instead take a ticket lock (just like a writer would), 126 * a writer could instead take a ticket lock (just like a writer would),
145 * and atomically enter read mode (with 1 reader) when it gets the ticket. 127 * and atomically enter read mode (with 1 reader) when it gets the ticket.
146 * This way both readers and writers will always make forward progress 128 * This way both readers and writers would always make forward progress
147 * in a finite time. 129 * in a finite time.
148 */ 130 */
149void arch_read_lock_slow(arch_rwlock_t *rwlock, u32 val) 131void arch_read_lock(arch_rwlock_t *rwlock)
150{ 132{
151 u32 iterations = 0; 133 u32 iterations = 0;
152 do { 134 while (unlikely(!arch_read_trylock(rwlock)))
153 if (!(val & 1))
154 rwlock->lock = val;
155 delay_backoff(iterations++); 135 delay_backoff(iterations++);
136}
137EXPORT_SYMBOL(arch_read_lock);
138
139void arch_read_unlock(arch_rwlock_t *rwlock)
140{
141 u32 val, iterations = 0;
142
143 mb(); /* guarantee anything modified under the lock is visible */
144 for (;;) {
145 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1);
156 val = __insn_tns((int *)&rwlock->lock); 146 val = __insn_tns((int *)&rwlock->lock);
157 } while ((val << RD_COUNT_WIDTH) != 0); 147 if (likely(val & 1) == 0) {
158 rwlock->lock = val + (1 << RD_COUNT_SHIFT); 148 rwlock->lock = val - (1 << _RD_COUNT_SHIFT);
149 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
150 break;
151 }
152 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
153 delay_backoff(iterations++);
154 }
159} 155}
160EXPORT_SYMBOL(arch_read_lock_slow); 156EXPORT_SYMBOL(arch_read_unlock);
161 157
162void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val) 158/*
159 * We don't need an interrupt critical section here (unlike for
160 * arch_read_lock) since we should never use a bare write lock where
161 * it could be interrupted by code that could try to re-acquire it.
162 */
163void arch_write_lock(arch_rwlock_t *rwlock)
163{ 164{
164 /* 165 /*
165 * The trailing underscore on this variable (and curr_ below) 166 * The trailing underscore on this variable (and curr_ below)
@@ -168,6 +169,12 @@ void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val)
168 */ 169 */
169 u32 my_ticket_; 170 u32 my_ticket_;
170 u32 iterations = 0; 171 u32 iterations = 0;
172 u32 val = __insn_tns((int *)&rwlock->lock);
173
174 if (likely(val == 0)) {
175 rwlock->lock = 1 << _WR_NEXT_SHIFT;
176 return;
177 }
171 178
172 /* 179 /*
173 * Wait until there are no readers, then bump up the next 180 * Wait until there are no readers, then bump up the next
@@ -206,23 +213,47 @@ void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val)
206 relax(4); 213 relax(4);
207 } 214 }
208} 215}
209EXPORT_SYMBOL(arch_write_lock_slow); 216EXPORT_SYMBOL(arch_write_lock);
210 217
211int __tns_atomic_acquire(atomic_t *lock) 218int arch_write_trylock(arch_rwlock_t *rwlock)
212{ 219{
213 int ret; 220 u32 val = __insn_tns((int *)&rwlock->lock);
214 u32 iterations = 0;
215 221
216 BUG_ON(__insn_mfspr(SPR_INTERRUPT_CRITICAL_SECTION)); 222 /*
217 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); 223 * If a tns is in progress, or there's a waiting or active locker,
224 * or active readers, we can't take the lock, so give up.
225 */
226 if (unlikely(val != 0)) {
227 if (!(val & 1))
228 rwlock->lock = val;
229 return 0;
230 }
218 231
219 while ((ret = __insn_tns((void *)&lock->counter)) == 1) 232 /* Set the "next" field to mark it locked. */
220 delay_backoff(iterations++); 233 rwlock->lock = 1 << _WR_NEXT_SHIFT;
221 return ret; 234 return 1;
222} 235}
236EXPORT_SYMBOL(arch_write_trylock);
223 237
224void __tns_atomic_release(atomic_t *p, int v) 238void arch_write_unlock(arch_rwlock_t *rwlock)
225{ 239{
226 p->counter = v; 240 u32 val, eq, mask;
227 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); 241
242 mb(); /* guarantee anything modified under the lock is visible */
243 val = __insn_tns((int *)&rwlock->lock);
244 if (likely(val == (1 << _WR_NEXT_SHIFT))) {
245 rwlock->lock = 0;
246 return;
247 }
248 while (unlikely(val & 1)) {
249 /* Limited backoff since we are the highest-priority task. */
250 relax(4);
251 val = __insn_tns((int *)&rwlock->lock);
252 }
253 mask = 1 << WR_CURR_SHIFT;
254 val = __insn_addb(val, mask);
255 eq = __insn_seqb(val, val << (WR_CURR_SHIFT - WR_NEXT_SHIFT));
256 val = __insn_mz(eq & mask, val);
257 rwlock->lock = val;
228} 258}
259EXPORT_SYMBOL(arch_write_unlock);