diff options
Diffstat (limited to 'arch/tile/lib')
-rw-r--r-- | arch/tile/lib/Makefile | 5 | ||||
-rw-r--r-- | arch/tile/lib/atomic_32.c | 5 | ||||
-rw-r--r-- | arch/tile/lib/atomic_asm_32.S | 2 | ||||
-rw-r--r-- | arch/tile/lib/cacheflush.c | 102 | ||||
-rw-r--r-- | arch/tile/lib/delay.c | 21 | ||||
-rw-r--r-- | arch/tile/lib/exports.c | 10 | ||||
-rw-r--r-- | arch/tile/lib/mb_incoherent.S | 34 | ||||
-rw-r--r-- | arch/tile/lib/memcpy_tile64.c | 4 | ||||
-rw-r--r-- | arch/tile/lib/spinlock_32.c | 161 |
9 files changed, 228 insertions, 116 deletions
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile index 93122d5b155..0c26086ecbe 100644 --- a/arch/tile/lib/Makefile +++ b/arch/tile/lib/Makefile | |||
@@ -2,9 +2,8 @@ | |||
2 | # Makefile for TILE-specific library files.. | 2 | # Makefile for TILE-specific library files.. |
3 | # | 3 | # |
4 | 4 | ||
5 | lib-y = cacheflush.o checksum.o cpumask.o delay.o \ | 5 | lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \ |
6 | mb_incoherent.o uaccess.o memmove.o \ | 6 | memmove.o memcpy_$(BITS).o memchr_$(BITS).o memset_$(BITS).o \ |
7 | memcpy_$(BITS).o memchr_$(BITS).o memset_$(BITS).o \ | ||
8 | strchr_$(BITS).o strlen_$(BITS).o | 7 | strchr_$(BITS).o strlen_$(BITS).o |
9 | 8 | ||
10 | ifeq ($(CONFIG_TILEGX),y) | 9 | ifeq ($(CONFIG_TILEGX),y) |
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c index 7a5cc706ab6..f02040d3614 100644 --- a/arch/tile/lib/atomic_32.c +++ b/arch/tile/lib/atomic_32.c | |||
@@ -46,14 +46,13 @@ struct atomic_locks_on_cpu *atomic_lock_ptr[ATOMIC_HASH_L1_SIZE] | |||
46 | #else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ | 46 | #else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ |
47 | 47 | ||
48 | /* This page is remapped on startup to be hash-for-home. */ | 48 | /* This page is remapped on startup to be hash-for-home. */ |
49 | int atomic_locks[PAGE_SIZE / sizeof(int) /* Only ATOMIC_HASH_SIZE is used */] | 49 | int atomic_locks[PAGE_SIZE / sizeof(int)] __page_aligned_bss; |
50 | __attribute__((aligned(PAGE_SIZE), section(".bss.page_aligned"))); | ||
51 | 50 | ||
52 | #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ | 51 | #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ |
53 | 52 | ||
54 | static inline int *__atomic_hashed_lock(volatile void *v) | 53 | static inline int *__atomic_hashed_lock(volatile void *v) |
55 | { | 54 | { |
56 | /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec.S */ | 55 | /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */ |
57 | #if ATOMIC_LOCKS_FOUND_VIA_TABLE() | 56 | #if ATOMIC_LOCKS_FOUND_VIA_TABLE() |
58 | unsigned long i = | 57 | unsigned long i = |
59 | (unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long)); | 58 | (unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long)); |
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S index 5a5514b77e7..82f64cc6365 100644 --- a/arch/tile/lib/atomic_asm_32.S +++ b/arch/tile/lib/atomic_asm_32.S | |||
@@ -14,7 +14,7 @@ | |||
14 | * Support routines for atomic operations. Each function takes: | 14 | * Support routines for atomic operations. Each function takes: |
15 | * | 15 | * |
16 | * r0: address to manipulate | 16 | * r0: address to manipulate |
17 | * r1: pointer to atomic lock guarding this operation (for FUTEX_LOCK_REG) | 17 | * r1: pointer to atomic lock guarding this operation (for ATOMIC_LOCK_REG) |
18 | * r2: new value to write, or for cmpxchg/add_unless, value to compare against | 18 | * r2: new value to write, or for cmpxchg/add_unless, value to compare against |
19 | * r3: (cmpxchg/xchg_add_unless) new value to write or add; | 19 | * r3: (cmpxchg/xchg_add_unless) new value to write or add; |
20 | * (atomic64 ops) high word of value to write | 20 | * (atomic64 ops) high word of value to write |
diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c index 11b6164c209..35c1d8ca5f3 100644 --- a/arch/tile/lib/cacheflush.c +++ b/arch/tile/lib/cacheflush.c | |||
@@ -21,3 +21,105 @@ void __flush_icache_range(unsigned long start, unsigned long end) | |||
21 | { | 21 | { |
22 | invalidate_icache((const void *)start, end - start, PAGE_SIZE); | 22 | invalidate_icache((const void *)start, end - start, PAGE_SIZE); |
23 | } | 23 | } |
24 | |||
25 | |||
26 | /* Force a load instruction to issue. */ | ||
27 | static inline void force_load(char *p) | ||
28 | { | ||
29 | *(volatile char *)p; | ||
30 | } | ||
31 | |||
32 | /* | ||
33 | * Flush and invalidate a VA range that is homed remotely on a single | ||
34 | * core (if "!hfh") or homed via hash-for-home (if "hfh"), waiting | ||
35 | * until the memory controller holds the flushed values. | ||
36 | */ | ||
37 | void finv_buffer_remote(void *buffer, size_t size, int hfh) | ||
38 | { | ||
39 | char *p, *base; | ||
40 | size_t step_size, load_count; | ||
41 | const unsigned long STRIPE_WIDTH = 8192; | ||
42 | |||
43 | /* | ||
44 | * Flush and invalidate the buffer out of the local L1/L2 | ||
45 | * and request the home cache to flush and invalidate as well. | ||
46 | */ | ||
47 | __finv_buffer(buffer, size); | ||
48 | |||
49 | /* | ||
50 | * Wait for the home cache to acknowledge that it has processed | ||
51 | * all the flush-and-invalidate requests. This does not mean | ||
52 | * that the flushed data has reached the memory controller yet, | ||
53 | * but it does mean the home cache is processing the flushes. | ||
54 | */ | ||
55 | __insn_mf(); | ||
56 | |||
57 | /* | ||
58 | * Issue a load to the last cache line, which can't complete | ||
59 | * until all the previously-issued flushes to the same memory | ||
60 | * controller have also completed. If we weren't striping | ||
61 | * memory, that one load would be sufficient, but since we may | ||
62 | * be, we also need to back up to the last load issued to | ||
63 | * another memory controller, which would be the point where | ||
64 | * we crossed an 8KB boundary (the granularity of striping | ||
65 | * across memory controllers). Keep backing up and doing this | ||
66 | * until we are before the beginning of the buffer, or have | ||
67 | * hit all the controllers. | ||
68 | * | ||
69 | * If we are flushing a hash-for-home buffer, it's even worse. | ||
70 | * Each line may be homed on a different tile, and each tile | ||
71 | * may have up to four lines that are on different | ||
72 | * controllers. So as we walk backwards, we have to touch | ||
73 | * enough cache lines to satisfy these constraints. In | ||
74 | * practice this ends up being close enough to "load from | ||
75 | * every cache line on a full memory stripe on each | ||
76 | * controller" that we simply do that, to simplify the logic. | ||
77 | * | ||
78 | * FIXME: See bug 9535 for some issues with this code. | ||
79 | */ | ||
80 | if (hfh) { | ||
81 | step_size = L2_CACHE_BYTES; | ||
82 | load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) * | ||
83 | (1 << CHIP_LOG_NUM_MSHIMS()); | ||
84 | } else { | ||
85 | step_size = STRIPE_WIDTH; | ||
86 | load_count = (1 << CHIP_LOG_NUM_MSHIMS()); | ||
87 | } | ||
88 | |||
89 | /* Load the last byte of the buffer. */ | ||
90 | p = (char *)buffer + size - 1; | ||
91 | force_load(p); | ||
92 | |||
93 | /* Bump down to the end of the previous stripe or cache line. */ | ||
94 | p -= step_size; | ||
95 | p = (char *)((unsigned long)p | (step_size - 1)); | ||
96 | |||
97 | /* Figure out how far back we need to go. */ | ||
98 | base = p - (step_size * (load_count - 2)); | ||
99 | if ((long)base < (long)buffer) | ||
100 | base = buffer; | ||
101 | |||
102 | /* | ||
103 | * Fire all the loads we need. The MAF only has eight entries | ||
104 | * so we can have at most eight outstanding loads, so we | ||
105 | * unroll by that amount. | ||
106 | */ | ||
107 | #pragma unroll 8 | ||
108 | for (; p >= base; p -= step_size) | ||
109 | force_load(p); | ||
110 | |||
111 | /* | ||
112 | * Repeat, but with inv's instead of loads, to get rid of the | ||
113 | * data we just loaded into our own cache and the old home L3. | ||
114 | * No need to unroll since inv's don't target a register. | ||
115 | */ | ||
116 | p = (char *)buffer + size - 1; | ||
117 | __insn_inv(p); | ||
118 | p -= step_size; | ||
119 | p = (char *)((unsigned long)p | (step_size - 1)); | ||
120 | for (; p >= base; p -= step_size) | ||
121 | __insn_inv(p); | ||
122 | |||
123 | /* Wait for the load+inv's (and thus finvs) to have completed. */ | ||
124 | __insn_mf(); | ||
125 | } | ||
diff --git a/arch/tile/lib/delay.c b/arch/tile/lib/delay.c index 5801b03c13e..cdacdd11d36 100644 --- a/arch/tile/lib/delay.c +++ b/arch/tile/lib/delay.c | |||
@@ -15,20 +15,31 @@ | |||
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/delay.h> | 16 | #include <linux/delay.h> |
17 | #include <linux/thread_info.h> | 17 | #include <linux/thread_info.h> |
18 | #include <asm/fixmap.h> | 18 | #include <asm/timex.h> |
19 | #include <hv/hypervisor.h> | ||
20 | 19 | ||
21 | void __udelay(unsigned long usecs) | 20 | void __udelay(unsigned long usecs) |
22 | { | 21 | { |
23 | hv_nanosleep(usecs * 1000); | 22 | if (usecs > ULONG_MAX / 1000) { |
23 | WARN_ON_ONCE(usecs > ULONG_MAX / 1000); | ||
24 | usecs = ULONG_MAX / 1000; | ||
25 | } | ||
26 | __ndelay(usecs * 1000); | ||
24 | } | 27 | } |
25 | EXPORT_SYMBOL(__udelay); | 28 | EXPORT_SYMBOL(__udelay); |
26 | 29 | ||
27 | void __ndelay(unsigned long nsecs) | 30 | void __ndelay(unsigned long nsecs) |
28 | { | 31 | { |
29 | hv_nanosleep(nsecs); | 32 | cycles_t target = get_cycles(); |
33 | target += ns2cycles(nsecs); | ||
34 | while (get_cycles() < target) | ||
35 | cpu_relax(); | ||
30 | } | 36 | } |
31 | EXPORT_SYMBOL(__ndelay); | 37 | EXPORT_SYMBOL(__ndelay); |
32 | 38 | ||
33 | /* FIXME: should be declared in a header somewhere. */ | 39 | void __delay(unsigned long cycles) |
40 | { | ||
41 | cycles_t target = get_cycles() + cycles; | ||
42 | while (get_cycles() < target) | ||
43 | cpu_relax(); | ||
44 | } | ||
34 | EXPORT_SYMBOL(__delay); | 45 | EXPORT_SYMBOL(__delay); |
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c index 1509c559765..49284fae9d0 100644 --- a/arch/tile/lib/exports.c +++ b/arch/tile/lib/exports.c | |||
@@ -29,6 +29,9 @@ EXPORT_SYMBOL(__put_user_8); | |||
29 | EXPORT_SYMBOL(strnlen_user_asm); | 29 | EXPORT_SYMBOL(strnlen_user_asm); |
30 | EXPORT_SYMBOL(strncpy_from_user_asm); | 30 | EXPORT_SYMBOL(strncpy_from_user_asm); |
31 | EXPORT_SYMBOL(clear_user_asm); | 31 | EXPORT_SYMBOL(clear_user_asm); |
32 | EXPORT_SYMBOL(flush_user_asm); | ||
33 | EXPORT_SYMBOL(inv_user_asm); | ||
34 | EXPORT_SYMBOL(finv_user_asm); | ||
32 | 35 | ||
33 | /* arch/tile/kernel/entry.S */ | 36 | /* arch/tile/kernel/entry.S */ |
34 | #include <linux/kernel.h> | 37 | #include <linux/kernel.h> |
@@ -45,9 +48,6 @@ EXPORT_SYMBOL(__copy_from_user_zeroing); | |||
45 | EXPORT_SYMBOL(__copy_in_user_inatomic); | 48 | EXPORT_SYMBOL(__copy_in_user_inatomic); |
46 | #endif | 49 | #endif |
47 | 50 | ||
48 | /* arch/tile/lib/mb_incoherent.S */ | ||
49 | EXPORT_SYMBOL(__mb_incoherent); | ||
50 | |||
51 | /* hypervisor glue */ | 51 | /* hypervisor glue */ |
52 | #include <hv/hypervisor.h> | 52 | #include <hv/hypervisor.h> |
53 | EXPORT_SYMBOL(hv_dev_open); | 53 | EXPORT_SYMBOL(hv_dev_open); |
@@ -85,4 +85,8 @@ int64_t __muldi3(int64_t, int64_t); | |||
85 | EXPORT_SYMBOL(__muldi3); | 85 | EXPORT_SYMBOL(__muldi3); |
86 | uint64_t __lshrdi3(uint64_t, unsigned int); | 86 | uint64_t __lshrdi3(uint64_t, unsigned int); |
87 | EXPORT_SYMBOL(__lshrdi3); | 87 | EXPORT_SYMBOL(__lshrdi3); |
88 | uint64_t __ashrdi3(uint64_t, unsigned int); | ||
89 | EXPORT_SYMBOL(__ashrdi3); | ||
90 | uint64_t __ashldi3(uint64_t, unsigned int); | ||
91 | EXPORT_SYMBOL(__ashldi3); | ||
88 | #endif | 92 | #endif |
diff --git a/arch/tile/lib/mb_incoherent.S b/arch/tile/lib/mb_incoherent.S deleted file mode 100644 index 989ad7b68d5..00000000000 --- a/arch/tile/lib/mb_incoherent.S +++ /dev/null | |||
@@ -1,34 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * Assembly code for invoking the HV's fence_incoherent syscall. | ||
15 | */ | ||
16 | |||
17 | #include <linux/linkage.h> | ||
18 | #include <hv/syscall_public.h> | ||
19 | #include <arch/abi.h> | ||
20 | #include <arch/chip.h> | ||
21 | |||
22 | #if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() | ||
23 | |||
24 | /* | ||
25 | * Invoke the hypervisor's fence_incoherent syscall, which guarantees | ||
26 | * that all victims for cachelines homed on this tile have reached memory. | ||
27 | */ | ||
28 | STD_ENTRY(__mb_incoherent) | ||
29 | moveli TREG_SYSCALL_NR_NAME, HV_SYS_fence_incoherent | ||
30 | swint2 | ||
31 | jrp lr | ||
32 | STD_ENDPROC(__mb_incoherent) | ||
33 | |||
34 | #endif | ||
diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c index f7d4a6ad61e..b2fe15e0107 100644 --- a/arch/tile/lib/memcpy_tile64.c +++ b/arch/tile/lib/memcpy_tile64.c | |||
@@ -96,7 +96,7 @@ static void memcpy_multicache(void *dest, const void *source, | |||
96 | newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); | 96 | newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); |
97 | pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc); | 97 | pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc); |
98 | ptep = pte_offset_kernel(pmdp, newsrc); | 98 | ptep = pte_offset_kernel(pmdp, newsrc); |
99 | *ptep = src_pte; /* set_pte() would be confused by this */ | 99 | __set_pte(ptep, src_pte); /* set_pte() would be confused by this */ |
100 | local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); | 100 | local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); |
101 | 101 | ||
102 | /* Actually move the data. */ | 102 | /* Actually move the data. */ |
@@ -109,7 +109,7 @@ static void memcpy_multicache(void *dest, const void *source, | |||
109 | */ | 109 | */ |
110 | src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3); | 110 | src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3); |
111 | src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */ | 111 | src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */ |
112 | *ptep = src_pte; /* set_pte() would be confused by this */ | 112 | __set_pte(ptep, src_pte); /* set_pte() would be confused by this */ |
113 | local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); | 113 | local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); |
114 | 114 | ||
115 | /* | 115 | /* |
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c index 5cd1c4004ec..cb0999fb64b 100644 --- a/arch/tile/lib/spinlock_32.c +++ b/arch/tile/lib/spinlock_32.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/spinlock.h> | 15 | #include <linux/spinlock.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
18 | #include <arch/spr_def.h> | ||
18 | 19 | ||
19 | #include "spinlock_common.h" | 20 | #include "spinlock_common.h" |
20 | 21 | ||
@@ -91,75 +92,75 @@ EXPORT_SYMBOL(arch_spin_unlock_wait); | |||
91 | #define RD_COUNT_MASK ((1 << RD_COUNT_WIDTH) - 1) | 92 | #define RD_COUNT_MASK ((1 << RD_COUNT_WIDTH) - 1) |
92 | 93 | ||
93 | 94 | ||
94 | /* Lock the word, spinning until there are no tns-ers. */ | 95 | /* |
95 | static inline u32 get_rwlock(arch_rwlock_t *rwlock) | 96 | * We can get the read lock if everything but the reader bits (which |
96 | { | 97 | * are in the high part of the word) is zero, i.e. no active or |
97 | u32 iterations = 0; | 98 | * waiting writers, no tns. |
98 | for (;;) { | 99 | * |
99 | u32 val = __insn_tns((int *)&rwlock->lock); | 100 | * We guard the tns/store-back with an interrupt critical section to |
100 | if (unlikely(val & 1)) { | 101 | * preserve the semantic that the same read lock can be acquired in an |
101 | delay_backoff(iterations++); | 102 | * interrupt context. |
102 | continue; | 103 | */ |
103 | } | 104 | inline int arch_read_trylock(arch_rwlock_t *rwlock) |
104 | return val; | ||
105 | } | ||
106 | } | ||
107 | |||
108 | int arch_read_trylock_slow(arch_rwlock_t *rwlock) | ||
109 | { | ||
110 | u32 val = get_rwlock(rwlock); | ||
111 | int locked = (val << RD_COUNT_WIDTH) == 0; | ||
112 | rwlock->lock = val + (locked << RD_COUNT_SHIFT); | ||
113 | return locked; | ||
114 | } | ||
115 | EXPORT_SYMBOL(arch_read_trylock_slow); | ||
116 | |||
117 | void arch_read_unlock_slow(arch_rwlock_t *rwlock) | ||
118 | { | ||
119 | u32 val = get_rwlock(rwlock); | ||
120 | rwlock->lock = val - (1 << RD_COUNT_SHIFT); | ||
121 | } | ||
122 | EXPORT_SYMBOL(arch_read_unlock_slow); | ||
123 | |||
124 | void arch_write_unlock_slow(arch_rwlock_t *rwlock, u32 val) | ||
125 | { | 105 | { |
126 | u32 eq, mask = 1 << WR_CURR_SHIFT; | 106 | u32 val; |
127 | while (unlikely(val & 1)) { | 107 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); |
128 | /* Limited backoff since we are the highest-priority task. */ | 108 | val = __insn_tns((int *)&rwlock->lock); |
129 | relax(4); | 109 | if (likely((val << _RD_COUNT_WIDTH) == 0)) { |
130 | val = __insn_tns((int *)&rwlock->lock); | 110 | val += 1 << RD_COUNT_SHIFT; |
111 | rwlock->lock = val; | ||
112 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); | ||
113 | BUG_ON(val == 0); /* we don't expect wraparound */ | ||
114 | return 1; | ||
131 | } | 115 | } |
132 | val = __insn_addb(val, mask); | 116 | if ((val & 1) == 0) |
133 | eq = __insn_seqb(val, val << (WR_CURR_SHIFT - WR_NEXT_SHIFT)); | 117 | rwlock->lock = val; |
134 | val = __insn_mz(eq & mask, val); | 118 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); |
135 | rwlock->lock = val; | 119 | return 0; |
136 | } | 120 | } |
137 | EXPORT_SYMBOL(arch_write_unlock_slow); | 121 | EXPORT_SYMBOL(arch_read_trylock); |
138 | 122 | ||
139 | /* | 123 | /* |
140 | * We spin until everything but the reader bits (which are in the high | 124 | * Spin doing arch_read_trylock() until we acquire the lock. |
141 | * part of the word) are zero, i.e. no active or waiting writers, no tns. | ||
142 | * | ||
143 | * ISSUE: This approach can permanently starve readers. A reader who sees | 125 | * ISSUE: This approach can permanently starve readers. A reader who sees |
144 | * a writer could instead take a ticket lock (just like a writer would), | 126 | * a writer could instead take a ticket lock (just like a writer would), |
145 | * and atomically enter read mode (with 1 reader) when it gets the ticket. | 127 | * and atomically enter read mode (with 1 reader) when it gets the ticket. |
146 | * This way both readers and writers will always make forward progress | 128 | * This way both readers and writers would always make forward progress |
147 | * in a finite time. | 129 | * in a finite time. |
148 | */ | 130 | */ |
149 | void arch_read_lock_slow(arch_rwlock_t *rwlock, u32 val) | 131 | void arch_read_lock(arch_rwlock_t *rwlock) |
150 | { | 132 | { |
151 | u32 iterations = 0; | 133 | u32 iterations = 0; |
152 | do { | 134 | while (unlikely(!arch_read_trylock(rwlock))) |
153 | if (!(val & 1)) | ||
154 | rwlock->lock = val; | ||
155 | delay_backoff(iterations++); | 135 | delay_backoff(iterations++); |
136 | } | ||
137 | EXPORT_SYMBOL(arch_read_lock); | ||
138 | |||
139 | void arch_read_unlock(arch_rwlock_t *rwlock) | ||
140 | { | ||
141 | u32 val, iterations = 0; | ||
142 | |||
143 | mb(); /* guarantee anything modified under the lock is visible */ | ||
144 | for (;;) { | ||
145 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); | ||
156 | val = __insn_tns((int *)&rwlock->lock); | 146 | val = __insn_tns((int *)&rwlock->lock); |
157 | } while ((val << RD_COUNT_WIDTH) != 0); | 147 | if (likely(val & 1) == 0) { |
158 | rwlock->lock = val + (1 << RD_COUNT_SHIFT); | 148 | rwlock->lock = val - (1 << _RD_COUNT_SHIFT); |
149 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); | ||
150 | break; | ||
151 | } | ||
152 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); | ||
153 | delay_backoff(iterations++); | ||
154 | } | ||
159 | } | 155 | } |
160 | EXPORT_SYMBOL(arch_read_lock_slow); | 156 | EXPORT_SYMBOL(arch_read_unlock); |
161 | 157 | ||
162 | void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val) | 158 | /* |
159 | * We don't need an interrupt critical section here (unlike for | ||
160 | * arch_read_lock) since we should never use a bare write lock where | ||
161 | * it could be interrupted by code that could try to re-acquire it. | ||
162 | */ | ||
163 | void arch_write_lock(arch_rwlock_t *rwlock) | ||
163 | { | 164 | { |
164 | /* | 165 | /* |
165 | * The trailing underscore on this variable (and curr_ below) | 166 | * The trailing underscore on this variable (and curr_ below) |
@@ -168,6 +169,12 @@ void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val) | |||
168 | */ | 169 | */ |
169 | u32 my_ticket_; | 170 | u32 my_ticket_; |
170 | u32 iterations = 0; | 171 | u32 iterations = 0; |
172 | u32 val = __insn_tns((int *)&rwlock->lock); | ||
173 | |||
174 | if (likely(val == 0)) { | ||
175 | rwlock->lock = 1 << _WR_NEXT_SHIFT; | ||
176 | return; | ||
177 | } | ||
171 | 178 | ||
172 | /* | 179 | /* |
173 | * Wait until there are no readers, then bump up the next | 180 | * Wait until there are no readers, then bump up the next |
@@ -206,23 +213,47 @@ void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val) | |||
206 | relax(4); | 213 | relax(4); |
207 | } | 214 | } |
208 | } | 215 | } |
209 | EXPORT_SYMBOL(arch_write_lock_slow); | 216 | EXPORT_SYMBOL(arch_write_lock); |
210 | 217 | ||
211 | int __tns_atomic_acquire(atomic_t *lock) | 218 | int arch_write_trylock(arch_rwlock_t *rwlock) |
212 | { | 219 | { |
213 | int ret; | 220 | u32 val = __insn_tns((int *)&rwlock->lock); |
214 | u32 iterations = 0; | ||
215 | 221 | ||
216 | BUG_ON(__insn_mfspr(SPR_INTERRUPT_CRITICAL_SECTION)); | 222 | /* |
217 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); | 223 | * If a tns is in progress, or there's a waiting or active locker, |
224 | * or active readers, we can't take the lock, so give up. | ||
225 | */ | ||
226 | if (unlikely(val != 0)) { | ||
227 | if (!(val & 1)) | ||
228 | rwlock->lock = val; | ||
229 | return 0; | ||
230 | } | ||
218 | 231 | ||
219 | while ((ret = __insn_tns((void *)&lock->counter)) == 1) | 232 | /* Set the "next" field to mark it locked. */ |
220 | delay_backoff(iterations++); | 233 | rwlock->lock = 1 << _WR_NEXT_SHIFT; |
221 | return ret; | 234 | return 1; |
222 | } | 235 | } |
236 | EXPORT_SYMBOL(arch_write_trylock); | ||
223 | 237 | ||
224 | void __tns_atomic_release(atomic_t *p, int v) | 238 | void arch_write_unlock(arch_rwlock_t *rwlock) |
225 | { | 239 | { |
226 | p->counter = v; | 240 | u32 val, eq, mask; |
227 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); | 241 | |
242 | mb(); /* guarantee anything modified under the lock is visible */ | ||
243 | val = __insn_tns((int *)&rwlock->lock); | ||
244 | if (likely(val == (1 << _WR_NEXT_SHIFT))) { | ||
245 | rwlock->lock = 0; | ||
246 | return; | ||
247 | } | ||
248 | while (unlikely(val & 1)) { | ||
249 | /* Limited backoff since we are the highest-priority task. */ | ||
250 | relax(4); | ||
251 | val = __insn_tns((int *)&rwlock->lock); | ||
252 | } | ||
253 | mask = 1 << WR_CURR_SHIFT; | ||
254 | val = __insn_addb(val, mask); | ||
255 | eq = __insn_seqb(val, val << (WR_CURR_SHIFT - WR_NEXT_SHIFT)); | ||
256 | val = __insn_mz(eq & mask, val); | ||
257 | rwlock->lock = val; | ||
228 | } | 258 | } |
259 | EXPORT_SYMBOL(arch_write_unlock); | ||