diff options
Diffstat (limited to 'arch')
52 files changed, 865 insertions, 557 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 08948e4e1503..f3b78701c219 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig | |||
@@ -1,5 +1,5 @@ | |||
1 | # For a description of the syntax of this configuration file, | 1 | # For a description of the syntax of this configuration file, |
2 | # see Documentation/kbuild/config-language.txt. | 2 | # see Documentation/kbuild/kconfig-language.txt. |
3 | 3 | ||
4 | config TILE | 4 | config TILE |
5 | def_bool y | 5 | def_bool y |
@@ -11,17 +11,18 @@ config TILE | |||
11 | select HAVE_GENERIC_HARDIRQS | 11 | select HAVE_GENERIC_HARDIRQS |
12 | select GENERIC_IRQ_PROBE | 12 | select GENERIC_IRQ_PROBE |
13 | select GENERIC_PENDING_IRQ if SMP | 13 | select GENERIC_PENDING_IRQ if SMP |
14 | select GENERIC_HARDIRQS_NO_DEPRECATED | ||
14 | 15 | ||
15 | # FIXME: investigate whether we need/want these options. | 16 | # FIXME: investigate whether we need/want these options. |
16 | # select HAVE_IOREMAP_PROT | 17 | # select HAVE_IOREMAP_PROT |
17 | # select HAVE_OPTPROBES | 18 | # select HAVE_OPTPROBES |
18 | # select HAVE_REGS_AND_STACK_ACCESS_API | 19 | # select HAVE_REGS_AND_STACK_ACCESS_API |
19 | # select HAVE_HW_BREAKPOINT | 20 | # select HAVE_HW_BREAKPOINT |
20 | # select PERF_EVENTS | 21 | # select PERF_EVENTS |
21 | # select HAVE_USER_RETURN_NOTIFIER | 22 | # select HAVE_USER_RETURN_NOTIFIER |
22 | # config NO_BOOTMEM | 23 | # config NO_BOOTMEM |
23 | # config ARCH_SUPPORTS_DEBUG_PAGEALLOC | 24 | # config ARCH_SUPPORTS_DEBUG_PAGEALLOC |
24 | # config HUGETLB_PAGE_SIZE_VARIABLE | 25 | # config HUGETLB_PAGE_SIZE_VARIABLE |
25 | 26 | ||
26 | config MMU | 27 | config MMU |
27 | def_bool y | 28 | def_bool y |
@@ -39,7 +40,7 @@ config HAVE_SETUP_PER_CPU_AREA | |||
39 | def_bool y | 40 | def_bool y |
40 | 41 | ||
41 | config NEED_PER_CPU_PAGE_FIRST_CHUNK | 42 | config NEED_PER_CPU_PAGE_FIRST_CHUNK |
42 | def_bool y | 43 | def_bool y |
43 | 44 | ||
44 | config SYS_SUPPORTS_HUGETLBFS | 45 | config SYS_SUPPORTS_HUGETLBFS |
45 | def_bool y | 46 | def_bool y |
@@ -201,12 +202,6 @@ config NODES_SHIFT | |||
201 | By default, 2, i.e. 2^2 == 4 DDR2 controllers. | 202 | By default, 2, i.e. 2^2 == 4 DDR2 controllers. |
202 | In a system with more controllers, this value should be raised. | 203 | In a system with more controllers, this value should be raised. |
203 | 204 | ||
204 | # Need 16MB areas to enable hugetlb | ||
205 | # See build-time check in arch/tile/mm/init.c. | ||
206 | config FORCE_MAX_ZONEORDER | ||
207 | int | ||
208 | default 9 | ||
209 | |||
210 | choice | 205 | choice |
211 | depends on !TILEGX | 206 | depends on !TILEGX |
212 | prompt "Memory split" if EXPERT | 207 | prompt "Memory split" if EXPERT |
@@ -233,8 +228,12 @@ choice | |||
233 | bool "3.5G/0.5G user/kernel split" | 228 | bool "3.5G/0.5G user/kernel split" |
234 | config VMSPLIT_3G | 229 | config VMSPLIT_3G |
235 | bool "3G/1G user/kernel split" | 230 | bool "3G/1G user/kernel split" |
236 | config VMSPLIT_3G_OPT | 231 | config VMSPLIT_2_75G |
237 | bool "3G/1G user/kernel split (for full 1G low memory)" | 232 | bool "2.75G/1.25G user/kernel split (for full 1G low memory)" |
233 | config VMSPLIT_2_5G | ||
234 | bool "2.5G/1.5G user/kernel split" | ||
235 | config VMSPLIT_2_25G | ||
236 | bool "2.25G/1.75G user/kernel split" | ||
238 | config VMSPLIT_2G | 237 | config VMSPLIT_2G |
239 | bool "2G/2G user/kernel split" | 238 | bool "2G/2G user/kernel split" |
240 | config VMSPLIT_1G | 239 | config VMSPLIT_1G |
@@ -245,7 +244,9 @@ config PAGE_OFFSET | |||
245 | hex | 244 | hex |
246 | default 0xF0000000 if VMSPLIT_3_75G | 245 | default 0xF0000000 if VMSPLIT_3_75G |
247 | default 0xE0000000 if VMSPLIT_3_5G | 246 | default 0xE0000000 if VMSPLIT_3_5G |
248 | default 0xB0000000 if VMSPLIT_3G_OPT | 247 | default 0xB0000000 if VMSPLIT_2_75G |
248 | default 0xA0000000 if VMSPLIT_2_5G | ||
249 | default 0x90000000 if VMSPLIT_2_25G | ||
249 | default 0x80000000 if VMSPLIT_2G | 250 | default 0x80000000 if VMSPLIT_2G |
250 | default 0x40000000 if VMSPLIT_1G | 251 | default 0x40000000 if VMSPLIT_1G |
251 | default 0xC0000000 | 252 | default 0xC0000000 |
diff --git a/arch/tile/include/arch/interrupts_32.h b/arch/tile/include/arch/interrupts_32.h index 9d0bfa7e59be..96b5710505b6 100644 --- a/arch/tile/include/arch/interrupts_32.h +++ b/arch/tile/include/arch/interrupts_32.h | |||
@@ -16,10 +16,11 @@ | |||
16 | #define __ARCH_INTERRUPTS_H__ | 16 | #define __ARCH_INTERRUPTS_H__ |
17 | 17 | ||
18 | /** Mask for an interrupt. */ | 18 | /** Mask for an interrupt. */ |
19 | #ifdef __ASSEMBLER__ | ||
20 | /* Note: must handle breaking interrupts into high and low words manually. */ | 19 | /* Note: must handle breaking interrupts into high and low words manually. */ |
21 | #define INT_MASK(intno) (1 << (intno)) | 20 | #define INT_MASK_LO(intno) (1 << (intno)) |
22 | #else | 21 | #define INT_MASK_HI(intno) (1 << ((intno) - 32)) |
22 | |||
23 | #ifndef __ASSEMBLER__ | ||
23 | #define INT_MASK(intno) (1ULL << (intno)) | 24 | #define INT_MASK(intno) (1ULL << (intno)) |
24 | #endif | 25 | #endif |
25 | 26 | ||
@@ -89,6 +90,7 @@ | |||
89 | 90 | ||
90 | #define NUM_INTERRUPTS 49 | 91 | #define NUM_INTERRUPTS 49 |
91 | 92 | ||
93 | #ifndef __ASSEMBLER__ | ||
92 | #define QUEUED_INTERRUPTS ( \ | 94 | #define QUEUED_INTERRUPTS ( \ |
93 | INT_MASK(INT_MEM_ERROR) | \ | 95 | INT_MASK(INT_MEM_ERROR) | \ |
94 | INT_MASK(INT_DMATLB_MISS) | \ | 96 | INT_MASK(INT_DMATLB_MISS) | \ |
@@ -301,4 +303,5 @@ | |||
301 | INT_MASK(INT_DOUBLE_FAULT) | \ | 303 | INT_MASK(INT_DOUBLE_FAULT) | \ |
302 | INT_MASK(INT_AUX_PERF_COUNT) | \ | 304 | INT_MASK(INT_AUX_PERF_COUNT) | \ |
303 | 0) | 305 | 0) |
306 | #endif /* !__ASSEMBLER__ */ | ||
304 | #endif /* !__ARCH_INTERRUPTS_H__ */ | 307 | #endif /* !__ARCH_INTERRUPTS_H__ */ |
diff --git a/arch/tile/include/arch/sim.h b/arch/tile/include/arch/sim.h index 74b7c1624d34..e54b7b0527f3 100644 --- a/arch/tile/include/arch/sim.h +++ b/arch/tile/include/arch/sim.h | |||
@@ -152,16 +152,33 @@ sim_dump(unsigned int mask) | |||
152 | /** | 152 | /** |
153 | * Print a string to the simulator stdout. | 153 | * Print a string to the simulator stdout. |
154 | * | 154 | * |
155 | * @param str The string to be written; a newline is automatically added. | 155 | * @param str The string to be written. |
156 | */ | ||
157 | static __inline void | ||
158 | sim_print(const char* str) | ||
159 | { | ||
160 | for ( ; *str != '\0'; str++) | ||
161 | { | ||
162 | __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC | | ||
163 | (*str << _SIM_CONTROL_OPERATOR_BITS)); | ||
164 | } | ||
165 | __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC | | ||
166 | (SIM_PUTC_FLUSH_BINARY << _SIM_CONTROL_OPERATOR_BITS)); | ||
167 | } | ||
168 | |||
169 | |||
170 | /** | ||
171 | * Print a string to the simulator stdout. | ||
172 | * | ||
173 | * @param str The string to be written (a newline is automatically added). | ||
156 | */ | 174 | */ |
157 | static __inline void | 175 | static __inline void |
158 | sim_print_string(const char* str) | 176 | sim_print_string(const char* str) |
159 | { | 177 | { |
160 | int i; | 178 | for ( ; *str != '\0'; str++) |
161 | for (i = 0; str[i] != 0; i++) | ||
162 | { | 179 | { |
163 | __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC | | 180 | __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC | |
164 | (str[i] << _SIM_CONTROL_OPERATOR_BITS)); | 181 | (*str << _SIM_CONTROL_OPERATOR_BITS)); |
165 | } | 182 | } |
166 | __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC | | 183 | __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC | |
167 | (SIM_PUTC_FLUSH_STRING << _SIM_CONTROL_OPERATOR_BITS)); | 184 | (SIM_PUTC_FLUSH_STRING << _SIM_CONTROL_OPERATOR_BITS)); |
@@ -203,7 +220,7 @@ sim_command(const char* str) | |||
203 | * we are passing to the simulator are actually valid in the registers | 220 | * we are passing to the simulator are actually valid in the registers |
204 | * (i.e. returned from memory) prior to the SIM_CONTROL spr. | 221 | * (i.e. returned from memory) prior to the SIM_CONTROL spr. |
205 | */ | 222 | */ |
206 | static __inline int _sim_syscall0(int val) | 223 | static __inline long _sim_syscall0(int val) |
207 | { | 224 | { |
208 | long result; | 225 | long result; |
209 | __asm__ __volatile__ ("mtspr SIM_CONTROL, r0" | 226 | __asm__ __volatile__ ("mtspr SIM_CONTROL, r0" |
@@ -211,7 +228,7 @@ static __inline int _sim_syscall0(int val) | |||
211 | return result; | 228 | return result; |
212 | } | 229 | } |
213 | 230 | ||
214 | static __inline int _sim_syscall1(int val, long arg1) | 231 | static __inline long _sim_syscall1(int val, long arg1) |
215 | { | 232 | { |
216 | long result; | 233 | long result; |
217 | __asm__ __volatile__ ("{ and zero, r1, r1; mtspr SIM_CONTROL, r0 }" | 234 | __asm__ __volatile__ ("{ and zero, r1, r1; mtspr SIM_CONTROL, r0 }" |
@@ -219,7 +236,7 @@ static __inline int _sim_syscall1(int val, long arg1) | |||
219 | return result; | 236 | return result; |
220 | } | 237 | } |
221 | 238 | ||
222 | static __inline int _sim_syscall2(int val, long arg1, long arg2) | 239 | static __inline long _sim_syscall2(int val, long arg1, long arg2) |
223 | { | 240 | { |
224 | long result; | 241 | long result; |
225 | __asm__ __volatile__ ("{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }" | 242 | __asm__ __volatile__ ("{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }" |
@@ -233,7 +250,7 @@ static __inline int _sim_syscall2(int val, long arg1, long arg2) | |||
233 | the register values for arguments 3 and up may still be in flight | 250 | the register values for arguments 3 and up may still be in flight |
234 | to the core from a stack frame reload. */ | 251 | to the core from a stack frame reload. */ |
235 | 252 | ||
236 | static __inline int _sim_syscall3(int val, long arg1, long arg2, long arg3) | 253 | static __inline long _sim_syscall3(int val, long arg1, long arg2, long arg3) |
237 | { | 254 | { |
238 | long result; | 255 | long result; |
239 | __asm__ __volatile__ ("{ and zero, r3, r3 };" | 256 | __asm__ __volatile__ ("{ and zero, r3, r3 };" |
@@ -244,7 +261,7 @@ static __inline int _sim_syscall3(int val, long arg1, long arg2, long arg3) | |||
244 | return result; | 261 | return result; |
245 | } | 262 | } |
246 | 263 | ||
247 | static __inline int _sim_syscall4(int val, long arg1, long arg2, long arg3, | 264 | static __inline long _sim_syscall4(int val, long arg1, long arg2, long arg3, |
248 | long arg4) | 265 | long arg4) |
249 | { | 266 | { |
250 | long result; | 267 | long result; |
@@ -256,7 +273,7 @@ static __inline int _sim_syscall4(int val, long arg1, long arg2, long arg3, | |||
256 | return result; | 273 | return result; |
257 | } | 274 | } |
258 | 275 | ||
259 | static __inline int _sim_syscall5(int val, long arg1, long arg2, long arg3, | 276 | static __inline long _sim_syscall5(int val, long arg1, long arg2, long arg3, |
260 | long arg4, long arg5) | 277 | long arg4, long arg5) |
261 | { | 278 | { |
262 | long result; | 279 | long result; |
@@ -268,7 +285,6 @@ static __inline int _sim_syscall5(int val, long arg1, long arg2, long arg3, | |||
268 | return result; | 285 | return result; |
269 | } | 286 | } |
270 | 287 | ||
271 | |||
272 | /** | 288 | /** |
273 | * Make a special syscall to the simulator itself, if running under | 289 | * Make a special syscall to the simulator itself, if running under |
274 | * simulation. This is used as the implementation of other functions | 290 | * simulation. This is used as the implementation of other functions |
@@ -281,7 +297,8 @@ static __inline int _sim_syscall5(int val, long arg1, long arg2, long arg3, | |||
281 | */ | 297 | */ |
282 | #define _sim_syscall(syscall_num, nr, args...) \ | 298 | #define _sim_syscall(syscall_num, nr, args...) \ |
283 | _sim_syscall##nr( \ | 299 | _sim_syscall##nr( \ |
284 | ((syscall_num) << _SIM_CONTROL_OPERATOR_BITS) | SIM_CONTROL_SYSCALL, args) | 300 | ((syscall_num) << _SIM_CONTROL_OPERATOR_BITS) | SIM_CONTROL_SYSCALL, \ |
301 | ##args) | ||
285 | 302 | ||
286 | 303 | ||
287 | /* Values for the "access_mask" parameters below. */ | 304 | /* Values for the "access_mask" parameters below. */ |
@@ -365,6 +382,13 @@ sim_validate_lines_evicted(unsigned long long pa, unsigned long length) | |||
365 | } | 382 | } |
366 | 383 | ||
367 | 384 | ||
385 | /* Return the current CPU speed in cycles per second. */ | ||
386 | static __inline long | ||
387 | sim_query_cpu_speed(void) | ||
388 | { | ||
389 | return _sim_syscall(SIM_SYSCALL_QUERY_CPU_SPEED, 0); | ||
390 | } | ||
391 | |||
368 | #endif /* !__DOXYGEN__ */ | 392 | #endif /* !__DOXYGEN__ */ |
369 | 393 | ||
370 | 394 | ||
diff --git a/arch/tile/include/arch/sim_def.h b/arch/tile/include/arch/sim_def.h index 7a17082c3773..4b44a2b6a09a 100644 --- a/arch/tile/include/arch/sim_def.h +++ b/arch/tile/include/arch/sim_def.h | |||
@@ -243,6 +243,9 @@ | |||
243 | */ | 243 | */ |
244 | #define SIM_SYSCALL_VALIDATE_LINES_EVICTED 5 | 244 | #define SIM_SYSCALL_VALIDATE_LINES_EVICTED 5 |
245 | 245 | ||
246 | /** Syscall number for sim_query_cpu_speed(). */ | ||
247 | #define SIM_SYSCALL_QUERY_CPU_SPEED 6 | ||
248 | |||
246 | 249 | ||
247 | /* | 250 | /* |
248 | * Bit masks which can be shifted by 8, combined with | 251 | * Bit masks which can be shifted by 8, combined with |
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 3b8f55b82dee..849ab2fa1f5c 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild | |||
@@ -1,3 +1,4 @@ | |||
1 | include include/asm-generic/Kbuild.asm | 1 | include include/asm-generic/Kbuild.asm |
2 | 2 | ||
3 | header-y += ucontext.h | 3 | header-y += ucontext.h |
4 | header-y += hardwall.h | ||
diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h index b8c49f98a44c..75a16028a952 100644 --- a/arch/tile/include/asm/atomic.h +++ b/arch/tile/include/asm/atomic.h | |||
@@ -32,7 +32,7 @@ | |||
32 | */ | 32 | */ |
33 | static inline int atomic_read(const atomic_t *v) | 33 | static inline int atomic_read(const atomic_t *v) |
34 | { | 34 | { |
35 | return v->counter; | 35 | return ACCESS_ONCE(v->counter); |
36 | } | 36 | } |
37 | 37 | ||
38 | /** | 38 | /** |
diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h index 7a93c001ac19..2638be51a164 100644 --- a/arch/tile/include/asm/bitops_32.h +++ b/arch/tile/include/asm/bitops_32.h | |||
@@ -122,7 +122,7 @@ static inline int test_and_change_bit(unsigned nr, | |||
122 | return (_atomic_xor(addr, mask) & mask) != 0; | 122 | return (_atomic_xor(addr, mask) & mask) != 0; |
123 | } | 123 | } |
124 | 124 | ||
125 | /* See discussion at smp_mb__before_atomic_dec() in <asm/atomic.h>. */ | 125 | /* See discussion at smp_mb__before_atomic_dec() in <asm/atomic_32.h>. */ |
126 | #define smp_mb__before_clear_bit() smp_mb() | 126 | #define smp_mb__before_clear_bit() smp_mb() |
127 | #define smp_mb__after_clear_bit() do {} while (0) | 127 | #define smp_mb__after_clear_bit() do {} while (0) |
128 | 128 | ||
diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h index 08a2815b5e4e..392e5333dd8b 100644 --- a/arch/tile/include/asm/cache.h +++ b/arch/tile/include/asm/cache.h | |||
@@ -40,7 +40,7 @@ | |||
40 | #define INTERNODE_CACHE_BYTES L2_CACHE_BYTES | 40 | #define INTERNODE_CACHE_BYTES L2_CACHE_BYTES |
41 | 41 | ||
42 | /* Group together read-mostly things to avoid cache false sharing */ | 42 | /* Group together read-mostly things to avoid cache false sharing */ |
43 | #define __read_mostly __attribute__((__section__(".data.read_mostly"))) | 43 | #define __read_mostly __attribute__((__section__(".data..read_mostly"))) |
44 | 44 | ||
45 | /* | 45 | /* |
46 | * Attribute for data that is kept read/write coherent until the end of | 46 | * Attribute for data that is kept read/write coherent until the end of |
diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h index 14a3f8556ace..12fb0fb330ee 100644 --- a/arch/tile/include/asm/cacheflush.h +++ b/arch/tile/include/asm/cacheflush.h | |||
@@ -138,55 +138,12 @@ static inline void finv_buffer(void *buffer, size_t size) | |||
138 | } | 138 | } |
139 | 139 | ||
140 | /* | 140 | /* |
141 | * Flush & invalidate a VA range that is homed remotely on a single core, | 141 | * Flush and invalidate a VA range that is homed remotely, waiting |
142 | * waiting until the memory controller holds the flushed values. | 142 | * until the memory controller holds the flushed values. If "hfh" is |
143 | * true, we will do a more expensive flush involving additional loads | ||
144 | * to make sure we have touched all the possible home cpus of a buffer | ||
145 | * that is homed with "hash for home". | ||
143 | */ | 146 | */ |
144 | static inline void finv_buffer_remote(void *buffer, size_t size) | 147 | void finv_buffer_remote(void *buffer, size_t size, int hfh); |
145 | { | ||
146 | char *p; | ||
147 | int i; | ||
148 | |||
149 | /* | ||
150 | * Flush and invalidate the buffer out of the local L1/L2 | ||
151 | * and request the home cache to flush and invalidate as well. | ||
152 | */ | ||
153 | __finv_buffer(buffer, size); | ||
154 | |||
155 | /* | ||
156 | * Wait for the home cache to acknowledge that it has processed | ||
157 | * all the flush-and-invalidate requests. This does not mean | ||
158 | * that the flushed data has reached the memory controller yet, | ||
159 | * but it does mean the home cache is processing the flushes. | ||
160 | */ | ||
161 | __insn_mf(); | ||
162 | |||
163 | /* | ||
164 | * Issue a load to the last cache line, which can't complete | ||
165 | * until all the previously-issued flushes to the same memory | ||
166 | * controller have also completed. If we weren't striping | ||
167 | * memory, that one load would be sufficient, but since we may | ||
168 | * be, we also need to back up to the last load issued to | ||
169 | * another memory controller, which would be the point where | ||
170 | * we crossed an 8KB boundary (the granularity of striping | ||
171 | * across memory controllers). Keep backing up and doing this | ||
172 | * until we are before the beginning of the buffer, or have | ||
173 | * hit all the controllers. | ||
174 | */ | ||
175 | for (i = 0, p = (char *)buffer + size - 1; | ||
176 | i < (1 << CHIP_LOG_NUM_MSHIMS()) && p >= (char *)buffer; | ||
177 | ++i) { | ||
178 | const unsigned long STRIPE_WIDTH = 8192; | ||
179 | |||
180 | /* Force a load instruction to issue. */ | ||
181 | *(volatile char *)p; | ||
182 | |||
183 | /* Jump to end of previous stripe. */ | ||
184 | p -= STRIPE_WIDTH; | ||
185 | p = (char *)((unsigned long)p | (STRIPE_WIDTH - 1)); | ||
186 | } | ||
187 | |||
188 | /* Wait for the loads (and thus flushes) to have completed. */ | ||
189 | __insn_mf(); | ||
190 | } | ||
191 | 148 | ||
192 | #endif /* _ASM_TILE_CACHEFLUSH_H */ | 149 | #endif /* _ASM_TILE_CACHEFLUSH_H */ |
diff --git a/arch/tile/include/asm/edac.h b/arch/tile/include/asm/edac.h new file mode 100644 index 000000000000..87fc83eeaffd --- /dev/null +++ b/arch/tile/include/asm/edac.h | |||
@@ -0,0 +1,29 @@ | |||
1 | /* | ||
2 | * Copyright 2011 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #ifndef _ASM_TILE_EDAC_H | ||
16 | #define _ASM_TILE_EDAC_H | ||
17 | |||
18 | /* ECC atomic, DMA, SMP and interrupt safe scrub function */ | ||
19 | |||
20 | static inline void atomic_scrub(void *va, u32 size) | ||
21 | { | ||
22 | /* | ||
23 | * These is nothing to be done here because CE is | ||
24 | * corrected by the mshim. | ||
25 | */ | ||
26 | return; | ||
27 | } | ||
28 | |||
29 | #endif /* _ASM_TILE_EDAC_H */ | ||
diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h index 0521c277bbde..d396d1805163 100644 --- a/arch/tile/include/asm/hugetlb.h +++ b/arch/tile/include/asm/hugetlb.h | |||
@@ -54,7 +54,7 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, | |||
54 | static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | 54 | static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, |
55 | pte_t *ptep, pte_t pte) | 55 | pte_t *ptep, pte_t pte) |
56 | { | 56 | { |
57 | set_pte_order(ptep, pte, HUGETLB_PAGE_ORDER); | 57 | set_pte(ptep, pte); |
58 | } | 58 | } |
59 | 59 | ||
60 | static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, | 60 | static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, |
diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h index 641e4ff3d805..5db0ce54284d 100644 --- a/arch/tile/include/asm/irqflags.h +++ b/arch/tile/include/asm/irqflags.h | |||
@@ -18,6 +18,8 @@ | |||
18 | #include <arch/interrupts.h> | 18 | #include <arch/interrupts.h> |
19 | #include <arch/chip.h> | 19 | #include <arch/chip.h> |
20 | 20 | ||
21 | #if !defined(__tilegx__) && defined(__ASSEMBLY__) | ||
22 | |||
21 | /* | 23 | /* |
22 | * The set of interrupts we want to allow when interrupts are nominally | 24 | * The set of interrupts we want to allow when interrupts are nominally |
23 | * disabled. The remainder are effectively "NMI" interrupts from | 25 | * disabled. The remainder are effectively "NMI" interrupts from |
@@ -25,6 +27,16 @@ | |||
25 | * interrupts (aka "non-queued") are not blocked by the mask in any case. | 27 | * interrupts (aka "non-queued") are not blocked by the mask in any case. |
26 | */ | 28 | */ |
27 | #if CHIP_HAS_AUX_PERF_COUNTERS() | 29 | #if CHIP_HAS_AUX_PERF_COUNTERS() |
30 | #define LINUX_MASKABLE_INTERRUPTS_HI \ | ||
31 | (~(INT_MASK_HI(INT_PERF_COUNT) | INT_MASK_HI(INT_AUX_PERF_COUNT))) | ||
32 | #else | ||
33 | #define LINUX_MASKABLE_INTERRUPTS_HI \ | ||
34 | (~(INT_MASK_HI(INT_PERF_COUNT))) | ||
35 | #endif | ||
36 | |||
37 | #else | ||
38 | |||
39 | #if CHIP_HAS_AUX_PERF_COUNTERS() | ||
28 | #define LINUX_MASKABLE_INTERRUPTS \ | 40 | #define LINUX_MASKABLE_INTERRUPTS \ |
29 | (~(INT_MASK(INT_PERF_COUNT) | INT_MASK(INT_AUX_PERF_COUNT))) | 41 | (~(INT_MASK(INT_PERF_COUNT) | INT_MASK(INT_AUX_PERF_COUNT))) |
30 | #else | 42 | #else |
@@ -32,6 +44,8 @@ | |||
32 | (~(INT_MASK(INT_PERF_COUNT))) | 44 | (~(INT_MASK(INT_PERF_COUNT))) |
33 | #endif | 45 | #endif |
34 | 46 | ||
47 | #endif | ||
48 | |||
35 | #ifndef __ASSEMBLY__ | 49 | #ifndef __ASSEMBLY__ |
36 | 50 | ||
37 | /* NOTE: we can't include <linux/percpu.h> due to #include dependencies. */ | 51 | /* NOTE: we can't include <linux/percpu.h> due to #include dependencies. */ |
@@ -224,11 +238,11 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); | |||
224 | #define IRQ_DISABLE(tmp0, tmp1) \ | 238 | #define IRQ_DISABLE(tmp0, tmp1) \ |
225 | { \ | 239 | { \ |
226 | movei tmp0, -1; \ | 240 | movei tmp0, -1; \ |
227 | moveli tmp1, lo16(LINUX_MASKABLE_INTERRUPTS) \ | 241 | moveli tmp1, lo16(LINUX_MASKABLE_INTERRUPTS_HI) \ |
228 | }; \ | 242 | }; \ |
229 | { \ | 243 | { \ |
230 | mtspr SPR_INTERRUPT_MASK_SET_K_0, tmp0; \ | 244 | mtspr SPR_INTERRUPT_MASK_SET_K_0, tmp0; \ |
231 | auli tmp1, tmp1, ha16(LINUX_MASKABLE_INTERRUPTS) \ | 245 | auli tmp1, tmp1, ha16(LINUX_MASKABLE_INTERRUPTS_HI) \ |
232 | }; \ | 246 | }; \ |
233 | mtspr SPR_INTERRUPT_MASK_SET_K_1, tmp1 | 247 | mtspr SPR_INTERRUPT_MASK_SET_K_1, tmp1 |
234 | 248 | ||
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h index 7979a45430d3..3eb53525bf9d 100644 --- a/arch/tile/include/asm/page.h +++ b/arch/tile/include/asm/page.h | |||
@@ -16,10 +16,11 @@ | |||
16 | #define _ASM_TILE_PAGE_H | 16 | #define _ASM_TILE_PAGE_H |
17 | 17 | ||
18 | #include <linux/const.h> | 18 | #include <linux/const.h> |
19 | #include <hv/pagesize.h> | ||
19 | 20 | ||
20 | /* PAGE_SHIFT and HPAGE_SHIFT determine the page sizes. */ | 21 | /* PAGE_SHIFT and HPAGE_SHIFT determine the page sizes. */ |
21 | #define PAGE_SHIFT 16 | 22 | #define PAGE_SHIFT HV_LOG2_PAGE_SIZE_SMALL |
22 | #define HPAGE_SHIFT 24 | 23 | #define HPAGE_SHIFT HV_LOG2_PAGE_SIZE_LARGE |
23 | 24 | ||
24 | #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) | 25 | #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) |
25 | #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) | 26 | #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) |
@@ -29,25 +30,18 @@ | |||
29 | 30 | ||
30 | #ifdef __KERNEL__ | 31 | #ifdef __KERNEL__ |
31 | 32 | ||
32 | #include <hv/hypervisor.h> | ||
33 | #include <arch/chip.h> | ||
34 | |||
35 | /* | 33 | /* |
36 | * The {,H}PAGE_SHIFT values must match the HV_LOG2_PAGE_SIZE_xxx | 34 | * If the Kconfig doesn't specify, set a maximum zone order that |
37 | * definitions in <hv/hypervisor.h>. We validate this at build time | 35 | * is enough so that we can create huge pages from small pages given |
38 | * here, and again at runtime during early boot. We provide a | 36 | * the respective sizes of the two page types. See <linux/mmzone.h>. |
39 | * separate definition since userspace doesn't have <hv/hypervisor.h>. | ||
40 | * | ||
41 | * Be careful to distinguish PAGE_SHIFT from HV_PTE_INDEX_PFN, since | ||
42 | * they are the same on i386 but not TILE. | ||
43 | */ | 37 | */ |
44 | #if HV_LOG2_PAGE_SIZE_SMALL != PAGE_SHIFT | 38 | #ifndef CONFIG_FORCE_MAX_ZONEORDER |
45 | # error Small page size mismatch in Linux | 39 | #define CONFIG_FORCE_MAX_ZONEORDER (HPAGE_SHIFT - PAGE_SHIFT + 1) |
46 | #endif | ||
47 | #if HV_LOG2_PAGE_SIZE_LARGE != HPAGE_SHIFT | ||
48 | # error Huge page size mismatch in Linux | ||
49 | #endif | 40 | #endif |
50 | 41 | ||
42 | #include <hv/hypervisor.h> | ||
43 | #include <arch/chip.h> | ||
44 | |||
51 | #ifndef __ASSEMBLY__ | 45 | #ifndef __ASSEMBLY__ |
52 | 46 | ||
53 | #include <linux/types.h> | 47 | #include <linux/types.h> |
@@ -81,12 +75,6 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, | |||
81 | * Hypervisor page tables are made of the same basic structure. | 75 | * Hypervisor page tables are made of the same basic structure. |
82 | */ | 76 | */ |
83 | 77 | ||
84 | typedef __u64 pteval_t; | ||
85 | typedef __u64 pmdval_t; | ||
86 | typedef __u64 pudval_t; | ||
87 | typedef __u64 pgdval_t; | ||
88 | typedef __u64 pgprotval_t; | ||
89 | |||
90 | typedef HV_PTE pte_t; | 78 | typedef HV_PTE pte_t; |
91 | typedef HV_PTE pgd_t; | 79 | typedef HV_PTE pgd_t; |
92 | typedef HV_PTE pgprot_t; | 80 | typedef HV_PTE pgprot_t; |
diff --git a/arch/tile/include/asm/pgalloc.h b/arch/tile/include/asm/pgalloc.h index cf52791a5501..e919c0bdc22d 100644 --- a/arch/tile/include/asm/pgalloc.h +++ b/arch/tile/include/asm/pgalloc.h | |||
@@ -41,9 +41,9 @@ | |||
41 | static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) | 41 | static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) |
42 | { | 42 | { |
43 | #ifdef CONFIG_64BIT | 43 | #ifdef CONFIG_64BIT |
44 | set_pte_order(pmdp, pmd, L2_USER_PGTABLE_ORDER); | 44 | set_pte(pmdp, pmd); |
45 | #else | 45 | #else |
46 | set_pte_order(&pmdp->pud.pgd, pmd.pud.pgd, L2_USER_PGTABLE_ORDER); | 46 | set_pte(&pmdp->pud.pgd, pmd.pud.pgd); |
47 | #endif | 47 | #endif |
48 | } | 48 | } |
49 | 49 | ||
@@ -100,6 +100,9 @@ pte_t *get_prealloc_pte(unsigned long pfn); | |||
100 | /* During init, we can shatter kernel huge pages if needed. */ | 100 | /* During init, we can shatter kernel huge pages if needed. */ |
101 | void shatter_pmd(pmd_t *pmd); | 101 | void shatter_pmd(pmd_t *pmd); |
102 | 102 | ||
103 | /* After init, a more complex technique is required. */ | ||
104 | void shatter_huge_page(unsigned long addr); | ||
105 | |||
103 | #ifdef __tilegx__ | 106 | #ifdef __tilegx__ |
104 | /* We share a single page allocator for both L1 and L2 page tables. */ | 107 | /* We share a single page allocator for both L1 and L2 page tables. */ |
105 | #if HV_L1_SIZE != HV_L2_SIZE | 108 | #if HV_L1_SIZE != HV_L2_SIZE |
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h index a6604e9485da..1a20b7ef8ea2 100644 --- a/arch/tile/include/asm/pgtable.h +++ b/arch/tile/include/asm/pgtable.h | |||
@@ -233,15 +233,23 @@ static inline void __pte_clear(pte_t *ptep) | |||
233 | #define pgd_ERROR(e) \ | 233 | #define pgd_ERROR(e) \ |
234 | pr_err("%s:%d: bad pgd 0x%016llx.\n", __FILE__, __LINE__, pgd_val(e)) | 234 | pr_err("%s:%d: bad pgd 0x%016llx.\n", __FILE__, __LINE__, pgd_val(e)) |
235 | 235 | ||
236 | /* Return PA and protection info for a given kernel VA. */ | ||
237 | int va_to_cpa_and_pte(void *va, phys_addr_t *cpa, pte_t *pte); | ||
238 | |||
239 | /* | ||
240 | * __set_pte() ensures we write the 64-bit PTE with 32-bit words in | ||
241 | * the right order on 32-bit platforms and also allows us to write | ||
242 | * hooks to check valid PTEs, etc., if we want. | ||
243 | */ | ||
244 | void __set_pte(pte_t *ptep, pte_t pte); | ||
245 | |||
236 | /* | 246 | /* |
237 | * set_pte_order() sets the given PTE and also sanity-checks the | 247 | * set_pte() sets the given PTE and also sanity-checks the |
238 | * requested PTE against the page homecaching. Unspecified parts | 248 | * requested PTE against the page homecaching. Unspecified parts |
239 | * of the PTE are filled in when it is written to memory, i.e. all | 249 | * of the PTE are filled in when it is written to memory, i.e. all |
240 | * caching attributes if "!forcecache", or the home cpu if "anyhome". | 250 | * caching attributes if "!forcecache", or the home cpu if "anyhome". |
241 | */ | 251 | */ |
242 | extern void set_pte_order(pte_t *ptep, pte_t pte, int order); | 252 | extern void set_pte(pte_t *ptep, pte_t pte); |
243 | |||
244 | #define set_pte(ptep, pteval) set_pte_order(ptep, pteval, 0) | ||
245 | #define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval) | 253 | #define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval) |
246 | #define set_pte_atomic(pteptr, pteval) set_pte(pteptr, pteval) | 254 | #define set_pte_atomic(pteptr, pteval) set_pte(pteptr, pteval) |
247 | 255 | ||
@@ -293,21 +301,6 @@ extern void check_mm_caching(struct mm_struct *prev, struct mm_struct *next); | |||
293 | #define __swp_entry_to_pte(swp) ((pte_t) { (((long long) ((swp).val)) << 32) }) | 301 | #define __swp_entry_to_pte(swp) ((pte_t) { (((long long) ((swp).val)) << 32) }) |
294 | 302 | ||
295 | /* | 303 | /* |
296 | * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); | ||
297 | * | ||
298 | * dst - pointer to pgd range anwhere on a pgd page | ||
299 | * src - "" | ||
300 | * count - the number of pgds to copy. | ||
301 | * | ||
302 | * dst and src can be on the same page, but the range must not overlap, | ||
303 | * and must not cross a page boundary. | ||
304 | */ | ||
305 | static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) | ||
306 | { | ||
307 | memcpy(dst, src, count * sizeof(pgd_t)); | ||
308 | } | ||
309 | |||
310 | /* | ||
311 | * Conversion functions: convert a page and protection to a page entry, | 304 | * Conversion functions: convert a page and protection to a page entry, |
312 | * and a page entry and page directory to the page they refer to. | 305 | * and a page entry and page directory to the page they refer to. |
313 | */ | 306 | */ |
diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h index 53ec34884744..9f98529761fd 100644 --- a/arch/tile/include/asm/pgtable_32.h +++ b/arch/tile/include/asm/pgtable_32.h | |||
@@ -24,6 +24,7 @@ | |||
24 | #define PGDIR_SIZE HV_PAGE_SIZE_LARGE | 24 | #define PGDIR_SIZE HV_PAGE_SIZE_LARGE |
25 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) | 25 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) |
26 | #define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT)) | 26 | #define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT)) |
27 | #define SIZEOF_PGD (PTRS_PER_PGD * sizeof(pgd_t)) | ||
27 | 28 | ||
28 | /* | 29 | /* |
29 | * The level-2 index is defined by the difference between the huge | 30 | * The level-2 index is defined by the difference between the huge |
@@ -33,6 +34,7 @@ | |||
33 | * this nomenclature is somewhat confusing. | 34 | * this nomenclature is somewhat confusing. |
34 | */ | 35 | */ |
35 | #define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL)) | 36 | #define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL)) |
37 | #define SIZEOF_PTE (PTRS_PER_PTE * sizeof(pte_t)) | ||
36 | 38 | ||
37 | #ifndef __ASSEMBLY__ | 39 | #ifndef __ASSEMBLY__ |
38 | 40 | ||
@@ -94,7 +96,6 @@ static inline int pgd_addr_invalid(unsigned long addr) | |||
94 | */ | 96 | */ |
95 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG | 97 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG |
96 | #define __HAVE_ARCH_PTEP_SET_WRPROTECT | 98 | #define __HAVE_ARCH_PTEP_SET_WRPROTECT |
97 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR | ||
98 | 99 | ||
99 | extern int ptep_test_and_clear_young(struct vm_area_struct *, | 100 | extern int ptep_test_and_clear_young(struct vm_area_struct *, |
100 | unsigned long addr, pte_t *); | 101 | unsigned long addr, pte_t *); |
@@ -110,6 +111,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, | |||
110 | return pte; | 111 | return pte; |
111 | } | 112 | } |
112 | 113 | ||
114 | static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval) | ||
115 | { | ||
116 | set_pte(&pmdp->pud.pgd, pmdval.pud.pgd); | ||
117 | } | ||
118 | |||
113 | /* Create a pmd from a PTFN. */ | 119 | /* Create a pmd from a PTFN. */ |
114 | static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot) | 120 | static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot) |
115 | { | 121 | { |
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index a9e7c8760334..e6889474038a 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h | |||
@@ -269,7 +269,6 @@ extern char chip_model[64]; | |||
269 | /* Data on which physical memory controller corresponds to which NUMA node. */ | 269 | /* Data on which physical memory controller corresponds to which NUMA node. */ |
270 | extern int node_controller[]; | 270 | extern int node_controller[]; |
271 | 271 | ||
272 | |||
273 | /* Do we dump information to the console when a user application crashes? */ | 272 | /* Do we dump information to the console when a user application crashes? */ |
274 | extern int show_crashinfo; | 273 | extern int show_crashinfo; |
275 | 274 | ||
diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h index ac6d343129d3..6be2246e015c 100644 --- a/arch/tile/include/asm/ptrace.h +++ b/arch/tile/include/asm/ptrace.h | |||
@@ -141,6 +141,9 @@ struct single_step_state { | |||
141 | /* Single-step the instruction at regs->pc */ | 141 | /* Single-step the instruction at regs->pc */ |
142 | extern void single_step_once(struct pt_regs *regs); | 142 | extern void single_step_once(struct pt_regs *regs); |
143 | 143 | ||
144 | /* Clean up after execve(). */ | ||
145 | extern void single_step_execve(void); | ||
146 | |||
144 | struct task_struct; | 147 | struct task_struct; |
145 | 148 | ||
146 | extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, | 149 | extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, |
diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h index 88efdde8dd2b..a8f2c6e31a87 100644 --- a/arch/tile/include/asm/spinlock_32.h +++ b/arch/tile/include/asm/spinlock_32.h | |||
@@ -78,13 +78,6 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock); | |||
78 | #define _RD_COUNT_SHIFT 24 | 78 | #define _RD_COUNT_SHIFT 24 |
79 | #define _RD_COUNT_WIDTH 8 | 79 | #define _RD_COUNT_WIDTH 8 |
80 | 80 | ||
81 | /* Internal functions; do not use. */ | ||
82 | void arch_read_lock_slow(arch_rwlock_t *, u32); | ||
83 | int arch_read_trylock_slow(arch_rwlock_t *); | ||
84 | void arch_read_unlock_slow(arch_rwlock_t *); | ||
85 | void arch_write_lock_slow(arch_rwlock_t *, u32); | ||
86 | void arch_write_unlock_slow(arch_rwlock_t *, u32); | ||
87 | |||
88 | /** | 81 | /** |
89 | * arch_read_can_lock() - would read_trylock() succeed? | 82 | * arch_read_can_lock() - would read_trylock() succeed? |
90 | */ | 83 | */ |
@@ -104,94 +97,32 @@ static inline int arch_write_can_lock(arch_rwlock_t *rwlock) | |||
104 | /** | 97 | /** |
105 | * arch_read_lock() - acquire a read lock. | 98 | * arch_read_lock() - acquire a read lock. |
106 | */ | 99 | */ |
107 | static inline void arch_read_lock(arch_rwlock_t *rwlock) | 100 | void arch_read_lock(arch_rwlock_t *rwlock); |
108 | { | ||
109 | u32 val = __insn_tns((int *)&rwlock->lock); | ||
110 | if (unlikely(val << _RD_COUNT_WIDTH)) { | ||
111 | arch_read_lock_slow(rwlock, val); | ||
112 | return; | ||
113 | } | ||
114 | rwlock->lock = val + (1 << _RD_COUNT_SHIFT); | ||
115 | } | ||
116 | 101 | ||
117 | /** | 102 | /** |
118 | * arch_read_lock() - acquire a write lock. | 103 | * arch_write_lock() - acquire a write lock. |
119 | */ | 104 | */ |
120 | static inline void arch_write_lock(arch_rwlock_t *rwlock) | 105 | void arch_write_lock(arch_rwlock_t *rwlock); |
121 | { | ||
122 | u32 val = __insn_tns((int *)&rwlock->lock); | ||
123 | if (unlikely(val != 0)) { | ||
124 | arch_write_lock_slow(rwlock, val); | ||
125 | return; | ||
126 | } | ||
127 | rwlock->lock = 1 << _WR_NEXT_SHIFT; | ||
128 | } | ||
129 | 106 | ||
130 | /** | 107 | /** |
131 | * arch_read_trylock() - try to acquire a read lock. | 108 | * arch_read_trylock() - try to acquire a read lock. |
132 | */ | 109 | */ |
133 | static inline int arch_read_trylock(arch_rwlock_t *rwlock) | 110 | int arch_read_trylock(arch_rwlock_t *rwlock); |
134 | { | ||
135 | int locked; | ||
136 | u32 val = __insn_tns((int *)&rwlock->lock); | ||
137 | if (unlikely(val & 1)) | ||
138 | return arch_read_trylock_slow(rwlock); | ||
139 | locked = (val << _RD_COUNT_WIDTH) == 0; | ||
140 | rwlock->lock = val + (locked << _RD_COUNT_SHIFT); | ||
141 | return locked; | ||
142 | } | ||
143 | 111 | ||
144 | /** | 112 | /** |
145 | * arch_write_trylock() - try to acquire a write lock. | 113 | * arch_write_trylock() - try to acquire a write lock. |
146 | */ | 114 | */ |
147 | static inline int arch_write_trylock(arch_rwlock_t *rwlock) | 115 | int arch_write_trylock(arch_rwlock_t *rwlock); |
148 | { | ||
149 | u32 val = __insn_tns((int *)&rwlock->lock); | ||
150 | |||
151 | /* | ||
152 | * If a tns is in progress, or there's a waiting or active locker, | ||
153 | * or active readers, we can't take the lock, so give up. | ||
154 | */ | ||
155 | if (unlikely(val != 0)) { | ||
156 | if (!(val & 1)) | ||
157 | rwlock->lock = val; | ||
158 | return 0; | ||
159 | } | ||
160 | |||
161 | /* Set the "next" field to mark it locked. */ | ||
162 | rwlock->lock = 1 << _WR_NEXT_SHIFT; | ||
163 | return 1; | ||
164 | } | ||
165 | 116 | ||
166 | /** | 117 | /** |
167 | * arch_read_unlock() - release a read lock. | 118 | * arch_read_unlock() - release a read lock. |
168 | */ | 119 | */ |
169 | static inline void arch_read_unlock(arch_rwlock_t *rwlock) | 120 | void arch_read_unlock(arch_rwlock_t *rwlock); |
170 | { | ||
171 | u32 val; | ||
172 | mb(); /* guarantee anything modified under the lock is visible */ | ||
173 | val = __insn_tns((int *)&rwlock->lock); | ||
174 | if (unlikely(val & 1)) { | ||
175 | arch_read_unlock_slow(rwlock); | ||
176 | return; | ||
177 | } | ||
178 | rwlock->lock = val - (1 << _RD_COUNT_SHIFT); | ||
179 | } | ||
180 | 121 | ||
181 | /** | 122 | /** |
182 | * arch_write_unlock() - release a write lock. | 123 | * arch_write_unlock() - release a write lock. |
183 | */ | 124 | */ |
184 | static inline void arch_write_unlock(arch_rwlock_t *rwlock) | 125 | void arch_write_unlock(arch_rwlock_t *rwlock); |
185 | { | ||
186 | u32 val; | ||
187 | mb(); /* guarantee anything modified under the lock is visible */ | ||
188 | val = __insn_tns((int *)&rwlock->lock); | ||
189 | if (unlikely(val != (1 << _WR_NEXT_SHIFT))) { | ||
190 | arch_write_unlock_slow(rwlock, val); | ||
191 | return; | ||
192 | } | ||
193 | rwlock->lock = 0; | ||
194 | } | ||
195 | 126 | ||
196 | #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) | 127 | #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) |
197 | #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) | 128 | #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) |
diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h index f908473c322d..4d97a2db932e 100644 --- a/arch/tile/include/asm/stack.h +++ b/arch/tile/include/asm/stack.h | |||
@@ -18,13 +18,14 @@ | |||
18 | #include <linux/types.h> | 18 | #include <linux/types.h> |
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <asm/backtrace.h> | 20 | #include <asm/backtrace.h> |
21 | #include <asm/page.h> | ||
21 | #include <hv/hypervisor.h> | 22 | #include <hv/hypervisor.h> |
22 | 23 | ||
23 | /* Everything we need to keep track of a backtrace iteration */ | 24 | /* Everything we need to keep track of a backtrace iteration */ |
24 | struct KBacktraceIterator { | 25 | struct KBacktraceIterator { |
25 | BacktraceIterator it; | 26 | BacktraceIterator it; |
26 | struct task_struct *task; /* task we are backtracing */ | 27 | struct task_struct *task; /* task we are backtracing */ |
27 | HV_PTE *pgtable; /* page table for user space access */ | 28 | pte_t *pgtable; /* page table for user space access */ |
28 | int end; /* iteration complete. */ | 29 | int end; /* iteration complete. */ |
29 | int new_context; /* new context is starting */ | 30 | int new_context; /* new context is starting */ |
30 | int profile; /* profiling, so stop on async intrpt */ | 31 | int profile; /* profiling, so stop on async intrpt */ |
diff --git a/arch/tile/include/asm/system.h b/arch/tile/include/asm/system.h index 5388850deeb2..23d1842f4839 100644 --- a/arch/tile/include/asm/system.h +++ b/arch/tile/include/asm/system.h | |||
@@ -90,7 +90,24 @@ | |||
90 | #endif | 90 | #endif |
91 | 91 | ||
92 | #if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() | 92 | #if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() |
93 | int __mb_incoherent(void); /* Helper routine for mb_incoherent(). */ | 93 | #include <hv/syscall_public.h> |
94 | /* | ||
95 | * Issue an uncacheable load to each memory controller, then | ||
96 | * wait until those loads have completed. | ||
97 | */ | ||
98 | static inline void __mb_incoherent(void) | ||
99 | { | ||
100 | long clobber_r10; | ||
101 | asm volatile("swint2" | ||
102 | : "=R10" (clobber_r10) | ||
103 | : "R10" (HV_SYS_fence_incoherent) | ||
104 | : "r0", "r1", "r2", "r3", "r4", | ||
105 | "r5", "r6", "r7", "r8", "r9", | ||
106 | "r11", "r12", "r13", "r14", | ||
107 | "r15", "r16", "r17", "r18", "r19", | ||
108 | "r20", "r21", "r22", "r23", "r24", | ||
109 | "r25", "r26", "r27", "r28", "r29"); | ||
110 | } | ||
94 | #endif | 111 | #endif |
95 | 112 | ||
96 | /* Fence to guarantee visibility of stores to incoherent memory. */ | 113 | /* Fence to guarantee visibility of stores to incoherent memory. */ |
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index 3872f2b345d2..9e8e9c4dfa2a 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h | |||
@@ -68,6 +68,7 @@ struct thread_info { | |||
68 | #else | 68 | #else |
69 | #define THREAD_SIZE_ORDER (0) | 69 | #define THREAD_SIZE_ORDER (0) |
70 | #endif | 70 | #endif |
71 | #define THREAD_SIZE_PAGES (1 << THREAD_SIZE_ORDER) | ||
71 | 72 | ||
72 | #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) | 73 | #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) |
73 | #define LOG2_THREAD_SIZE (PAGE_SHIFT + THREAD_SIZE_ORDER) | 74 | #define LOG2_THREAD_SIZE (PAGE_SHIFT + THREAD_SIZE_ORDER) |
diff --git a/arch/tile/include/asm/timex.h b/arch/tile/include/asm/timex.h index 3baf5fc4c0a1..29921f0b86da 100644 --- a/arch/tile/include/asm/timex.h +++ b/arch/tile/include/asm/timex.h | |||
@@ -38,6 +38,9 @@ static inline cycles_t get_cycles(void) | |||
38 | 38 | ||
39 | cycles_t get_clock_rate(void); | 39 | cycles_t get_clock_rate(void); |
40 | 40 | ||
41 | /* Convert nanoseconds to core clock cycles. */ | ||
42 | cycles_t ns2cycles(unsigned long nsecs); | ||
43 | |||
41 | /* Called at cpu initialization to set some low-level constants. */ | 44 | /* Called at cpu initialization to set some low-level constants. */ |
42 | void setup_clock(void); | 45 | void setup_clock(void); |
43 | 46 | ||
diff --git a/arch/tile/include/hv/drv_mshim_intf.h b/arch/tile/include/hv/drv_mshim_intf.h new file mode 100644 index 000000000000..c6ef3bdc55cf --- /dev/null +++ b/arch/tile/include/hv/drv_mshim_intf.h | |||
@@ -0,0 +1,50 @@ | |||
1 | /* | ||
2 | * Copyright 2011 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | /** | ||
16 | * @file drv_mshim_intf.h | ||
17 | * Interface definitions for the Linux EDAC memory controller driver. | ||
18 | */ | ||
19 | |||
20 | #ifndef _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H | ||
21 | #define _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H | ||
22 | |||
23 | /** Number of memory controllers in the public API. */ | ||
24 | #define TILE_MAX_MSHIMS 4 | ||
25 | |||
26 | /** Memory info under each memory controller. */ | ||
27 | struct mshim_mem_info | ||
28 | { | ||
29 | uint64_t mem_size; /**< Total memory size in bytes. */ | ||
30 | uint8_t mem_type; /**< Memory type, DDR2 or DDR3. */ | ||
31 | uint8_t mem_ecc; /**< Memory supports ECC. */ | ||
32 | }; | ||
33 | |||
34 | /** | ||
35 | * DIMM error structure. | ||
36 | * For now, only correctable errors are counted and the mshim doesn't record | ||
37 | * the error PA. HV takes panic upon uncorrectable errors. | ||
38 | */ | ||
39 | struct mshim_mem_error | ||
40 | { | ||
41 | uint32_t sbe_count; /**< Number of single-bit errors. */ | ||
42 | }; | ||
43 | |||
44 | /** Read this offset to get the memory info per mshim. */ | ||
45 | #define MSHIM_MEM_INFO_OFF 0x100 | ||
46 | |||
47 | /** Read this offset to check DIMM error. */ | ||
48 | #define MSHIM_MEM_ERROR_OFF 0x200 | ||
49 | |||
50 | #endif /* _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H */ | ||
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h index f672544cd4f9..1b8bf03d62a0 100644 --- a/arch/tile/include/hv/hypervisor.h +++ b/arch/tile/include/hv/hypervisor.h | |||
@@ -338,9 +338,10 @@ typedef int HV_Errno; | |||
338 | #define HV_ENOTREADY -812 /**< Device not ready */ | 338 | #define HV_ENOTREADY -812 /**< Device not ready */ |
339 | #define HV_EIO -813 /**< I/O error */ | 339 | #define HV_EIO -813 /**< I/O error */ |
340 | #define HV_ENOMEM -814 /**< Out of memory */ | 340 | #define HV_ENOMEM -814 /**< Out of memory */ |
341 | #define HV_EAGAIN -815 /**< Try again */ | ||
341 | 342 | ||
342 | #define HV_ERR_MAX -801 /**< Largest HV error code */ | 343 | #define HV_ERR_MAX -801 /**< Largest HV error code */ |
343 | #define HV_ERR_MIN -814 /**< Smallest HV error code */ | 344 | #define HV_ERR_MIN -815 /**< Smallest HV error code */ |
344 | 345 | ||
345 | #ifndef __ASSEMBLER__ | 346 | #ifndef __ASSEMBLER__ |
346 | 347 | ||
@@ -867,6 +868,43 @@ typedef struct | |||
867 | */ | 868 | */ |
868 | HV_PhysAddrRange hv_inquire_physical(int idx); | 869 | HV_PhysAddrRange hv_inquire_physical(int idx); |
869 | 870 | ||
871 | /** Possible DIMM types. */ | ||
872 | typedef enum | ||
873 | { | ||
874 | NO_DIMM = 0, /**< No DIMM */ | ||
875 | DDR2 = 1, /**< DDR2 */ | ||
876 | DDR3 = 2 /**< DDR3 */ | ||
877 | } HV_DIMM_Type; | ||
878 | |||
879 | #ifdef __tilegx__ | ||
880 | |||
881 | /** Log2 of minimum DIMM bytes supported by the memory controller. */ | ||
882 | #define HV_MSH_MIN_DIMM_SIZE_SHIFT 29 | ||
883 | |||
884 | /** Max number of DIMMs contained by one memory controller. */ | ||
885 | #define HV_MSH_MAX_DIMMS 8 | ||
886 | |||
887 | #else | ||
888 | |||
889 | /** Log2 of minimum DIMM bytes supported by the memory controller. */ | ||
890 | #define HV_MSH_MIN_DIMM_SIZE_SHIFT 26 | ||
891 | |||
892 | /** Max number of DIMMs contained by one memory controller. */ | ||
893 | #define HV_MSH_MAX_DIMMS 2 | ||
894 | |||
895 | #endif | ||
896 | |||
897 | /** Number of bits to right-shift to get the DIMM type. */ | ||
898 | #define HV_DIMM_TYPE_SHIFT 0 | ||
899 | |||
900 | /** Bits to mask to get the DIMM type. */ | ||
901 | #define HV_DIMM_TYPE_MASK 0xf | ||
902 | |||
903 | /** Number of bits to right-shift to get the DIMM size. */ | ||
904 | #define HV_DIMM_SIZE_SHIFT 4 | ||
905 | |||
906 | /** Bits to mask to get the DIMM size. */ | ||
907 | #define HV_DIMM_SIZE_MASK 0xf | ||
870 | 908 | ||
871 | /** Memory controller information. */ | 909 | /** Memory controller information. */ |
872 | typedef struct | 910 | typedef struct |
@@ -964,6 +1002,11 @@ HV_ASIDRange hv_inquire_asid(int idx); | |||
964 | 1002 | ||
965 | /** Waits for at least the specified number of nanoseconds then returns. | 1003 | /** Waits for at least the specified number of nanoseconds then returns. |
966 | * | 1004 | * |
1005 | * NOTE: this deprecated function currently assumes a 750 MHz clock, | ||
1006 | * and is thus not generally suitable for use. New code should call | ||
1007 | * hv_sysconf(HV_SYSCONF_CPU_SPEED), compute a cycle count to wait for, | ||
1008 | * and delay by looping while checking the cycle counter SPR. | ||
1009 | * | ||
967 | * @param nanosecs The number of nanoseconds to sleep. | 1010 | * @param nanosecs The number of nanoseconds to sleep. |
968 | */ | 1011 | */ |
969 | void hv_nanosleep(int nanosecs); | 1012 | void hv_nanosleep(int nanosecs); |
@@ -1038,6 +1081,7 @@ int hv_console_write(HV_VirtAddr bytes, int len); | |||
1038 | * downcall: | 1081 | * downcall: |
1039 | * | 1082 | * |
1040 | * INT_MESSAGE_RCV_DWNCL (hypervisor message available) | 1083 | * INT_MESSAGE_RCV_DWNCL (hypervisor message available) |
1084 | * INT_DEV_INTR_DWNCL (device interrupt) | ||
1041 | * INT_DMATLB_MISS_DWNCL (DMA TLB miss) | 1085 | * INT_DMATLB_MISS_DWNCL (DMA TLB miss) |
1042 | * INT_SNITLB_MISS_DWNCL (SNI TLB miss) | 1086 | * INT_SNITLB_MISS_DWNCL (SNI TLB miss) |
1043 | * INT_DMATLB_ACCESS_DWNCL (DMA TLB access violation) | 1087 | * INT_DMATLB_ACCESS_DWNCL (DMA TLB access violation) |
diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index fd8dc42abdcb..431e9ae60488 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S | |||
@@ -38,12 +38,6 @@ STD_ENTRY(kernel_execve) | |||
38 | jrp lr | 38 | jrp lr |
39 | STD_ENDPROC(kernel_execve) | 39 | STD_ENDPROC(kernel_execve) |
40 | 40 | ||
41 | /* Delay a fixed number of cycles. */ | ||
42 | STD_ENTRY(__delay) | ||
43 | { addi r0, r0, -1; bnzt r0, . } | ||
44 | jrp lr | ||
45 | STD_ENDPROC(__delay) | ||
46 | |||
47 | /* | 41 | /* |
48 | * We don't run this function directly, but instead copy it to a page | 42 | * We don't run this function directly, but instead copy it to a page |
49 | * we map into every user process. See vdso_setup(). | 43 | * we map into every user process. See vdso_setup(). |
@@ -97,23 +91,17 @@ STD_ENTRY(smp_nap) | |||
97 | 91 | ||
98 | /* | 92 | /* |
99 | * Enable interrupts racelessly and then nap until interrupted. | 93 | * Enable interrupts racelessly and then nap until interrupted. |
94 | * Architecturally, we are guaranteed that enabling interrupts via | ||
95 | * mtspr to INTERRUPT_CRITICAL_SECTION only interrupts at the next PC. | ||
100 | * This function's _cpu_idle_nap address is special; see intvec.S. | 96 | * This function's _cpu_idle_nap address is special; see intvec.S. |
101 | * When interrupted at _cpu_idle_nap, we bump the PC forward 8, and | 97 | * When interrupted at _cpu_idle_nap, we bump the PC forward 8, and |
102 | * as a result return to the function that called _cpu_idle(). | 98 | * as a result return to the function that called _cpu_idle(). |
103 | */ | 99 | */ |
104 | STD_ENTRY(_cpu_idle) | 100 | STD_ENTRY(_cpu_idle) |
105 | { | 101 | movei r1, 1 |
106 | lnk r0 | 102 | mtspr INTERRUPT_CRITICAL_SECTION, r1 |
107 | movei r1, KERNEL_PL | ||
108 | } | ||
109 | { | ||
110 | addli r0, r0, _cpu_idle_nap - . | ||
111 | mtspr INTERRUPT_CRITICAL_SECTION, r1 | ||
112 | } | ||
113 | IRQ_ENABLE(r2, r3) /* unmask, but still with ICS set */ | 103 | IRQ_ENABLE(r2, r3) /* unmask, but still with ICS set */ |
114 | mtspr SPR_EX_CONTEXT_K_1, r1 /* Kernel PL, ICS clear */ | 104 | mtspr INTERRUPT_CRITICAL_SECTION, zero |
115 | mtspr SPR_EX_CONTEXT_K_0, r0 | ||
116 | iret | ||
117 | .global _cpu_idle_nap | 105 | .global _cpu_idle_nap |
118 | _cpu_idle_nap: | 106 | _cpu_idle_nap: |
119 | nap | 107 | nap |
diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S index 90e7c4435693..1a39b7c1c87e 100644 --- a/arch/tile/kernel/head_32.S +++ b/arch/tile/kernel/head_32.S | |||
@@ -133,7 +133,7 @@ ENTRY(_start) | |||
133 | } | 133 | } |
134 | ENDPROC(_start) | 134 | ENDPROC(_start) |
135 | 135 | ||
136 | .section ".bss.page_aligned","w" | 136 | __PAGE_ALIGNED_BSS |
137 | .align PAGE_SIZE | 137 | .align PAGE_SIZE |
138 | ENTRY(empty_zero_page) | 138 | ENTRY(empty_zero_page) |
139 | .fill PAGE_SIZE,1,0 | 139 | .fill PAGE_SIZE,1,0 |
@@ -145,10 +145,10 @@ ENTRY(empty_zero_page) | |||
145 | .endif | 145 | .endif |
146 | .word HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED | \ | 146 | .word HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED | \ |
147 | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) | 147 | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) |
148 | .word (\bits1) | (HV_CPA_TO_PFN(\cpa) << HV_PTE_INDEX_PFN) | 148 | .word (\bits1) | (HV_CPA_TO_PFN(\cpa) << (HV_PTE_INDEX_PFN - 32)) |
149 | .endm | 149 | .endm |
150 | 150 | ||
151 | .section ".data.page_aligned","wa" | 151 | __PAGE_ALIGNED_DATA |
152 | .align PAGE_SIZE | 152 | .align PAGE_SIZE |
153 | ENTRY(swapper_pg_dir) | 153 | ENTRY(swapper_pg_dir) |
154 | /* | 154 | /* |
@@ -158,12 +158,14 @@ ENTRY(swapper_pg_dir) | |||
158 | */ | 158 | */ |
159 | .set addr, 0 | 159 | .set addr, 0 |
160 | .rept (MEM_USER_INTRPT - PAGE_OFFSET) >> PGDIR_SHIFT | 160 | .rept (MEM_USER_INTRPT - PAGE_OFFSET) >> PGDIR_SHIFT |
161 | PTE addr + PAGE_OFFSET, addr, HV_PTE_READABLE | HV_PTE_WRITABLE | 161 | PTE addr + PAGE_OFFSET, addr, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ |
162 | (1 << (HV_PTE_INDEX_WRITABLE - 32)) | ||
162 | .set addr, addr + PGDIR_SIZE | 163 | .set addr, addr + PGDIR_SIZE |
163 | .endr | 164 | .endr |
164 | 165 | ||
165 | /* The true text VAs are mapped as VA = PA + MEM_SV_INTRPT */ | 166 | /* The true text VAs are mapped as VA = PA + MEM_SV_INTRPT */ |
166 | PTE MEM_SV_INTRPT, 0, HV_PTE_READABLE | HV_PTE_EXECUTABLE | 167 | PTE MEM_SV_INTRPT, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ |
168 | (1 << (HV_PTE_INDEX_EXECUTABLE - 32)) | ||
167 | .org swapper_pg_dir + HV_L1_SIZE | 169 | .org swapper_pg_dir + HV_L1_SIZE |
168 | END(swapper_pg_dir) | 170 | END(swapper_pg_dir) |
169 | 171 | ||
@@ -176,6 +178,7 @@ ENTRY(swapper_pg_dir) | |||
176 | __INITDATA | 178 | __INITDATA |
177 | .align CHIP_L2_LINE_SIZE() | 179 | .align CHIP_L2_LINE_SIZE() |
178 | ENTRY(swapper_pgprot) | 180 | ENTRY(swapper_pgprot) |
179 | PTE 0, 0, HV_PTE_READABLE | HV_PTE_WRITABLE, 1 | 181 | PTE 0, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ |
182 | (1 << (HV_PTE_INDEX_WRITABLE - 32)), 1 | ||
180 | .align CHIP_L2_LINE_SIZE() | 183 | .align CHIP_L2_LINE_SIZE() |
181 | END(swapper_pgprot) | 184 | END(swapper_pgprot) |
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 5eed4a02bf62..fffcfa6b3a62 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S | |||
@@ -32,10 +32,6 @@ | |||
32 | # error "No support for kernel preemption currently" | 32 | # error "No support for kernel preemption currently" |
33 | #endif | 33 | #endif |
34 | 34 | ||
35 | #if INT_INTCTRL_K < 32 || INT_INTCTRL_K >= 48 | ||
36 | # error INT_INTCTRL_K coded to set high interrupt mask | ||
37 | #endif | ||
38 | |||
39 | #define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg) | 35 | #define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg) |
40 | 36 | ||
41 | #define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) | 37 | #define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) |
@@ -1199,46 +1195,6 @@ STD_ENTRY(interrupt_return) | |||
1199 | STD_ENDPROC(interrupt_return) | 1195 | STD_ENDPROC(interrupt_return) |
1200 | 1196 | ||
1201 | /* | 1197 | /* |
1202 | * This interrupt variant clears the INT_INTCTRL_K interrupt mask bit | ||
1203 | * before returning, so we can properly get more downcalls. | ||
1204 | */ | ||
1205 | .pushsection .text.handle_interrupt_downcall,"ax" | ||
1206 | handle_interrupt_downcall: | ||
1207 | finish_interrupt_save handle_interrupt_downcall | ||
1208 | check_single_stepping normal, .Ldispatch_downcall | ||
1209 | .Ldispatch_downcall: | ||
1210 | |||
1211 | /* Clear INTCTRL_K from the set of interrupts we ever enable. */ | ||
1212 | GET_INTERRUPTS_ENABLED_MASK_PTR(r30) | ||
1213 | { | ||
1214 | addi r30, r30, 4 | ||
1215 | movei r31, INT_MASK(INT_INTCTRL_K) | ||
1216 | } | ||
1217 | { | ||
1218 | lw r20, r30 | ||
1219 | nor r21, r31, zero | ||
1220 | } | ||
1221 | and r20, r20, r21 | ||
1222 | sw r30, r20 | ||
1223 | |||
1224 | { | ||
1225 | jalr r0 | ||
1226 | PTREGS_PTR(r0, PTREGS_OFFSET_BASE) | ||
1227 | } | ||
1228 | FEEDBACK_REENTER(handle_interrupt_downcall) | ||
1229 | |||
1230 | /* Allow INTCTRL_K to be enabled next time we enable interrupts. */ | ||
1231 | lw r20, r30 | ||
1232 | or r20, r20, r31 | ||
1233 | sw r30, r20 | ||
1234 | |||
1235 | { | ||
1236 | movei r30, 0 /* not an NMI */ | ||
1237 | j interrupt_return | ||
1238 | } | ||
1239 | STD_ENDPROC(handle_interrupt_downcall) | ||
1240 | |||
1241 | /* | ||
1242 | * Some interrupts don't check for single stepping | 1198 | * Some interrupts don't check for single stepping |
1243 | */ | 1199 | */ |
1244 | .pushsection .text.handle_interrupt_no_single_step,"ax" | 1200 | .pushsection .text.handle_interrupt_no_single_step,"ax" |
@@ -1600,7 +1556,10 @@ STD_ENTRY(_sys_clone) | |||
1600 | .align 64 | 1556 | .align 64 |
1601 | /* Align much later jump on the start of a cache line. */ | 1557 | /* Align much later jump on the start of a cache line. */ |
1602 | #if !ATOMIC_LOCKS_FOUND_VIA_TABLE() | 1558 | #if !ATOMIC_LOCKS_FOUND_VIA_TABLE() |
1603 | nop; nop | 1559 | nop |
1560 | #if PAGE_SIZE >= 0x10000 | ||
1561 | nop | ||
1562 | #endif | ||
1604 | #endif | 1563 | #endif |
1605 | ENTRY(sys_cmpxchg) | 1564 | ENTRY(sys_cmpxchg) |
1606 | 1565 | ||
@@ -1628,9 +1587,13 @@ ENTRY(sys_cmpxchg) | |||
1628 | * about aliasing among multiple mappings of the same physical page, | 1587 | * about aliasing among multiple mappings of the same physical page, |
1629 | * and we ignore the low 3 bits so we have one lock that covers | 1588 | * and we ignore the low 3 bits so we have one lock that covers |
1630 | * both a cmpxchg64() and a cmpxchg() on either its low or high word. | 1589 | * both a cmpxchg64() and a cmpxchg() on either its low or high word. |
1631 | * NOTE: this code must match __atomic_hashed_lock() in lib/atomic.c. | 1590 | * NOTE: this must match __atomic_hashed_lock() in lib/atomic_32.c. |
1632 | */ | 1591 | */ |
1633 | 1592 | ||
1593 | #if (PAGE_OFFSET & 0xffff) != 0 | ||
1594 | # error Code here assumes PAGE_OFFSET can be loaded with just hi16() | ||
1595 | #endif | ||
1596 | |||
1634 | #if ATOMIC_LOCKS_FOUND_VIA_TABLE() | 1597 | #if ATOMIC_LOCKS_FOUND_VIA_TABLE() |
1635 | { | 1598 | { |
1636 | /* Check for unaligned input. */ | 1599 | /* Check for unaligned input. */ |
@@ -1723,11 +1686,14 @@ ENTRY(sys_cmpxchg) | |||
1723 | lw r26, r0 | 1686 | lw r26, r0 |
1724 | } | 1687 | } |
1725 | { | 1688 | { |
1726 | /* atomic_locks is page aligned so this suffices to get its addr. */ | 1689 | auli r21, zero, ha16(atomic_locks) |
1727 | auli r21, zero, hi16(atomic_locks) | ||
1728 | 1690 | ||
1729 | bbns r23, .Lcmpxchg_badaddr | 1691 | bbns r23, .Lcmpxchg_badaddr |
1730 | } | 1692 | } |
1693 | #if PAGE_SIZE < 0x10000 | ||
1694 | /* atomic_locks is page-aligned so for big pages we don't need this. */ | ||
1695 | addli r21, r21, lo16(atomic_locks) | ||
1696 | #endif | ||
1731 | { | 1697 | { |
1732 | /* | 1698 | /* |
1733 | * Insert the hash bits into the page-aligned pointer. | 1699 | * Insert the hash bits into the page-aligned pointer. |
@@ -1762,7 +1728,7 @@ ENTRY(sys_cmpxchg) | |||
1762 | 1728 | ||
1763 | /* | 1729 | /* |
1764 | * Perform the actual cmpxchg or atomic_update. | 1730 | * Perform the actual cmpxchg or atomic_update. |
1765 | * Note that __futex_mark_unlocked() in uClibc relies on | 1731 | * Note that the system <arch/atomic.h> header relies on |
1766 | * atomic_update() to always perform an "mf", so don't make | 1732 | * atomic_update() to always perform an "mf", so don't make |
1767 | * it optional or conditional without modifying that code. | 1733 | * it optional or conditional without modifying that code. |
1768 | */ | 1734 | */ |
@@ -2014,17 +1980,17 @@ int_unalign: | |||
2014 | #endif | 1980 | #endif |
2015 | int_hand INT_INTCTRL_0, INTCTRL_0, bad_intr | 1981 | int_hand INT_INTCTRL_0, INTCTRL_0, bad_intr |
2016 | int_hand INT_MESSAGE_RCV_DWNCL, MESSAGE_RCV_DWNCL, \ | 1982 | int_hand INT_MESSAGE_RCV_DWNCL, MESSAGE_RCV_DWNCL, \ |
2017 | hv_message_intr, handle_interrupt_downcall | 1983 | hv_message_intr |
2018 | int_hand INT_DEV_INTR_DWNCL, DEV_INTR_DWNCL, \ | 1984 | int_hand INT_DEV_INTR_DWNCL, DEV_INTR_DWNCL, \ |
2019 | tile_dev_intr, handle_interrupt_downcall | 1985 | tile_dev_intr |
2020 | int_hand INT_I_ASID, I_ASID, bad_intr | 1986 | int_hand INT_I_ASID, I_ASID, bad_intr |
2021 | int_hand INT_D_ASID, D_ASID, bad_intr | 1987 | int_hand INT_D_ASID, D_ASID, bad_intr |
2022 | int_hand INT_DMATLB_MISS_DWNCL, DMATLB_MISS_DWNCL, \ | 1988 | int_hand INT_DMATLB_MISS_DWNCL, DMATLB_MISS_DWNCL, \ |
2023 | do_page_fault, handle_interrupt_downcall | 1989 | do_page_fault |
2024 | int_hand INT_SNITLB_MISS_DWNCL, SNITLB_MISS_DWNCL, \ | 1990 | int_hand INT_SNITLB_MISS_DWNCL, SNITLB_MISS_DWNCL, \ |
2025 | do_page_fault, handle_interrupt_downcall | 1991 | do_page_fault |
2026 | int_hand INT_DMATLB_ACCESS_DWNCL, DMATLB_ACCESS_DWNCL, \ | 1992 | int_hand INT_DMATLB_ACCESS_DWNCL, DMATLB_ACCESS_DWNCL, \ |
2027 | do_page_fault, handle_interrupt_downcall | 1993 | do_page_fault |
2028 | int_hand INT_SN_CPL, SN_CPL, bad_intr | 1994 | int_hand INT_SN_CPL, SN_CPL, bad_intr |
2029 | int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap | 1995 | int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap |
2030 | #if CHIP_HAS_AUX_PERF_COUNTERS() | 1996 | #if CHIP_HAS_AUX_PERF_COUNTERS() |
diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index 128805ef8f2c..0baa7580121f 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c | |||
@@ -176,43 +176,43 @@ void disable_percpu_irq(unsigned int irq) | |||
176 | EXPORT_SYMBOL(disable_percpu_irq); | 176 | EXPORT_SYMBOL(disable_percpu_irq); |
177 | 177 | ||
178 | /* Mask an interrupt. */ | 178 | /* Mask an interrupt. */ |
179 | static void tile_irq_chip_mask(unsigned int irq) | 179 | static void tile_irq_chip_mask(struct irq_data *d) |
180 | { | 180 | { |
181 | mask_irqs(1UL << irq); | 181 | mask_irqs(1UL << d->irq); |
182 | } | 182 | } |
183 | 183 | ||
184 | /* Unmask an interrupt. */ | 184 | /* Unmask an interrupt. */ |
185 | static void tile_irq_chip_unmask(unsigned int irq) | 185 | static void tile_irq_chip_unmask(struct irq_data *d) |
186 | { | 186 | { |
187 | unmask_irqs(1UL << irq); | 187 | unmask_irqs(1UL << d->irq); |
188 | } | 188 | } |
189 | 189 | ||
190 | /* | 190 | /* |
191 | * Clear an interrupt before processing it so that any new assertions | 191 | * Clear an interrupt before processing it so that any new assertions |
192 | * will trigger another irq. | 192 | * will trigger another irq. |
193 | */ | 193 | */ |
194 | static void tile_irq_chip_ack(unsigned int irq) | 194 | static void tile_irq_chip_ack(struct irq_data *d) |
195 | { | 195 | { |
196 | if ((unsigned long)get_irq_chip_data(irq) != IS_HW_CLEARED) | 196 | if ((unsigned long)irq_data_get_irq_chip_data(d) != IS_HW_CLEARED) |
197 | clear_irqs(1UL << irq); | 197 | clear_irqs(1UL << d->irq); |
198 | } | 198 | } |
199 | 199 | ||
200 | /* | 200 | /* |
201 | * For per-cpu interrupts, we need to avoid unmasking any interrupts | 201 | * For per-cpu interrupts, we need to avoid unmasking any interrupts |
202 | * that we disabled via disable_percpu_irq(). | 202 | * that we disabled via disable_percpu_irq(). |
203 | */ | 203 | */ |
204 | static void tile_irq_chip_eoi(unsigned int irq) | 204 | static void tile_irq_chip_eoi(struct irq_data *d) |
205 | { | 205 | { |
206 | if (!(__get_cpu_var(irq_disable_mask) & (1UL << irq))) | 206 | if (!(__get_cpu_var(irq_disable_mask) & (1UL << d->irq))) |
207 | unmask_irqs(1UL << irq); | 207 | unmask_irqs(1UL << d->irq); |
208 | } | 208 | } |
209 | 209 | ||
210 | static struct irq_chip tile_irq_chip = { | 210 | static struct irq_chip tile_irq_chip = { |
211 | .name = "tile_irq_chip", | 211 | .name = "tile_irq_chip", |
212 | .ack = tile_irq_chip_ack, | 212 | .irq_ack = tile_irq_chip_ack, |
213 | .eoi = tile_irq_chip_eoi, | 213 | .irq_eoi = tile_irq_chip_eoi, |
214 | .mask = tile_irq_chip_mask, | 214 | .irq_mask = tile_irq_chip_mask, |
215 | .unmask = tile_irq_chip_unmask, | 215 | .irq_unmask = tile_irq_chip_unmask, |
216 | }; | 216 | }; |
217 | 217 | ||
218 | void __init init_IRQ(void) | 218 | void __init init_IRQ(void) |
@@ -277,8 +277,10 @@ int show_interrupts(struct seq_file *p, void *v) | |||
277 | } | 277 | } |
278 | 278 | ||
279 | if (i < NR_IRQS) { | 279 | if (i < NR_IRQS) { |
280 | raw_spin_lock_irqsave(&irq_desc[i].lock, flags); | 280 | struct irq_desc *desc = irq_to_desc(i); |
281 | action = irq_desc[i].action; | 281 | |
282 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
283 | action = desc->action; | ||
282 | if (!action) | 284 | if (!action) |
283 | goto skip; | 285 | goto skip; |
284 | seq_printf(p, "%3d: ", i); | 286 | seq_printf(p, "%3d: ", i); |
@@ -288,7 +290,7 @@ int show_interrupts(struct seq_file *p, void *v) | |||
288 | for_each_online_cpu(j) | 290 | for_each_online_cpu(j) |
289 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); | 291 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); |
290 | #endif | 292 | #endif |
291 | seq_printf(p, " %14s", irq_desc[i].chip->name); | 293 | seq_printf(p, " %14s", get_irq_desc_chip(desc)->name); |
292 | seq_printf(p, " %s", action->name); | 294 | seq_printf(p, " %s", action->name); |
293 | 295 | ||
294 | for (action = action->next; action; action = action->next) | 296 | for (action = action->next; action; action = action->next) |
@@ -296,7 +298,7 @@ int show_interrupts(struct seq_file *p, void *v) | |||
296 | 298 | ||
297 | seq_putc(p, '\n'); | 299 | seq_putc(p, '\n'); |
298 | skip: | 300 | skip: |
299 | raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); | 301 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
300 | } | 302 | } |
301 | return 0; | 303 | return 0; |
302 | } | 304 | } |
diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c index 0d8b9e933487..e00d7179989e 100644 --- a/arch/tile/kernel/machine_kexec.c +++ b/arch/tile/kernel/machine_kexec.c | |||
@@ -240,8 +240,11 @@ static void setup_quasi_va_is_pa(void) | |||
240 | pte = hv_pte(_PAGE_KERNEL | _PAGE_HUGE_PAGE); | 240 | pte = hv_pte(_PAGE_KERNEL | _PAGE_HUGE_PAGE); |
241 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); | 241 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); |
242 | 242 | ||
243 | for (i = 0; i < pgd_index(PAGE_OFFSET); i++) | 243 | for (i = 0; i < pgd_index(PAGE_OFFSET); i++) { |
244 | pgtable[i] = pfn_pte(i << (HPAGE_SHIFT - PAGE_SHIFT), pte); | 244 | unsigned long pfn = i << (HPAGE_SHIFT - PAGE_SHIFT); |
245 | if (pfn_valid(pfn)) | ||
246 | __set_pte(&pgtable[i], pfn_pte(pfn, pte)); | ||
247 | } | ||
245 | } | 248 | } |
246 | 249 | ||
247 | 250 | ||
diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c index 5ad5e13b0fa6..658752b2835e 100644 --- a/arch/tile/kernel/pci-dma.c +++ b/arch/tile/kernel/pci-dma.c | |||
@@ -86,6 +86,21 @@ EXPORT_SYMBOL(dma_free_coherent); | |||
86 | * can count on nothing having been touched. | 86 | * can count on nothing having been touched. |
87 | */ | 87 | */ |
88 | 88 | ||
89 | /* Flush a PA range from cache page by page. */ | ||
90 | static void __dma_map_pa_range(dma_addr_t dma_addr, size_t size) | ||
91 | { | ||
92 | struct page *page = pfn_to_page(PFN_DOWN(dma_addr)); | ||
93 | size_t bytesleft = PAGE_SIZE - (dma_addr & (PAGE_SIZE - 1)); | ||
94 | |||
95 | while ((ssize_t)size > 0) { | ||
96 | /* Flush the page. */ | ||
97 | homecache_flush_cache(page++, 0); | ||
98 | |||
99 | /* Figure out if we need to continue on the next page. */ | ||
100 | size -= bytesleft; | ||
101 | bytesleft = PAGE_SIZE; | ||
102 | } | ||
103 | } | ||
89 | 104 | ||
90 | /* | 105 | /* |
91 | * dma_map_single can be passed any memory address, and there appear | 106 | * dma_map_single can be passed any memory address, and there appear |
@@ -97,26 +112,12 @@ EXPORT_SYMBOL(dma_free_coherent); | |||
97 | dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, | 112 | dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, |
98 | enum dma_data_direction direction) | 113 | enum dma_data_direction direction) |
99 | { | 114 | { |
100 | struct page *page; | 115 | dma_addr_t dma_addr = __pa(ptr); |
101 | dma_addr_t dma_addr; | ||
102 | int thispage; | ||
103 | 116 | ||
104 | BUG_ON(!valid_dma_direction(direction)); | 117 | BUG_ON(!valid_dma_direction(direction)); |
105 | WARN_ON(size == 0); | 118 | WARN_ON(size == 0); |
106 | 119 | ||
107 | dma_addr = __pa(ptr); | 120 | __dma_map_pa_range(dma_addr, size); |
108 | |||
109 | /* We might have been handed a buffer that wraps a page boundary */ | ||
110 | while ((int)size > 0) { | ||
111 | /* The amount to flush that's on this page */ | ||
112 | thispage = PAGE_SIZE - ((unsigned long)ptr & (PAGE_SIZE - 1)); | ||
113 | thispage = min((int)thispage, (int)size); | ||
114 | /* Is this valid for any page we could be handed? */ | ||
115 | page = pfn_to_page(kaddr_to_pfn(ptr)); | ||
116 | homecache_flush_cache(page, 0); | ||
117 | ptr += thispage; | ||
118 | size -= thispage; | ||
119 | } | ||
120 | 121 | ||
121 | return dma_addr; | 122 | return dma_addr; |
122 | } | 123 | } |
@@ -140,10 +141,8 @@ int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, | |||
140 | WARN_ON(nents == 0 || sglist->length == 0); | 141 | WARN_ON(nents == 0 || sglist->length == 0); |
141 | 142 | ||
142 | for_each_sg(sglist, sg, nents, i) { | 143 | for_each_sg(sglist, sg, nents, i) { |
143 | struct page *page; | ||
144 | sg->dma_address = sg_phys(sg); | 144 | sg->dma_address = sg_phys(sg); |
145 | page = pfn_to_page(sg->dma_address >> PAGE_SHIFT); | 145 | __dma_map_pa_range(sg->dma_address, sg->length); |
146 | homecache_flush_cache(page, 0); | ||
147 | } | 146 | } |
148 | 147 | ||
149 | return nents; | 148 | return nents; |
@@ -163,6 +162,7 @@ dma_addr_t dma_map_page(struct device *dev, struct page *page, | |||
163 | { | 162 | { |
164 | BUG_ON(!valid_dma_direction(direction)); | 163 | BUG_ON(!valid_dma_direction(direction)); |
165 | 164 | ||
165 | BUG_ON(offset + size > PAGE_SIZE); | ||
166 | homecache_flush_cache(page, 0); | 166 | homecache_flush_cache(page, 0); |
167 | 167 | ||
168 | return page_to_pa(page) + offset; | 168 | return page_to_pa(page) + offset; |
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index e90eb53173b0..b9cd962e1d30 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c | |||
@@ -165,7 +165,7 @@ void free_thread_info(struct thread_info *info) | |||
165 | kfree(step_state); | 165 | kfree(step_state); |
166 | } | 166 | } |
167 | 167 | ||
168 | free_page((unsigned long)info); | 168 | free_pages((unsigned long)info, THREAD_SIZE_ORDER); |
169 | } | 169 | } |
170 | 170 | ||
171 | static void save_arch_state(struct thread_struct *t); | 171 | static void save_arch_state(struct thread_struct *t); |
@@ -574,6 +574,8 @@ SYSCALL_DEFINE4(execve, const char __user *, path, | |||
574 | goto out; | 574 | goto out; |
575 | error = do_execve(filename, argv, envp, regs); | 575 | error = do_execve(filename, argv, envp, regs); |
576 | putname(filename); | 576 | putname(filename); |
577 | if (error == 0) | ||
578 | single_step_execve(); | ||
577 | out: | 579 | out: |
578 | return error; | 580 | return error; |
579 | } | 581 | } |
@@ -593,6 +595,8 @@ long compat_sys_execve(const char __user *path, | |||
593 | goto out; | 595 | goto out; |
594 | error = compat_do_execve(filename, argv, envp, regs); | 596 | error = compat_do_execve(filename, argv, envp, regs); |
595 | putname(filename); | 597 | putname(filename); |
598 | if (error == 0) | ||
599 | single_step_execve(); | ||
596 | out: | 600 | out: |
597 | return error; | 601 | return error; |
598 | } | 602 | } |
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index f18573643ed1..3696b1832566 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c | |||
@@ -59,6 +59,8 @@ unsigned long __initdata node_memmap_pfn[MAX_NUMNODES]; | |||
59 | unsigned long __initdata node_percpu_pfn[MAX_NUMNODES]; | 59 | unsigned long __initdata node_percpu_pfn[MAX_NUMNODES]; |
60 | unsigned long __initdata node_free_pfn[MAX_NUMNODES]; | 60 | unsigned long __initdata node_free_pfn[MAX_NUMNODES]; |
61 | 61 | ||
62 | static unsigned long __initdata node_percpu[MAX_NUMNODES]; | ||
63 | |||
62 | #ifdef CONFIG_HIGHMEM | 64 | #ifdef CONFIG_HIGHMEM |
63 | /* Page frame index of end of lowmem on each controller. */ | 65 | /* Page frame index of end of lowmem on each controller. */ |
64 | unsigned long __cpuinitdata node_lowmem_end_pfn[MAX_NUMNODES]; | 66 | unsigned long __cpuinitdata node_lowmem_end_pfn[MAX_NUMNODES]; |
@@ -554,7 +556,6 @@ static void __init setup_bootmem_allocator(void) | |||
554 | reserve_bootmem(crashk_res.start, | 556 | reserve_bootmem(crashk_res.start, |
555 | crashk_res.end - crashk_res.start + 1, 0); | 557 | crashk_res.end - crashk_res.start + 1, 0); |
556 | #endif | 558 | #endif |
557 | |||
558 | } | 559 | } |
559 | 560 | ||
560 | void *__init alloc_remap(int nid, unsigned long size) | 561 | void *__init alloc_remap(int nid, unsigned long size) |
@@ -568,11 +569,13 @@ void *__init alloc_remap(int nid, unsigned long size) | |||
568 | 569 | ||
569 | static int __init percpu_size(void) | 570 | static int __init percpu_size(void) |
570 | { | 571 | { |
571 | int size = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE); | 572 | int size = __per_cpu_end - __per_cpu_start; |
572 | #ifdef CONFIG_MODULES | 573 | size += PERCPU_MODULE_RESERVE; |
573 | if (size < PERCPU_ENOUGH_ROOM) | 574 | size += PERCPU_DYNAMIC_EARLY_SIZE; |
574 | size = PERCPU_ENOUGH_ROOM; | 575 | if (size < PCPU_MIN_UNIT_SIZE) |
575 | #endif | 576 | size = PCPU_MIN_UNIT_SIZE; |
577 | size = roundup(size, PAGE_SIZE); | ||
578 | |||
576 | /* In several places we assume the per-cpu data fits on a huge page. */ | 579 | /* In several places we assume the per-cpu data fits on a huge page. */ |
577 | BUG_ON(kdata_huge && size > HPAGE_SIZE); | 580 | BUG_ON(kdata_huge && size > HPAGE_SIZE); |
578 | return size; | 581 | return size; |
@@ -589,7 +592,6 @@ static inline unsigned long alloc_bootmem_pfn(int size, unsigned long goal) | |||
589 | static void __init zone_sizes_init(void) | 592 | static void __init zone_sizes_init(void) |
590 | { | 593 | { |
591 | unsigned long zones_size[MAX_NR_ZONES] = { 0 }; | 594 | unsigned long zones_size[MAX_NR_ZONES] = { 0 }; |
592 | unsigned long node_percpu[MAX_NUMNODES] = { 0 }; | ||
593 | int size = percpu_size(); | 595 | int size = percpu_size(); |
594 | int num_cpus = smp_height * smp_width; | 596 | int num_cpus = smp_height * smp_width; |
595 | int i; | 597 | int i; |
@@ -674,7 +676,7 @@ static void __init zone_sizes_init(void) | |||
674 | NODE_DATA(i)->bdata = NODE_DATA(0)->bdata; | 676 | NODE_DATA(i)->bdata = NODE_DATA(0)->bdata; |
675 | 677 | ||
676 | free_area_init_node(i, zones_size, start, NULL); | 678 | free_area_init_node(i, zones_size, start, NULL); |
677 | printk(KERN_DEBUG " DMA zone: %ld per-cpu pages\n", | 679 | printk(KERN_DEBUG " Normal zone: %ld per-cpu pages\n", |
678 | PFN_UP(node_percpu[i])); | 680 | PFN_UP(node_percpu[i])); |
679 | 681 | ||
680 | /* Track the type of memory on each node */ | 682 | /* Track the type of memory on each node */ |
@@ -1312,6 +1314,8 @@ static void *__init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) | |||
1312 | 1314 | ||
1313 | BUG_ON(size % PAGE_SIZE != 0); | 1315 | BUG_ON(size % PAGE_SIZE != 0); |
1314 | pfn_offset[nid] += size / PAGE_SIZE; | 1316 | pfn_offset[nid] += size / PAGE_SIZE; |
1317 | BUG_ON(node_percpu[nid] < size); | ||
1318 | node_percpu[nid] -= size; | ||
1315 | if (percpu_pfn[cpu] == 0) | 1319 | if (percpu_pfn[cpu] == 0) |
1316 | percpu_pfn[cpu] = pfn; | 1320 | percpu_pfn[cpu] = pfn; |
1317 | return pfn_to_kaddr(pfn); | 1321 | return pfn_to_kaddr(pfn); |
diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c index 1eb3b39e36c7..84a729e06ec4 100644 --- a/arch/tile/kernel/single_step.c +++ b/arch/tile/kernel/single_step.c | |||
@@ -56,7 +56,7 @@ enum mem_op { | |||
56 | MEMOP_STORE_POSTINCR | 56 | MEMOP_STORE_POSTINCR |
57 | }; | 57 | }; |
58 | 58 | ||
59 | static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, int32_t offset) | 59 | static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset) |
60 | { | 60 | { |
61 | tile_bundle_bits result; | 61 | tile_bundle_bits result; |
62 | 62 | ||
@@ -254,6 +254,18 @@ P("\n"); | |||
254 | return bundle; | 254 | return bundle; |
255 | } | 255 | } |
256 | 256 | ||
257 | /* | ||
258 | * Called after execve() has started the new image. This allows us | ||
259 | * to reset the info state. Note that the the mmap'ed memory, if there | ||
260 | * was any, has already been unmapped by the exec. | ||
261 | */ | ||
262 | void single_step_execve(void) | ||
263 | { | ||
264 | struct thread_info *ti = current_thread_info(); | ||
265 | kfree(ti->step_state); | ||
266 | ti->step_state = NULL; | ||
267 | } | ||
268 | |||
257 | /** | 269 | /** |
258 | * single_step_once() - entry point when single stepping has been triggered. | 270 | * single_step_once() - entry point when single stepping has been triggered. |
259 | * @regs: The machine register state | 271 | * @regs: The machine register state |
@@ -373,7 +385,7 @@ void single_step_once(struct pt_regs *regs) | |||
373 | /* branches */ | 385 | /* branches */ |
374 | case BRANCH_OPCODE_X1: | 386 | case BRANCH_OPCODE_X1: |
375 | { | 387 | { |
376 | int32_t offset = signExtend17(get_BrOff_X1(bundle)); | 388 | s32 offset = signExtend17(get_BrOff_X1(bundle)); |
377 | 389 | ||
378 | /* | 390 | /* |
379 | * For branches, we use a rewriting trick to let the | 391 | * For branches, we use a rewriting trick to let the |
@@ -731,4 +743,9 @@ void single_step_once(struct pt_regs *regs) | |||
731 | __insn_mtspr(SPR_SINGLE_STEP_EN_K_K, 1 << USER_PL); | 743 | __insn_mtspr(SPR_SINGLE_STEP_EN_K_K, 1 << USER_PL); |
732 | } | 744 | } |
733 | 745 | ||
746 | void single_step_execve(void) | ||
747 | { | ||
748 | /* Nothing */ | ||
749 | } | ||
750 | |||
734 | #endif /* !__tilegx__ */ | 751 | #endif /* !__tilegx__ */ |
diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index 9575b37a8b75..a4293102ef81 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c | |||
@@ -36,6 +36,22 @@ static unsigned long __iomem *ipi_mappings[NR_CPUS]; | |||
36 | /* Set by smp_send_stop() to avoid recursive panics. */ | 36 | /* Set by smp_send_stop() to avoid recursive panics. */ |
37 | static int stopping_cpus; | 37 | static int stopping_cpus; |
38 | 38 | ||
39 | static void __send_IPI_many(HV_Recipient *recip, int nrecip, int tag) | ||
40 | { | ||
41 | int sent = 0; | ||
42 | while (sent < nrecip) { | ||
43 | int rc = hv_send_message(recip, nrecip, | ||
44 | (HV_VirtAddr)&tag, sizeof(tag)); | ||
45 | if (rc < 0) { | ||
46 | if (!stopping_cpus) /* avoid recursive panic */ | ||
47 | panic("hv_send_message returned %d", rc); | ||
48 | break; | ||
49 | } | ||
50 | WARN_ONCE(rc == 0, "hv_send_message() returned zero\n"); | ||
51 | sent += rc; | ||
52 | } | ||
53 | } | ||
54 | |||
39 | void send_IPI_single(int cpu, int tag) | 55 | void send_IPI_single(int cpu, int tag) |
40 | { | 56 | { |
41 | HV_Recipient recip = { | 57 | HV_Recipient recip = { |
@@ -43,14 +59,13 @@ void send_IPI_single(int cpu, int tag) | |||
43 | .x = cpu % smp_width, | 59 | .x = cpu % smp_width, |
44 | .state = HV_TO_BE_SENT | 60 | .state = HV_TO_BE_SENT |
45 | }; | 61 | }; |
46 | int rc = hv_send_message(&recip, 1, (HV_VirtAddr)&tag, sizeof(tag)); | 62 | __send_IPI_many(&recip, 1, tag); |
47 | BUG_ON(rc <= 0); | ||
48 | } | 63 | } |
49 | 64 | ||
50 | void send_IPI_many(const struct cpumask *mask, int tag) | 65 | void send_IPI_many(const struct cpumask *mask, int tag) |
51 | { | 66 | { |
52 | HV_Recipient recip[NR_CPUS]; | 67 | HV_Recipient recip[NR_CPUS]; |
53 | int cpu, sent; | 68 | int cpu; |
54 | int nrecip = 0; | 69 | int nrecip = 0; |
55 | int my_cpu = smp_processor_id(); | 70 | int my_cpu = smp_processor_id(); |
56 | for_each_cpu(cpu, mask) { | 71 | for_each_cpu(cpu, mask) { |
@@ -61,17 +76,7 @@ void send_IPI_many(const struct cpumask *mask, int tag) | |||
61 | r->x = cpu % smp_width; | 76 | r->x = cpu % smp_width; |
62 | r->state = HV_TO_BE_SENT; | 77 | r->state = HV_TO_BE_SENT; |
63 | } | 78 | } |
64 | sent = 0; | 79 | __send_IPI_many(recip, nrecip, tag); |
65 | while (sent < nrecip) { | ||
66 | int rc = hv_send_message(recip, nrecip, | ||
67 | (HV_VirtAddr)&tag, sizeof(tag)); | ||
68 | if (rc <= 0) { | ||
69 | if (!stopping_cpus) /* avoid recursive panic */ | ||
70 | panic("hv_send_message returned %d", rc); | ||
71 | break; | ||
72 | } | ||
73 | sent += rc; | ||
74 | } | ||
75 | } | 80 | } |
76 | 81 | ||
77 | void send_IPI_allbutself(int tag) | 82 | void send_IPI_allbutself(int tag) |
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index 0d54106be3d6..dd81713a90dc 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c | |||
@@ -44,13 +44,6 @@ static int in_kernel_stack(struct KBacktraceIterator *kbt, VirtualAddress sp) | |||
44 | return sp >= kstack_base && sp < kstack_base + THREAD_SIZE; | 44 | return sp >= kstack_base && sp < kstack_base + THREAD_SIZE; |
45 | } | 45 | } |
46 | 46 | ||
47 | /* Is address in the specified kernel code? */ | ||
48 | static int in_kernel_text(VirtualAddress address) | ||
49 | { | ||
50 | return (address >= MEM_SV_INTRPT && | ||
51 | address < MEM_SV_INTRPT + HPAGE_SIZE); | ||
52 | } | ||
53 | |||
54 | /* Is address valid for reading? */ | 47 | /* Is address valid for reading? */ |
55 | static int valid_address(struct KBacktraceIterator *kbt, VirtualAddress address) | 48 | static int valid_address(struct KBacktraceIterator *kbt, VirtualAddress address) |
56 | { | 49 | { |
@@ -63,6 +56,23 @@ static int valid_address(struct KBacktraceIterator *kbt, VirtualAddress address) | |||
63 | if (l1_pgtable == NULL) | 56 | if (l1_pgtable == NULL) |
64 | return 0; /* can't read user space in other tasks */ | 57 | return 0; /* can't read user space in other tasks */ |
65 | 58 | ||
59 | #ifdef CONFIG_64BIT | ||
60 | /* Find the real l1_pgtable by looking in the l0_pgtable. */ | ||
61 | pte = l1_pgtable[HV_L0_INDEX(address)]; | ||
62 | if (!hv_pte_get_present(pte)) | ||
63 | return 0; | ||
64 | pfn = hv_pte_get_pfn(pte); | ||
65 | if (pte_huge(pte)) { | ||
66 | if (!pfn_valid(pfn)) { | ||
67 | pr_err("L0 huge page has bad pfn %#lx\n", pfn); | ||
68 | return 0; | ||
69 | } | ||
70 | return hv_pte_get_present(pte) && hv_pte_get_readable(pte); | ||
71 | } | ||
72 | page = pfn_to_page(pfn); | ||
73 | BUG_ON(PageHighMem(page)); /* No HIGHMEM on 64-bit. */ | ||
74 | l1_pgtable = (HV_PTE *)pfn_to_kaddr(pfn); | ||
75 | #endif | ||
66 | pte = l1_pgtable[HV_L1_INDEX(address)]; | 76 | pte = l1_pgtable[HV_L1_INDEX(address)]; |
67 | if (!hv_pte_get_present(pte)) | 77 | if (!hv_pte_get_present(pte)) |
68 | return 0; | 78 | return 0; |
@@ -92,7 +102,7 @@ static bool read_memory_func(void *result, VirtualAddress address, | |||
92 | { | 102 | { |
93 | int retval; | 103 | int retval; |
94 | struct KBacktraceIterator *kbt = (struct KBacktraceIterator *)vkbt; | 104 | struct KBacktraceIterator *kbt = (struct KBacktraceIterator *)vkbt; |
95 | if (in_kernel_text(address)) { | 105 | if (__kernel_text_address(address)) { |
96 | /* OK to read kernel code. */ | 106 | /* OK to read kernel code. */ |
97 | } else if (address >= PAGE_OFFSET) { | 107 | } else if (address >= PAGE_OFFSET) { |
98 | /* We only tolerate kernel-space reads of this task's stack */ | 108 | /* We only tolerate kernel-space reads of this task's stack */ |
@@ -132,7 +142,7 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) | |||
132 | } | 142 | } |
133 | } | 143 | } |
134 | if (EX1_PL(p->ex1) == KERNEL_PL && | 144 | if (EX1_PL(p->ex1) == KERNEL_PL && |
135 | in_kernel_text(p->pc) && | 145 | __kernel_text_address(p->pc) && |
136 | in_kernel_stack(kbt, p->sp) && | 146 | in_kernel_stack(kbt, p->sp) && |
137 | p->sp >= sp) { | 147 | p->sp >= sp) { |
138 | if (kbt->verbose) | 148 | if (kbt->verbose) |
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index f2e156e44692..49a605be94c5 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c | |||
@@ -224,3 +224,13 @@ int setup_profiling_timer(unsigned int multiplier) | |||
224 | { | 224 | { |
225 | return -EINVAL; | 225 | return -EINVAL; |
226 | } | 226 | } |
227 | |||
228 | /* | ||
229 | * Use the tile timer to convert nsecs to core clock cycles, relying | ||
230 | * on it having the same frequency as SPR_CYCLE. | ||
231 | */ | ||
232 | cycles_t ns2cycles(unsigned long nsecs) | ||
233 | { | ||
234 | struct clock_event_device *dev = &__get_cpu_var(tile_timer); | ||
235 | return ((u64)nsecs * dev->mult) >> dev->shift; | ||
236 | } | ||
diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S index c6ce378e0678..38f64fafdc10 100644 --- a/arch/tile/kernel/vmlinux.lds.S +++ b/arch/tile/kernel/vmlinux.lds.S | |||
@@ -59,10 +59,7 @@ SECTIONS | |||
59 | 59 | ||
60 | . = ALIGN(PAGE_SIZE); | 60 | . = ALIGN(PAGE_SIZE); |
61 | VMLINUX_SYMBOL(_sinitdata) = .; | 61 | VMLINUX_SYMBOL(_sinitdata) = .; |
62 | .init.page : AT (ADDR(.init.page) - LOAD_OFFSET) { | 62 | INIT_DATA_SECTION(16) :data =0 |
63 | *(.init.page) | ||
64 | } :data =0 | ||
65 | INIT_DATA_SECTION(16) | ||
66 | PERCPU(L2_CACHE_BYTES, PAGE_SIZE) | 63 | PERCPU(L2_CACHE_BYTES, PAGE_SIZE) |
67 | . = ALIGN(PAGE_SIZE); | 64 | . = ALIGN(PAGE_SIZE); |
68 | VMLINUX_SYMBOL(_einitdata) = .; | 65 | VMLINUX_SYMBOL(_einitdata) = .; |
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile index 93122d5b1558..0c26086ecbef 100644 --- a/arch/tile/lib/Makefile +++ b/arch/tile/lib/Makefile | |||
@@ -2,9 +2,8 @@ | |||
2 | # Makefile for TILE-specific library files.. | 2 | # Makefile for TILE-specific library files.. |
3 | # | 3 | # |
4 | 4 | ||
5 | lib-y = cacheflush.o checksum.o cpumask.o delay.o \ | 5 | lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \ |
6 | mb_incoherent.o uaccess.o memmove.o \ | 6 | memmove.o memcpy_$(BITS).o memchr_$(BITS).o memset_$(BITS).o \ |
7 | memcpy_$(BITS).o memchr_$(BITS).o memset_$(BITS).o \ | ||
8 | strchr_$(BITS).o strlen_$(BITS).o | 7 | strchr_$(BITS).o strlen_$(BITS).o |
9 | 8 | ||
10 | ifeq ($(CONFIG_TILEGX),y) | 9 | ifeq ($(CONFIG_TILEGX),y) |
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c index 7a5cc706ab62..f02040d3614e 100644 --- a/arch/tile/lib/atomic_32.c +++ b/arch/tile/lib/atomic_32.c | |||
@@ -46,14 +46,13 @@ struct atomic_locks_on_cpu *atomic_lock_ptr[ATOMIC_HASH_L1_SIZE] | |||
46 | #else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ | 46 | #else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ |
47 | 47 | ||
48 | /* This page is remapped on startup to be hash-for-home. */ | 48 | /* This page is remapped on startup to be hash-for-home. */ |
49 | int atomic_locks[PAGE_SIZE / sizeof(int) /* Only ATOMIC_HASH_SIZE is used */] | 49 | int atomic_locks[PAGE_SIZE / sizeof(int)] __page_aligned_bss; |
50 | __attribute__((aligned(PAGE_SIZE), section(".bss.page_aligned"))); | ||
51 | 50 | ||
52 | #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ | 51 | #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ |
53 | 52 | ||
54 | static inline int *__atomic_hashed_lock(volatile void *v) | 53 | static inline int *__atomic_hashed_lock(volatile void *v) |
55 | { | 54 | { |
56 | /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec.S */ | 55 | /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */ |
57 | #if ATOMIC_LOCKS_FOUND_VIA_TABLE() | 56 | #if ATOMIC_LOCKS_FOUND_VIA_TABLE() |
58 | unsigned long i = | 57 | unsigned long i = |
59 | (unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long)); | 58 | (unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long)); |
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S index 5a5514b77e78..82f64cc63658 100644 --- a/arch/tile/lib/atomic_asm_32.S +++ b/arch/tile/lib/atomic_asm_32.S | |||
@@ -14,7 +14,7 @@ | |||
14 | * Support routines for atomic operations. Each function takes: | 14 | * Support routines for atomic operations. Each function takes: |
15 | * | 15 | * |
16 | * r0: address to manipulate | 16 | * r0: address to manipulate |
17 | * r1: pointer to atomic lock guarding this operation (for FUTEX_LOCK_REG) | 17 | * r1: pointer to atomic lock guarding this operation (for ATOMIC_LOCK_REG) |
18 | * r2: new value to write, or for cmpxchg/add_unless, value to compare against | 18 | * r2: new value to write, or for cmpxchg/add_unless, value to compare against |
19 | * r3: (cmpxchg/xchg_add_unless) new value to write or add; | 19 | * r3: (cmpxchg/xchg_add_unless) new value to write or add; |
20 | * (atomic64 ops) high word of value to write | 20 | * (atomic64 ops) high word of value to write |
diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c index 11b6164c2097..35c1d8ca5f38 100644 --- a/arch/tile/lib/cacheflush.c +++ b/arch/tile/lib/cacheflush.c | |||
@@ -21,3 +21,105 @@ void __flush_icache_range(unsigned long start, unsigned long end) | |||
21 | { | 21 | { |
22 | invalidate_icache((const void *)start, end - start, PAGE_SIZE); | 22 | invalidate_icache((const void *)start, end - start, PAGE_SIZE); |
23 | } | 23 | } |
24 | |||
25 | |||
26 | /* Force a load instruction to issue. */ | ||
27 | static inline void force_load(char *p) | ||
28 | { | ||
29 | *(volatile char *)p; | ||
30 | } | ||
31 | |||
32 | /* | ||
33 | * Flush and invalidate a VA range that is homed remotely on a single | ||
34 | * core (if "!hfh") or homed via hash-for-home (if "hfh"), waiting | ||
35 | * until the memory controller holds the flushed values. | ||
36 | */ | ||
37 | void finv_buffer_remote(void *buffer, size_t size, int hfh) | ||
38 | { | ||
39 | char *p, *base; | ||
40 | size_t step_size, load_count; | ||
41 | const unsigned long STRIPE_WIDTH = 8192; | ||
42 | |||
43 | /* | ||
44 | * Flush and invalidate the buffer out of the local L1/L2 | ||
45 | * and request the home cache to flush and invalidate as well. | ||
46 | */ | ||
47 | __finv_buffer(buffer, size); | ||
48 | |||
49 | /* | ||
50 | * Wait for the home cache to acknowledge that it has processed | ||
51 | * all the flush-and-invalidate requests. This does not mean | ||
52 | * that the flushed data has reached the memory controller yet, | ||
53 | * but it does mean the home cache is processing the flushes. | ||
54 | */ | ||
55 | __insn_mf(); | ||
56 | |||
57 | /* | ||
58 | * Issue a load to the last cache line, which can't complete | ||
59 | * until all the previously-issued flushes to the same memory | ||
60 | * controller have also completed. If we weren't striping | ||
61 | * memory, that one load would be sufficient, but since we may | ||
62 | * be, we also need to back up to the last load issued to | ||
63 | * another memory controller, which would be the point where | ||
64 | * we crossed an 8KB boundary (the granularity of striping | ||
65 | * across memory controllers). Keep backing up and doing this | ||
66 | * until we are before the beginning of the buffer, or have | ||
67 | * hit all the controllers. | ||
68 | * | ||
69 | * If we are flushing a hash-for-home buffer, it's even worse. | ||
70 | * Each line may be homed on a different tile, and each tile | ||
71 | * may have up to four lines that are on different | ||
72 | * controllers. So as we walk backwards, we have to touch | ||
73 | * enough cache lines to satisfy these constraints. In | ||
74 | * practice this ends up being close enough to "load from | ||
75 | * every cache line on a full memory stripe on each | ||
76 | * controller" that we simply do that, to simplify the logic. | ||
77 | * | ||
78 | * FIXME: See bug 9535 for some issues with this code. | ||
79 | */ | ||
80 | if (hfh) { | ||
81 | step_size = L2_CACHE_BYTES; | ||
82 | load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) * | ||
83 | (1 << CHIP_LOG_NUM_MSHIMS()); | ||
84 | } else { | ||
85 | step_size = STRIPE_WIDTH; | ||
86 | load_count = (1 << CHIP_LOG_NUM_MSHIMS()); | ||
87 | } | ||
88 | |||
89 | /* Load the last byte of the buffer. */ | ||
90 | p = (char *)buffer + size - 1; | ||
91 | force_load(p); | ||
92 | |||
93 | /* Bump down to the end of the previous stripe or cache line. */ | ||
94 | p -= step_size; | ||
95 | p = (char *)((unsigned long)p | (step_size - 1)); | ||
96 | |||
97 | /* Figure out how far back we need to go. */ | ||
98 | base = p - (step_size * (load_count - 2)); | ||
99 | if ((long)base < (long)buffer) | ||
100 | base = buffer; | ||
101 | |||
102 | /* | ||
103 | * Fire all the loads we need. The MAF only has eight entries | ||
104 | * so we can have at most eight outstanding loads, so we | ||
105 | * unroll by that amount. | ||
106 | */ | ||
107 | #pragma unroll 8 | ||
108 | for (; p >= base; p -= step_size) | ||
109 | force_load(p); | ||
110 | |||
111 | /* | ||
112 | * Repeat, but with inv's instead of loads, to get rid of the | ||
113 | * data we just loaded into our own cache and the old home L3. | ||
114 | * No need to unroll since inv's don't target a register. | ||
115 | */ | ||
116 | p = (char *)buffer + size - 1; | ||
117 | __insn_inv(p); | ||
118 | p -= step_size; | ||
119 | p = (char *)((unsigned long)p | (step_size - 1)); | ||
120 | for (; p >= base; p -= step_size) | ||
121 | __insn_inv(p); | ||
122 | |||
123 | /* Wait for the load+inv's (and thus finvs) to have completed. */ | ||
124 | __insn_mf(); | ||
125 | } | ||
diff --git a/arch/tile/lib/delay.c b/arch/tile/lib/delay.c index 5801b03c13ef..cdacdd11d360 100644 --- a/arch/tile/lib/delay.c +++ b/arch/tile/lib/delay.c | |||
@@ -15,20 +15,31 @@ | |||
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/delay.h> | 16 | #include <linux/delay.h> |
17 | #include <linux/thread_info.h> | 17 | #include <linux/thread_info.h> |
18 | #include <asm/fixmap.h> | 18 | #include <asm/timex.h> |
19 | #include <hv/hypervisor.h> | ||
20 | 19 | ||
21 | void __udelay(unsigned long usecs) | 20 | void __udelay(unsigned long usecs) |
22 | { | 21 | { |
23 | hv_nanosleep(usecs * 1000); | 22 | if (usecs > ULONG_MAX / 1000) { |
23 | WARN_ON_ONCE(usecs > ULONG_MAX / 1000); | ||
24 | usecs = ULONG_MAX / 1000; | ||
25 | } | ||
26 | __ndelay(usecs * 1000); | ||
24 | } | 27 | } |
25 | EXPORT_SYMBOL(__udelay); | 28 | EXPORT_SYMBOL(__udelay); |
26 | 29 | ||
27 | void __ndelay(unsigned long nsecs) | 30 | void __ndelay(unsigned long nsecs) |
28 | { | 31 | { |
29 | hv_nanosleep(nsecs); | 32 | cycles_t target = get_cycles(); |
33 | target += ns2cycles(nsecs); | ||
34 | while (get_cycles() < target) | ||
35 | cpu_relax(); | ||
30 | } | 36 | } |
31 | EXPORT_SYMBOL(__ndelay); | 37 | EXPORT_SYMBOL(__ndelay); |
32 | 38 | ||
33 | /* FIXME: should be declared in a header somewhere. */ | 39 | void __delay(unsigned long cycles) |
40 | { | ||
41 | cycles_t target = get_cycles() + cycles; | ||
42 | while (get_cycles() < target) | ||
43 | cpu_relax(); | ||
44 | } | ||
34 | EXPORT_SYMBOL(__delay); | 45 | EXPORT_SYMBOL(__delay); |
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c index 1509c5597653..49284fae9d09 100644 --- a/arch/tile/lib/exports.c +++ b/arch/tile/lib/exports.c | |||
@@ -29,6 +29,9 @@ EXPORT_SYMBOL(__put_user_8); | |||
29 | EXPORT_SYMBOL(strnlen_user_asm); | 29 | EXPORT_SYMBOL(strnlen_user_asm); |
30 | EXPORT_SYMBOL(strncpy_from_user_asm); | 30 | EXPORT_SYMBOL(strncpy_from_user_asm); |
31 | EXPORT_SYMBOL(clear_user_asm); | 31 | EXPORT_SYMBOL(clear_user_asm); |
32 | EXPORT_SYMBOL(flush_user_asm); | ||
33 | EXPORT_SYMBOL(inv_user_asm); | ||
34 | EXPORT_SYMBOL(finv_user_asm); | ||
32 | 35 | ||
33 | /* arch/tile/kernel/entry.S */ | 36 | /* arch/tile/kernel/entry.S */ |
34 | #include <linux/kernel.h> | 37 | #include <linux/kernel.h> |
@@ -45,9 +48,6 @@ EXPORT_SYMBOL(__copy_from_user_zeroing); | |||
45 | EXPORT_SYMBOL(__copy_in_user_inatomic); | 48 | EXPORT_SYMBOL(__copy_in_user_inatomic); |
46 | #endif | 49 | #endif |
47 | 50 | ||
48 | /* arch/tile/lib/mb_incoherent.S */ | ||
49 | EXPORT_SYMBOL(__mb_incoherent); | ||
50 | |||
51 | /* hypervisor glue */ | 51 | /* hypervisor glue */ |
52 | #include <hv/hypervisor.h> | 52 | #include <hv/hypervisor.h> |
53 | EXPORT_SYMBOL(hv_dev_open); | 53 | EXPORT_SYMBOL(hv_dev_open); |
@@ -85,4 +85,8 @@ int64_t __muldi3(int64_t, int64_t); | |||
85 | EXPORT_SYMBOL(__muldi3); | 85 | EXPORT_SYMBOL(__muldi3); |
86 | uint64_t __lshrdi3(uint64_t, unsigned int); | 86 | uint64_t __lshrdi3(uint64_t, unsigned int); |
87 | EXPORT_SYMBOL(__lshrdi3); | 87 | EXPORT_SYMBOL(__lshrdi3); |
88 | uint64_t __ashrdi3(uint64_t, unsigned int); | ||
89 | EXPORT_SYMBOL(__ashrdi3); | ||
90 | uint64_t __ashldi3(uint64_t, unsigned int); | ||
91 | EXPORT_SYMBOL(__ashldi3); | ||
88 | #endif | 92 | #endif |
diff --git a/arch/tile/lib/mb_incoherent.S b/arch/tile/lib/mb_incoherent.S deleted file mode 100644 index 989ad7b68d5a..000000000000 --- a/arch/tile/lib/mb_incoherent.S +++ /dev/null | |||
@@ -1,34 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * Assembly code for invoking the HV's fence_incoherent syscall. | ||
15 | */ | ||
16 | |||
17 | #include <linux/linkage.h> | ||
18 | #include <hv/syscall_public.h> | ||
19 | #include <arch/abi.h> | ||
20 | #include <arch/chip.h> | ||
21 | |||
22 | #if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() | ||
23 | |||
24 | /* | ||
25 | * Invoke the hypervisor's fence_incoherent syscall, which guarantees | ||
26 | * that all victims for cachelines homed on this tile have reached memory. | ||
27 | */ | ||
28 | STD_ENTRY(__mb_incoherent) | ||
29 | moveli TREG_SYSCALL_NR_NAME, HV_SYS_fence_incoherent | ||
30 | swint2 | ||
31 | jrp lr | ||
32 | STD_ENDPROC(__mb_incoherent) | ||
33 | |||
34 | #endif | ||
diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c index f7d4a6ad61e8..b2fe15e01075 100644 --- a/arch/tile/lib/memcpy_tile64.c +++ b/arch/tile/lib/memcpy_tile64.c | |||
@@ -96,7 +96,7 @@ static void memcpy_multicache(void *dest, const void *source, | |||
96 | newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); | 96 | newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); |
97 | pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc); | 97 | pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc); |
98 | ptep = pte_offset_kernel(pmdp, newsrc); | 98 | ptep = pte_offset_kernel(pmdp, newsrc); |
99 | *ptep = src_pte; /* set_pte() would be confused by this */ | 99 | __set_pte(ptep, src_pte); /* set_pte() would be confused by this */ |
100 | local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); | 100 | local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); |
101 | 101 | ||
102 | /* Actually move the data. */ | 102 | /* Actually move the data. */ |
@@ -109,7 +109,7 @@ static void memcpy_multicache(void *dest, const void *source, | |||
109 | */ | 109 | */ |
110 | src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3); | 110 | src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3); |
111 | src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */ | 111 | src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */ |
112 | *ptep = src_pte; /* set_pte() would be confused by this */ | 112 | __set_pte(ptep, src_pte); /* set_pte() would be confused by this */ |
113 | local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); | 113 | local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); |
114 | 114 | ||
115 | /* | 115 | /* |
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c index 5cd1c4004eca..cb0999fb64b4 100644 --- a/arch/tile/lib/spinlock_32.c +++ b/arch/tile/lib/spinlock_32.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/spinlock.h> | 15 | #include <linux/spinlock.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
18 | #include <arch/spr_def.h> | ||
18 | 19 | ||
19 | #include "spinlock_common.h" | 20 | #include "spinlock_common.h" |
20 | 21 | ||
@@ -91,75 +92,75 @@ EXPORT_SYMBOL(arch_spin_unlock_wait); | |||
91 | #define RD_COUNT_MASK ((1 << RD_COUNT_WIDTH) - 1) | 92 | #define RD_COUNT_MASK ((1 << RD_COUNT_WIDTH) - 1) |
92 | 93 | ||
93 | 94 | ||
94 | /* Lock the word, spinning until there are no tns-ers. */ | 95 | /* |
95 | static inline u32 get_rwlock(arch_rwlock_t *rwlock) | 96 | * We can get the read lock if everything but the reader bits (which |
96 | { | 97 | * are in the high part of the word) is zero, i.e. no active or |
97 | u32 iterations = 0; | 98 | * waiting writers, no tns. |
98 | for (;;) { | 99 | * |
99 | u32 val = __insn_tns((int *)&rwlock->lock); | 100 | * We guard the tns/store-back with an interrupt critical section to |
100 | if (unlikely(val & 1)) { | 101 | * preserve the semantic that the same read lock can be acquired in an |
101 | delay_backoff(iterations++); | 102 | * interrupt context. |
102 | continue; | 103 | */ |
103 | } | 104 | inline int arch_read_trylock(arch_rwlock_t *rwlock) |
104 | return val; | ||
105 | } | ||
106 | } | ||
107 | |||
108 | int arch_read_trylock_slow(arch_rwlock_t *rwlock) | ||
109 | { | ||
110 | u32 val = get_rwlock(rwlock); | ||
111 | int locked = (val << RD_COUNT_WIDTH) == 0; | ||
112 | rwlock->lock = val + (locked << RD_COUNT_SHIFT); | ||
113 | return locked; | ||
114 | } | ||
115 | EXPORT_SYMBOL(arch_read_trylock_slow); | ||
116 | |||
117 | void arch_read_unlock_slow(arch_rwlock_t *rwlock) | ||
118 | { | ||
119 | u32 val = get_rwlock(rwlock); | ||
120 | rwlock->lock = val - (1 << RD_COUNT_SHIFT); | ||
121 | } | ||
122 | EXPORT_SYMBOL(arch_read_unlock_slow); | ||
123 | |||
124 | void arch_write_unlock_slow(arch_rwlock_t *rwlock, u32 val) | ||
125 | { | 105 | { |
126 | u32 eq, mask = 1 << WR_CURR_SHIFT; | 106 | u32 val; |
127 | while (unlikely(val & 1)) { | 107 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); |
128 | /* Limited backoff since we are the highest-priority task. */ | 108 | val = __insn_tns((int *)&rwlock->lock); |
129 | relax(4); | 109 | if (likely((val << _RD_COUNT_WIDTH) == 0)) { |
130 | val = __insn_tns((int *)&rwlock->lock); | 110 | val += 1 << RD_COUNT_SHIFT; |
111 | rwlock->lock = val; | ||
112 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); | ||
113 | BUG_ON(val == 0); /* we don't expect wraparound */ | ||
114 | return 1; | ||
131 | } | 115 | } |
132 | val = __insn_addb(val, mask); | 116 | if ((val & 1) == 0) |
133 | eq = __insn_seqb(val, val << (WR_CURR_SHIFT - WR_NEXT_SHIFT)); | 117 | rwlock->lock = val; |
134 | val = __insn_mz(eq & mask, val); | 118 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); |
135 | rwlock->lock = val; | 119 | return 0; |
136 | } | 120 | } |
137 | EXPORT_SYMBOL(arch_write_unlock_slow); | 121 | EXPORT_SYMBOL(arch_read_trylock); |
138 | 122 | ||
139 | /* | 123 | /* |
140 | * We spin until everything but the reader bits (which are in the high | 124 | * Spin doing arch_read_trylock() until we acquire the lock. |
141 | * part of the word) are zero, i.e. no active or waiting writers, no tns. | ||
142 | * | ||
143 | * ISSUE: This approach can permanently starve readers. A reader who sees | 125 | * ISSUE: This approach can permanently starve readers. A reader who sees |
144 | * a writer could instead take a ticket lock (just like a writer would), | 126 | * a writer could instead take a ticket lock (just like a writer would), |
145 | * and atomically enter read mode (with 1 reader) when it gets the ticket. | 127 | * and atomically enter read mode (with 1 reader) when it gets the ticket. |
146 | * This way both readers and writers will always make forward progress | 128 | * This way both readers and writers would always make forward progress |
147 | * in a finite time. | 129 | * in a finite time. |
148 | */ | 130 | */ |
149 | void arch_read_lock_slow(arch_rwlock_t *rwlock, u32 val) | 131 | void arch_read_lock(arch_rwlock_t *rwlock) |
150 | { | 132 | { |
151 | u32 iterations = 0; | 133 | u32 iterations = 0; |
152 | do { | 134 | while (unlikely(!arch_read_trylock(rwlock))) |
153 | if (!(val & 1)) | ||
154 | rwlock->lock = val; | ||
155 | delay_backoff(iterations++); | 135 | delay_backoff(iterations++); |
136 | } | ||
137 | EXPORT_SYMBOL(arch_read_lock); | ||
138 | |||
139 | void arch_read_unlock(arch_rwlock_t *rwlock) | ||
140 | { | ||
141 | u32 val, iterations = 0; | ||
142 | |||
143 | mb(); /* guarantee anything modified under the lock is visible */ | ||
144 | for (;;) { | ||
145 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); | ||
156 | val = __insn_tns((int *)&rwlock->lock); | 146 | val = __insn_tns((int *)&rwlock->lock); |
157 | } while ((val << RD_COUNT_WIDTH) != 0); | 147 | if (likely(val & 1) == 0) { |
158 | rwlock->lock = val + (1 << RD_COUNT_SHIFT); | 148 | rwlock->lock = val - (1 << _RD_COUNT_SHIFT); |
149 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); | ||
150 | break; | ||
151 | } | ||
152 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); | ||
153 | delay_backoff(iterations++); | ||
154 | } | ||
159 | } | 155 | } |
160 | EXPORT_SYMBOL(arch_read_lock_slow); | 156 | EXPORT_SYMBOL(arch_read_unlock); |
161 | 157 | ||
162 | void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val) | 158 | /* |
159 | * We don't need an interrupt critical section here (unlike for | ||
160 | * arch_read_lock) since we should never use a bare write lock where | ||
161 | * it could be interrupted by code that could try to re-acquire it. | ||
162 | */ | ||
163 | void arch_write_lock(arch_rwlock_t *rwlock) | ||
163 | { | 164 | { |
164 | /* | 165 | /* |
165 | * The trailing underscore on this variable (and curr_ below) | 166 | * The trailing underscore on this variable (and curr_ below) |
@@ -168,6 +169,12 @@ void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val) | |||
168 | */ | 169 | */ |
169 | u32 my_ticket_; | 170 | u32 my_ticket_; |
170 | u32 iterations = 0; | 171 | u32 iterations = 0; |
172 | u32 val = __insn_tns((int *)&rwlock->lock); | ||
173 | |||
174 | if (likely(val == 0)) { | ||
175 | rwlock->lock = 1 << _WR_NEXT_SHIFT; | ||
176 | return; | ||
177 | } | ||
171 | 178 | ||
172 | /* | 179 | /* |
173 | * Wait until there are no readers, then bump up the next | 180 | * Wait until there are no readers, then bump up the next |
@@ -206,23 +213,47 @@ void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val) | |||
206 | relax(4); | 213 | relax(4); |
207 | } | 214 | } |
208 | } | 215 | } |
209 | EXPORT_SYMBOL(arch_write_lock_slow); | 216 | EXPORT_SYMBOL(arch_write_lock); |
210 | 217 | ||
211 | int __tns_atomic_acquire(atomic_t *lock) | 218 | int arch_write_trylock(arch_rwlock_t *rwlock) |
212 | { | 219 | { |
213 | int ret; | 220 | u32 val = __insn_tns((int *)&rwlock->lock); |
214 | u32 iterations = 0; | ||
215 | 221 | ||
216 | BUG_ON(__insn_mfspr(SPR_INTERRUPT_CRITICAL_SECTION)); | 222 | /* |
217 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); | 223 | * If a tns is in progress, or there's a waiting or active locker, |
224 | * or active readers, we can't take the lock, so give up. | ||
225 | */ | ||
226 | if (unlikely(val != 0)) { | ||
227 | if (!(val & 1)) | ||
228 | rwlock->lock = val; | ||
229 | return 0; | ||
230 | } | ||
218 | 231 | ||
219 | while ((ret = __insn_tns((void *)&lock->counter)) == 1) | 232 | /* Set the "next" field to mark it locked. */ |
220 | delay_backoff(iterations++); | 233 | rwlock->lock = 1 << _WR_NEXT_SHIFT; |
221 | return ret; | 234 | return 1; |
222 | } | 235 | } |
236 | EXPORT_SYMBOL(arch_write_trylock); | ||
223 | 237 | ||
224 | void __tns_atomic_release(atomic_t *p, int v) | 238 | void arch_write_unlock(arch_rwlock_t *rwlock) |
225 | { | 239 | { |
226 | p->counter = v; | 240 | u32 val, eq, mask; |
227 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); | 241 | |
242 | mb(); /* guarantee anything modified under the lock is visible */ | ||
243 | val = __insn_tns((int *)&rwlock->lock); | ||
244 | if (likely(val == (1 << _WR_NEXT_SHIFT))) { | ||
245 | rwlock->lock = 0; | ||
246 | return; | ||
247 | } | ||
248 | while (unlikely(val & 1)) { | ||
249 | /* Limited backoff since we are the highest-priority task. */ | ||
250 | relax(4); | ||
251 | val = __insn_tns((int *)&rwlock->lock); | ||
252 | } | ||
253 | mask = 1 << WR_CURR_SHIFT; | ||
254 | val = __insn_addb(val, mask); | ||
255 | eq = __insn_seqb(val, val << (WR_CURR_SHIFT - WR_NEXT_SHIFT)); | ||
256 | val = __insn_mz(eq & mask, val); | ||
257 | rwlock->lock = val; | ||
228 | } | 258 | } |
259 | EXPORT_SYMBOL(arch_write_unlock); | ||
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index dcebfc831cd6..758f597f488c 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c | |||
@@ -655,14 +655,6 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num, | |||
655 | } | 655 | } |
656 | 656 | ||
657 | /* | 657 | /* |
658 | * NOTE: the one other type of access that might bring us here | ||
659 | * are the memory ops in __tns_atomic_acquire/__tns_atomic_release, | ||
660 | * but we don't have to check specially for them since we can | ||
661 | * always safely return to the address of the fault and retry, | ||
662 | * since no separate atomic locks are involved. | ||
663 | */ | ||
664 | |||
665 | /* | ||
666 | * Now that we have released the atomic lock (if necessary), | 658 | * Now that we have released the atomic lock (if necessary), |
667 | * it's safe to spin if the PTE that caused the fault was migrating. | 659 | * it's safe to spin if the PTE that caused the fault was migrating. |
668 | */ | 660 | */ |
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index d78df3a6ee15..cbe6f4f9eca3 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c | |||
@@ -179,23 +179,46 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control, | |||
179 | panic("Unsafe to continue."); | 179 | panic("Unsafe to continue."); |
180 | } | 180 | } |
181 | 181 | ||
182 | void flush_remote_page(struct page *page, int order) | ||
183 | { | ||
184 | int i, pages = (1 << order); | ||
185 | for (i = 0; i < pages; ++i, ++page) { | ||
186 | void *p = kmap_atomic(page); | ||
187 | int hfh = 0; | ||
188 | int home = page_home(page); | ||
189 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
190 | if (home == PAGE_HOME_HASH) | ||
191 | hfh = 1; | ||
192 | else | ||
193 | #endif | ||
194 | BUG_ON(home < 0 || home >= NR_CPUS); | ||
195 | finv_buffer_remote(p, PAGE_SIZE, hfh); | ||
196 | kunmap_atomic(p); | ||
197 | } | ||
198 | } | ||
199 | |||
182 | void homecache_evict(const struct cpumask *mask) | 200 | void homecache_evict(const struct cpumask *mask) |
183 | { | 201 | { |
184 | flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0); | 202 | flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0); |
185 | } | 203 | } |
186 | 204 | ||
187 | /* Return a mask of the cpus whose caches currently own these pages. */ | 205 | /* |
188 | static void homecache_mask(struct page *page, int pages, | 206 | * Return a mask of the cpus whose caches currently own these pages. |
189 | struct cpumask *home_mask) | 207 | * The return value is whether the pages are all coherently cached |
208 | * (i.e. none are immutable, incoherent, or uncached). | ||
209 | */ | ||
210 | static int homecache_mask(struct page *page, int pages, | ||
211 | struct cpumask *home_mask) | ||
190 | { | 212 | { |
191 | int i; | 213 | int i; |
214 | int cached_coherently = 1; | ||
192 | cpumask_clear(home_mask); | 215 | cpumask_clear(home_mask); |
193 | for (i = 0; i < pages; ++i) { | 216 | for (i = 0; i < pages; ++i) { |
194 | int home = page_home(&page[i]); | 217 | int home = page_home(&page[i]); |
195 | if (home == PAGE_HOME_IMMUTABLE || | 218 | if (home == PAGE_HOME_IMMUTABLE || |
196 | home == PAGE_HOME_INCOHERENT) { | 219 | home == PAGE_HOME_INCOHERENT) { |
197 | cpumask_copy(home_mask, cpu_possible_mask); | 220 | cpumask_copy(home_mask, cpu_possible_mask); |
198 | return; | 221 | return 0; |
199 | } | 222 | } |
200 | #if CHIP_HAS_CBOX_HOME_MAP() | 223 | #if CHIP_HAS_CBOX_HOME_MAP() |
201 | if (home == PAGE_HOME_HASH) { | 224 | if (home == PAGE_HOME_HASH) { |
@@ -203,11 +226,14 @@ static void homecache_mask(struct page *page, int pages, | |||
203 | continue; | 226 | continue; |
204 | } | 227 | } |
205 | #endif | 228 | #endif |
206 | if (home == PAGE_HOME_UNCACHED) | 229 | if (home == PAGE_HOME_UNCACHED) { |
230 | cached_coherently = 0; | ||
207 | continue; | 231 | continue; |
232 | } | ||
208 | BUG_ON(home < 0 || home >= NR_CPUS); | 233 | BUG_ON(home < 0 || home >= NR_CPUS); |
209 | cpumask_set_cpu(home, home_mask); | 234 | cpumask_set_cpu(home, home_mask); |
210 | } | 235 | } |
236 | return cached_coherently; | ||
211 | } | 237 | } |
212 | 238 | ||
213 | /* | 239 | /* |
@@ -386,7 +412,7 @@ void homecache_change_page_home(struct page *page, int order, int home) | |||
386 | pte_t *ptep = virt_to_pte(NULL, kva); | 412 | pte_t *ptep = virt_to_pte(NULL, kva); |
387 | pte_t pteval = *ptep; | 413 | pte_t pteval = *ptep; |
388 | BUG_ON(!pte_present(pteval) || pte_huge(pteval)); | 414 | BUG_ON(!pte_present(pteval) || pte_huge(pteval)); |
389 | *ptep = pte_set_home(pteval, home); | 415 | __set_pte(ptep, pte_set_home(pteval, home)); |
390 | } | 416 | } |
391 | } | 417 | } |
392 | 418 | ||
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 0b9ce69b0ee5..d6e87fda2fb2 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c | |||
@@ -53,22 +53,11 @@ | |||
53 | 53 | ||
54 | #include "migrate.h" | 54 | #include "migrate.h" |
55 | 55 | ||
56 | /* | ||
57 | * We could set FORCE_MAX_ZONEORDER to "(HPAGE_SHIFT - PAGE_SHIFT + 1)" | ||
58 | * in the Tile Kconfig, but this generates configure warnings. | ||
59 | * Do it here and force people to get it right to compile this file. | ||
60 | * The problem is that with 4KB small pages and 16MB huge pages, | ||
61 | * the default value doesn't allow us to group enough small pages | ||
62 | * together to make up a huge page. | ||
63 | */ | ||
64 | #if CONFIG_FORCE_MAX_ZONEORDER < HPAGE_SHIFT - PAGE_SHIFT + 1 | ||
65 | # error "Change FORCE_MAX_ZONEORDER in arch/tile/Kconfig to match page size" | ||
66 | #endif | ||
67 | |||
68 | #define clear_pgd(pmdptr) (*(pmdptr) = hv_pte(0)) | 56 | #define clear_pgd(pmdptr) (*(pmdptr) = hv_pte(0)) |
69 | 57 | ||
70 | #ifndef __tilegx__ | 58 | #ifndef __tilegx__ |
71 | unsigned long VMALLOC_RESERVE = CONFIG_VMALLOC_RESERVE; | 59 | unsigned long VMALLOC_RESERVE = CONFIG_VMALLOC_RESERVE; |
60 | EXPORT_SYMBOL(VMALLOC_RESERVE); | ||
72 | #endif | 61 | #endif |
73 | 62 | ||
74 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | 63 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); |
@@ -445,7 +434,7 @@ static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va) | |||
445 | 434 | ||
446 | /* Temporary page table we use for staging. */ | 435 | /* Temporary page table we use for staging. */ |
447 | static pgd_t pgtables[PTRS_PER_PGD] | 436 | static pgd_t pgtables[PTRS_PER_PGD] |
448 | __attribute__((section(".init.page"))); | 437 | __attribute__((aligned(HV_PAGE_TABLE_ALIGN))); |
449 | 438 | ||
450 | /* | 439 | /* |
451 | * This maps the physical memory to kernel virtual address space, a total | 440 | * This maps the physical memory to kernel virtual address space, a total |
@@ -653,6 +642,17 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) | |||
653 | memcpy(pgd_base, pgtables, sizeof(pgtables)); | 642 | memcpy(pgd_base, pgtables, sizeof(pgtables)); |
654 | __install_page_table(pgd_base, __get_cpu_var(current_asid), | 643 | __install_page_table(pgd_base, __get_cpu_var(current_asid), |
655 | swapper_pgprot); | 644 | swapper_pgprot); |
645 | |||
646 | /* | ||
647 | * We just read swapper_pgprot and thus brought it into the cache, | ||
648 | * with its new home & caching mode. When we start the other CPUs, | ||
649 | * they're going to reference swapper_pgprot via their initial fake | ||
650 | * VA-is-PA mappings, which cache everything locally. At that | ||
651 | * time, if it's in our cache with a conflicting home, the | ||
652 | * simulator's coherence checker will complain. So, flush it out | ||
653 | * of our cache; we're not going to ever use it again anyway. | ||
654 | */ | ||
655 | __insn_finv(&swapper_pgprot); | ||
656 | } | 656 | } |
657 | 657 | ||
658 | /* | 658 | /* |
@@ -950,11 +950,7 @@ struct kmem_cache *pgd_cache; | |||
950 | 950 | ||
951 | void __init pgtable_cache_init(void) | 951 | void __init pgtable_cache_init(void) |
952 | { | 952 | { |
953 | pgd_cache = kmem_cache_create("pgd", | 953 | pgd_cache = kmem_cache_create("pgd", SIZEOF_PGD, SIZEOF_PGD, 0, NULL); |
954 | PTRS_PER_PGD*sizeof(pgd_t), | ||
955 | PTRS_PER_PGD*sizeof(pgd_t), | ||
956 | 0, | ||
957 | NULL); | ||
958 | if (!pgd_cache) | 954 | if (!pgd_cache) |
959 | panic("pgtable_cache_init(): Cannot create pgd cache"); | 955 | panic("pgtable_cache_init(): Cannot create pgd cache"); |
960 | } | 956 | } |
@@ -989,7 +985,7 @@ static long __write_once initfree = 1; | |||
989 | static int __init set_initfree(char *str) | 985 | static int __init set_initfree(char *str) |
990 | { | 986 | { |
991 | long val; | 987 | long val; |
992 | if (strict_strtol(str, 0, &val)) { | 988 | if (strict_strtol(str, 0, &val) == 0) { |
993 | initfree = val; | 989 | initfree = val; |
994 | pr_info("initfree: %s free init pages\n", | 990 | pr_info("initfree: %s free init pages\n", |
995 | initfree ? "will" : "won't"); | 991 | initfree ? "will" : "won't"); |
diff --git a/arch/tile/mm/migrate_32.S b/arch/tile/mm/migrate_32.S index f738765cd1e6..ac01a7cdf77f 100644 --- a/arch/tile/mm/migrate_32.S +++ b/arch/tile/mm/migrate_32.S | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/linkage.h> | 18 | #include <linux/linkage.h> |
19 | #include <linux/threads.h> | 19 | #include <linux/threads.h> |
20 | #include <asm/page.h> | 20 | #include <asm/page.h> |
21 | #include <asm/thread_info.h> | ||
21 | #include <asm/types.h> | 22 | #include <asm/types.h> |
22 | #include <asm/asm-offsets.h> | 23 | #include <asm/asm-offsets.h> |
23 | #include <hv/hypervisor.h> | 24 | #include <hv/hypervisor.h> |
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 1f5430c53d0d..1a2b36f8866d 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c | |||
@@ -142,6 +142,76 @@ pte_t *_pte_offset_map(pmd_t *dir, unsigned long address) | |||
142 | } | 142 | } |
143 | #endif | 143 | #endif |
144 | 144 | ||
145 | /** | ||
146 | * shatter_huge_page() - ensure a given address is mapped by a small page. | ||
147 | * | ||
148 | * This function converts a huge PTE mapping kernel LOWMEM into a bunch | ||
149 | * of small PTEs with the same caching. No cache flush required, but we | ||
150 | * must do a global TLB flush. | ||
151 | * | ||
152 | * Any caller that wishes to modify a kernel mapping that might | ||
153 | * have been made with a huge page should call this function, | ||
154 | * since doing so properly avoids race conditions with installing the | ||
155 | * newly-shattered page and then flushing all the TLB entries. | ||
156 | * | ||
157 | * @addr: Address at which to shatter any existing huge page. | ||
158 | */ | ||
159 | void shatter_huge_page(unsigned long addr) | ||
160 | { | ||
161 | pgd_t *pgd; | ||
162 | pud_t *pud; | ||
163 | pmd_t *pmd; | ||
164 | unsigned long flags = 0; /* happy compiler */ | ||
165 | #ifdef __PAGETABLE_PMD_FOLDED | ||
166 | struct list_head *pos; | ||
167 | #endif | ||
168 | |||
169 | /* Get a pointer to the pmd entry that we need to change. */ | ||
170 | addr &= HPAGE_MASK; | ||
171 | BUG_ON(pgd_addr_invalid(addr)); | ||
172 | BUG_ON(addr < PAGE_OFFSET); /* only for kernel LOWMEM */ | ||
173 | pgd = swapper_pg_dir + pgd_index(addr); | ||
174 | pud = pud_offset(pgd, addr); | ||
175 | BUG_ON(!pud_present(*pud)); | ||
176 | pmd = pmd_offset(pud, addr); | ||
177 | BUG_ON(!pmd_present(*pmd)); | ||
178 | if (!pmd_huge_page(*pmd)) | ||
179 | return; | ||
180 | |||
181 | /* | ||
182 | * Grab the pgd_lock, since we may need it to walk the pgd_list, | ||
183 | * and since we need some kind of lock here to avoid races. | ||
184 | */ | ||
185 | spin_lock_irqsave(&pgd_lock, flags); | ||
186 | if (!pmd_huge_page(*pmd)) { | ||
187 | /* Lost the race to convert the huge page. */ | ||
188 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
189 | return; | ||
190 | } | ||
191 | |||
192 | /* Shatter the huge page into the preallocated L2 page table. */ | ||
193 | pmd_populate_kernel(&init_mm, pmd, | ||
194 | get_prealloc_pte(pte_pfn(*(pte_t *)pmd))); | ||
195 | |||
196 | #ifdef __PAGETABLE_PMD_FOLDED | ||
197 | /* Walk every pgd on the system and update the pmd there. */ | ||
198 | list_for_each(pos, &pgd_list) { | ||
199 | pmd_t *copy_pmd; | ||
200 | pgd = list_to_pgd(pos) + pgd_index(addr); | ||
201 | pud = pud_offset(pgd, addr); | ||
202 | copy_pmd = pmd_offset(pud, addr); | ||
203 | __set_pmd(copy_pmd, *pmd); | ||
204 | } | ||
205 | #endif | ||
206 | |||
207 | /* Tell every cpu to notice the change. */ | ||
208 | flush_remote(0, 0, NULL, addr, HPAGE_SIZE, HPAGE_SIZE, | ||
209 | cpu_possible_mask, NULL, 0); | ||
210 | |||
211 | /* Hold the lock until the TLB flush is finished to avoid races. */ | ||
212 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
213 | } | ||
214 | |||
145 | /* | 215 | /* |
146 | * List of all pgd's needed so it can invalidate entries in both cached | 216 | * List of all pgd's needed so it can invalidate entries in both cached |
147 | * and uncached pgd's. This is essentially codepath-based locking | 217 | * and uncached pgd's. This is essentially codepath-based locking |
@@ -184,9 +254,9 @@ static void pgd_ctor(pgd_t *pgd) | |||
184 | BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0); | 254 | BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0); |
185 | #endif | 255 | #endif |
186 | 256 | ||
187 | clone_pgd_range(pgd + KERNEL_PGD_INDEX_START, | 257 | memcpy(pgd + KERNEL_PGD_INDEX_START, |
188 | swapper_pg_dir + KERNEL_PGD_INDEX_START, | 258 | swapper_pg_dir + KERNEL_PGD_INDEX_START, |
189 | KERNEL_PGD_PTRS); | 259 | KERNEL_PGD_PTRS * sizeof(pgd_t)); |
190 | 260 | ||
191 | pgd_list_add(pgd); | 261 | pgd_list_add(pgd); |
192 | spin_unlock_irqrestore(&pgd_lock, flags); | 262 | spin_unlock_irqrestore(&pgd_lock, flags); |
@@ -220,8 +290,11 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) | |||
220 | 290 | ||
221 | struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) | 291 | struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) |
222 | { | 292 | { |
223 | gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO|__GFP_COMP; | 293 | gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO; |
224 | struct page *p; | 294 | struct page *p; |
295 | #if L2_USER_PGTABLE_ORDER > 0 | ||
296 | int i; | ||
297 | #endif | ||
225 | 298 | ||
226 | #ifdef CONFIG_HIGHPTE | 299 | #ifdef CONFIG_HIGHPTE |
227 | flags |= __GFP_HIGHMEM; | 300 | flags |= __GFP_HIGHMEM; |
@@ -231,6 +304,18 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) | |||
231 | if (p == NULL) | 304 | if (p == NULL) |
232 | return NULL; | 305 | return NULL; |
233 | 306 | ||
307 | #if L2_USER_PGTABLE_ORDER > 0 | ||
308 | /* | ||
309 | * Make every page have a page_count() of one, not just the first. | ||
310 | * We don't use __GFP_COMP since it doesn't look like it works | ||
311 | * correctly with tlb_remove_page(). | ||
312 | */ | ||
313 | for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) { | ||
314 | init_page_count(p+i); | ||
315 | inc_zone_page_state(p+i, NR_PAGETABLE); | ||
316 | } | ||
317 | #endif | ||
318 | |||
234 | pgtable_page_ctor(p); | 319 | pgtable_page_ctor(p); |
235 | return p; | 320 | return p; |
236 | } | 321 | } |
@@ -242,8 +327,15 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) | |||
242 | */ | 327 | */ |
243 | void pte_free(struct mm_struct *mm, struct page *p) | 328 | void pte_free(struct mm_struct *mm, struct page *p) |
244 | { | 329 | { |
330 | int i; | ||
331 | |||
245 | pgtable_page_dtor(p); | 332 | pgtable_page_dtor(p); |
246 | __free_pages(p, L2_USER_PGTABLE_ORDER); | 333 | __free_page(p); |
334 | |||
335 | for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) { | ||
336 | __free_page(p+i); | ||
337 | dec_zone_page_state(p+i, NR_PAGETABLE); | ||
338 | } | ||
247 | } | 339 | } |
248 | 340 | ||
249 | void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, | 341 | void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, |
@@ -252,18 +344,11 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, | |||
252 | int i; | 344 | int i; |
253 | 345 | ||
254 | pgtable_page_dtor(pte); | 346 | pgtable_page_dtor(pte); |
255 | tlb->need_flush = 1; | 347 | tlb_remove_page(tlb, pte); |
256 | if (tlb_fast_mode(tlb)) { | 348 | |
257 | struct page *pte_pages[L2_USER_PGTABLE_PAGES]; | 349 | for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) { |
258 | for (i = 0; i < L2_USER_PGTABLE_PAGES; ++i) | 350 | tlb_remove_page(tlb, pte + i); |
259 | pte_pages[i] = pte + i; | 351 | dec_zone_page_state(pte + i, NR_PAGETABLE); |
260 | free_pages_and_swap_cache(pte_pages, L2_USER_PGTABLE_PAGES); | ||
261 | return; | ||
262 | } | ||
263 | for (i = 0; i < L2_USER_PGTABLE_PAGES; ++i) { | ||
264 | tlb->pages[tlb->nr++] = pte + i; | ||
265 | if (tlb->nr >= FREE_PTE_NR) | ||
266 | tlb_flush_mmu(tlb, 0, 0); | ||
267 | } | 352 | } |
268 | } | 353 | } |
269 | 354 | ||
@@ -346,35 +431,51 @@ int get_remote_cache_cpu(pgprot_t prot) | |||
346 | return x + y * smp_width; | 431 | return x + y * smp_width; |
347 | } | 432 | } |
348 | 433 | ||
349 | void set_pte_order(pte_t *ptep, pte_t pte, int order) | 434 | /* |
435 | * Convert a kernel VA to a PA and homing information. | ||
436 | */ | ||
437 | int va_to_cpa_and_pte(void *va, unsigned long long *cpa, pte_t *pte) | ||
350 | { | 438 | { |
351 | unsigned long pfn = pte_pfn(pte); | 439 | struct page *page = virt_to_page(va); |
352 | struct page *page = pfn_to_page(pfn); | 440 | pte_t null_pte = { 0 }; |
353 | 441 | ||
354 | /* Update the home of a PTE if necessary */ | 442 | *cpa = __pa(va); |
355 | pte = pte_set_home(pte, page_home(page)); | ||
356 | 443 | ||
444 | /* Note that this is not writing a page table, just returning a pte. */ | ||
445 | *pte = pte_set_home(null_pte, page_home(page)); | ||
446 | |||
447 | return 0; /* return non-zero if not hfh? */ | ||
448 | } | ||
449 | EXPORT_SYMBOL(va_to_cpa_and_pte); | ||
450 | |||
451 | void __set_pte(pte_t *ptep, pte_t pte) | ||
452 | { | ||
357 | #ifdef __tilegx__ | 453 | #ifdef __tilegx__ |
358 | *ptep = pte; | 454 | *ptep = pte; |
359 | #else | 455 | #else |
360 | /* | 456 | # if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32 |
361 | * When setting a PTE, write the high bits first, then write | 457 | # error Must write the present and migrating bits last |
362 | * the low bits. This sets the "present" bit only after the | 458 | # endif |
363 | * other bits are in place. If a particular PTE update | 459 | if (pte_present(pte)) { |
364 | * involves transitioning from one valid PTE to another, it | 460 | ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32); |
365 | * may be necessary to call set_pte_order() more than once, | 461 | barrier(); |
366 | * transitioning via a suitable intermediate state. | 462 | ((u32 *)ptep)[0] = (u32)(pte_val(pte)); |
367 | * Note that this sequence also means that if we are transitioning | 463 | } else { |
368 | * from any migrating PTE to a non-migrating one, we will not | 464 | ((u32 *)ptep)[0] = (u32)(pte_val(pte)); |
369 | * see a half-updated PTE with the migrating bit off. | 465 | barrier(); |
370 | */ | 466 | ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32); |
371 | #if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32 | 467 | } |
372 | # error Must write the present and migrating bits last | 468 | #endif /* __tilegx__ */ |
373 | #endif | 469 | } |
374 | ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32); | 470 | |
375 | barrier(); | 471 | void set_pte(pte_t *ptep, pte_t pte) |
376 | ((u32 *)ptep)[0] = (u32)(pte_val(pte)); | 472 | { |
377 | #endif | 473 | struct page *page = pfn_to_page(pte_pfn(pte)); |
474 | |||
475 | /* Update the home of a PTE if necessary */ | ||
476 | pte = pte_set_home(pte, page_home(page)); | ||
477 | |||
478 | __set_pte(ptep, pte); | ||
378 | } | 479 | } |
379 | 480 | ||
380 | /* Can this mm load a PTE with cached_priority set? */ | 481 | /* Can this mm load a PTE with cached_priority set? */ |