diff options
28 files changed, 34 insertions, 1121 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index b2be42524483..6e1ed55f6cfc 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig | |||
@@ -207,7 +207,7 @@ config SYSVIPC_COMPAT | |||
207 | def_bool y | 207 | def_bool y |
208 | depends on COMPAT && SYSVIPC | 208 | depends on COMPAT && SYSVIPC |
209 | 209 | ||
210 | # We do not currently support disabling HIGHMEM on tile64 and tilepro. | 210 | # We do not currently support disabling HIGHMEM on tilepro. |
211 | config HIGHMEM | 211 | config HIGHMEM |
212 | bool # "Support for more than 512 MB of RAM" | 212 | bool # "Support for more than 512 MB of RAM" |
213 | default !TILEGX | 213 | default !TILEGX |
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index e7fb5cfb9597..96156f5ba640 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h | |||
@@ -252,21 +252,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n) | |||
252 | * Internal definitions only beyond this point. | 252 | * Internal definitions only beyond this point. |
253 | */ | 253 | */ |
254 | 254 | ||
255 | #define ATOMIC_LOCKS_FOUND_VIA_TABLE() \ | ||
256 | (!CHIP_HAS_CBOX_HOME_MAP() && defined(CONFIG_SMP)) | ||
257 | |||
258 | #if ATOMIC_LOCKS_FOUND_VIA_TABLE() | ||
259 | |||
260 | /* Number of entries in atomic_lock_ptr[]. */ | ||
261 | #define ATOMIC_HASH_L1_SHIFT 6 | ||
262 | #define ATOMIC_HASH_L1_SIZE (1 << ATOMIC_HASH_L1_SHIFT) | ||
263 | |||
264 | /* Number of locks in each struct pointed to by atomic_lock_ptr[]. */ | ||
265 | #define ATOMIC_HASH_L2_SHIFT (CHIP_L2_LOG_LINE_SIZE() - 2) | ||
266 | #define ATOMIC_HASH_L2_SIZE (1 << ATOMIC_HASH_L2_SHIFT) | ||
267 | |||
268 | #else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ | ||
269 | |||
270 | /* | 255 | /* |
271 | * Number of atomic locks in atomic_locks[]. Must be a power of two. | 256 | * Number of atomic locks in atomic_locks[]. Must be a power of two. |
272 | * There is no reason for more than PAGE_SIZE / 8 entries, since that | 257 | * There is no reason for more than PAGE_SIZE / 8 entries, since that |
@@ -281,8 +266,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n) | |||
281 | extern int atomic_locks[]; | 266 | extern int atomic_locks[]; |
282 | #endif | 267 | #endif |
283 | 268 | ||
284 | #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ | ||
285 | |||
286 | /* | 269 | /* |
287 | * All the code that may fault while holding an atomic lock must | 270 | * All the code that may fault while holding an atomic lock must |
288 | * place the pointer to the lock in ATOMIC_LOCK_REG so the fault code | 271 | * place the pointer to the lock in ATOMIC_LOCK_REG so the fault code |
diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h index 990a217a0b72..a9a73da5865d 100644 --- a/arch/tile/include/asm/barrier.h +++ b/arch/tile/include/asm/barrier.h | |||
@@ -77,7 +77,6 @@ | |||
77 | 77 | ||
78 | #define __sync() __insn_mf() | 78 | #define __sync() __insn_mf() |
79 | 79 | ||
80 | #if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() | ||
81 | #include <hv/syscall_public.h> | 80 | #include <hv/syscall_public.h> |
82 | /* | 81 | /* |
83 | * Issue an uncacheable load to each memory controller, then | 82 | * Issue an uncacheable load to each memory controller, then |
@@ -96,7 +95,6 @@ static inline void __mb_incoherent(void) | |||
96 | "r20", "r21", "r22", "r23", "r24", | 95 | "r20", "r21", "r22", "r23", "r24", |
97 | "r25", "r26", "r27", "r28", "r29"); | 96 | "r25", "r26", "r27", "r28", "r29"); |
98 | } | 97 | } |
99 | #endif | ||
100 | 98 | ||
101 | /* Fence to guarantee visibility of stores to incoherent memory. */ | 99 | /* Fence to guarantee visibility of stores to incoherent memory. */ |
102 | static inline void | 100 | static inline void |
@@ -104,7 +102,6 @@ mb_incoherent(void) | |||
104 | { | 102 | { |
105 | __insn_mf(); | 103 | __insn_mf(); |
106 | 104 | ||
107 | #if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() | ||
108 | { | 105 | { |
109 | #if CHIP_HAS_TILE_WRITE_PENDING() | 106 | #if CHIP_HAS_TILE_WRITE_PENDING() |
110 | const unsigned long WRITE_TIMEOUT_CYCLES = 400; | 107 | const unsigned long WRITE_TIMEOUT_CYCLES = 400; |
@@ -116,7 +113,6 @@ mb_incoherent(void) | |||
116 | #endif /* CHIP_HAS_TILE_WRITE_PENDING() */ | 113 | #endif /* CHIP_HAS_TILE_WRITE_PENDING() */ |
117 | (void) __mb_incoherent(); | 114 | (void) __mb_incoherent(); |
118 | } | 115 | } |
119 | #endif /* CHIP_HAS_MF_WAITS_FOR_VICTIMS() */ | ||
120 | } | 116 | } |
121 | 117 | ||
122 | #define fast_wmb() __sync() | 118 | #define fast_wmb() __sync() |
diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h index e1da88e8aa9f..41d9878a9686 100644 --- a/arch/tile/include/asm/elf.h +++ b/arch/tile/include/asm/elf.h | |||
@@ -30,7 +30,6 @@ typedef unsigned long elf_greg_t; | |||
30 | #define ELF_NGREG (sizeof(struct pt_regs) / sizeof(elf_greg_t)) | 30 | #define ELF_NGREG (sizeof(struct pt_regs) / sizeof(elf_greg_t)) |
31 | typedef elf_greg_t elf_gregset_t[ELF_NGREG]; | 31 | typedef elf_greg_t elf_gregset_t[ELF_NGREG]; |
32 | 32 | ||
33 | #define EM_TILE64 187 | ||
34 | #define EM_TILEPRO 188 | 33 | #define EM_TILEPRO 188 |
35 | #define EM_TILEGX 191 | 34 | #define EM_TILEGX 191 |
36 | 35 | ||
diff --git a/arch/tile/include/asm/homecache.h b/arch/tile/include/asm/homecache.h index 49d19dfc0630..7ddd1b8d6910 100644 --- a/arch/tile/include/asm/homecache.h +++ b/arch/tile/include/asm/homecache.h | |||
@@ -33,8 +33,7 @@ struct zone; | |||
33 | 33 | ||
34 | /* | 34 | /* |
35 | * Is this page immutable (unwritable) and thus able to be cached more | 35 | * Is this page immutable (unwritable) and thus able to be cached more |
36 | * widely than would otherwise be possible? On tile64 this means we | 36 | * widely than would otherwise be possible? This means we have "nc" set. |
37 | * mark the PTE to cache locally; on tilepro it means we have "nc" set. | ||
38 | */ | 37 | */ |
39 | #define PAGE_HOME_IMMUTABLE -2 | 38 | #define PAGE_HOME_IMMUTABLE -2 |
40 | 39 | ||
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index 5aa54319d2ef..42323636c459 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h | |||
@@ -113,18 +113,14 @@ struct thread_struct { | |||
113 | unsigned long intctrl_0; | 113 | unsigned long intctrl_0; |
114 | /* Is this task currently doing a backtrace? */ | 114 | /* Is this task currently doing a backtrace? */ |
115 | bool in_backtrace; | 115 | bool in_backtrace; |
116 | #if CHIP_HAS_PROC_STATUS_SPR() | ||
117 | /* Any other miscellaneous processor state bits */ | 116 | /* Any other miscellaneous processor state bits */ |
118 | unsigned long proc_status; | 117 | unsigned long proc_status; |
119 | #endif | ||
120 | #if !CHIP_HAS_FIXED_INTVEC_BASE() | 118 | #if !CHIP_HAS_FIXED_INTVEC_BASE() |
121 | /* Interrupt base for PL0 interrupts */ | 119 | /* Interrupt base for PL0 interrupts */ |
122 | unsigned long interrupt_vector_base; | 120 | unsigned long interrupt_vector_base; |
123 | #endif | 121 | #endif |
124 | #if CHIP_HAS_TILE_RTF_HWM() | ||
125 | /* Tile cache retry fifo high-water mark */ | 122 | /* Tile cache retry fifo high-water mark */ |
126 | unsigned long tile_rtf_hwm; | 123 | unsigned long tile_rtf_hwm; |
127 | #endif | ||
128 | #if CHIP_HAS_DSTREAM_PF() | 124 | #if CHIP_HAS_DSTREAM_PF() |
129 | /* Data stream prefetch control */ | 125 | /* Data stream prefetch control */ |
130 | unsigned long dstream_pf; | 126 | unsigned long dstream_pf; |
@@ -137,12 +133,6 @@ struct thread_struct { | |||
137 | /* Async DMA TLB fault information */ | 133 | /* Async DMA TLB fault information */ |
138 | struct async_tlb dma_async_tlb; | 134 | struct async_tlb dma_async_tlb; |
139 | #endif | 135 | #endif |
140 | #if CHIP_HAS_SN_PROC() | ||
141 | /* Was static network processor when we were switched out? */ | ||
142 | int sn_proc_running; | ||
143 | /* Async SNI TLB fault information */ | ||
144 | struct async_tlb sn_async_tlb; | ||
145 | #endif | ||
146 | }; | 136 | }; |
147 | 137 | ||
148 | #endif /* !__ASSEMBLY__ */ | 138 | #endif /* !__ASSEMBLY__ */ |
@@ -286,7 +276,6 @@ extern char chip_model[64]; | |||
286 | /* Data on which physical memory controller corresponds to which NUMA node. */ | 276 | /* Data on which physical memory controller corresponds to which NUMA node. */ |
287 | extern int node_controller[]; | 277 | extern int node_controller[]; |
288 | 278 | ||
289 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
290 | /* Does the heap allocator return hash-for-home pages by default? */ | 279 | /* Does the heap allocator return hash-for-home pages by default? */ |
291 | extern int hash_default; | 280 | extern int hash_default; |
292 | 281 | ||
@@ -296,11 +285,6 @@ extern int kstack_hash; | |||
296 | /* Does MAP_ANONYMOUS return hash-for-home pages by default? */ | 285 | /* Does MAP_ANONYMOUS return hash-for-home pages by default? */ |
297 | #define uheap_hash hash_default | 286 | #define uheap_hash hash_default |
298 | 287 | ||
299 | #else | ||
300 | #define hash_default 0 | ||
301 | #define kstack_hash 0 | ||
302 | #define uheap_hash 0 | ||
303 | #endif | ||
304 | 288 | ||
305 | /* Are we using huge pages in the TLB for kernel data? */ | 289 | /* Are we using huge pages in the TLB for kernel data? */ |
306 | extern int kdata_huge; | 290 | extern int kdata_huge; |
diff --git a/arch/tile/include/asm/smp.h b/arch/tile/include/asm/smp.h index 1aa759aeb5b3..9a326b64f7ae 100644 --- a/arch/tile/include/asm/smp.h +++ b/arch/tile/include/asm/smp.h | |||
@@ -101,10 +101,8 @@ void print_disabled_cpus(void); | |||
101 | extern struct cpumask cpu_lotar_map; | 101 | extern struct cpumask cpu_lotar_map; |
102 | #define cpu_is_valid_lotar(cpu) cpumask_test_cpu((cpu), &cpu_lotar_map) | 102 | #define cpu_is_valid_lotar(cpu) cpumask_test_cpu((cpu), &cpu_lotar_map) |
103 | 103 | ||
104 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
105 | /* Which processors are used for hash-for-home mapping */ | 104 | /* Which processors are used for hash-for-home mapping */ |
106 | extern struct cpumask hash_for_home_map; | 105 | extern struct cpumask hash_for_home_map; |
107 | #endif | ||
108 | 106 | ||
109 | /* Which cpus can have their cache flushed by hv_flush_remote(). */ | 107 | /* Which cpus can have their cache flushed by hv_flush_remote(). */ |
110 | extern struct cpumask cpu_cacheable_map; | 108 | extern struct cpumask cpu_cacheable_map; |
diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h index 5f172b2403a6..4b99a1c3aab2 100644 --- a/arch/tile/include/asm/traps.h +++ b/arch/tile/include/asm/traps.h | |||
@@ -21,7 +21,7 @@ | |||
21 | /* mm/fault.c */ | 21 | /* mm/fault.c */ |
22 | void do_page_fault(struct pt_regs *, int fault_num, | 22 | void do_page_fault(struct pt_regs *, int fault_num, |
23 | unsigned long address, unsigned long write); | 23 | unsigned long address, unsigned long write); |
24 | #if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() | 24 | #if CHIP_HAS_TILE_DMA() |
25 | void do_async_page_fault(struct pt_regs *); | 25 | void do_async_page_fault(struct pt_regs *); |
26 | #endif | 26 | #endif |
27 | 27 | ||
diff --git a/arch/tile/include/uapi/arch/Kbuild b/arch/tile/include/uapi/arch/Kbuild index 4ebc34f4768d..97dfbecec6b6 100644 --- a/arch/tile/include/uapi/arch/Kbuild +++ b/arch/tile/include/uapi/arch/Kbuild | |||
@@ -1,7 +1,6 @@ | |||
1 | # UAPI Header export list | 1 | # UAPI Header export list |
2 | header-y += abi.h | 2 | header-y += abi.h |
3 | header-y += chip.h | 3 | header-y += chip.h |
4 | header-y += chip_tile64.h | ||
5 | header-y += chip_tilegx.h | 4 | header-y += chip_tilegx.h |
6 | header-y += chip_tilepro.h | 5 | header-y += chip_tilepro.h |
7 | header-y += icache.h | 6 | header-y += icache.h |
diff --git a/arch/tile/include/uapi/arch/chip.h b/arch/tile/include/uapi/arch/chip.h index 926d3db0e91e..4c91f90b9369 100644 --- a/arch/tile/include/uapi/arch/chip.h +++ b/arch/tile/include/uapi/arch/chip.h | |||
@@ -12,9 +12,7 @@ | |||
12 | * more details. | 12 | * more details. |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #if __tile_chip__ == 0 | 15 | #if __tile_chip__ == 1 |
16 | #include <arch/chip_tile64.h> | ||
17 | #elif __tile_chip__ == 1 | ||
18 | #include <arch/chip_tilepro.h> | 16 | #include <arch/chip_tilepro.h> |
19 | #elif defined(__tilegx__) | 17 | #elif defined(__tilegx__) |
20 | #include <arch/chip_tilegx.h> | 18 | #include <arch/chip_tilegx.h> |
diff --git a/arch/tile/include/uapi/arch/chip_tile64.h b/arch/tile/include/uapi/arch/chip_tile64.h deleted file mode 100644 index 261aaba092d4..000000000000 --- a/arch/tile/include/uapi/arch/chip_tile64.h +++ /dev/null | |||
@@ -1,258 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | /* | ||
16 | * @file | ||
17 | * Global header file. | ||
18 | * This header file specifies defines for TILE64. | ||
19 | */ | ||
20 | |||
21 | #ifndef __ARCH_CHIP_H__ | ||
22 | #define __ARCH_CHIP_H__ | ||
23 | |||
24 | /** Specify chip version. | ||
25 | * When possible, prefer the CHIP_xxx symbols below for future-proofing. | ||
26 | * This is intended for cross-compiling; native compilation should | ||
27 | * use the predefined __tile_chip__ symbol. | ||
28 | */ | ||
29 | #define TILE_CHIP 0 | ||
30 | |||
31 | /** Specify chip revision. | ||
32 | * This provides for the case of a respin of a particular chip type; | ||
33 | * the normal value for this symbol is "0". | ||
34 | * This is intended for cross-compiling; native compilation should | ||
35 | * use the predefined __tile_chip_rev__ symbol. | ||
36 | */ | ||
37 | #define TILE_CHIP_REV 0 | ||
38 | |||
39 | /** The name of this architecture. */ | ||
40 | #define CHIP_ARCH_NAME "tile64" | ||
41 | |||
42 | /** The ELF e_machine type for binaries for this chip. */ | ||
43 | #define CHIP_ELF_TYPE() EM_TILE64 | ||
44 | |||
45 | /** The alternate ELF e_machine type for binaries for this chip. */ | ||
46 | #define CHIP_COMPAT_ELF_TYPE() 0x2506 | ||
47 | |||
48 | /** What is the native word size of the machine? */ | ||
49 | #define CHIP_WORD_SIZE() 32 | ||
50 | |||
51 | /** How many bits of a virtual address are used. Extra bits must be | ||
52 | * the sign extension of the low bits. | ||
53 | */ | ||
54 | #define CHIP_VA_WIDTH() 32 | ||
55 | |||
56 | /** How many bits are in a physical address? */ | ||
57 | #define CHIP_PA_WIDTH() 36 | ||
58 | |||
59 | /** Size of the L2 cache, in bytes. */ | ||
60 | #define CHIP_L2_CACHE_SIZE() 65536 | ||
61 | |||
62 | /** Log size of an L2 cache line in bytes. */ | ||
63 | #define CHIP_L2_LOG_LINE_SIZE() 6 | ||
64 | |||
65 | /** Size of an L2 cache line, in bytes. */ | ||
66 | #define CHIP_L2_LINE_SIZE() (1 << CHIP_L2_LOG_LINE_SIZE()) | ||
67 | |||
68 | /** Associativity of the L2 cache. */ | ||
69 | #define CHIP_L2_ASSOC() 2 | ||
70 | |||
71 | /** Size of the L1 data cache, in bytes. */ | ||
72 | #define CHIP_L1D_CACHE_SIZE() 8192 | ||
73 | |||
74 | /** Log size of an L1 data cache line in bytes. */ | ||
75 | #define CHIP_L1D_LOG_LINE_SIZE() 4 | ||
76 | |||
77 | /** Size of an L1 data cache line, in bytes. */ | ||
78 | #define CHIP_L1D_LINE_SIZE() (1 << CHIP_L1D_LOG_LINE_SIZE()) | ||
79 | |||
80 | /** Associativity of the L1 data cache. */ | ||
81 | #define CHIP_L1D_ASSOC() 2 | ||
82 | |||
83 | /** Size of the L1 instruction cache, in bytes. */ | ||
84 | #define CHIP_L1I_CACHE_SIZE() 8192 | ||
85 | |||
86 | /** Log size of an L1 instruction cache line in bytes. */ | ||
87 | #define CHIP_L1I_LOG_LINE_SIZE() 6 | ||
88 | |||
89 | /** Size of an L1 instruction cache line, in bytes. */ | ||
90 | #define CHIP_L1I_LINE_SIZE() (1 << CHIP_L1I_LOG_LINE_SIZE()) | ||
91 | |||
92 | /** Associativity of the L1 instruction cache. */ | ||
93 | #define CHIP_L1I_ASSOC() 1 | ||
94 | |||
95 | /** Stride with which flush instructions must be issued. */ | ||
96 | #define CHIP_FLUSH_STRIDE() CHIP_L2_LINE_SIZE() | ||
97 | |||
98 | /** Stride with which inv instructions must be issued. */ | ||
99 | #define CHIP_INV_STRIDE() CHIP_L1D_LINE_SIZE() | ||
100 | |||
101 | /** Stride with which finv instructions must be issued. */ | ||
102 | #define CHIP_FINV_STRIDE() CHIP_L1D_LINE_SIZE() | ||
103 | |||
104 | /** Can the local cache coherently cache data that is homed elsewhere? */ | ||
105 | #define CHIP_HAS_COHERENT_LOCAL_CACHE() 0 | ||
106 | |||
107 | /** How many simultaneous outstanding victims can the L2 cache have? */ | ||
108 | #define CHIP_MAX_OUTSTANDING_VICTIMS() 2 | ||
109 | |||
110 | /** Does the TLB support the NC and NOALLOC bits? */ | ||
111 | #define CHIP_HAS_NC_AND_NOALLOC_BITS() 0 | ||
112 | |||
113 | /** Does the chip support hash-for-home caching? */ | ||
114 | #define CHIP_HAS_CBOX_HOME_MAP() 0 | ||
115 | |||
116 | /** Number of entries in the chip's home map tables. */ | ||
117 | /* #define CHIP_CBOX_HOME_MAP_SIZE() -- does not apply to chip 0 */ | ||
118 | |||
119 | /** Do uncacheable requests miss in the cache regardless of whether | ||
120 | * there is matching data? */ | ||
121 | #define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 0 | ||
122 | |||
123 | /** Does the mf instruction wait for victims? */ | ||
124 | #define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 1 | ||
125 | |||
126 | /** Does the chip have an "inv" instruction that doesn't also flush? */ | ||
127 | #define CHIP_HAS_INV() 0 | ||
128 | |||
129 | /** Does the chip have a "wh64" instruction? */ | ||
130 | #define CHIP_HAS_WH64() 0 | ||
131 | |||
132 | /** Does this chip have a 'dword_align' instruction? */ | ||
133 | #define CHIP_HAS_DWORD_ALIGN() 0 | ||
134 | |||
135 | /** Number of performance counters. */ | ||
136 | #define CHIP_PERFORMANCE_COUNTERS() 2 | ||
137 | |||
138 | /** Does this chip have auxiliary performance counters? */ | ||
139 | #define CHIP_HAS_AUX_PERF_COUNTERS() 0 | ||
140 | |||
141 | /** Is the CBOX_MSR1 SPR supported? */ | ||
142 | #define CHIP_HAS_CBOX_MSR1() 0 | ||
143 | |||
144 | /** Is the TILE_RTF_HWM SPR supported? */ | ||
145 | #define CHIP_HAS_TILE_RTF_HWM() 0 | ||
146 | |||
147 | /** Is the TILE_WRITE_PENDING SPR supported? */ | ||
148 | #define CHIP_HAS_TILE_WRITE_PENDING() 0 | ||
149 | |||
150 | /** Is the PROC_STATUS SPR supported? */ | ||
151 | #define CHIP_HAS_PROC_STATUS_SPR() 0 | ||
152 | |||
153 | /** Is the DSTREAM_PF SPR supported? */ | ||
154 | #define CHIP_HAS_DSTREAM_PF() 0 | ||
155 | |||
156 | /** Log of the number of mshims we have. */ | ||
157 | #define CHIP_LOG_NUM_MSHIMS() 2 | ||
158 | |||
159 | /** Are the bases of the interrupt vector areas fixed? */ | ||
160 | #define CHIP_HAS_FIXED_INTVEC_BASE() 1 | ||
161 | |||
162 | /** Are the interrupt masks split up into 2 SPRs? */ | ||
163 | #define CHIP_HAS_SPLIT_INTR_MASK() 1 | ||
164 | |||
165 | /** Is the cycle count split up into 2 SPRs? */ | ||
166 | #define CHIP_HAS_SPLIT_CYCLE() 1 | ||
167 | |||
168 | /** Does the chip have a static network? */ | ||
169 | #define CHIP_HAS_SN() 1 | ||
170 | |||
171 | /** Does the chip have a static network processor? */ | ||
172 | #define CHIP_HAS_SN_PROC() 1 | ||
173 | |||
174 | /** Size of the L1 static network processor instruction cache, in bytes. */ | ||
175 | #define CHIP_L1SNI_CACHE_SIZE() 2048 | ||
176 | |||
177 | /** Does the chip have DMA support in each tile? */ | ||
178 | #define CHIP_HAS_TILE_DMA() 1 | ||
179 | |||
180 | /** Does the chip have the second revision of the directly accessible | ||
181 | * dynamic networks? This encapsulates a number of characteristics, | ||
182 | * including the absence of the catch-all, the absence of inline message | ||
183 | * tags, the absence of support for network context-switching, and so on. | ||
184 | */ | ||
185 | #define CHIP_HAS_REV1_XDN() 0 | ||
186 | |||
187 | /** Does the chip have cmpexch and similar (fetchadd, exch, etc.)? */ | ||
188 | #define CHIP_HAS_CMPEXCH() 0 | ||
189 | |||
190 | /** Does the chip have memory-mapped I/O support? */ | ||
191 | #define CHIP_HAS_MMIO() 0 | ||
192 | |||
193 | /** Does the chip have post-completion interrupts? */ | ||
194 | #define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 0 | ||
195 | |||
196 | /** Does the chip have native single step support? */ | ||
197 | #define CHIP_HAS_SINGLE_STEP() 0 | ||
198 | |||
199 | #ifndef __OPEN_SOURCE__ /* features only relevant to hypervisor-level code */ | ||
200 | |||
201 | /** How many entries are present in the instruction TLB? */ | ||
202 | #define CHIP_ITLB_ENTRIES() 8 | ||
203 | |||
204 | /** How many entries are present in the data TLB? */ | ||
205 | #define CHIP_DTLB_ENTRIES() 16 | ||
206 | |||
207 | /** How many MAF entries does the XAUI shim have? */ | ||
208 | #define CHIP_XAUI_MAF_ENTRIES() 16 | ||
209 | |||
210 | /** Does the memory shim have a source-id table? */ | ||
211 | #define CHIP_HAS_MSHIM_SRCID_TABLE() 1 | ||
212 | |||
213 | /** Does the L1 instruction cache clear on reset? */ | ||
214 | #define CHIP_HAS_L1I_CLEAR_ON_RESET() 0 | ||
215 | |||
216 | /** Does the chip come out of reset with valid coordinates on all tiles? | ||
217 | * Note that if defined, this also implies that the upper left is 1,1. | ||
218 | */ | ||
219 | #define CHIP_HAS_VALID_TILE_COORD_RESET() 0 | ||
220 | |||
221 | /** Does the chip have unified packet formats? */ | ||
222 | #define CHIP_HAS_UNIFIED_PACKET_FORMATS() 0 | ||
223 | |||
224 | /** Does the chip support write reordering? */ | ||
225 | #define CHIP_HAS_WRITE_REORDERING() 0 | ||
226 | |||
227 | /** Does the chip support Y-X routing as well as X-Y? */ | ||
228 | #define CHIP_HAS_Y_X_ROUTING() 0 | ||
229 | |||
230 | /** Is INTCTRL_3 managed with the correct MPL? */ | ||
231 | #define CHIP_HAS_INTCTRL_3_STATUS_FIX() 0 | ||
232 | |||
233 | /** Is it possible to configure the chip to be big-endian? */ | ||
234 | #define CHIP_HAS_BIG_ENDIAN_CONFIG() 0 | ||
235 | |||
236 | /** Is the CACHE_RED_WAY_OVERRIDDEN SPR supported? */ | ||
237 | #define CHIP_HAS_CACHE_RED_WAY_OVERRIDDEN() 0 | ||
238 | |||
239 | /** Is the DIAG_TRACE_WAY SPR supported? */ | ||
240 | #define CHIP_HAS_DIAG_TRACE_WAY() 0 | ||
241 | |||
242 | /** Is the MEM_STRIPE_CONFIG SPR supported? */ | ||
243 | #define CHIP_HAS_MEM_STRIPE_CONFIG() 0 | ||
244 | |||
245 | /** Are the TLB_PERF SPRs supported? */ | ||
246 | #define CHIP_HAS_TLB_PERF() 0 | ||
247 | |||
248 | /** Is the VDN_SNOOP_SHIM_CTL SPR supported? */ | ||
249 | #define CHIP_HAS_VDN_SNOOP_SHIM_CTL() 0 | ||
250 | |||
251 | /** Does the chip support rev1 DMA packets? */ | ||
252 | #define CHIP_HAS_REV1_DMA_PACKETS() 0 | ||
253 | |||
254 | /** Does the chip have an IPI shim? */ | ||
255 | #define CHIP_HAS_IPI() 0 | ||
256 | |||
257 | #endif /* !__OPEN_SOURCE__ */ | ||
258 | #endif /* __ARCH_CHIP_H__ */ | ||
diff --git a/arch/tile/include/uapi/arch/spr_def_32.h b/arch/tile/include/uapi/arch/spr_def_32.h index c689446e6284..78daa3146d25 100644 --- a/arch/tile/include/uapi/arch/spr_def_32.h +++ b/arch/tile/include/uapi/arch/spr_def_32.h | |||
@@ -200,8 +200,6 @@ | |||
200 | #define SPR_SIM_CONTROL 0x4e0c | 200 | #define SPR_SIM_CONTROL 0x4e0c |
201 | #define SPR_SNCTL 0x0805 | 201 | #define SPR_SNCTL 0x0805 |
202 | #define SPR_SNCTL__FRZFABRIC_MASK 0x1 | 202 | #define SPR_SNCTL__FRZFABRIC_MASK 0x1 |
203 | #define SPR_SNCTL__FRZPROC_MASK 0x2 | ||
204 | #define SPR_SNPC 0x080b | ||
205 | #define SPR_SNSTATIC 0x080c | 203 | #define SPR_SNSTATIC 0x080c |
206 | #define SPR_SYSTEM_SAVE_0_0 0x4b00 | 204 | #define SPR_SYSTEM_SAVE_0_0 0x4b00 |
207 | #define SPR_SYSTEM_SAVE_0_1 0x4b01 | 205 | #define SPR_SYSTEM_SAVE_0_1 0x4b01 |
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index f084f1c7afde..088d5c141e68 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S | |||
@@ -32,12 +32,6 @@ | |||
32 | 32 | ||
33 | #define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) | 33 | #define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) |
34 | 34 | ||
35 | #if !CHIP_HAS_WH64() | ||
36 | /* By making this an empty macro, we can use wh64 in the code. */ | ||
37 | .macro wh64 reg | ||
38 | .endm | ||
39 | #endif | ||
40 | |||
41 | .macro push_reg reg, ptr=sp, delta=-4 | 35 | .macro push_reg reg, ptr=sp, delta=-4 |
42 | { | 36 | { |
43 | sw \ptr, \reg | 37 | sw \ptr, \reg |
@@ -325,18 +319,14 @@ intvec_\vecname: | |||
325 | movei r3, -1 /* not used, but set for consistency */ | 319 | movei r3, -1 /* not used, but set for consistency */ |
326 | } | 320 | } |
327 | .else | 321 | .else |
328 | #if CHIP_HAS_AUX_PERF_COUNTERS() | ||
329 | .ifc \c_routine, op_handle_aux_perf_interrupt | 322 | .ifc \c_routine, op_handle_aux_perf_interrupt |
330 | { | 323 | { |
331 | mfspr r2, AUX_PERF_COUNT_STS | 324 | mfspr r2, AUX_PERF_COUNT_STS |
332 | movei r3, -1 /* not used, but set for consistency */ | 325 | movei r3, -1 /* not used, but set for consistency */ |
333 | } | 326 | } |
334 | .else | 327 | .else |
335 | #endif | ||
336 | movei r3, 0 | 328 | movei r3, 0 |
337 | #if CHIP_HAS_AUX_PERF_COUNTERS() | ||
338 | .endif | 329 | .endif |
339 | #endif | ||
340 | .endif | 330 | .endif |
341 | .endif | 331 | .endif |
342 | .endif | 332 | .endif |
@@ -561,7 +551,6 @@ intvec_\vecname: | |||
561 | .endif | 551 | .endif |
562 | mtspr INTERRUPT_CRITICAL_SECTION, zero | 552 | mtspr INTERRUPT_CRITICAL_SECTION, zero |
563 | 553 | ||
564 | #if CHIP_HAS_WH64() | ||
565 | /* | 554 | /* |
566 | * Prepare the first 256 stack bytes to be rapidly accessible | 555 | * Prepare the first 256 stack bytes to be rapidly accessible |
567 | * without having to fetch the background data. We don't really | 556 | * without having to fetch the background data. We don't really |
@@ -582,7 +571,6 @@ intvec_\vecname: | |||
582 | addi r52, r52, -64 | 571 | addi r52, r52, -64 |
583 | } | 572 | } |
584 | wh64 r52 | 573 | wh64 r52 |
585 | #endif | ||
586 | 574 | ||
587 | #ifdef CONFIG_TRACE_IRQFLAGS | 575 | #ifdef CONFIG_TRACE_IRQFLAGS |
588 | .ifnc \function,handle_nmi | 576 | .ifnc \function,handle_nmi |
@@ -1533,12 +1521,10 @@ STD_ENTRY(_sys_clone) | |||
1533 | __HEAD | 1521 | __HEAD |
1534 | .align 64 | 1522 | .align 64 |
1535 | /* Align much later jump on the start of a cache line. */ | 1523 | /* Align much later jump on the start of a cache line. */ |
1536 | #if !ATOMIC_LOCKS_FOUND_VIA_TABLE() | ||
1537 | nop | 1524 | nop |
1538 | #if PAGE_SIZE >= 0x10000 | 1525 | #if PAGE_SIZE >= 0x10000 |
1539 | nop | 1526 | nop |
1540 | #endif | 1527 | #endif |
1541 | #endif | ||
1542 | ENTRY(sys_cmpxchg) | 1528 | ENTRY(sys_cmpxchg) |
1543 | 1529 | ||
1544 | /* | 1530 | /* |
@@ -1572,45 +1558,6 @@ ENTRY(sys_cmpxchg) | |||
1572 | # error Code here assumes PAGE_OFFSET can be loaded with just hi16() | 1558 | # error Code here assumes PAGE_OFFSET can be loaded with just hi16() |
1573 | #endif | 1559 | #endif |
1574 | 1560 | ||
1575 | #if ATOMIC_LOCKS_FOUND_VIA_TABLE() | ||
1576 | { | ||
1577 | /* Check for unaligned input. */ | ||
1578 | bnz sp, .Lcmpxchg_badaddr | ||
1579 | mm r25, r0, zero, 3, PAGE_SHIFT-1 | ||
1580 | } | ||
1581 | { | ||
1582 | crc32_32 r25, zero, r25 | ||
1583 | moveli r21, lo16(atomic_lock_ptr) | ||
1584 | } | ||
1585 | { | ||
1586 | auli r21, r21, ha16(atomic_lock_ptr) | ||
1587 | auli r23, zero, hi16(PAGE_OFFSET) /* hugepage-aligned */ | ||
1588 | } | ||
1589 | { | ||
1590 | shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT | ||
1591 | slt_u r23, r0, r23 | ||
1592 | lw r26, r0 /* see comment in the "#else" for the "lw r26". */ | ||
1593 | } | ||
1594 | { | ||
1595 | s2a r21, r20, r21 | ||
1596 | bbns r23, .Lcmpxchg_badaddr | ||
1597 | } | ||
1598 | { | ||
1599 | lw r21, r21 | ||
1600 | seqi r23, TREG_SYSCALL_NR_NAME, __NR_FAST_cmpxchg64 | ||
1601 | andi r25, r25, ATOMIC_HASH_L2_SIZE - 1 | ||
1602 | } | ||
1603 | { | ||
1604 | /* Branch away at this point if we're doing a 64-bit cmpxchg. */ | ||
1605 | bbs r23, .Lcmpxchg64 | ||
1606 | andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */ | ||
1607 | } | ||
1608 | { | ||
1609 | s2a ATOMIC_LOCK_REG_NAME, r25, r21 | ||
1610 | j .Lcmpxchg32_tns /* see comment in the #else for the jump. */ | ||
1611 | } | ||
1612 | |||
1613 | #else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ | ||
1614 | { | 1561 | { |
1615 | /* Check for unaligned input. */ | 1562 | /* Check for unaligned input. */ |
1616 | bnz sp, .Lcmpxchg_badaddr | 1563 | bnz sp, .Lcmpxchg_badaddr |
@@ -1635,12 +1582,9 @@ ENTRY(sys_cmpxchg) | |||
1635 | 1582 | ||
1636 | /* | 1583 | /* |
1637 | * Ensure that the TLB is loaded before we take out the lock. | 1584 | * Ensure that the TLB is loaded before we take out the lock. |
1638 | * On tilepro, this will start fetching the value all the way | 1585 | * This will start fetching the value all the way into our L1 |
1639 | * into our L1 as well (and if it gets modified before we | 1586 | * as well (and if it gets modified before we grab the lock, |
1640 | * grab the lock, it will be invalidated from our cache | 1587 | * it will be invalidated from our cache before we reload it). |
1641 | * before we reload it). On tile64, we'll start fetching it | ||
1642 | * into our L1 if we're the home, and if we're not, we'll | ||
1643 | * still at least start fetching it into the home's L2. | ||
1644 | */ | 1588 | */ |
1645 | lw r26, r0 | 1589 | lw r26, r0 |
1646 | } | 1590 | } |
@@ -1683,8 +1627,6 @@ ENTRY(sys_cmpxchg) | |||
1683 | j .Lcmpxchg32_tns | 1627 | j .Lcmpxchg32_tns |
1684 | } | 1628 | } |
1685 | 1629 | ||
1686 | #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ | ||
1687 | |||
1688 | /* Symbol for do_page_fault_ics() to use to compare against the PC. */ | 1630 | /* Symbol for do_page_fault_ics() to use to compare against the PC. */ |
1689 | .global __sys_cmpxchg_grab_lock | 1631 | .global __sys_cmpxchg_grab_lock |
1690 | __sys_cmpxchg_grab_lock: | 1632 | __sys_cmpxchg_grab_lock: |
@@ -1822,9 +1764,6 @@ __sys_cmpxchg_grab_lock: | |||
1822 | .align 64 | 1764 | .align 64 |
1823 | .Lcmpxchg64: | 1765 | .Lcmpxchg64: |
1824 | { | 1766 | { |
1825 | #if ATOMIC_LOCKS_FOUND_VIA_TABLE() | ||
1826 | s2a ATOMIC_LOCK_REG_NAME, r25, r21 | ||
1827 | #endif | ||
1828 | bzt r23, .Lcmpxchg64_tns | 1767 | bzt r23, .Lcmpxchg64_tns |
1829 | } | 1768 | } |
1830 | j .Lcmpxchg_badaddr | 1769 | j .Lcmpxchg_badaddr |
@@ -1959,10 +1898,8 @@ int_unalign: | |||
1959 | do_page_fault | 1898 | do_page_fault |
1960 | int_hand INT_SN_CPL, SN_CPL, bad_intr | 1899 | int_hand INT_SN_CPL, SN_CPL, bad_intr |
1961 | int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap | 1900 | int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap |
1962 | #if CHIP_HAS_AUX_PERF_COUNTERS() | ||
1963 | int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \ | 1901 | int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \ |
1964 | op_handle_aux_perf_interrupt, handle_nmi | 1902 | op_handle_aux_perf_interrupt, handle_nmi |
1965 | #endif | ||
1966 | 1903 | ||
1967 | /* Synthetic interrupt delivered only by the simulator */ | 1904 | /* Synthetic interrupt delivered only by the simulator */ |
1968 | int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint | 1905 | int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint |
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index c3a2335fa6a8..ec755d3f3734 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S | |||
@@ -511,12 +511,10 @@ intvec_\vecname: | |||
511 | .else | 511 | .else |
512 | .ifc \c_routine, op_handle_perf_interrupt | 512 | .ifc \c_routine, op_handle_perf_interrupt |
513 | mfspr r2, PERF_COUNT_STS | 513 | mfspr r2, PERF_COUNT_STS |
514 | #if CHIP_HAS_AUX_PERF_COUNTERS() | ||
515 | .else | 514 | .else |
516 | .ifc \c_routine, op_handle_aux_perf_interrupt | 515 | .ifc \c_routine, op_handle_aux_perf_interrupt |
517 | mfspr r2, AUX_PERF_COUNT_STS | 516 | mfspr r2, AUX_PERF_COUNT_STS |
518 | .endif | 517 | .endif |
519 | #endif | ||
520 | .endif | 518 | .endif |
521 | .endif | 519 | .endif |
522 | .endif | 520 | .endif |
diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index 0e6c521b8a89..d8ba06058fd0 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c | |||
@@ -74,7 +74,7 @@ static DEFINE_SPINLOCK(available_irqs_lock); | |||
74 | 74 | ||
75 | /* | 75 | /* |
76 | * The interrupt handling path, implemented in terms of HV interrupt | 76 | * The interrupt handling path, implemented in terms of HV interrupt |
77 | * emulation on TILE64 and TILEPro, and IPI hardware on TILE-Gx. | 77 | * emulation on TILEPro, and IPI hardware on TILE-Gx. |
78 | * Entered with interrupts disabled. | 78 | * Entered with interrupts disabled. |
79 | */ | 79 | */ |
80 | void tile_dev_intr(struct pt_regs *regs, int intnum) | 80 | void tile_dev_intr(struct pt_regs *regs, int intnum) |
@@ -235,7 +235,7 @@ void tile_irq_activate(unsigned int irq, int tile_irq_type) | |||
235 | { | 235 | { |
236 | /* | 236 | /* |
237 | * We use handle_level_irq() by default because the pending | 237 | * We use handle_level_irq() by default because the pending |
238 | * interrupt vector (whether modeled by the HV on TILE64 and | 238 | * interrupt vector (whether modeled by the HV on |
239 | * TILEPro or implemented in hardware on TILE-Gx) has | 239 | * TILEPro or implemented in hardware on TILE-Gx) has |
240 | * level-style semantics for each bit. An interrupt fires | 240 | * level-style semantics for each bit. An interrupt fires |
241 | * whenever a bit is high, not just at edges. | 241 | * whenever a bit is high, not just at edges. |
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 44cdc4aa59e8..16ed58948757 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c | |||
@@ -187,16 +187,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
187 | memset(&p->thread.dma_async_tlb, 0, sizeof(struct async_tlb)); | 187 | memset(&p->thread.dma_async_tlb, 0, sizeof(struct async_tlb)); |
188 | #endif | 188 | #endif |
189 | 189 | ||
190 | #if CHIP_HAS_SN_PROC() | ||
191 | /* Likewise, the new thread is not running static processor code. */ | ||
192 | p->thread.sn_proc_running = 0; | ||
193 | memset(&p->thread.sn_async_tlb, 0, sizeof(struct async_tlb)); | ||
194 | #endif | ||
195 | |||
196 | #if CHIP_HAS_PROC_STATUS_SPR() | ||
197 | /* New thread has its miscellaneous processor state bits clear. */ | 190 | /* New thread has its miscellaneous processor state bits clear. */ |
198 | p->thread.proc_status = 0; | 191 | p->thread.proc_status = 0; |
199 | #endif | ||
200 | 192 | ||
201 | #ifdef CONFIG_HARDWALL | 193 | #ifdef CONFIG_HARDWALL |
202 | /* New thread does not own any networks. */ | 194 | /* New thread does not own any networks. */ |
@@ -378,15 +370,11 @@ static void save_arch_state(struct thread_struct *t) | |||
378 | t->system_save[2] = __insn_mfspr(SPR_SYSTEM_SAVE_0_2); | 370 | t->system_save[2] = __insn_mfspr(SPR_SYSTEM_SAVE_0_2); |
379 | t->system_save[3] = __insn_mfspr(SPR_SYSTEM_SAVE_0_3); | 371 | t->system_save[3] = __insn_mfspr(SPR_SYSTEM_SAVE_0_3); |
380 | t->intctrl_0 = __insn_mfspr(SPR_INTCTRL_0_STATUS); | 372 | t->intctrl_0 = __insn_mfspr(SPR_INTCTRL_0_STATUS); |
381 | #if CHIP_HAS_PROC_STATUS_SPR() | ||
382 | t->proc_status = __insn_mfspr(SPR_PROC_STATUS); | 373 | t->proc_status = __insn_mfspr(SPR_PROC_STATUS); |
383 | #endif | ||
384 | #if !CHIP_HAS_FIXED_INTVEC_BASE() | 374 | #if !CHIP_HAS_FIXED_INTVEC_BASE() |
385 | t->interrupt_vector_base = __insn_mfspr(SPR_INTERRUPT_VECTOR_BASE_0); | 375 | t->interrupt_vector_base = __insn_mfspr(SPR_INTERRUPT_VECTOR_BASE_0); |
386 | #endif | 376 | #endif |
387 | #if CHIP_HAS_TILE_RTF_HWM() | ||
388 | t->tile_rtf_hwm = __insn_mfspr(SPR_TILE_RTF_HWM); | 377 | t->tile_rtf_hwm = __insn_mfspr(SPR_TILE_RTF_HWM); |
389 | #endif | ||
390 | #if CHIP_HAS_DSTREAM_PF() | 378 | #if CHIP_HAS_DSTREAM_PF() |
391 | t->dstream_pf = __insn_mfspr(SPR_DSTREAM_PF); | 379 | t->dstream_pf = __insn_mfspr(SPR_DSTREAM_PF); |
392 | #endif | 380 | #endif |
@@ -407,15 +395,11 @@ static void restore_arch_state(const struct thread_struct *t) | |||
407 | __insn_mtspr(SPR_SYSTEM_SAVE_0_2, t->system_save[2]); | 395 | __insn_mtspr(SPR_SYSTEM_SAVE_0_2, t->system_save[2]); |
408 | __insn_mtspr(SPR_SYSTEM_SAVE_0_3, t->system_save[3]); | 396 | __insn_mtspr(SPR_SYSTEM_SAVE_0_3, t->system_save[3]); |
409 | __insn_mtspr(SPR_INTCTRL_0_STATUS, t->intctrl_0); | 397 | __insn_mtspr(SPR_INTCTRL_0_STATUS, t->intctrl_0); |
410 | #if CHIP_HAS_PROC_STATUS_SPR() | ||
411 | __insn_mtspr(SPR_PROC_STATUS, t->proc_status); | 398 | __insn_mtspr(SPR_PROC_STATUS, t->proc_status); |
412 | #endif | ||
413 | #if !CHIP_HAS_FIXED_INTVEC_BASE() | 399 | #if !CHIP_HAS_FIXED_INTVEC_BASE() |
414 | __insn_mtspr(SPR_INTERRUPT_VECTOR_BASE_0, t->interrupt_vector_base); | 400 | __insn_mtspr(SPR_INTERRUPT_VECTOR_BASE_0, t->interrupt_vector_base); |
415 | #endif | 401 | #endif |
416 | #if CHIP_HAS_TILE_RTF_HWM() | ||
417 | __insn_mtspr(SPR_TILE_RTF_HWM, t->tile_rtf_hwm); | 402 | __insn_mtspr(SPR_TILE_RTF_HWM, t->tile_rtf_hwm); |
418 | #endif | ||
419 | #if CHIP_HAS_DSTREAM_PF() | 403 | #if CHIP_HAS_DSTREAM_PF() |
420 | __insn_mtspr(SPR_DSTREAM_PF, t->dstream_pf); | 404 | __insn_mtspr(SPR_DSTREAM_PF, t->dstream_pf); |
421 | #endif | 405 | #endif |
@@ -424,26 +408,11 @@ static void restore_arch_state(const struct thread_struct *t) | |||
424 | 408 | ||
425 | void _prepare_arch_switch(struct task_struct *next) | 409 | void _prepare_arch_switch(struct task_struct *next) |
426 | { | 410 | { |
427 | #if CHIP_HAS_SN_PROC() | ||
428 | int snctl; | ||
429 | #endif | ||
430 | #if CHIP_HAS_TILE_DMA() | 411 | #if CHIP_HAS_TILE_DMA() |
431 | struct tile_dma_state *dma = ¤t->thread.tile_dma_state; | 412 | struct tile_dma_state *dma = ¤t->thread.tile_dma_state; |
432 | if (dma->enabled) | 413 | if (dma->enabled) |
433 | save_tile_dma_state(dma); | 414 | save_tile_dma_state(dma); |
434 | #endif | 415 | #endif |
435 | #if CHIP_HAS_SN_PROC() | ||
436 | /* | ||
437 | * Suspend the static network processor if it was running. | ||
438 | * We do not suspend the fabric itself, just like we don't | ||
439 | * try to suspend the UDN. | ||
440 | */ | ||
441 | snctl = __insn_mfspr(SPR_SNCTL); | ||
442 | current->thread.sn_proc_running = | ||
443 | (snctl & SPR_SNCTL__FRZPROC_MASK) == 0; | ||
444 | if (current->thread.sn_proc_running) | ||
445 | __insn_mtspr(SPR_SNCTL, snctl | SPR_SNCTL__FRZPROC_MASK); | ||
446 | #endif | ||
447 | } | 416 | } |
448 | 417 | ||
449 | 418 | ||
@@ -471,17 +440,6 @@ struct task_struct *__sched _switch_to(struct task_struct *prev, | |||
471 | /* Restore other arch state. */ | 440 | /* Restore other arch state. */ |
472 | restore_arch_state(&next->thread); | 441 | restore_arch_state(&next->thread); |
473 | 442 | ||
474 | #if CHIP_HAS_SN_PROC() | ||
475 | /* | ||
476 | * Restart static network processor in the new process | ||
477 | * if it was running before. | ||
478 | */ | ||
479 | if (next->thread.sn_proc_running) { | ||
480 | int snctl = __insn_mfspr(SPR_SNCTL); | ||
481 | __insn_mtspr(SPR_SNCTL, snctl & ~SPR_SNCTL__FRZPROC_MASK); | ||
482 | } | ||
483 | #endif | ||
484 | |||
485 | #ifdef CONFIG_HARDWALL | 443 | #ifdef CONFIG_HARDWALL |
486 | /* Enable or disable access to the network registers appropriately. */ | 444 | /* Enable or disable access to the network registers appropriately. */ |
487 | hardwall_switch_tasks(prev, next); | 445 | hardwall_switch_tasks(prev, next); |
@@ -523,7 +481,7 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags) | |||
523 | schedule(); | 481 | schedule(); |
524 | return 1; | 482 | return 1; |
525 | } | 483 | } |
526 | #if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() | 484 | #if CHIP_HAS_TILE_DMA() |
527 | if (thread_info_flags & _TIF_ASYNC_TLB) { | 485 | if (thread_info_flags & _TIF_ASYNC_TLB) { |
528 | do_async_page_fault(regs); | 486 | do_async_page_fault(regs); |
529 | return 1; | 487 | return 1; |
diff --git a/arch/tile/kernel/relocate_kernel_32.S b/arch/tile/kernel/relocate_kernel_32.S index f7fd37b64a78..e44fbcf8cbd5 100644 --- a/arch/tile/kernel/relocate_kernel_32.S +++ b/arch/tile/kernel/relocate_kernel_32.S | |||
@@ -77,7 +77,6 @@ STD_ENTRY(relocate_new_kernel) | |||
77 | move r30, sp | 77 | move r30, sp |
78 | addi sp, sp, -8 | 78 | addi sp, sp, -8 |
79 | 79 | ||
80 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
81 | /* | 80 | /* |
82 | * On TILEPro, we need to flush all tiles' caches, since we may | 81 | * On TILEPro, we need to flush all tiles' caches, since we may |
83 | * have been doing hash-for-home caching there. Note that we | 82 | * have been doing hash-for-home caching there. Note that we |
@@ -113,7 +112,6 @@ STD_ENTRY(relocate_new_kernel) | |||
113 | } | 112 | } |
114 | 113 | ||
115 | jalr r20 | 114 | jalr r20 |
116 | #endif | ||
117 | 115 | ||
118 | /* r33 is destination pointer, default to zero */ | 116 | /* r33 is destination pointer, default to zero */ |
119 | 117 | ||
diff --git a/arch/tile/kernel/relocate_kernel_64.S b/arch/tile/kernel/relocate_kernel_64.S index 02bc44621021..d9d8cf6176e8 100644 --- a/arch/tile/kernel/relocate_kernel_64.S +++ b/arch/tile/kernel/relocate_kernel_64.S | |||
@@ -78,7 +78,6 @@ STD_ENTRY(relocate_new_kernel) | |||
78 | move r30, sp | 78 | move r30, sp |
79 | addi sp, sp, -16 | 79 | addi sp, sp, -16 |
80 | 80 | ||
81 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
82 | /* | 81 | /* |
83 | * On TILE-GX, we need to flush all tiles' caches, since we may | 82 | * On TILE-GX, we need to flush all tiles' caches, since we may |
84 | * have been doing hash-for-home caching there. Note that we | 83 | * have been doing hash-for-home caching there. Note that we |
@@ -116,7 +115,6 @@ STD_ENTRY(relocate_new_kernel) | |||
116 | shl16insli r20, r20, hw0(hv_flush_remote) | 115 | shl16insli r20, r20, hw0(hv_flush_remote) |
117 | 116 | ||
118 | jalr r20 | 117 | jalr r20 |
119 | #endif | ||
120 | 118 | ||
121 | /* r33 is destination pointer, default to zero */ | 119 | /* r33 is destination pointer, default to zero */ |
122 | 120 | ||
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index b79c312ca3cb..128a2d0b8650 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c | |||
@@ -1046,9 +1046,6 @@ void __cpuinit setup_cpu(int boot) | |||
1046 | arch_local_irq_unmask(INT_DMATLB_MISS); | 1046 | arch_local_irq_unmask(INT_DMATLB_MISS); |
1047 | arch_local_irq_unmask(INT_DMATLB_ACCESS); | 1047 | arch_local_irq_unmask(INT_DMATLB_ACCESS); |
1048 | #endif | 1048 | #endif |
1049 | #if CHIP_HAS_SN_PROC() | ||
1050 | arch_local_irq_unmask(INT_SNITLB_MISS); | ||
1051 | #endif | ||
1052 | #ifdef __tilegx__ | 1049 | #ifdef __tilegx__ |
1053 | arch_local_irq_unmask(INT_SINGLE_STEP_K); | 1050 | arch_local_irq_unmask(INT_SINGLE_STEP_K); |
1054 | #endif | 1051 | #endif |
@@ -1063,10 +1060,6 @@ void __cpuinit setup_cpu(int boot) | |||
1063 | /* Static network is not restricted. */ | 1060 | /* Static network is not restricted. */ |
1064 | __insn_mtspr(SPR_MPL_SN_ACCESS_SET_0, 1); | 1061 | __insn_mtspr(SPR_MPL_SN_ACCESS_SET_0, 1); |
1065 | #endif | 1062 | #endif |
1066 | #if CHIP_HAS_SN_PROC() | ||
1067 | __insn_mtspr(SPR_MPL_SN_NOTIFY_SET_0, 1); | ||
1068 | __insn_mtspr(SPR_MPL_SN_CPL_SET_0, 1); | ||
1069 | #endif | ||
1070 | 1063 | ||
1071 | /* | 1064 | /* |
1072 | * Set the MPL for interrupt control 0 & 1 to the corresponding | 1065 | * Set the MPL for interrupt control 0 & 1 to the corresponding |
@@ -1291,7 +1284,6 @@ static void __init validate_va(void) | |||
1291 | struct cpumask __write_once cpu_lotar_map; | 1284 | struct cpumask __write_once cpu_lotar_map; |
1292 | EXPORT_SYMBOL(cpu_lotar_map); | 1285 | EXPORT_SYMBOL(cpu_lotar_map); |
1293 | 1286 | ||
1294 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
1295 | /* | 1287 | /* |
1296 | * hash_for_home_map lists all the tiles that hash-for-home data | 1288 | * hash_for_home_map lists all the tiles that hash-for-home data |
1297 | * will be cached on. Note that this may includes tiles that are not | 1289 | * will be cached on. Note that this may includes tiles that are not |
@@ -1301,7 +1293,6 @@ EXPORT_SYMBOL(cpu_lotar_map); | |||
1301 | */ | 1293 | */ |
1302 | struct cpumask hash_for_home_map; | 1294 | struct cpumask hash_for_home_map; |
1303 | EXPORT_SYMBOL(hash_for_home_map); | 1295 | EXPORT_SYMBOL(hash_for_home_map); |
1304 | #endif | ||
1305 | 1296 | ||
1306 | /* | 1297 | /* |
1307 | * cpu_cacheable_map lists all the cpus whose caches the hypervisor can | 1298 | * cpu_cacheable_map lists all the cpus whose caches the hypervisor can |
@@ -1394,7 +1385,6 @@ static void __init setup_cpu_maps(void) | |||
1394 | cpu_lotar_map = *cpu_possible_mask; | 1385 | cpu_lotar_map = *cpu_possible_mask; |
1395 | } | 1386 | } |
1396 | 1387 | ||
1397 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
1398 | /* Retrieve set of CPUs used for hash-for-home caching */ | 1388 | /* Retrieve set of CPUs used for hash-for-home caching */ |
1399 | rc = hv_inquire_tiles(HV_INQ_TILES_HFH_CACHE, | 1389 | rc = hv_inquire_tiles(HV_INQ_TILES_HFH_CACHE, |
1400 | (HV_VirtAddr) hash_for_home_map.bits, | 1390 | (HV_VirtAddr) hash_for_home_map.bits, |
@@ -1402,9 +1392,6 @@ static void __init setup_cpu_maps(void) | |||
1402 | if (rc < 0) | 1392 | if (rc < 0) |
1403 | early_panic("hv_inquire_tiles(HFH_CACHE) failed: rc %d\n", rc); | 1393 | early_panic("hv_inquire_tiles(HFH_CACHE) failed: rc %d\n", rc); |
1404 | cpumask_or(&cpu_cacheable_map, cpu_possible_mask, &hash_for_home_map); | 1394 | cpumask_or(&cpu_cacheable_map, cpu_possible_mask, &hash_for_home_map); |
1405 | #else | ||
1406 | cpu_cacheable_map = *cpu_possible_mask; | ||
1407 | #endif | ||
1408 | } | 1395 | } |
1409 | 1396 | ||
1410 | 1397 | ||
diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c index 5ef2e9eae5c5..de07fa7d1315 100644 --- a/arch/tile/kernel/single_step.c +++ b/arch/tile/kernel/single_step.c | |||
@@ -546,7 +546,6 @@ void single_step_once(struct pt_regs *regs) | |||
546 | } | 546 | } |
547 | break; | 547 | break; |
548 | 548 | ||
549 | #if CHIP_HAS_WH64() | ||
550 | /* postincrement operations */ | 549 | /* postincrement operations */ |
551 | case IMM_0_OPCODE_X1: | 550 | case IMM_0_OPCODE_X1: |
552 | switch (get_ImmOpcodeExtension_X1(bundle)) { | 551 | switch (get_ImmOpcodeExtension_X1(bundle)) { |
@@ -581,7 +580,6 @@ void single_step_once(struct pt_regs *regs) | |||
581 | break; | 580 | break; |
582 | } | 581 | } |
583 | break; | 582 | break; |
584 | #endif /* CHIP_HAS_WH64() */ | ||
585 | } | 583 | } |
586 | 584 | ||
587 | if (state->update) { | 585 | if (state->update) { |
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile index 9adfd76fbdd8..c4211cbb2021 100644 --- a/arch/tile/lib/Makefile +++ b/arch/tile/lib/Makefile | |||
@@ -7,7 +7,7 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \ | |||
7 | strchr_$(BITS).o strlen_$(BITS).o strnlen_$(BITS).o | 7 | strchr_$(BITS).o strlen_$(BITS).o strnlen_$(BITS).o |
8 | 8 | ||
9 | lib-$(CONFIG_TILEGX) += memcpy_user_64.o | 9 | lib-$(CONFIG_TILEGX) += memcpy_user_64.o |
10 | lib-$(CONFIG_TILEPRO) += atomic_32.o atomic_asm_32.o memcpy_tile64.o | 10 | lib-$(CONFIG_TILEPRO) += atomic_32.o atomic_asm_32.o |
11 | lib-$(CONFIG_SMP) += spinlock_$(BITS).o usercopy_$(BITS).o | 11 | lib-$(CONFIG_SMP) += spinlock_$(BITS).o usercopy_$(BITS).o |
12 | 12 | ||
13 | obj-$(CONFIG_MODULES) += exports.o | 13 | obj-$(CONFIG_MODULES) += exports.o |
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c index 42eacb1f737a..5d91d1860640 100644 --- a/arch/tile/lib/atomic_32.c +++ b/arch/tile/lib/atomic_32.c | |||
@@ -20,50 +20,12 @@ | |||
20 | #include <linux/atomic.h> | 20 | #include <linux/atomic.h> |
21 | #include <arch/chip.h> | 21 | #include <arch/chip.h> |
22 | 22 | ||
23 | /* See <asm/atomic_32.h> */ | ||
24 | #if ATOMIC_LOCKS_FOUND_VIA_TABLE() | ||
25 | |||
26 | /* | ||
27 | * A block of memory containing locks for atomic ops. Each instance of this | ||
28 | * struct will be homed on a different CPU. | ||
29 | */ | ||
30 | struct atomic_locks_on_cpu { | ||
31 | int lock[ATOMIC_HASH_L2_SIZE]; | ||
32 | } __attribute__((aligned(ATOMIC_HASH_L2_SIZE * 4))); | ||
33 | |||
34 | static DEFINE_PER_CPU(struct atomic_locks_on_cpu, atomic_lock_pool); | ||
35 | |||
36 | /* The locks we'll use until __init_atomic_per_cpu is called. */ | ||
37 | static struct atomic_locks_on_cpu __initdata initial_atomic_locks; | ||
38 | |||
39 | /* Hash into this vector to get a pointer to lock for the given atomic. */ | ||
40 | struct atomic_locks_on_cpu *atomic_lock_ptr[ATOMIC_HASH_L1_SIZE] | ||
41 | __write_once = { | ||
42 | [0 ... ATOMIC_HASH_L1_SIZE-1] (&initial_atomic_locks) | ||
43 | }; | ||
44 | |||
45 | #else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ | ||
46 | |||
47 | /* This page is remapped on startup to be hash-for-home. */ | 23 | /* This page is remapped on startup to be hash-for-home. */ |
48 | int atomic_locks[PAGE_SIZE / sizeof(int)] __page_aligned_bss; | 24 | int atomic_locks[PAGE_SIZE / sizeof(int)] __page_aligned_bss; |
49 | 25 | ||
50 | #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ | ||
51 | |||
52 | int *__atomic_hashed_lock(volatile void *v) | 26 | int *__atomic_hashed_lock(volatile void *v) |
53 | { | 27 | { |
54 | /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */ | 28 | /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */ |
55 | #if ATOMIC_LOCKS_FOUND_VIA_TABLE() | ||
56 | unsigned long i = | ||
57 | (unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long)); | ||
58 | unsigned long n = __insn_crc32_32(0, i); | ||
59 | |||
60 | /* Grab high bits for L1 index. */ | ||
61 | unsigned long l1_index = n >> ((sizeof(n) * 8) - ATOMIC_HASH_L1_SHIFT); | ||
62 | /* Grab low bits for L2 index. */ | ||
63 | unsigned long l2_index = n & (ATOMIC_HASH_L2_SIZE - 1); | ||
64 | |||
65 | return &atomic_lock_ptr[l1_index]->lock[l2_index]; | ||
66 | #else | ||
67 | /* | 29 | /* |
68 | * Use bits [3, 3 + ATOMIC_HASH_SHIFT) as the lock index. | 30 | * Use bits [3, 3 + ATOMIC_HASH_SHIFT) as the lock index. |
69 | * Using mm works here because atomic_locks is page aligned. | 31 | * Using mm works here because atomic_locks is page aligned. |
@@ -72,26 +34,13 @@ int *__atomic_hashed_lock(volatile void *v) | |||
72 | (unsigned long)atomic_locks, | 34 | (unsigned long)atomic_locks, |
73 | 2, (ATOMIC_HASH_SHIFT + 2) - 1); | 35 | 2, (ATOMIC_HASH_SHIFT + 2) - 1); |
74 | return (int *)ptr; | 36 | return (int *)ptr; |
75 | #endif | ||
76 | } | 37 | } |
77 | 38 | ||
78 | #ifdef CONFIG_SMP | 39 | #ifdef CONFIG_SMP |
79 | /* Return whether the passed pointer is a valid atomic lock pointer. */ | 40 | /* Return whether the passed pointer is a valid atomic lock pointer. */ |
80 | static int is_atomic_lock(int *p) | 41 | static int is_atomic_lock(int *p) |
81 | { | 42 | { |
82 | #if ATOMIC_LOCKS_FOUND_VIA_TABLE() | ||
83 | int i; | ||
84 | for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) { | ||
85 | |||
86 | if (p >= &atomic_lock_ptr[i]->lock[0] && | ||
87 | p < &atomic_lock_ptr[i]->lock[ATOMIC_HASH_L2_SIZE]) { | ||
88 | return 1; | ||
89 | } | ||
90 | } | ||
91 | return 0; | ||
92 | #else | ||
93 | return p >= &atomic_locks[0] && p < &atomic_locks[ATOMIC_HASH_SIZE]; | 43 | return p >= &atomic_locks[0] && p < &atomic_locks[ATOMIC_HASH_SIZE]; |
94 | #endif | ||
95 | } | 44 | } |
96 | 45 | ||
97 | void __atomic_fault_unlock(int *irqlock_word) | 46 | void __atomic_fault_unlock(int *irqlock_word) |
@@ -210,43 +159,6 @@ struct __get_user __atomic_bad_address(int __user *addr) | |||
210 | 159 | ||
211 | void __init __init_atomic_per_cpu(void) | 160 | void __init __init_atomic_per_cpu(void) |
212 | { | 161 | { |
213 | #if ATOMIC_LOCKS_FOUND_VIA_TABLE() | ||
214 | |||
215 | unsigned int i; | ||
216 | int actual_cpu; | ||
217 | |||
218 | /* | ||
219 | * Before this is called from setup, we just have one lock for | ||
220 | * all atomic objects/operations. Here we replace the | ||
221 | * elements of atomic_lock_ptr so that they point at per_cpu | ||
222 | * integers. This seemingly over-complex approach stems from | ||
223 | * the fact that DEFINE_PER_CPU defines an entry for each cpu | ||
224 | * in the grid, not each cpu from 0..ATOMIC_HASH_SIZE-1. But | ||
225 | * for efficient hashing of atomics to their locks we want a | ||
226 | * compile time constant power of 2 for the size of this | ||
227 | * table, so we use ATOMIC_HASH_SIZE. | ||
228 | * | ||
229 | * Here we populate atomic_lock_ptr from the per cpu | ||
230 | * atomic_lock_pool, interspersing by actual cpu so that | ||
231 | * subsequent elements are homed on consecutive cpus. | ||
232 | */ | ||
233 | |||
234 | actual_cpu = cpumask_first(cpu_possible_mask); | ||
235 | |||
236 | for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) { | ||
237 | /* | ||
238 | * Preincrement to slightly bias against using cpu 0, | ||
239 | * which has plenty of stuff homed on it already. | ||
240 | */ | ||
241 | actual_cpu = cpumask_next(actual_cpu, cpu_possible_mask); | ||
242 | if (actual_cpu >= nr_cpu_ids) | ||
243 | actual_cpu = cpumask_first(cpu_possible_mask); | ||
244 | |||
245 | atomic_lock_ptr[i] = &per_cpu(atomic_lock_pool, actual_cpu); | ||
246 | } | ||
247 | |||
248 | #else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ | ||
249 | |||
250 | /* Validate power-of-two and "bigger than cpus" assumption */ | 162 | /* Validate power-of-two and "bigger than cpus" assumption */ |
251 | BUILD_BUG_ON(ATOMIC_HASH_SIZE & (ATOMIC_HASH_SIZE-1)); | 163 | BUILD_BUG_ON(ATOMIC_HASH_SIZE & (ATOMIC_HASH_SIZE-1)); |
252 | BUG_ON(ATOMIC_HASH_SIZE < nr_cpu_ids); | 164 | BUG_ON(ATOMIC_HASH_SIZE < nr_cpu_ids); |
@@ -270,6 +182,4 @@ void __init __init_atomic_per_cpu(void) | |||
270 | * That should not produce more indices than ATOMIC_HASH_SIZE. | 182 | * That should not produce more indices than ATOMIC_HASH_SIZE. |
271 | */ | 183 | */ |
272 | BUILD_BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE); | 184 | BUILD_BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE); |
273 | |||
274 | #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ | ||
275 | } | 185 | } |
diff --git a/arch/tile/lib/memcpy_32.S b/arch/tile/lib/memcpy_32.S index 8ba7626cfeb1..a2771ae5da53 100644 --- a/arch/tile/lib/memcpy_32.S +++ b/arch/tile/lib/memcpy_32.S | |||
@@ -22,14 +22,6 @@ | |||
22 | 22 | ||
23 | #include <linux/linkage.h> | 23 | #include <linux/linkage.h> |
24 | 24 | ||
25 | /* On TILE64, we wrap these functions via arch/tile/lib/memcpy_tile64.c */ | ||
26 | #if !CHIP_HAS_COHERENT_LOCAL_CACHE() | ||
27 | #define memcpy __memcpy_asm | ||
28 | #define __copy_to_user_inatomic __copy_to_user_inatomic_asm | ||
29 | #define __copy_from_user_inatomic __copy_from_user_inatomic_asm | ||
30 | #define __copy_from_user_zeroing __copy_from_user_zeroing_asm | ||
31 | #endif | ||
32 | |||
33 | #define IS_MEMCPY 0 | 25 | #define IS_MEMCPY 0 |
34 | #define IS_COPY_FROM_USER 1 | 26 | #define IS_COPY_FROM_USER 1 |
35 | #define IS_COPY_FROM_USER_ZEROING 2 | 27 | #define IS_COPY_FROM_USER_ZEROING 2 |
@@ -159,12 +151,9 @@ EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 } | |||
159 | 151 | ||
160 | { addi r3, r1, 60; andi r9, r9, -64 } | 152 | { addi r3, r1, 60; andi r9, r9, -64 } |
161 | 153 | ||
162 | #if CHIP_HAS_WH64() | ||
163 | /* No need to prefetch dst, we'll just do the wh64 | 154 | /* No need to prefetch dst, we'll just do the wh64 |
164 | * right before we copy a line. | 155 | * right before we copy a line. |
165 | */ | 156 | */ |
166 | #endif | ||
167 | |||
168 | EX: { lw r5, r3; addi r3, r3, 64; movei r4, 1 } | 157 | EX: { lw r5, r3; addi r3, r3, 64; movei r4, 1 } |
169 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ | 158 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ |
170 | { bnzt zero, .; move r27, lr } | 159 | { bnzt zero, .; move r27, lr } |
@@ -172,21 +161,6 @@ EX: { lw r6, r3; addi r3, r3, 64 } | |||
172 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ | 161 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ |
173 | { bnzt zero, . } | 162 | { bnzt zero, . } |
174 | EX: { lw r7, r3; addi r3, r3, 64 } | 163 | EX: { lw r7, r3; addi r3, r3, 64 } |
175 | #if !CHIP_HAS_WH64() | ||
176 | /* Prefetch the dest */ | ||
177 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ | ||
178 | { bnzt zero, . } | ||
179 | /* Use a real load to cause a TLB miss if necessary. We aren't using | ||
180 | * r28, so this should be fine. | ||
181 | */ | ||
182 | EX: { lw r28, r9; addi r9, r9, 64 } | ||
183 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ | ||
184 | { bnzt zero, . } | ||
185 | { prefetch r9; addi r9, r9, 64 } | ||
186 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ | ||
187 | { bnzt zero, . } | ||
188 | { prefetch r9; addi r9, r9, 64 } | ||
189 | #endif | ||
190 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ | 164 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ |
191 | { bz zero, .Lbig_loop2 } | 165 | { bz zero, .Lbig_loop2 } |
192 | 166 | ||
@@ -287,13 +261,8 @@ EX: { lw r7, r3; addi r3, r3, 64 } | |||
287 | /* Fill second L1D line. */ | 261 | /* Fill second L1D line. */ |
288 | EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */ | 262 | EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */ |
289 | 263 | ||
290 | #if CHIP_HAS_WH64() | ||
291 | /* Prepare destination line for writing. */ | 264 | /* Prepare destination line for writing. */ |
292 | EX: { wh64 r9; addi r9, r9, 64 } | 265 | EX: { wh64 r9; addi r9, r9, 64 } |
293 | #else | ||
294 | /* Prefetch dest line */ | ||
295 | { prefetch r9; addi r9, r9, 64 } | ||
296 | #endif | ||
297 | /* Load seven words that are L1D hits to cover wh64 L2 usage. */ | 266 | /* Load seven words that are L1D hits to cover wh64 L2 usage. */ |
298 | 267 | ||
299 | /* Load the three remaining words from the last L1D line, which | 268 | /* Load the three remaining words from the last L1D line, which |
@@ -331,16 +300,7 @@ EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */ | |||
331 | EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */ | 300 | EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */ |
332 | EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */ | 301 | EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */ |
333 | EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */ | 302 | EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */ |
334 | #if CHIP_HAS_WH64() | ||
335 | EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */ | 303 | EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */ |
336 | #else | ||
337 | /* Back up the r9 to a cache line we are already storing to | ||
338 | * if it gets past the end of the dest vector. Strictly speaking, | ||
339 | * we don't need to back up to the start of a cache line, but it's free | ||
340 | * and tidy, so why not? | ||
341 | */ | ||
342 | EX: { sw r0, r15; addi r0, r0, 4; andi r13, r0, -64 } /* store(WORD_3) */ | ||
343 | #endif | ||
344 | /* Store second L1D line. */ | 304 | /* Store second L1D line. */ |
345 | EX: { sw r0, r17; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */ | 305 | EX: { sw r0, r17; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */ |
346 | EX: { sw r0, r19; addi r0, r0, 4 } /* store(WORD_5) */ | 306 | EX: { sw r0, r19; addi r0, r0, 4 } /* store(WORD_5) */ |
@@ -404,7 +364,6 @@ EX: { sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 } | |||
404 | 364 | ||
405 | .Ldest_is_word_aligned: | 365 | .Ldest_is_word_aligned: |
406 | 366 | ||
407 | #if CHIP_HAS_DWORD_ALIGN() | ||
408 | EX: { andi r8, r0, 63; lwadd_na r6, r1, 4} | 367 | EX: { andi r8, r0, 63; lwadd_na r6, r1, 4} |
409 | { slti_u r9, r2, 64; bz r8, .Ldest_is_L2_line_aligned } | 368 | { slti_u r9, r2, 64; bz r8, .Ldest_is_L2_line_aligned } |
410 | 369 | ||
@@ -512,26 +471,6 @@ EX: { swadd r0, r13, 4; addi r2, r2, -32 } | |||
512 | /* Move r1 back to the point where it corresponds to r0. */ | 471 | /* Move r1 back to the point where it corresponds to r0. */ |
513 | { addi r1, r1, -4 } | 472 | { addi r1, r1, -4 } |
514 | 473 | ||
515 | #else /* !CHIP_HAS_DWORD_ALIGN() */ | ||
516 | |||
517 | /* Compute right/left shift counts and load initial source words. */ | ||
518 | { andi r5, r1, -4; andi r3, r1, 3 } | ||
519 | EX: { lw r6, r5; addi r5, r5, 4; shli r3, r3, 3 } | ||
520 | EX: { lw r7, r5; addi r5, r5, 4; sub r4, zero, r3 } | ||
521 | |||
522 | /* Load and store one word at a time, using shifts and ORs | ||
523 | * to correct for the misaligned src. | ||
524 | */ | ||
525 | .Lcopy_unaligned_src_loop: | ||
526 | { shr r6, r6, r3; shl r8, r7, r4 } | ||
527 | EX: { lw r7, r5; or r8, r8, r6; move r6, r7 } | ||
528 | EX: { sw r0, r8; addi r0, r0, 4; addi r2, r2, -4 } | ||
529 | { addi r5, r5, 4; slti_u r8, r2, 8 } | ||
530 | { bzt r8, .Lcopy_unaligned_src_loop; addi r1, r1, 4 } | ||
531 | |||
532 | { bz r2, .Lcopy_unaligned_done } | ||
533 | #endif /* !CHIP_HAS_DWORD_ALIGN() */ | ||
534 | |||
535 | /* Fall through */ | 474 | /* Fall through */ |
536 | 475 | ||
537 | /* | 476 | /* |
diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c deleted file mode 100644 index 0290c222847b..000000000000 --- a/arch/tile/lib/memcpy_tile64.c +++ /dev/null | |||
@@ -1,280 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #include <linux/string.h> | ||
16 | #include <linux/smp.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/uaccess.h> | ||
19 | #include <asm/fixmap.h> | ||
20 | #include <asm/kmap_types.h> | ||
21 | #include <asm/tlbflush.h> | ||
22 | #include <hv/hypervisor.h> | ||
23 | #include <arch/chip.h> | ||
24 | |||
25 | |||
26 | #if !CHIP_HAS_COHERENT_LOCAL_CACHE() | ||
27 | |||
28 | /* Defined in memcpy.S */ | ||
29 | extern unsigned long __memcpy_asm(void *to, const void *from, unsigned long n); | ||
30 | extern unsigned long __copy_to_user_inatomic_asm( | ||
31 | void __user *to, const void *from, unsigned long n); | ||
32 | extern unsigned long __copy_from_user_inatomic_asm( | ||
33 | void *to, const void __user *from, unsigned long n); | ||
34 | extern unsigned long __copy_from_user_zeroing_asm( | ||
35 | void *to, const void __user *from, unsigned long n); | ||
36 | |||
37 | typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long); | ||
38 | |||
39 | /* Size above which to consider TLB games for performance */ | ||
40 | #define LARGE_COPY_CUTOFF 2048 | ||
41 | |||
42 | /* Communicate to the simulator what we are trying to do. */ | ||
43 | #define sim_allow_multiple_caching(b) \ | ||
44 | __insn_mtspr(SPR_SIM_CONTROL, \ | ||
45 | SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ((b) << _SIM_CONTROL_OPERATOR_BITS)) | ||
46 | |||
47 | /* | ||
48 | * Copy memory by briefly enabling incoherent cacheline-at-a-time mode. | ||
49 | * | ||
50 | * We set up our own source and destination PTEs that we fully control. | ||
51 | * This is the only way to guarantee that we don't race with another | ||
52 | * thread that is modifying the PTE; we can't afford to try the | ||
53 | * copy_{to,from}_user() technique of catching the interrupt, since | ||
54 | * we must run with interrupts disabled to avoid the risk of some | ||
55 | * other code seeing the incoherent data in our cache. (Recall that | ||
56 | * our cache is indexed by PA, so even if the other code doesn't use | ||
57 | * our kmap_atomic virtual addresses, they'll still hit in cache using | ||
58 | * the normal VAs that aren't supposed to hit in cache.) | ||
59 | */ | ||
60 | static void memcpy_multicache(void *dest, const void *source, | ||
61 | pte_t dst_pte, pte_t src_pte, int len) | ||
62 | { | ||
63 | int idx; | ||
64 | unsigned long flags, newsrc, newdst; | ||
65 | pmd_t *pmdp; | ||
66 | pte_t *ptep; | ||
67 | int type0, type1; | ||
68 | int cpu = smp_processor_id(); | ||
69 | |||
70 | /* | ||
71 | * Disable interrupts so that we don't recurse into memcpy() | ||
72 | * in an interrupt handler, nor accidentally reference | ||
73 | * the PA of the source from an interrupt routine. Also | ||
74 | * notify the simulator that we're playing games so we don't | ||
75 | * generate spurious coherency warnings. | ||
76 | */ | ||
77 | local_irq_save(flags); | ||
78 | sim_allow_multiple_caching(1); | ||
79 | |||
80 | /* Set up the new dest mapping */ | ||
81 | type0 = kmap_atomic_idx_push(); | ||
82 | idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0; | ||
83 | newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1)); | ||
84 | pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst); | ||
85 | ptep = pte_offset_kernel(pmdp, newdst); | ||
86 | if (pte_val(*ptep) != pte_val(dst_pte)) { | ||
87 | set_pte(ptep, dst_pte); | ||
88 | local_flush_tlb_page(NULL, newdst, PAGE_SIZE); | ||
89 | } | ||
90 | |||
91 | /* Set up the new source mapping */ | ||
92 | type1 = kmap_atomic_idx_push(); | ||
93 | idx += (type0 - type1); | ||
94 | src_pte = hv_pte_set_nc(src_pte); | ||
95 | src_pte = hv_pte_clear_writable(src_pte); /* be paranoid */ | ||
96 | newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); | ||
97 | pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc); | ||
98 | ptep = pte_offset_kernel(pmdp, newsrc); | ||
99 | __set_pte(ptep, src_pte); /* set_pte() would be confused by this */ | ||
100 | local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); | ||
101 | |||
102 | /* Actually move the data. */ | ||
103 | __memcpy_asm((void *)newdst, (const void *)newsrc, len); | ||
104 | |||
105 | /* | ||
106 | * Remap the source as locally-cached and not OLOC'ed so that | ||
107 | * we can inval without also invaling the remote cpu's cache. | ||
108 | * This also avoids known errata with inv'ing cacheable oloc data. | ||
109 | */ | ||
110 | src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3); | ||
111 | src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */ | ||
112 | __set_pte(ptep, src_pte); /* set_pte() would be confused by this */ | ||
113 | local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); | ||
114 | |||
115 | /* | ||
116 | * Do the actual invalidation, covering the full L2 cache line | ||
117 | * at the end since __memcpy_asm() is somewhat aggressive. | ||
118 | */ | ||
119 | __inv_buffer((void *)newsrc, len); | ||
120 | |||
121 | /* | ||
122 | * We're done: notify the simulator that all is back to normal, | ||
123 | * and re-enable interrupts and pre-emption. | ||
124 | */ | ||
125 | kmap_atomic_idx_pop(); | ||
126 | kmap_atomic_idx_pop(); | ||
127 | sim_allow_multiple_caching(0); | ||
128 | local_irq_restore(flags); | ||
129 | } | ||
130 | |||
131 | /* | ||
132 | * Identify large copies from remotely-cached memory, and copy them | ||
133 | * via memcpy_multicache() if they look good, otherwise fall back | ||
134 | * to the particular kind of copying passed as the memcpy_t function. | ||
135 | */ | ||
136 | static unsigned long fast_copy(void *dest, const void *source, int len, | ||
137 | memcpy_t func) | ||
138 | { | ||
139 | int cpu = get_cpu(); | ||
140 | unsigned long retval; | ||
141 | |||
142 | /* | ||
143 | * Check if it's big enough to bother with. We may end up doing a | ||
144 | * small copy via TLB manipulation if we're near a page boundary, | ||
145 | * but presumably we'll make it up when we hit the second page. | ||
146 | */ | ||
147 | while (len >= LARGE_COPY_CUTOFF) { | ||
148 | int copy_size, bytes_left_on_page; | ||
149 | pte_t *src_ptep, *dst_ptep; | ||
150 | pte_t src_pte, dst_pte; | ||
151 | struct page *src_page, *dst_page; | ||
152 | |||
153 | /* Is the source page oloc'ed to a remote cpu? */ | ||
154 | retry_source: | ||
155 | src_ptep = virt_to_pte(current->mm, (unsigned long)source); | ||
156 | if (src_ptep == NULL) | ||
157 | break; | ||
158 | src_pte = *src_ptep; | ||
159 | if (!hv_pte_get_present(src_pte) || | ||
160 | !hv_pte_get_readable(src_pte) || | ||
161 | hv_pte_get_mode(src_pte) != HV_PTE_MODE_CACHE_TILE_L3) | ||
162 | break; | ||
163 | if (get_remote_cache_cpu(src_pte) == cpu) | ||
164 | break; | ||
165 | src_page = pfn_to_page(pte_pfn(src_pte)); | ||
166 | get_page(src_page); | ||
167 | if (pte_val(src_pte) != pte_val(*src_ptep)) { | ||
168 | put_page(src_page); | ||
169 | goto retry_source; | ||
170 | } | ||
171 | if (pte_huge(src_pte)) { | ||
172 | /* Adjust the PTE to correspond to a small page */ | ||
173 | int pfn = pte_pfn(src_pte); | ||
174 | pfn += (((unsigned long)source & (HPAGE_SIZE-1)) | ||
175 | >> PAGE_SHIFT); | ||
176 | src_pte = pfn_pte(pfn, src_pte); | ||
177 | src_pte = pte_mksmall(src_pte); | ||
178 | } | ||
179 | |||
180 | /* Is the destination page writable? */ | ||
181 | retry_dest: | ||
182 | dst_ptep = virt_to_pte(current->mm, (unsigned long)dest); | ||
183 | if (dst_ptep == NULL) { | ||
184 | put_page(src_page); | ||
185 | break; | ||
186 | } | ||
187 | dst_pte = *dst_ptep; | ||
188 | if (!hv_pte_get_present(dst_pte) || | ||
189 | !hv_pte_get_writable(dst_pte)) { | ||
190 | put_page(src_page); | ||
191 | break; | ||
192 | } | ||
193 | dst_page = pfn_to_page(pte_pfn(dst_pte)); | ||
194 | if (dst_page == src_page) { | ||
195 | /* | ||
196 | * Source and dest are on the same page; this | ||
197 | * potentially exposes us to incoherence if any | ||
198 | * part of src and dest overlap on a cache line. | ||
199 | * Just give up rather than trying to be precise. | ||
200 | */ | ||
201 | put_page(src_page); | ||
202 | break; | ||
203 | } | ||
204 | get_page(dst_page); | ||
205 | if (pte_val(dst_pte) != pte_val(*dst_ptep)) { | ||
206 | put_page(dst_page); | ||
207 | goto retry_dest; | ||
208 | } | ||
209 | if (pte_huge(dst_pte)) { | ||
210 | /* Adjust the PTE to correspond to a small page */ | ||
211 | int pfn = pte_pfn(dst_pte); | ||
212 | pfn += (((unsigned long)dest & (HPAGE_SIZE-1)) | ||
213 | >> PAGE_SHIFT); | ||
214 | dst_pte = pfn_pte(pfn, dst_pte); | ||
215 | dst_pte = pte_mksmall(dst_pte); | ||
216 | } | ||
217 | |||
218 | /* All looks good: create a cachable PTE and copy from it */ | ||
219 | copy_size = len; | ||
220 | bytes_left_on_page = | ||
221 | PAGE_SIZE - (((int)source) & (PAGE_SIZE-1)); | ||
222 | if (copy_size > bytes_left_on_page) | ||
223 | copy_size = bytes_left_on_page; | ||
224 | bytes_left_on_page = | ||
225 | PAGE_SIZE - (((int)dest) & (PAGE_SIZE-1)); | ||
226 | if (copy_size > bytes_left_on_page) | ||
227 | copy_size = bytes_left_on_page; | ||
228 | memcpy_multicache(dest, source, dst_pte, src_pte, copy_size); | ||
229 | |||
230 | /* Release the pages */ | ||
231 | put_page(dst_page); | ||
232 | put_page(src_page); | ||
233 | |||
234 | /* Continue on the next page */ | ||
235 | dest += copy_size; | ||
236 | source += copy_size; | ||
237 | len -= copy_size; | ||
238 | } | ||
239 | |||
240 | retval = func(dest, source, len); | ||
241 | put_cpu(); | ||
242 | return retval; | ||
243 | } | ||
244 | |||
245 | void *memcpy(void *to, const void *from, __kernel_size_t n) | ||
246 | { | ||
247 | if (n < LARGE_COPY_CUTOFF) | ||
248 | return (void *)__memcpy_asm(to, from, n); | ||
249 | else | ||
250 | return (void *)fast_copy(to, from, n, __memcpy_asm); | ||
251 | } | ||
252 | |||
253 | unsigned long __copy_to_user_inatomic(void __user *to, const void *from, | ||
254 | unsigned long n) | ||
255 | { | ||
256 | if (n < LARGE_COPY_CUTOFF) | ||
257 | return __copy_to_user_inatomic_asm(to, from, n); | ||
258 | else | ||
259 | return fast_copy(to, from, n, __copy_to_user_inatomic_asm); | ||
260 | } | ||
261 | |||
262 | unsigned long __copy_from_user_inatomic(void *to, const void __user *from, | ||
263 | unsigned long n) | ||
264 | { | ||
265 | if (n < LARGE_COPY_CUTOFF) | ||
266 | return __copy_from_user_inatomic_asm(to, from, n); | ||
267 | else | ||
268 | return fast_copy(to, from, n, __copy_from_user_inatomic_asm); | ||
269 | } | ||
270 | |||
271 | unsigned long __copy_from_user_zeroing(void *to, const void __user *from, | ||
272 | unsigned long n) | ||
273 | { | ||
274 | if (n < LARGE_COPY_CUTOFF) | ||
275 | return __copy_from_user_zeroing_asm(to, from, n); | ||
276 | else | ||
277 | return fast_copy(to, from, n, __copy_from_user_zeroing_asm); | ||
278 | } | ||
279 | |||
280 | #endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */ | ||
diff --git a/arch/tile/lib/memset_32.c b/arch/tile/lib/memset_32.c index 9a7837d11f7d..2042bfe6595f 100644 --- a/arch/tile/lib/memset_32.c +++ b/arch/tile/lib/memset_32.c | |||
@@ -23,11 +23,7 @@ void *memset(void *s, int c, size_t n) | |||
23 | int n32; | 23 | int n32; |
24 | uint32_t v16, v32; | 24 | uint32_t v16, v32; |
25 | uint8_t *out8 = s; | 25 | uint8_t *out8 = s; |
26 | #if !CHIP_HAS_WH64() | ||
27 | int ahead32; | ||
28 | #else | ||
29 | int to_align32; | 26 | int to_align32; |
30 | #endif | ||
31 | 27 | ||
32 | /* Experimentation shows that a trivial tight loop is a win up until | 28 | /* Experimentation shows that a trivial tight loop is a win up until |
33 | * around a size of 20, where writing a word at a time starts to win. | 29 | * around a size of 20, where writing a word at a time starts to win. |
@@ -58,21 +54,6 @@ void *memset(void *s, int c, size_t n) | |||
58 | return s; | 54 | return s; |
59 | } | 55 | } |
60 | 56 | ||
61 | #if !CHIP_HAS_WH64() | ||
62 | /* Use a spare issue slot to start prefetching the first cache | ||
63 | * line early. This instruction is free as the store can be buried | ||
64 | * in otherwise idle issue slots doing ALU ops. | ||
65 | */ | ||
66 | __insn_prefetch(out8); | ||
67 | |||
68 | /* We prefetch the end so that a short memset that spans two cache | ||
69 | * lines gets some prefetching benefit. Again we believe this is free | ||
70 | * to issue. | ||
71 | */ | ||
72 | __insn_prefetch(&out8[n - 1]); | ||
73 | #endif /* !CHIP_HAS_WH64() */ | ||
74 | |||
75 | |||
76 | /* Align 'out8'. We know n >= 3 so this won't write past the end. */ | 57 | /* Align 'out8'. We know n >= 3 so this won't write past the end. */ |
77 | while (((uintptr_t) out8 & 3) != 0) { | 58 | while (((uintptr_t) out8 & 3) != 0) { |
78 | *out8++ = c; | 59 | *out8++ = c; |
@@ -93,90 +74,6 @@ void *memset(void *s, int c, size_t n) | |||
93 | /* This must be at least 8 or the following loop doesn't work. */ | 74 | /* This must be at least 8 or the following loop doesn't work. */ |
94 | #define CACHE_LINE_SIZE_IN_WORDS (CHIP_L2_LINE_SIZE() / 4) | 75 | #define CACHE_LINE_SIZE_IN_WORDS (CHIP_L2_LINE_SIZE() / 4) |
95 | 76 | ||
96 | #if !CHIP_HAS_WH64() | ||
97 | |||
98 | ahead32 = CACHE_LINE_SIZE_IN_WORDS; | ||
99 | |||
100 | /* We already prefetched the first and last cache lines, so | ||
101 | * we only need to do more prefetching if we are storing | ||
102 | * to more than two cache lines. | ||
103 | */ | ||
104 | if (n32 > CACHE_LINE_SIZE_IN_WORDS * 2) { | ||
105 | int i; | ||
106 | |||
107 | /* Prefetch the next several cache lines. | ||
108 | * This is the setup code for the software-pipelined | ||
109 | * loop below. | ||
110 | */ | ||
111 | #define MAX_PREFETCH 5 | ||
112 | ahead32 = n32 & -CACHE_LINE_SIZE_IN_WORDS; | ||
113 | if (ahead32 > MAX_PREFETCH * CACHE_LINE_SIZE_IN_WORDS) | ||
114 | ahead32 = MAX_PREFETCH * CACHE_LINE_SIZE_IN_WORDS; | ||
115 | |||
116 | for (i = CACHE_LINE_SIZE_IN_WORDS; | ||
117 | i < ahead32; i += CACHE_LINE_SIZE_IN_WORDS) | ||
118 | __insn_prefetch(&out32[i]); | ||
119 | } | ||
120 | |||
121 | if (n32 > ahead32) { | ||
122 | while (1) { | ||
123 | int j; | ||
124 | |||
125 | /* Prefetch by reading one word several cache lines | ||
126 | * ahead. Since loads are non-blocking this will | ||
127 | * cause the full cache line to be read while we are | ||
128 | * finishing earlier cache lines. Using a store | ||
129 | * here causes microarchitectural performance | ||
130 | * problems where a victimizing store miss goes to | ||
131 | * the head of the retry FIFO and locks the pipe for | ||
132 | * a few cycles. So a few subsequent stores in this | ||
133 | * loop go into the retry FIFO, and then later | ||
134 | * stores see other stores to the same cache line | ||
135 | * are already in the retry FIFO and themselves go | ||
136 | * into the retry FIFO, filling it up and grinding | ||
137 | * to a halt waiting for the original miss to be | ||
138 | * satisfied. | ||
139 | */ | ||
140 | __insn_prefetch(&out32[ahead32]); | ||
141 | |||
142 | #if CACHE_LINE_SIZE_IN_WORDS % 4 != 0 | ||
143 | #error "Unhandled CACHE_LINE_SIZE_IN_WORDS" | ||
144 | #endif | ||
145 | |||
146 | n32 -= CACHE_LINE_SIZE_IN_WORDS; | ||
147 | |||
148 | /* Save icache space by only partially unrolling | ||
149 | * this loop. | ||
150 | */ | ||
151 | for (j = CACHE_LINE_SIZE_IN_WORDS / 4; j > 0; j--) { | ||
152 | *out32++ = v32; | ||
153 | *out32++ = v32; | ||
154 | *out32++ = v32; | ||
155 | *out32++ = v32; | ||
156 | } | ||
157 | |||
158 | /* To save compiled code size, reuse this loop even | ||
159 | * when we run out of prefetching to do by dropping | ||
160 | * ahead32 down. | ||
161 | */ | ||
162 | if (n32 <= ahead32) { | ||
163 | /* Not even a full cache line left, | ||
164 | * so stop now. | ||
165 | */ | ||
166 | if (n32 < CACHE_LINE_SIZE_IN_WORDS) | ||
167 | break; | ||
168 | |||
169 | /* Choose a small enough value that we don't | ||
170 | * prefetch past the end. There's no sense | ||
171 | * in touching cache lines we don't have to. | ||
172 | */ | ||
173 | ahead32 = CACHE_LINE_SIZE_IN_WORDS - 1; | ||
174 | } | ||
175 | } | ||
176 | } | ||
177 | |||
178 | #else /* CHIP_HAS_WH64() */ | ||
179 | |||
180 | /* Determine how many words we need to emit before the 'out32' | 77 | /* Determine how many words we need to emit before the 'out32' |
181 | * pointer becomes aligned modulo the cache line size. | 78 | * pointer becomes aligned modulo the cache line size. |
182 | */ | 79 | */ |
@@ -233,8 +130,6 @@ void *memset(void *s, int c, size_t n) | |||
233 | n32 &= CACHE_LINE_SIZE_IN_WORDS - 1; | 130 | n32 &= CACHE_LINE_SIZE_IN_WORDS - 1; |
234 | } | 131 | } |
235 | 132 | ||
236 | #endif /* CHIP_HAS_WH64() */ | ||
237 | |||
238 | /* Now handle any leftover values. */ | 133 | /* Now handle any leftover values. */ |
239 | if (n32 != 0) { | 134 | if (n32 != 0) { |
240 | do { | 135 | do { |
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 39c48cbe0a96..111d5a9b76f1 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c | |||
@@ -466,28 +466,15 @@ good_area: | |||
466 | } | 466 | } |
467 | } | 467 | } |
468 | 468 | ||
469 | #if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() | ||
470 | /* | ||
471 | * If this was an asynchronous fault, | ||
472 | * restart the appropriate engine. | ||
473 | */ | ||
474 | switch (fault_num) { | ||
475 | #if CHIP_HAS_TILE_DMA() | 469 | #if CHIP_HAS_TILE_DMA() |
470 | /* If this was a DMA TLB fault, restart the DMA engine. */ | ||
471 | switch (fault_num) { | ||
476 | case INT_DMATLB_MISS: | 472 | case INT_DMATLB_MISS: |
477 | case INT_DMATLB_MISS_DWNCL: | 473 | case INT_DMATLB_MISS_DWNCL: |
478 | case INT_DMATLB_ACCESS: | 474 | case INT_DMATLB_ACCESS: |
479 | case INT_DMATLB_ACCESS_DWNCL: | 475 | case INT_DMATLB_ACCESS_DWNCL: |
480 | __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK); | 476 | __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK); |
481 | break; | 477 | break; |
482 | #endif | ||
483 | #if CHIP_HAS_SN_PROC() | ||
484 | case INT_SNITLB_MISS: | ||
485 | case INT_SNITLB_MISS_DWNCL: | ||
486 | __insn_mtspr(SPR_SNCTL, | ||
487 | __insn_mfspr(SPR_SNCTL) & | ||
488 | ~SPR_SNCTL__FRZPROC_MASK); | ||
489 | break; | ||
490 | #endif | ||
491 | } | 478 | } |
492 | #endif | 479 | #endif |
493 | 480 | ||
@@ -804,10 +791,6 @@ void do_page_fault(struct pt_regs *regs, int fault_num, | |||
804 | case INT_DMATLB_MISS: | 791 | case INT_DMATLB_MISS: |
805 | case INT_DMATLB_MISS_DWNCL: | 792 | case INT_DMATLB_MISS_DWNCL: |
806 | #endif | 793 | #endif |
807 | #if CHIP_HAS_SN_PROC() | ||
808 | case INT_SNITLB_MISS: | ||
809 | case INT_SNITLB_MISS_DWNCL: | ||
810 | #endif | ||
811 | is_page_fault = 1; | 794 | is_page_fault = 1; |
812 | break; | 795 | break; |
813 | 796 | ||
@@ -823,7 +806,7 @@ void do_page_fault(struct pt_regs *regs, int fault_num, | |||
823 | panic("Bad fault number %d in do_page_fault", fault_num); | 806 | panic("Bad fault number %d in do_page_fault", fault_num); |
824 | } | 807 | } |
825 | 808 | ||
826 | #if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() | 809 | #if CHIP_HAS_TILE_DMA() |
827 | if (!user_mode(regs)) { | 810 | if (!user_mode(regs)) { |
828 | struct async_tlb *async; | 811 | struct async_tlb *async; |
829 | switch (fault_num) { | 812 | switch (fault_num) { |
@@ -835,12 +818,6 @@ void do_page_fault(struct pt_regs *regs, int fault_num, | |||
835 | async = ¤t->thread.dma_async_tlb; | 818 | async = ¤t->thread.dma_async_tlb; |
836 | break; | 819 | break; |
837 | #endif | 820 | #endif |
838 | #if CHIP_HAS_SN_PROC() | ||
839 | case INT_SNITLB_MISS: | ||
840 | case INT_SNITLB_MISS_DWNCL: | ||
841 | async = ¤t->thread.sn_async_tlb; | ||
842 | break; | ||
843 | #endif | ||
844 | default: | 821 | default: |
845 | async = NULL; | 822 | async = NULL; |
846 | } | 823 | } |
@@ -873,14 +850,22 @@ void do_page_fault(struct pt_regs *regs, int fault_num, | |||
873 | } | 850 | } |
874 | 851 | ||
875 | 852 | ||
876 | #if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() | 853 | #if CHIP_HAS_TILE_DMA() |
877 | /* | 854 | /* |
878 | * Check an async_tlb structure to see if a deferred fault is waiting, | 855 | * This routine effectively re-issues asynchronous page faults |
879 | * and if so pass it to the page-fault code. | 856 | * when we are returning to user space. |
880 | */ | 857 | */ |
881 | static void handle_async_page_fault(struct pt_regs *regs, | 858 | void do_async_page_fault(struct pt_regs *regs) |
882 | struct async_tlb *async) | ||
883 | { | 859 | { |
860 | struct async_tlb *async = ¤t->thread.dma_async_tlb; | ||
861 | |||
862 | /* | ||
863 | * Clear thread flag early. If we re-interrupt while processing | ||
864 | * code here, we will reset it and recall this routine before | ||
865 | * returning to user space. | ||
866 | */ | ||
867 | clear_thread_flag(TIF_ASYNC_TLB); | ||
868 | |||
884 | if (async->fault_num) { | 869 | if (async->fault_num) { |
885 | /* | 870 | /* |
886 | * Clear async->fault_num before calling the page-fault | 871 | * Clear async->fault_num before calling the page-fault |
@@ -894,28 +879,7 @@ static void handle_async_page_fault(struct pt_regs *regs, | |||
894 | async->address, async->is_write); | 879 | async->address, async->is_write); |
895 | } | 880 | } |
896 | } | 881 | } |
897 | 882 | #endif /* CHIP_HAS_TILE_DMA() */ | |
898 | /* | ||
899 | * This routine effectively re-issues asynchronous page faults | ||
900 | * when we are returning to user space. | ||
901 | */ | ||
902 | void do_async_page_fault(struct pt_regs *regs) | ||
903 | { | ||
904 | /* | ||
905 | * Clear thread flag early. If we re-interrupt while processing | ||
906 | * code here, we will reset it and recall this routine before | ||
907 | * returning to user space. | ||
908 | */ | ||
909 | clear_thread_flag(TIF_ASYNC_TLB); | ||
910 | |||
911 | #if CHIP_HAS_TILE_DMA() | ||
912 | handle_async_page_fault(regs, ¤t->thread.dma_async_tlb); | ||
913 | #endif | ||
914 | #if CHIP_HAS_SN_PROC() | ||
915 | handle_async_page_fault(regs, ¤t->thread.sn_async_tlb); | ||
916 | #endif | ||
917 | } | ||
918 | #endif /* CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() */ | ||
919 | 883 | ||
920 | 884 | ||
921 | void vmalloc_sync_all(void) | 885 | void vmalloc_sync_all(void) |
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index e3ee55b0327a..004ba568d93f 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c | |||
@@ -43,12 +43,9 @@ | |||
43 | #include "migrate.h" | 43 | #include "migrate.h" |
44 | 44 | ||
45 | 45 | ||
46 | #if CHIP_HAS_COHERENT_LOCAL_CACHE() | ||
47 | |||
48 | /* | 46 | /* |
49 | * The noallocl2 option suppresses all use of the L2 cache to cache | 47 | * The noallocl2 option suppresses all use of the L2 cache to cache |
50 | * locally from a remote home. There's no point in using it if we | 48 | * locally from a remote home. |
51 | * don't have coherent local caching, though. | ||
52 | */ | 49 | */ |
53 | static int __write_once noallocl2; | 50 | static int __write_once noallocl2; |
54 | static int __init set_noallocl2(char *str) | 51 | static int __init set_noallocl2(char *str) |
@@ -58,12 +55,6 @@ static int __init set_noallocl2(char *str) | |||
58 | } | 55 | } |
59 | early_param("noallocl2", set_noallocl2); | 56 | early_param("noallocl2", set_noallocl2); |
60 | 57 | ||
61 | #else | ||
62 | |||
63 | #define noallocl2 0 | ||
64 | |||
65 | #endif | ||
66 | |||
67 | 58 | ||
68 | /* | 59 | /* |
69 | * Update the irq_stat for cpus that we are going to interrupt | 60 | * Update the irq_stat for cpus that we are going to interrupt |
@@ -265,10 +256,8 @@ static int pte_to_home(pte_t pte) | |||
265 | return PAGE_HOME_INCOHERENT; | 256 | return PAGE_HOME_INCOHERENT; |
266 | case HV_PTE_MODE_UNCACHED: | 257 | case HV_PTE_MODE_UNCACHED: |
267 | return PAGE_HOME_UNCACHED; | 258 | return PAGE_HOME_UNCACHED; |
268 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
269 | case HV_PTE_MODE_CACHE_HASH_L3: | 259 | case HV_PTE_MODE_CACHE_HASH_L3: |
270 | return PAGE_HOME_HASH; | 260 | return PAGE_HOME_HASH; |
271 | #endif | ||
272 | } | 261 | } |
273 | panic("Bad PTE %#llx\n", pte.val); | 262 | panic("Bad PTE %#llx\n", pte.val); |
274 | } | 263 | } |
@@ -325,20 +314,16 @@ pte_t pte_set_home(pte_t pte, int home) | |||
325 | HV_PTE_MODE_CACHE_NO_L3); | 314 | HV_PTE_MODE_CACHE_NO_L3); |
326 | } | 315 | } |
327 | } else | 316 | } else |
328 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
329 | if (hash_default) | 317 | if (hash_default) |
330 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); | 318 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); |
331 | else | 319 | else |
332 | #endif | ||
333 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); | 320 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); |
334 | pte = hv_pte_set_nc(pte); | 321 | pte = hv_pte_set_nc(pte); |
335 | break; | 322 | break; |
336 | 323 | ||
337 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
338 | case PAGE_HOME_HASH: | 324 | case PAGE_HOME_HASH: |
339 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); | 325 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); |
340 | break; | 326 | break; |
341 | #endif | ||
342 | 327 | ||
343 | default: | 328 | default: |
344 | BUG_ON(home < 0 || home >= NR_CPUS || | 329 | BUG_ON(home < 0 || home >= NR_CPUS || |
@@ -348,7 +333,6 @@ pte_t pte_set_home(pte_t pte, int home) | |||
348 | break; | 333 | break; |
349 | } | 334 | } |
350 | 335 | ||
351 | #if CHIP_HAS_NC_AND_NOALLOC_BITS() | ||
352 | if (noallocl2) | 336 | if (noallocl2) |
353 | pte = hv_pte_set_no_alloc_l2(pte); | 337 | pte = hv_pte_set_no_alloc_l2(pte); |
354 | 338 | ||
@@ -357,7 +341,6 @@ pte_t pte_set_home(pte_t pte, int home) | |||
357 | hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_NO_L3) { | 341 | hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_NO_L3) { |
358 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED); | 342 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED); |
359 | } | 343 | } |
360 | #endif | ||
361 | 344 | ||
362 | /* Checking this case here gives a better panic than from the hv. */ | 345 | /* Checking this case here gives a better panic than from the hv. */ |
363 | BUG_ON(hv_pte_get_mode(pte) == 0); | 346 | BUG_ON(hv_pte_get_mode(pte) == 0); |
@@ -373,16 +356,10 @@ EXPORT_SYMBOL(pte_set_home); | |||
373 | * so they're not suitable for anything but infrequent use. | 356 | * so they're not suitable for anything but infrequent use. |
374 | */ | 357 | */ |
375 | 358 | ||
376 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
377 | static inline int initial_page_home(void) { return PAGE_HOME_HASH; } | ||
378 | #else | ||
379 | static inline int initial_page_home(void) { return 0; } | ||
380 | #endif | ||
381 | |||
382 | int page_home(struct page *page) | 359 | int page_home(struct page *page) |
383 | { | 360 | { |
384 | if (PageHighMem(page)) { | 361 | if (PageHighMem(page)) { |
385 | return initial_page_home(); | 362 | return PAGE_HOME_HASH; |
386 | } else { | 363 | } else { |
387 | unsigned long kva = (unsigned long)page_address(page); | 364 | unsigned long kva = (unsigned long)page_address(page); |
388 | return pte_to_home(*virt_to_kpte(kva)); | 365 | return pte_to_home(*virt_to_kpte(kva)); |
@@ -438,7 +415,7 @@ struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask, | |||
438 | void __homecache_free_pages(struct page *page, unsigned int order) | 415 | void __homecache_free_pages(struct page *page, unsigned int order) |
439 | { | 416 | { |
440 | if (put_page_testzero(page)) { | 417 | if (put_page_testzero(page)) { |
441 | homecache_change_page_home(page, order, initial_page_home()); | 418 | homecache_change_page_home(page, order, PAGE_HOME_HASH); |
442 | if (order == 0) { | 419 | if (order == 0) { |
443 | free_hot_cold_page(page, 0); | 420 | free_hot_cold_page(page, 0); |
444 | } else { | 421 | } else { |
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index c8f58c12866d..22e41cf5a2a9 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c | |||
@@ -106,10 +106,8 @@ pte_t *get_prealloc_pte(unsigned long pfn) | |||
106 | */ | 106 | */ |
107 | static int initial_heap_home(void) | 107 | static int initial_heap_home(void) |
108 | { | 108 | { |
109 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
110 | if (hash_default) | 109 | if (hash_default) |
111 | return PAGE_HOME_HASH; | 110 | return PAGE_HOME_HASH; |
112 | #endif | ||
113 | return smp_processor_id(); | 111 | return smp_processor_id(); |
114 | } | 112 | } |
115 | 113 | ||
@@ -190,14 +188,11 @@ static void __init page_table_range_init(unsigned long start, | |||
190 | } | 188 | } |
191 | 189 | ||
192 | 190 | ||
193 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
194 | |||
195 | static int __initdata ktext_hash = 1; /* .text pages */ | 191 | static int __initdata ktext_hash = 1; /* .text pages */ |
196 | static int __initdata kdata_hash = 1; /* .data and .bss pages */ | 192 | static int __initdata kdata_hash = 1; /* .data and .bss pages */ |
197 | int __write_once hash_default = 1; /* kernel allocator pages */ | 193 | int __write_once hash_default = 1; /* kernel allocator pages */ |
198 | EXPORT_SYMBOL(hash_default); | 194 | EXPORT_SYMBOL(hash_default); |
199 | int __write_once kstack_hash = 1; /* if no homecaching, use h4h */ | 195 | int __write_once kstack_hash = 1; /* if no homecaching, use h4h */ |
200 | #endif /* CHIP_HAS_CBOX_HOME_MAP */ | ||
201 | 196 | ||
202 | /* | 197 | /* |
203 | * CPUs to use to for striping the pages of kernel data. If hash-for-home | 198 | * CPUs to use to for striping the pages of kernel data. If hash-for-home |
@@ -215,14 +210,12 @@ int __write_once kdata_huge; /* if no homecaching, small pages */ | |||
215 | static pgprot_t __init construct_pgprot(pgprot_t prot, int home) | 210 | static pgprot_t __init construct_pgprot(pgprot_t prot, int home) |
216 | { | 211 | { |
217 | prot = pte_set_home(prot, home); | 212 | prot = pte_set_home(prot, home); |
218 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
219 | if (home == PAGE_HOME_IMMUTABLE) { | 213 | if (home == PAGE_HOME_IMMUTABLE) { |
220 | if (ktext_hash) | 214 | if (ktext_hash) |
221 | prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_HASH_L3); | 215 | prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_HASH_L3); |
222 | else | 216 | else |
223 | prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_NO_L3); | 217 | prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_NO_L3); |
224 | } | 218 | } |
225 | #endif | ||
226 | return prot; | 219 | return prot; |
227 | } | 220 | } |
228 | 221 | ||
@@ -236,20 +229,15 @@ static pgprot_t __init init_pgprot(ulong address) | |||
236 | unsigned long page; | 229 | unsigned long page; |
237 | enum { CODE_DELTA = MEM_SV_START - PAGE_OFFSET }; | 230 | enum { CODE_DELTA = MEM_SV_START - PAGE_OFFSET }; |
238 | 231 | ||
239 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
240 | /* For kdata=huge, everything is just hash-for-home. */ | 232 | /* For kdata=huge, everything is just hash-for-home. */ |
241 | if (kdata_huge) | 233 | if (kdata_huge) |
242 | return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); | 234 | return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); |
243 | #endif | ||
244 | 235 | ||
245 | /* We map the aliased pages of permanent text inaccessible. */ | 236 | /* We map the aliased pages of permanent text inaccessible. */ |
246 | if (address < (ulong) _sinittext - CODE_DELTA) | 237 | if (address < (ulong) _sinittext - CODE_DELTA) |
247 | return PAGE_NONE; | 238 | return PAGE_NONE; |
248 | 239 | ||
249 | /* | 240 | /* We map read-only data non-coherent for performance. */ |
250 | * We map read-only data non-coherent for performance. We could | ||
251 | * use neighborhood caching on TILE64, but it's not clear it's a win. | ||
252 | */ | ||
253 | if ((address >= (ulong) __start_rodata && | 241 | if ((address >= (ulong) __start_rodata && |
254 | address < (ulong) __end_rodata) || | 242 | address < (ulong) __end_rodata) || |
255 | address == (ulong) empty_zero_page) { | 243 | address == (ulong) empty_zero_page) { |
@@ -257,12 +245,10 @@ static pgprot_t __init init_pgprot(ulong address) | |||
257 | } | 245 | } |
258 | 246 | ||
259 | #ifndef __tilegx__ | 247 | #ifndef __tilegx__ |
260 | #if !ATOMIC_LOCKS_FOUND_VIA_TABLE() | ||
261 | /* Force the atomic_locks[] array page to be hash-for-home. */ | 248 | /* Force the atomic_locks[] array page to be hash-for-home. */ |
262 | if (address == (ulong) atomic_locks) | 249 | if (address == (ulong) atomic_locks) |
263 | return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); | 250 | return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); |
264 | #endif | 251 | #endif |
265 | #endif | ||
266 | 252 | ||
267 | /* | 253 | /* |
268 | * Everything else that isn't data or bss is heap, so mark it | 254 | * Everything else that isn't data or bss is heap, so mark it |
@@ -280,11 +266,9 @@ static pgprot_t __init init_pgprot(ulong address) | |||
280 | if (address >= (ulong) _end || address < (ulong) _einitdata) | 266 | if (address >= (ulong) _end || address < (ulong) _einitdata) |
281 | return construct_pgprot(PAGE_KERNEL, initial_heap_home()); | 267 | return construct_pgprot(PAGE_KERNEL, initial_heap_home()); |
282 | 268 | ||
283 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
284 | /* Use hash-for-home if requested for data/bss. */ | 269 | /* Use hash-for-home if requested for data/bss. */ |
285 | if (kdata_hash) | 270 | if (kdata_hash) |
286 | return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); | 271 | return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); |
287 | #endif | ||
288 | 272 | ||
289 | /* | 273 | /* |
290 | * Make the w1data homed like heap to start with, to avoid | 274 | * Make the w1data homed like heap to start with, to avoid |
@@ -311,11 +295,9 @@ static pgprot_t __init init_pgprot(ulong address) | |||
311 | if (page == (ulong)empty_zero_page) | 295 | if (page == (ulong)empty_zero_page) |
312 | continue; | 296 | continue; |
313 | #ifndef __tilegx__ | 297 | #ifndef __tilegx__ |
314 | #if !ATOMIC_LOCKS_FOUND_VIA_TABLE() | ||
315 | if (page == (ulong)atomic_locks) | 298 | if (page == (ulong)atomic_locks) |
316 | continue; | 299 | continue; |
317 | #endif | 300 | #endif |
318 | #endif | ||
319 | cpu = cpumask_next(cpu, &kdata_mask); | 301 | cpu = cpumask_next(cpu, &kdata_mask); |
320 | if (cpu == NR_CPUS) | 302 | if (cpu == NR_CPUS) |
321 | cpu = cpumask_first(&kdata_mask); | 303 | cpu = cpumask_first(&kdata_mask); |
@@ -358,7 +340,7 @@ static int __init setup_ktext(char *str) | |||
358 | 340 | ||
359 | ktext_arg_seen = 1; | 341 | ktext_arg_seen = 1; |
360 | 342 | ||
361 | /* Default setting on Tile64: use a huge page */ | 343 | /* Default setting: use a huge page */ |
362 | if (strcmp(str, "huge") == 0) | 344 | if (strcmp(str, "huge") == 0) |
363 | pr_info("ktext: using one huge locally cached page\n"); | 345 | pr_info("ktext: using one huge locally cached page\n"); |
364 | 346 | ||
@@ -404,10 +386,8 @@ static inline pgprot_t ktext_set_nocache(pgprot_t prot) | |||
404 | { | 386 | { |
405 | if (!ktext_nocache) | 387 | if (!ktext_nocache) |
406 | prot = hv_pte_set_nc(prot); | 388 | prot = hv_pte_set_nc(prot); |
407 | #if CHIP_HAS_NC_AND_NOALLOC_BITS() | ||
408 | else | 389 | else |
409 | prot = hv_pte_set_no_alloc_l2(prot); | 390 | prot = hv_pte_set_no_alloc_l2(prot); |
410 | #endif | ||
411 | return prot; | 391 | return prot; |
412 | } | 392 | } |
413 | 393 | ||
@@ -440,7 +420,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) | |||
440 | struct cpumask kstripe_mask; | 420 | struct cpumask kstripe_mask; |
441 | int rc, i; | 421 | int rc, i; |
442 | 422 | ||
443 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
444 | if (ktext_arg_seen && ktext_hash) { | 423 | if (ktext_arg_seen && ktext_hash) { |
445 | pr_warning("warning: \"ktext\" boot argument ignored" | 424 | pr_warning("warning: \"ktext\" boot argument ignored" |
446 | " if \"kcache_hash\" sets up text hash-for-home\n"); | 425 | " if \"kcache_hash\" sets up text hash-for-home\n"); |
@@ -457,7 +436,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) | |||
457 | " kcache_hash=all or =allbutstack\n"); | 436 | " kcache_hash=all or =allbutstack\n"); |
458 | kdata_huge = 0; | 437 | kdata_huge = 0; |
459 | } | 438 | } |
460 | #endif | ||
461 | 439 | ||
462 | /* | 440 | /* |
463 | * Set up a mask for cpus to use for kernel striping. | 441 | * Set up a mask for cpus to use for kernel striping. |
@@ -585,13 +563,11 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) | |||
585 | } else { | 563 | } else { |
586 | pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC); | 564 | pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC); |
587 | pteval = pte_mkhuge(pteval); | 565 | pteval = pte_mkhuge(pteval); |
588 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
589 | if (ktext_hash) { | 566 | if (ktext_hash) { |
590 | pteval = hv_pte_set_mode(pteval, | 567 | pteval = hv_pte_set_mode(pteval, |
591 | HV_PTE_MODE_CACHE_HASH_L3); | 568 | HV_PTE_MODE_CACHE_HASH_L3); |
592 | pteval = ktext_set_nocache(pteval); | 569 | pteval = ktext_set_nocache(pteval); |
593 | } else | 570 | } else |
594 | #endif /* CHIP_HAS_CBOX_HOME_MAP() */ | ||
595 | if (cpumask_weight(&ktext_mask) == 1) { | 571 | if (cpumask_weight(&ktext_mask) == 1) { |
596 | pteval = set_remote_cache_cpu(pteval, | 572 | pteval = set_remote_cache_cpu(pteval, |
597 | cpumask_first(&ktext_mask)); | 573 | cpumask_first(&ktext_mask)); |
@@ -938,26 +914,6 @@ void __init pgtable_cache_init(void) | |||
938 | panic("pgtable_cache_init(): Cannot create pgd cache"); | 914 | panic("pgtable_cache_init(): Cannot create pgd cache"); |
939 | } | 915 | } |
940 | 916 | ||
941 | #if !CHIP_HAS_COHERENT_LOCAL_CACHE() | ||
942 | /* | ||
943 | * The __w1data area holds data that is only written during initialization, | ||
944 | * and is read-only and thus freely cacheable thereafter. Fix the page | ||
945 | * table entries that cover that region accordingly. | ||
946 | */ | ||
947 | static void mark_w1data_ro(void) | ||
948 | { | ||
949 | /* Loop over page table entries */ | ||
950 | unsigned long addr = (unsigned long)__w1data_begin; | ||
951 | BUG_ON((addr & (PAGE_SIZE-1)) != 0); | ||
952 | for (; addr <= (unsigned long)__w1data_end - 1; addr += PAGE_SIZE) { | ||
953 | unsigned long pfn = kaddr_to_pfn((void *)addr); | ||
954 | pte_t *ptep = virt_to_kpte(addr); | ||
955 | BUG_ON(pte_huge(*ptep)); /* not relevant for kdata_huge */ | ||
956 | set_pte_at(&init_mm, addr, ptep, pfn_pte(pfn, PAGE_KERNEL_RO)); | ||
957 | } | ||
958 | } | ||
959 | #endif | ||
960 | |||
961 | #ifdef CONFIG_DEBUG_PAGEALLOC | 917 | #ifdef CONFIG_DEBUG_PAGEALLOC |
962 | static long __write_once initfree; | 918 | static long __write_once initfree; |
963 | #else | 919 | #else |
@@ -1026,10 +982,7 @@ void free_initmem(void) | |||
1026 | /* | 982 | /* |
1027 | * Evict the dirty initdata on the boot cpu, evict the w1data | 983 | * Evict the dirty initdata on the boot cpu, evict the w1data |
1028 | * wherever it's homed, and evict all the init code everywhere. | 984 | * wherever it's homed, and evict all the init code everywhere. |
1029 | * We are guaranteed that no one will touch the init pages any | 985 | * We are guaranteed that no one will touch the init pages any more. |
1030 | * more, and although other cpus may be touching the w1data, | ||
1031 | * we only actually change the caching on tile64, which won't | ||
1032 | * be keeping local copies in the other tiles' caches anyway. | ||
1033 | */ | 986 | */ |
1034 | homecache_evict(&cpu_cacheable_map); | 987 | homecache_evict(&cpu_cacheable_map); |
1035 | 988 | ||
@@ -1045,21 +998,6 @@ void free_initmem(void) | |||
1045 | free_init_pages("unused kernel text", | 998 | free_init_pages("unused kernel text", |
1046 | (unsigned long)_sinittext - text_delta, | 999 | (unsigned long)_sinittext - text_delta, |
1047 | (unsigned long)_einittext - text_delta); | 1000 | (unsigned long)_einittext - text_delta); |
1048 | |||
1049 | #if !CHIP_HAS_COHERENT_LOCAL_CACHE() | ||
1050 | /* | ||
1051 | * Upgrade the .w1data section to globally cached. | ||
1052 | * We don't do this on tilepro, since the cache architecture | ||
1053 | * pretty much makes it irrelevant, and in any case we end | ||
1054 | * up having racing issues with other tiles that may touch | ||
1055 | * the data after we flush the cache but before we update | ||
1056 | * the PTEs and flush the TLBs, causing sharer shootdowns | ||
1057 | * later. Even though this is to clean data, it seems like | ||
1058 | * an unnecessary complication. | ||
1059 | */ | ||
1060 | mark_w1data_ro(); | ||
1061 | #endif | ||
1062 | |||
1063 | /* Do a global TLB flush so everyone sees the changes. */ | 1001 | /* Do a global TLB flush so everyone sees the changes. */ |
1064 | flush_tlb_all(); | 1002 | flush_tlb_all(); |
1065 | } | 1003 | } |