aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/tile/Kconfig2
-rw-r--r--arch/tile/include/asm/atomic_32.h17
-rw-r--r--arch/tile/include/asm/barrier.h4
-rw-r--r--arch/tile/include/asm/elf.h1
-rw-r--r--arch/tile/include/asm/homecache.h3
-rw-r--r--arch/tile/include/asm/processor.h16
-rw-r--r--arch/tile/include/asm/smp.h2
-rw-r--r--arch/tile/include/asm/traps.h2
-rw-r--r--arch/tile/include/uapi/arch/Kbuild1
-rw-r--r--arch/tile/include/uapi/arch/chip.h4
-rw-r--r--arch/tile/include/uapi/arch/chip_tile64.h258
-rw-r--r--arch/tile/include/uapi/arch/spr_def_32.h2
-rw-r--r--arch/tile/kernel/intvec_32.S69
-rw-r--r--arch/tile/kernel/intvec_64.S2
-rw-r--r--arch/tile/kernel/irq.c4
-rw-r--r--arch/tile/kernel/process.c44
-rw-r--r--arch/tile/kernel/relocate_kernel_32.S2
-rw-r--r--arch/tile/kernel/relocate_kernel_64.S2
-rw-r--r--arch/tile/kernel/setup.c13
-rw-r--r--arch/tile/kernel/single_step.c2
-rw-r--r--arch/tile/lib/Makefile2
-rw-r--r--arch/tile/lib/atomic_32.c90
-rw-r--r--arch/tile/lib/memcpy_32.S61
-rw-r--r--arch/tile/lib/memcpy_tile64.c280
-rw-r--r--arch/tile/lib/memset_32.c105
-rw-r--r--arch/tile/mm/fault.c70
-rw-r--r--arch/tile/mm/homecache.c29
-rw-r--r--arch/tile/mm/init.c68
28 files changed, 34 insertions, 1121 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index b2be42524483..6e1ed55f6cfc 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -207,7 +207,7 @@ config SYSVIPC_COMPAT
207 def_bool y 207 def_bool y
208 depends on COMPAT && SYSVIPC 208 depends on COMPAT && SYSVIPC
209 209
210# We do not currently support disabling HIGHMEM on tile64 and tilepro. 210# We do not currently support disabling HIGHMEM on tilepro.
211config HIGHMEM 211config HIGHMEM
212 bool # "Support for more than 512 MB of RAM" 212 bool # "Support for more than 512 MB of RAM"
213 default !TILEGX 213 default !TILEGX
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index e7fb5cfb9597..96156f5ba640 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -252,21 +252,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n)
252 * Internal definitions only beyond this point. 252 * Internal definitions only beyond this point.
253 */ 253 */
254 254
255#define ATOMIC_LOCKS_FOUND_VIA_TABLE() \
256 (!CHIP_HAS_CBOX_HOME_MAP() && defined(CONFIG_SMP))
257
258#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
259
260/* Number of entries in atomic_lock_ptr[]. */
261#define ATOMIC_HASH_L1_SHIFT 6
262#define ATOMIC_HASH_L1_SIZE (1 << ATOMIC_HASH_L1_SHIFT)
263
264/* Number of locks in each struct pointed to by atomic_lock_ptr[]. */
265#define ATOMIC_HASH_L2_SHIFT (CHIP_L2_LOG_LINE_SIZE() - 2)
266#define ATOMIC_HASH_L2_SIZE (1 << ATOMIC_HASH_L2_SHIFT)
267
268#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
269
270/* 255/*
271 * Number of atomic locks in atomic_locks[]. Must be a power of two. 256 * Number of atomic locks in atomic_locks[]. Must be a power of two.
272 * There is no reason for more than PAGE_SIZE / 8 entries, since that 257 * There is no reason for more than PAGE_SIZE / 8 entries, since that
@@ -281,8 +266,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n)
281extern int atomic_locks[]; 266extern int atomic_locks[];
282#endif 267#endif
283 268
284#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
285
286/* 269/*
287 * All the code that may fault while holding an atomic lock must 270 * All the code that may fault while holding an atomic lock must
288 * place the pointer to the lock in ATOMIC_LOCK_REG so the fault code 271 * place the pointer to the lock in ATOMIC_LOCK_REG so the fault code
diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h
index 990a217a0b72..a9a73da5865d 100644
--- a/arch/tile/include/asm/barrier.h
+++ b/arch/tile/include/asm/barrier.h
@@ -77,7 +77,6 @@
77 77
78#define __sync() __insn_mf() 78#define __sync() __insn_mf()
79 79
80#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
81#include <hv/syscall_public.h> 80#include <hv/syscall_public.h>
82/* 81/*
83 * Issue an uncacheable load to each memory controller, then 82 * Issue an uncacheable load to each memory controller, then
@@ -96,7 +95,6 @@ static inline void __mb_incoherent(void)
96 "r20", "r21", "r22", "r23", "r24", 95 "r20", "r21", "r22", "r23", "r24",
97 "r25", "r26", "r27", "r28", "r29"); 96 "r25", "r26", "r27", "r28", "r29");
98} 97}
99#endif
100 98
101/* Fence to guarantee visibility of stores to incoherent memory. */ 99/* Fence to guarantee visibility of stores to incoherent memory. */
102static inline void 100static inline void
@@ -104,7 +102,6 @@ mb_incoherent(void)
104{ 102{
105 __insn_mf(); 103 __insn_mf();
106 104
107#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
108 { 105 {
109#if CHIP_HAS_TILE_WRITE_PENDING() 106#if CHIP_HAS_TILE_WRITE_PENDING()
110 const unsigned long WRITE_TIMEOUT_CYCLES = 400; 107 const unsigned long WRITE_TIMEOUT_CYCLES = 400;
@@ -116,7 +113,6 @@ mb_incoherent(void)
116#endif /* CHIP_HAS_TILE_WRITE_PENDING() */ 113#endif /* CHIP_HAS_TILE_WRITE_PENDING() */
117 (void) __mb_incoherent(); 114 (void) __mb_incoherent();
118 } 115 }
119#endif /* CHIP_HAS_MF_WAITS_FOR_VICTIMS() */
120} 116}
121 117
122#define fast_wmb() __sync() 118#define fast_wmb() __sync()
diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h
index e1da88e8aa9f..41d9878a9686 100644
--- a/arch/tile/include/asm/elf.h
+++ b/arch/tile/include/asm/elf.h
@@ -30,7 +30,6 @@ typedef unsigned long elf_greg_t;
30#define ELF_NGREG (sizeof(struct pt_regs) / sizeof(elf_greg_t)) 30#define ELF_NGREG (sizeof(struct pt_regs) / sizeof(elf_greg_t))
31typedef elf_greg_t elf_gregset_t[ELF_NGREG]; 31typedef elf_greg_t elf_gregset_t[ELF_NGREG];
32 32
33#define EM_TILE64 187
34#define EM_TILEPRO 188 33#define EM_TILEPRO 188
35#define EM_TILEGX 191 34#define EM_TILEGX 191
36 35
diff --git a/arch/tile/include/asm/homecache.h b/arch/tile/include/asm/homecache.h
index 49d19dfc0630..7ddd1b8d6910 100644
--- a/arch/tile/include/asm/homecache.h
+++ b/arch/tile/include/asm/homecache.h
@@ -33,8 +33,7 @@ struct zone;
33 33
34/* 34/*
35 * Is this page immutable (unwritable) and thus able to be cached more 35 * Is this page immutable (unwritable) and thus able to be cached more
36 * widely than would otherwise be possible? On tile64 this means we 36 * widely than would otherwise be possible? This means we have "nc" set.
37 * mark the PTE to cache locally; on tilepro it means we have "nc" set.
38 */ 37 */
39#define PAGE_HOME_IMMUTABLE -2 38#define PAGE_HOME_IMMUTABLE -2
40 39
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index 5aa54319d2ef..42323636c459 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -113,18 +113,14 @@ struct thread_struct {
113 unsigned long intctrl_0; 113 unsigned long intctrl_0;
114 /* Is this task currently doing a backtrace? */ 114 /* Is this task currently doing a backtrace? */
115 bool in_backtrace; 115 bool in_backtrace;
116#if CHIP_HAS_PROC_STATUS_SPR()
117 /* Any other miscellaneous processor state bits */ 116 /* Any other miscellaneous processor state bits */
118 unsigned long proc_status; 117 unsigned long proc_status;
119#endif
120#if !CHIP_HAS_FIXED_INTVEC_BASE() 118#if !CHIP_HAS_FIXED_INTVEC_BASE()
121 /* Interrupt base for PL0 interrupts */ 119 /* Interrupt base for PL0 interrupts */
122 unsigned long interrupt_vector_base; 120 unsigned long interrupt_vector_base;
123#endif 121#endif
124#if CHIP_HAS_TILE_RTF_HWM()
125 /* Tile cache retry fifo high-water mark */ 122 /* Tile cache retry fifo high-water mark */
126 unsigned long tile_rtf_hwm; 123 unsigned long tile_rtf_hwm;
127#endif
128#if CHIP_HAS_DSTREAM_PF() 124#if CHIP_HAS_DSTREAM_PF()
129 /* Data stream prefetch control */ 125 /* Data stream prefetch control */
130 unsigned long dstream_pf; 126 unsigned long dstream_pf;
@@ -137,12 +133,6 @@ struct thread_struct {
137 /* Async DMA TLB fault information */ 133 /* Async DMA TLB fault information */
138 struct async_tlb dma_async_tlb; 134 struct async_tlb dma_async_tlb;
139#endif 135#endif
140#if CHIP_HAS_SN_PROC()
141 /* Was static network processor when we were switched out? */
142 int sn_proc_running;
143 /* Async SNI TLB fault information */
144 struct async_tlb sn_async_tlb;
145#endif
146}; 136};
147 137
148#endif /* !__ASSEMBLY__ */ 138#endif /* !__ASSEMBLY__ */
@@ -286,7 +276,6 @@ extern char chip_model[64];
286/* Data on which physical memory controller corresponds to which NUMA node. */ 276/* Data on which physical memory controller corresponds to which NUMA node. */
287extern int node_controller[]; 277extern int node_controller[];
288 278
289#if CHIP_HAS_CBOX_HOME_MAP()
290/* Does the heap allocator return hash-for-home pages by default? */ 279/* Does the heap allocator return hash-for-home pages by default? */
291extern int hash_default; 280extern int hash_default;
292 281
@@ -296,11 +285,6 @@ extern int kstack_hash;
296/* Does MAP_ANONYMOUS return hash-for-home pages by default? */ 285/* Does MAP_ANONYMOUS return hash-for-home pages by default? */
297#define uheap_hash hash_default 286#define uheap_hash hash_default
298 287
299#else
300#define hash_default 0
301#define kstack_hash 0
302#define uheap_hash 0
303#endif
304 288
305/* Are we using huge pages in the TLB for kernel data? */ 289/* Are we using huge pages in the TLB for kernel data? */
306extern int kdata_huge; 290extern int kdata_huge;
diff --git a/arch/tile/include/asm/smp.h b/arch/tile/include/asm/smp.h
index 1aa759aeb5b3..9a326b64f7ae 100644
--- a/arch/tile/include/asm/smp.h
+++ b/arch/tile/include/asm/smp.h
@@ -101,10 +101,8 @@ void print_disabled_cpus(void);
101extern struct cpumask cpu_lotar_map; 101extern struct cpumask cpu_lotar_map;
102#define cpu_is_valid_lotar(cpu) cpumask_test_cpu((cpu), &cpu_lotar_map) 102#define cpu_is_valid_lotar(cpu) cpumask_test_cpu((cpu), &cpu_lotar_map)
103 103
104#if CHIP_HAS_CBOX_HOME_MAP()
105/* Which processors are used for hash-for-home mapping */ 104/* Which processors are used for hash-for-home mapping */
106extern struct cpumask hash_for_home_map; 105extern struct cpumask hash_for_home_map;
107#endif
108 106
109/* Which cpus can have their cache flushed by hv_flush_remote(). */ 107/* Which cpus can have their cache flushed by hv_flush_remote(). */
110extern struct cpumask cpu_cacheable_map; 108extern struct cpumask cpu_cacheable_map;
diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h
index 5f172b2403a6..4b99a1c3aab2 100644
--- a/arch/tile/include/asm/traps.h
+++ b/arch/tile/include/asm/traps.h
@@ -21,7 +21,7 @@
21/* mm/fault.c */ 21/* mm/fault.c */
22void do_page_fault(struct pt_regs *, int fault_num, 22void do_page_fault(struct pt_regs *, int fault_num,
23 unsigned long address, unsigned long write); 23 unsigned long address, unsigned long write);
24#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() 24#if CHIP_HAS_TILE_DMA()
25void do_async_page_fault(struct pt_regs *); 25void do_async_page_fault(struct pt_regs *);
26#endif 26#endif
27 27
diff --git a/arch/tile/include/uapi/arch/Kbuild b/arch/tile/include/uapi/arch/Kbuild
index 4ebc34f4768d..97dfbecec6b6 100644
--- a/arch/tile/include/uapi/arch/Kbuild
+++ b/arch/tile/include/uapi/arch/Kbuild
@@ -1,7 +1,6 @@
1# UAPI Header export list 1# UAPI Header export list
2header-y += abi.h 2header-y += abi.h
3header-y += chip.h 3header-y += chip.h
4header-y += chip_tile64.h
5header-y += chip_tilegx.h 4header-y += chip_tilegx.h
6header-y += chip_tilepro.h 5header-y += chip_tilepro.h
7header-y += icache.h 6header-y += icache.h
diff --git a/arch/tile/include/uapi/arch/chip.h b/arch/tile/include/uapi/arch/chip.h
index 926d3db0e91e..4c91f90b9369 100644
--- a/arch/tile/include/uapi/arch/chip.h
+++ b/arch/tile/include/uapi/arch/chip.h
@@ -12,9 +12,7 @@
12 * more details. 12 * more details.
13 */ 13 */
14 14
15#if __tile_chip__ == 0 15#if __tile_chip__ == 1
16#include <arch/chip_tile64.h>
17#elif __tile_chip__ == 1
18#include <arch/chip_tilepro.h> 16#include <arch/chip_tilepro.h>
19#elif defined(__tilegx__) 17#elif defined(__tilegx__)
20#include <arch/chip_tilegx.h> 18#include <arch/chip_tilegx.h>
diff --git a/arch/tile/include/uapi/arch/chip_tile64.h b/arch/tile/include/uapi/arch/chip_tile64.h
deleted file mode 100644
index 261aaba092d4..000000000000
--- a/arch/tile/include/uapi/arch/chip_tile64.h
+++ /dev/null
@@ -1,258 +0,0 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15/*
16 * @file
17 * Global header file.
18 * This header file specifies defines for TILE64.
19 */
20
21#ifndef __ARCH_CHIP_H__
22#define __ARCH_CHIP_H__
23
24/** Specify chip version.
25 * When possible, prefer the CHIP_xxx symbols below for future-proofing.
26 * This is intended for cross-compiling; native compilation should
27 * use the predefined __tile_chip__ symbol.
28 */
29#define TILE_CHIP 0
30
31/** Specify chip revision.
32 * This provides for the case of a respin of a particular chip type;
33 * the normal value for this symbol is "0".
34 * This is intended for cross-compiling; native compilation should
35 * use the predefined __tile_chip_rev__ symbol.
36 */
37#define TILE_CHIP_REV 0
38
39/** The name of this architecture. */
40#define CHIP_ARCH_NAME "tile64"
41
42/** The ELF e_machine type for binaries for this chip. */
43#define CHIP_ELF_TYPE() EM_TILE64
44
45/** The alternate ELF e_machine type for binaries for this chip. */
46#define CHIP_COMPAT_ELF_TYPE() 0x2506
47
48/** What is the native word size of the machine? */
49#define CHIP_WORD_SIZE() 32
50
51/** How many bits of a virtual address are used. Extra bits must be
52 * the sign extension of the low bits.
53 */
54#define CHIP_VA_WIDTH() 32
55
56/** How many bits are in a physical address? */
57#define CHIP_PA_WIDTH() 36
58
59/** Size of the L2 cache, in bytes. */
60#define CHIP_L2_CACHE_SIZE() 65536
61
62/** Log size of an L2 cache line in bytes. */
63#define CHIP_L2_LOG_LINE_SIZE() 6
64
65/** Size of an L2 cache line, in bytes. */
66#define CHIP_L2_LINE_SIZE() (1 << CHIP_L2_LOG_LINE_SIZE())
67
68/** Associativity of the L2 cache. */
69#define CHIP_L2_ASSOC() 2
70
71/** Size of the L1 data cache, in bytes. */
72#define CHIP_L1D_CACHE_SIZE() 8192
73
74/** Log size of an L1 data cache line in bytes. */
75#define CHIP_L1D_LOG_LINE_SIZE() 4
76
77/** Size of an L1 data cache line, in bytes. */
78#define CHIP_L1D_LINE_SIZE() (1 << CHIP_L1D_LOG_LINE_SIZE())
79
80/** Associativity of the L1 data cache. */
81#define CHIP_L1D_ASSOC() 2
82
83/** Size of the L1 instruction cache, in bytes. */
84#define CHIP_L1I_CACHE_SIZE() 8192
85
86/** Log size of an L1 instruction cache line in bytes. */
87#define CHIP_L1I_LOG_LINE_SIZE() 6
88
89/** Size of an L1 instruction cache line, in bytes. */
90#define CHIP_L1I_LINE_SIZE() (1 << CHIP_L1I_LOG_LINE_SIZE())
91
92/** Associativity of the L1 instruction cache. */
93#define CHIP_L1I_ASSOC() 1
94
95/** Stride with which flush instructions must be issued. */
96#define CHIP_FLUSH_STRIDE() CHIP_L2_LINE_SIZE()
97
98/** Stride with which inv instructions must be issued. */
99#define CHIP_INV_STRIDE() CHIP_L1D_LINE_SIZE()
100
101/** Stride with which finv instructions must be issued. */
102#define CHIP_FINV_STRIDE() CHIP_L1D_LINE_SIZE()
103
104/** Can the local cache coherently cache data that is homed elsewhere? */
105#define CHIP_HAS_COHERENT_LOCAL_CACHE() 0
106
107/** How many simultaneous outstanding victims can the L2 cache have? */
108#define CHIP_MAX_OUTSTANDING_VICTIMS() 2
109
110/** Does the TLB support the NC and NOALLOC bits? */
111#define CHIP_HAS_NC_AND_NOALLOC_BITS() 0
112
113/** Does the chip support hash-for-home caching? */
114#define CHIP_HAS_CBOX_HOME_MAP() 0
115
116/** Number of entries in the chip's home map tables. */
117/* #define CHIP_CBOX_HOME_MAP_SIZE() -- does not apply to chip 0 */
118
119/** Do uncacheable requests miss in the cache regardless of whether
120 * there is matching data? */
121#define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 0
122
123/** Does the mf instruction wait for victims? */
124#define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 1
125
126/** Does the chip have an "inv" instruction that doesn't also flush? */
127#define CHIP_HAS_INV() 0
128
129/** Does the chip have a "wh64" instruction? */
130#define CHIP_HAS_WH64() 0
131
132/** Does this chip have a 'dword_align' instruction? */
133#define CHIP_HAS_DWORD_ALIGN() 0
134
135/** Number of performance counters. */
136#define CHIP_PERFORMANCE_COUNTERS() 2
137
138/** Does this chip have auxiliary performance counters? */
139#define CHIP_HAS_AUX_PERF_COUNTERS() 0
140
141/** Is the CBOX_MSR1 SPR supported? */
142#define CHIP_HAS_CBOX_MSR1() 0
143
144/** Is the TILE_RTF_HWM SPR supported? */
145#define CHIP_HAS_TILE_RTF_HWM() 0
146
147/** Is the TILE_WRITE_PENDING SPR supported? */
148#define CHIP_HAS_TILE_WRITE_PENDING() 0
149
150/** Is the PROC_STATUS SPR supported? */
151#define CHIP_HAS_PROC_STATUS_SPR() 0
152
153/** Is the DSTREAM_PF SPR supported? */
154#define CHIP_HAS_DSTREAM_PF() 0
155
156/** Log of the number of mshims we have. */
157#define CHIP_LOG_NUM_MSHIMS() 2
158
159/** Are the bases of the interrupt vector areas fixed? */
160#define CHIP_HAS_FIXED_INTVEC_BASE() 1
161
162/** Are the interrupt masks split up into 2 SPRs? */
163#define CHIP_HAS_SPLIT_INTR_MASK() 1
164
165/** Is the cycle count split up into 2 SPRs? */
166#define CHIP_HAS_SPLIT_CYCLE() 1
167
168/** Does the chip have a static network? */
169#define CHIP_HAS_SN() 1
170
171/** Does the chip have a static network processor? */
172#define CHIP_HAS_SN_PROC() 1
173
174/** Size of the L1 static network processor instruction cache, in bytes. */
175#define CHIP_L1SNI_CACHE_SIZE() 2048
176
177/** Does the chip have DMA support in each tile? */
178#define CHIP_HAS_TILE_DMA() 1
179
180/** Does the chip have the second revision of the directly accessible
181 * dynamic networks? This encapsulates a number of characteristics,
182 * including the absence of the catch-all, the absence of inline message
183 * tags, the absence of support for network context-switching, and so on.
184 */
185#define CHIP_HAS_REV1_XDN() 0
186
187/** Does the chip have cmpexch and similar (fetchadd, exch, etc.)? */
188#define CHIP_HAS_CMPEXCH() 0
189
190/** Does the chip have memory-mapped I/O support? */
191#define CHIP_HAS_MMIO() 0
192
193/** Does the chip have post-completion interrupts? */
194#define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 0
195
196/** Does the chip have native single step support? */
197#define CHIP_HAS_SINGLE_STEP() 0
198
199#ifndef __OPEN_SOURCE__ /* features only relevant to hypervisor-level code */
200
201/** How many entries are present in the instruction TLB? */
202#define CHIP_ITLB_ENTRIES() 8
203
204/** How many entries are present in the data TLB? */
205#define CHIP_DTLB_ENTRIES() 16
206
207/** How many MAF entries does the XAUI shim have? */
208#define CHIP_XAUI_MAF_ENTRIES() 16
209
210/** Does the memory shim have a source-id table? */
211#define CHIP_HAS_MSHIM_SRCID_TABLE() 1
212
213/** Does the L1 instruction cache clear on reset? */
214#define CHIP_HAS_L1I_CLEAR_ON_RESET() 0
215
216/** Does the chip come out of reset with valid coordinates on all tiles?
217 * Note that if defined, this also implies that the upper left is 1,1.
218 */
219#define CHIP_HAS_VALID_TILE_COORD_RESET() 0
220
221/** Does the chip have unified packet formats? */
222#define CHIP_HAS_UNIFIED_PACKET_FORMATS() 0
223
224/** Does the chip support write reordering? */
225#define CHIP_HAS_WRITE_REORDERING() 0
226
227/** Does the chip support Y-X routing as well as X-Y? */
228#define CHIP_HAS_Y_X_ROUTING() 0
229
230/** Is INTCTRL_3 managed with the correct MPL? */
231#define CHIP_HAS_INTCTRL_3_STATUS_FIX() 0
232
233/** Is it possible to configure the chip to be big-endian? */
234#define CHIP_HAS_BIG_ENDIAN_CONFIG() 0
235
236/** Is the CACHE_RED_WAY_OVERRIDDEN SPR supported? */
237#define CHIP_HAS_CACHE_RED_WAY_OVERRIDDEN() 0
238
239/** Is the DIAG_TRACE_WAY SPR supported? */
240#define CHIP_HAS_DIAG_TRACE_WAY() 0
241
242/** Is the MEM_STRIPE_CONFIG SPR supported? */
243#define CHIP_HAS_MEM_STRIPE_CONFIG() 0
244
245/** Are the TLB_PERF SPRs supported? */
246#define CHIP_HAS_TLB_PERF() 0
247
248/** Is the VDN_SNOOP_SHIM_CTL SPR supported? */
249#define CHIP_HAS_VDN_SNOOP_SHIM_CTL() 0
250
251/** Does the chip support rev1 DMA packets? */
252#define CHIP_HAS_REV1_DMA_PACKETS() 0
253
254/** Does the chip have an IPI shim? */
255#define CHIP_HAS_IPI() 0
256
257#endif /* !__OPEN_SOURCE__ */
258#endif /* __ARCH_CHIP_H__ */
diff --git a/arch/tile/include/uapi/arch/spr_def_32.h b/arch/tile/include/uapi/arch/spr_def_32.h
index c689446e6284..78daa3146d25 100644
--- a/arch/tile/include/uapi/arch/spr_def_32.h
+++ b/arch/tile/include/uapi/arch/spr_def_32.h
@@ -200,8 +200,6 @@
200#define SPR_SIM_CONTROL 0x4e0c 200#define SPR_SIM_CONTROL 0x4e0c
201#define SPR_SNCTL 0x0805 201#define SPR_SNCTL 0x0805
202#define SPR_SNCTL__FRZFABRIC_MASK 0x1 202#define SPR_SNCTL__FRZFABRIC_MASK 0x1
203#define SPR_SNCTL__FRZPROC_MASK 0x2
204#define SPR_SNPC 0x080b
205#define SPR_SNSTATIC 0x080c 203#define SPR_SNSTATIC 0x080c
206#define SPR_SYSTEM_SAVE_0_0 0x4b00 204#define SPR_SYSTEM_SAVE_0_0 0x4b00
207#define SPR_SYSTEM_SAVE_0_1 0x4b01 205#define SPR_SYSTEM_SAVE_0_1 0x4b01
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index f084f1c7afde..088d5c141e68 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -32,12 +32,6 @@
32 32
33#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) 33#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR)
34 34
35#if !CHIP_HAS_WH64()
36 /* By making this an empty macro, we can use wh64 in the code. */
37 .macro wh64 reg
38 .endm
39#endif
40
41 .macro push_reg reg, ptr=sp, delta=-4 35 .macro push_reg reg, ptr=sp, delta=-4
42 { 36 {
43 sw \ptr, \reg 37 sw \ptr, \reg
@@ -325,18 +319,14 @@ intvec_\vecname:
325 movei r3, -1 /* not used, but set for consistency */ 319 movei r3, -1 /* not used, but set for consistency */
326 } 320 }
327 .else 321 .else
328#if CHIP_HAS_AUX_PERF_COUNTERS()
329 .ifc \c_routine, op_handle_aux_perf_interrupt 322 .ifc \c_routine, op_handle_aux_perf_interrupt
330 { 323 {
331 mfspr r2, AUX_PERF_COUNT_STS 324 mfspr r2, AUX_PERF_COUNT_STS
332 movei r3, -1 /* not used, but set for consistency */ 325 movei r3, -1 /* not used, but set for consistency */
333 } 326 }
334 .else 327 .else
335#endif
336 movei r3, 0 328 movei r3, 0
337#if CHIP_HAS_AUX_PERF_COUNTERS()
338 .endif 329 .endif
339#endif
340 .endif 330 .endif
341 .endif 331 .endif
342 .endif 332 .endif
@@ -561,7 +551,6 @@ intvec_\vecname:
561 .endif 551 .endif
562 mtspr INTERRUPT_CRITICAL_SECTION, zero 552 mtspr INTERRUPT_CRITICAL_SECTION, zero
563 553
564#if CHIP_HAS_WH64()
565 /* 554 /*
566 * Prepare the first 256 stack bytes to be rapidly accessible 555 * Prepare the first 256 stack bytes to be rapidly accessible
567 * without having to fetch the background data. We don't really 556 * without having to fetch the background data. We don't really
@@ -582,7 +571,6 @@ intvec_\vecname:
582 addi r52, r52, -64 571 addi r52, r52, -64
583 } 572 }
584 wh64 r52 573 wh64 r52
585#endif
586 574
587#ifdef CONFIG_TRACE_IRQFLAGS 575#ifdef CONFIG_TRACE_IRQFLAGS
588 .ifnc \function,handle_nmi 576 .ifnc \function,handle_nmi
@@ -1533,12 +1521,10 @@ STD_ENTRY(_sys_clone)
1533 __HEAD 1521 __HEAD
1534 .align 64 1522 .align 64
1535 /* Align much later jump on the start of a cache line. */ 1523 /* Align much later jump on the start of a cache line. */
1536#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
1537 nop 1524 nop
1538#if PAGE_SIZE >= 0x10000 1525#if PAGE_SIZE >= 0x10000
1539 nop 1526 nop
1540#endif 1527#endif
1541#endif
1542ENTRY(sys_cmpxchg) 1528ENTRY(sys_cmpxchg)
1543 1529
1544 /* 1530 /*
@@ -1572,45 +1558,6 @@ ENTRY(sys_cmpxchg)
1572# error Code here assumes PAGE_OFFSET can be loaded with just hi16() 1558# error Code here assumes PAGE_OFFSET can be loaded with just hi16()
1573#endif 1559#endif
1574 1560
1575#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
1576 {
1577 /* Check for unaligned input. */
1578 bnz sp, .Lcmpxchg_badaddr
1579 mm r25, r0, zero, 3, PAGE_SHIFT-1
1580 }
1581 {
1582 crc32_32 r25, zero, r25
1583 moveli r21, lo16(atomic_lock_ptr)
1584 }
1585 {
1586 auli r21, r21, ha16(atomic_lock_ptr)
1587 auli r23, zero, hi16(PAGE_OFFSET) /* hugepage-aligned */
1588 }
1589 {
1590 shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT
1591 slt_u r23, r0, r23
1592 lw r26, r0 /* see comment in the "#else" for the "lw r26". */
1593 }
1594 {
1595 s2a r21, r20, r21
1596 bbns r23, .Lcmpxchg_badaddr
1597 }
1598 {
1599 lw r21, r21
1600 seqi r23, TREG_SYSCALL_NR_NAME, __NR_FAST_cmpxchg64
1601 andi r25, r25, ATOMIC_HASH_L2_SIZE - 1
1602 }
1603 {
1604 /* Branch away at this point if we're doing a 64-bit cmpxchg. */
1605 bbs r23, .Lcmpxchg64
1606 andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */
1607 }
1608 {
1609 s2a ATOMIC_LOCK_REG_NAME, r25, r21
1610 j .Lcmpxchg32_tns /* see comment in the #else for the jump. */
1611 }
1612
1613#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
1614 { 1561 {
1615 /* Check for unaligned input. */ 1562 /* Check for unaligned input. */
1616 bnz sp, .Lcmpxchg_badaddr 1563 bnz sp, .Lcmpxchg_badaddr
@@ -1635,12 +1582,9 @@ ENTRY(sys_cmpxchg)
1635 1582
1636 /* 1583 /*
1637 * Ensure that the TLB is loaded before we take out the lock. 1584 * Ensure that the TLB is loaded before we take out the lock.
1638 * On tilepro, this will start fetching the value all the way 1585 * This will start fetching the value all the way into our L1
1639 * into our L1 as well (and if it gets modified before we 1586 * as well (and if it gets modified before we grab the lock,
1640 * grab the lock, it will be invalidated from our cache 1587 * it will be invalidated from our cache before we reload it).
1641 * before we reload it). On tile64, we'll start fetching it
1642 * into our L1 if we're the home, and if we're not, we'll
1643 * still at least start fetching it into the home's L2.
1644 */ 1588 */
1645 lw r26, r0 1589 lw r26, r0
1646 } 1590 }
@@ -1683,8 +1627,6 @@ ENTRY(sys_cmpxchg)
1683 j .Lcmpxchg32_tns 1627 j .Lcmpxchg32_tns
1684 } 1628 }
1685 1629
1686#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
1687
1688/* Symbol for do_page_fault_ics() to use to compare against the PC. */ 1630/* Symbol for do_page_fault_ics() to use to compare against the PC. */
1689.global __sys_cmpxchg_grab_lock 1631.global __sys_cmpxchg_grab_lock
1690__sys_cmpxchg_grab_lock: 1632__sys_cmpxchg_grab_lock:
@@ -1822,9 +1764,6 @@ __sys_cmpxchg_grab_lock:
1822 .align 64 1764 .align 64
1823.Lcmpxchg64: 1765.Lcmpxchg64:
1824 { 1766 {
1825#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
1826 s2a ATOMIC_LOCK_REG_NAME, r25, r21
1827#endif
1828 bzt r23, .Lcmpxchg64_tns 1767 bzt r23, .Lcmpxchg64_tns
1829 } 1768 }
1830 j .Lcmpxchg_badaddr 1769 j .Lcmpxchg_badaddr
@@ -1959,10 +1898,8 @@ int_unalign:
1959 do_page_fault 1898 do_page_fault
1960 int_hand INT_SN_CPL, SN_CPL, bad_intr 1899 int_hand INT_SN_CPL, SN_CPL, bad_intr
1961 int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap 1900 int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap
1962#if CHIP_HAS_AUX_PERF_COUNTERS()
1963 int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \ 1901 int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \
1964 op_handle_aux_perf_interrupt, handle_nmi 1902 op_handle_aux_perf_interrupt, handle_nmi
1965#endif
1966 1903
1967 /* Synthetic interrupt delivered only by the simulator */ 1904 /* Synthetic interrupt delivered only by the simulator */
1968 int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint 1905 int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index c3a2335fa6a8..ec755d3f3734 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -511,12 +511,10 @@ intvec_\vecname:
511 .else 511 .else
512 .ifc \c_routine, op_handle_perf_interrupt 512 .ifc \c_routine, op_handle_perf_interrupt
513 mfspr r2, PERF_COUNT_STS 513 mfspr r2, PERF_COUNT_STS
514#if CHIP_HAS_AUX_PERF_COUNTERS()
515 .else 514 .else
516 .ifc \c_routine, op_handle_aux_perf_interrupt 515 .ifc \c_routine, op_handle_aux_perf_interrupt
517 mfspr r2, AUX_PERF_COUNT_STS 516 mfspr r2, AUX_PERF_COUNT_STS
518 .endif 517 .endif
519#endif
520 .endif 518 .endif
521 .endif 519 .endif
522 .endif 520 .endif
diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c
index 0e6c521b8a89..d8ba06058fd0 100644
--- a/arch/tile/kernel/irq.c
+++ b/arch/tile/kernel/irq.c
@@ -74,7 +74,7 @@ static DEFINE_SPINLOCK(available_irqs_lock);
74 74
75/* 75/*
76 * The interrupt handling path, implemented in terms of HV interrupt 76 * The interrupt handling path, implemented in terms of HV interrupt
77 * emulation on TILE64 and TILEPro, and IPI hardware on TILE-Gx. 77 * emulation on TILEPro, and IPI hardware on TILE-Gx.
78 * Entered with interrupts disabled. 78 * Entered with interrupts disabled.
79 */ 79 */
80void tile_dev_intr(struct pt_regs *regs, int intnum) 80void tile_dev_intr(struct pt_regs *regs, int intnum)
@@ -235,7 +235,7 @@ void tile_irq_activate(unsigned int irq, int tile_irq_type)
235{ 235{
236 /* 236 /*
237 * We use handle_level_irq() by default because the pending 237 * We use handle_level_irq() by default because the pending
238 * interrupt vector (whether modeled by the HV on TILE64 and 238 * interrupt vector (whether modeled by the HV on
239 * TILEPro or implemented in hardware on TILE-Gx) has 239 * TILEPro or implemented in hardware on TILE-Gx) has
240 * level-style semantics for each bit. An interrupt fires 240 * level-style semantics for each bit. An interrupt fires
241 * whenever a bit is high, not just at edges. 241 * whenever a bit is high, not just at edges.
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 44cdc4aa59e8..16ed58948757 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -187,16 +187,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
187 memset(&p->thread.dma_async_tlb, 0, sizeof(struct async_tlb)); 187 memset(&p->thread.dma_async_tlb, 0, sizeof(struct async_tlb));
188#endif 188#endif
189 189
190#if CHIP_HAS_SN_PROC()
191 /* Likewise, the new thread is not running static processor code. */
192 p->thread.sn_proc_running = 0;
193 memset(&p->thread.sn_async_tlb, 0, sizeof(struct async_tlb));
194#endif
195
196#if CHIP_HAS_PROC_STATUS_SPR()
197 /* New thread has its miscellaneous processor state bits clear. */ 190 /* New thread has its miscellaneous processor state bits clear. */
198 p->thread.proc_status = 0; 191 p->thread.proc_status = 0;
199#endif
200 192
201#ifdef CONFIG_HARDWALL 193#ifdef CONFIG_HARDWALL
202 /* New thread does not own any networks. */ 194 /* New thread does not own any networks. */
@@ -378,15 +370,11 @@ static void save_arch_state(struct thread_struct *t)
378 t->system_save[2] = __insn_mfspr(SPR_SYSTEM_SAVE_0_2); 370 t->system_save[2] = __insn_mfspr(SPR_SYSTEM_SAVE_0_2);
379 t->system_save[3] = __insn_mfspr(SPR_SYSTEM_SAVE_0_3); 371 t->system_save[3] = __insn_mfspr(SPR_SYSTEM_SAVE_0_3);
380 t->intctrl_0 = __insn_mfspr(SPR_INTCTRL_0_STATUS); 372 t->intctrl_0 = __insn_mfspr(SPR_INTCTRL_0_STATUS);
381#if CHIP_HAS_PROC_STATUS_SPR()
382 t->proc_status = __insn_mfspr(SPR_PROC_STATUS); 373 t->proc_status = __insn_mfspr(SPR_PROC_STATUS);
383#endif
384#if !CHIP_HAS_FIXED_INTVEC_BASE() 374#if !CHIP_HAS_FIXED_INTVEC_BASE()
385 t->interrupt_vector_base = __insn_mfspr(SPR_INTERRUPT_VECTOR_BASE_0); 375 t->interrupt_vector_base = __insn_mfspr(SPR_INTERRUPT_VECTOR_BASE_0);
386#endif 376#endif
387#if CHIP_HAS_TILE_RTF_HWM()
388 t->tile_rtf_hwm = __insn_mfspr(SPR_TILE_RTF_HWM); 377 t->tile_rtf_hwm = __insn_mfspr(SPR_TILE_RTF_HWM);
389#endif
390#if CHIP_HAS_DSTREAM_PF() 378#if CHIP_HAS_DSTREAM_PF()
391 t->dstream_pf = __insn_mfspr(SPR_DSTREAM_PF); 379 t->dstream_pf = __insn_mfspr(SPR_DSTREAM_PF);
392#endif 380#endif
@@ -407,15 +395,11 @@ static void restore_arch_state(const struct thread_struct *t)
407 __insn_mtspr(SPR_SYSTEM_SAVE_0_2, t->system_save[2]); 395 __insn_mtspr(SPR_SYSTEM_SAVE_0_2, t->system_save[2]);
408 __insn_mtspr(SPR_SYSTEM_SAVE_0_3, t->system_save[3]); 396 __insn_mtspr(SPR_SYSTEM_SAVE_0_3, t->system_save[3]);
409 __insn_mtspr(SPR_INTCTRL_0_STATUS, t->intctrl_0); 397 __insn_mtspr(SPR_INTCTRL_0_STATUS, t->intctrl_0);
410#if CHIP_HAS_PROC_STATUS_SPR()
411 __insn_mtspr(SPR_PROC_STATUS, t->proc_status); 398 __insn_mtspr(SPR_PROC_STATUS, t->proc_status);
412#endif
413#if !CHIP_HAS_FIXED_INTVEC_BASE() 399#if !CHIP_HAS_FIXED_INTVEC_BASE()
414 __insn_mtspr(SPR_INTERRUPT_VECTOR_BASE_0, t->interrupt_vector_base); 400 __insn_mtspr(SPR_INTERRUPT_VECTOR_BASE_0, t->interrupt_vector_base);
415#endif 401#endif
416#if CHIP_HAS_TILE_RTF_HWM()
417 __insn_mtspr(SPR_TILE_RTF_HWM, t->tile_rtf_hwm); 402 __insn_mtspr(SPR_TILE_RTF_HWM, t->tile_rtf_hwm);
418#endif
419#if CHIP_HAS_DSTREAM_PF() 403#if CHIP_HAS_DSTREAM_PF()
420 __insn_mtspr(SPR_DSTREAM_PF, t->dstream_pf); 404 __insn_mtspr(SPR_DSTREAM_PF, t->dstream_pf);
421#endif 405#endif
@@ -424,26 +408,11 @@ static void restore_arch_state(const struct thread_struct *t)
424 408
425void _prepare_arch_switch(struct task_struct *next) 409void _prepare_arch_switch(struct task_struct *next)
426{ 410{
427#if CHIP_HAS_SN_PROC()
428 int snctl;
429#endif
430#if CHIP_HAS_TILE_DMA() 411#if CHIP_HAS_TILE_DMA()
431 struct tile_dma_state *dma = &current->thread.tile_dma_state; 412 struct tile_dma_state *dma = &current->thread.tile_dma_state;
432 if (dma->enabled) 413 if (dma->enabled)
433 save_tile_dma_state(dma); 414 save_tile_dma_state(dma);
434#endif 415#endif
435#if CHIP_HAS_SN_PROC()
436 /*
437 * Suspend the static network processor if it was running.
438 * We do not suspend the fabric itself, just like we don't
439 * try to suspend the UDN.
440 */
441 snctl = __insn_mfspr(SPR_SNCTL);
442 current->thread.sn_proc_running =
443 (snctl & SPR_SNCTL__FRZPROC_MASK) == 0;
444 if (current->thread.sn_proc_running)
445 __insn_mtspr(SPR_SNCTL, snctl | SPR_SNCTL__FRZPROC_MASK);
446#endif
447} 416}
448 417
449 418
@@ -471,17 +440,6 @@ struct task_struct *__sched _switch_to(struct task_struct *prev,
471 /* Restore other arch state. */ 440 /* Restore other arch state. */
472 restore_arch_state(&next->thread); 441 restore_arch_state(&next->thread);
473 442
474#if CHIP_HAS_SN_PROC()
475 /*
476 * Restart static network processor in the new process
477 * if it was running before.
478 */
479 if (next->thread.sn_proc_running) {
480 int snctl = __insn_mfspr(SPR_SNCTL);
481 __insn_mtspr(SPR_SNCTL, snctl & ~SPR_SNCTL__FRZPROC_MASK);
482 }
483#endif
484
485#ifdef CONFIG_HARDWALL 443#ifdef CONFIG_HARDWALL
486 /* Enable or disable access to the network registers appropriately. */ 444 /* Enable or disable access to the network registers appropriately. */
487 hardwall_switch_tasks(prev, next); 445 hardwall_switch_tasks(prev, next);
@@ -523,7 +481,7 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags)
523 schedule(); 481 schedule();
524 return 1; 482 return 1;
525 } 483 }
526#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() 484#if CHIP_HAS_TILE_DMA()
527 if (thread_info_flags & _TIF_ASYNC_TLB) { 485 if (thread_info_flags & _TIF_ASYNC_TLB) {
528 do_async_page_fault(regs); 486 do_async_page_fault(regs);
529 return 1; 487 return 1;
diff --git a/arch/tile/kernel/relocate_kernel_32.S b/arch/tile/kernel/relocate_kernel_32.S
index f7fd37b64a78..e44fbcf8cbd5 100644
--- a/arch/tile/kernel/relocate_kernel_32.S
+++ b/arch/tile/kernel/relocate_kernel_32.S
@@ -77,7 +77,6 @@ STD_ENTRY(relocate_new_kernel)
77 move r30, sp 77 move r30, sp
78 addi sp, sp, -8 78 addi sp, sp, -8
79 79
80#if CHIP_HAS_CBOX_HOME_MAP()
81 /* 80 /*
82 * On TILEPro, we need to flush all tiles' caches, since we may 81 * On TILEPro, we need to flush all tiles' caches, since we may
83 * have been doing hash-for-home caching there. Note that we 82 * have been doing hash-for-home caching there. Note that we
@@ -113,7 +112,6 @@ STD_ENTRY(relocate_new_kernel)
113 } 112 }
114 113
115 jalr r20 114 jalr r20
116#endif
117 115
118 /* r33 is destination pointer, default to zero */ 116 /* r33 is destination pointer, default to zero */
119 117
diff --git a/arch/tile/kernel/relocate_kernel_64.S b/arch/tile/kernel/relocate_kernel_64.S
index 02bc44621021..d9d8cf6176e8 100644
--- a/arch/tile/kernel/relocate_kernel_64.S
+++ b/arch/tile/kernel/relocate_kernel_64.S
@@ -78,7 +78,6 @@ STD_ENTRY(relocate_new_kernel)
78 move r30, sp 78 move r30, sp
79 addi sp, sp, -16 79 addi sp, sp, -16
80 80
81#if CHIP_HAS_CBOX_HOME_MAP()
82 /* 81 /*
83 * On TILE-GX, we need to flush all tiles' caches, since we may 82 * On TILE-GX, we need to flush all tiles' caches, since we may
84 * have been doing hash-for-home caching there. Note that we 83 * have been doing hash-for-home caching there. Note that we
@@ -116,7 +115,6 @@ STD_ENTRY(relocate_new_kernel)
116 shl16insli r20, r20, hw0(hv_flush_remote) 115 shl16insli r20, r20, hw0(hv_flush_remote)
117 116
118 jalr r20 117 jalr r20
119#endif
120 118
121 /* r33 is destination pointer, default to zero */ 119 /* r33 is destination pointer, default to zero */
122 120
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index b79c312ca3cb..128a2d0b8650 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -1046,9 +1046,6 @@ void __cpuinit setup_cpu(int boot)
1046 arch_local_irq_unmask(INT_DMATLB_MISS); 1046 arch_local_irq_unmask(INT_DMATLB_MISS);
1047 arch_local_irq_unmask(INT_DMATLB_ACCESS); 1047 arch_local_irq_unmask(INT_DMATLB_ACCESS);
1048#endif 1048#endif
1049#if CHIP_HAS_SN_PROC()
1050 arch_local_irq_unmask(INT_SNITLB_MISS);
1051#endif
1052#ifdef __tilegx__ 1049#ifdef __tilegx__
1053 arch_local_irq_unmask(INT_SINGLE_STEP_K); 1050 arch_local_irq_unmask(INT_SINGLE_STEP_K);
1054#endif 1051#endif
@@ -1063,10 +1060,6 @@ void __cpuinit setup_cpu(int boot)
1063 /* Static network is not restricted. */ 1060 /* Static network is not restricted. */
1064 __insn_mtspr(SPR_MPL_SN_ACCESS_SET_0, 1); 1061 __insn_mtspr(SPR_MPL_SN_ACCESS_SET_0, 1);
1065#endif 1062#endif
1066#if CHIP_HAS_SN_PROC()
1067 __insn_mtspr(SPR_MPL_SN_NOTIFY_SET_0, 1);
1068 __insn_mtspr(SPR_MPL_SN_CPL_SET_0, 1);
1069#endif
1070 1063
1071 /* 1064 /*
1072 * Set the MPL for interrupt control 0 & 1 to the corresponding 1065 * Set the MPL for interrupt control 0 & 1 to the corresponding
@@ -1291,7 +1284,6 @@ static void __init validate_va(void)
1291struct cpumask __write_once cpu_lotar_map; 1284struct cpumask __write_once cpu_lotar_map;
1292EXPORT_SYMBOL(cpu_lotar_map); 1285EXPORT_SYMBOL(cpu_lotar_map);
1293 1286
1294#if CHIP_HAS_CBOX_HOME_MAP()
1295/* 1287/*
1296 * hash_for_home_map lists all the tiles that hash-for-home data 1288 * hash_for_home_map lists all the tiles that hash-for-home data
1297 * will be cached on. Note that this may includes tiles that are not 1289 * will be cached on. Note that this may includes tiles that are not
@@ -1301,7 +1293,6 @@ EXPORT_SYMBOL(cpu_lotar_map);
1301 */ 1293 */
1302struct cpumask hash_for_home_map; 1294struct cpumask hash_for_home_map;
1303EXPORT_SYMBOL(hash_for_home_map); 1295EXPORT_SYMBOL(hash_for_home_map);
1304#endif
1305 1296
1306/* 1297/*
1307 * cpu_cacheable_map lists all the cpus whose caches the hypervisor can 1298 * cpu_cacheable_map lists all the cpus whose caches the hypervisor can
@@ -1394,7 +1385,6 @@ static void __init setup_cpu_maps(void)
1394 cpu_lotar_map = *cpu_possible_mask; 1385 cpu_lotar_map = *cpu_possible_mask;
1395 } 1386 }
1396 1387
1397#if CHIP_HAS_CBOX_HOME_MAP()
1398 /* Retrieve set of CPUs used for hash-for-home caching */ 1388 /* Retrieve set of CPUs used for hash-for-home caching */
1399 rc = hv_inquire_tiles(HV_INQ_TILES_HFH_CACHE, 1389 rc = hv_inquire_tiles(HV_INQ_TILES_HFH_CACHE,
1400 (HV_VirtAddr) hash_for_home_map.bits, 1390 (HV_VirtAddr) hash_for_home_map.bits,
@@ -1402,9 +1392,6 @@ static void __init setup_cpu_maps(void)
1402 if (rc < 0) 1392 if (rc < 0)
1403 early_panic("hv_inquire_tiles(HFH_CACHE) failed: rc %d\n", rc); 1393 early_panic("hv_inquire_tiles(HFH_CACHE) failed: rc %d\n", rc);
1404 cpumask_or(&cpu_cacheable_map, cpu_possible_mask, &hash_for_home_map); 1394 cpumask_or(&cpu_cacheable_map, cpu_possible_mask, &hash_for_home_map);
1405#else
1406 cpu_cacheable_map = *cpu_possible_mask;
1407#endif
1408} 1395}
1409 1396
1410 1397
diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c
index 5ef2e9eae5c5..de07fa7d1315 100644
--- a/arch/tile/kernel/single_step.c
+++ b/arch/tile/kernel/single_step.c
@@ -546,7 +546,6 @@ void single_step_once(struct pt_regs *regs)
546 } 546 }
547 break; 547 break;
548 548
549#if CHIP_HAS_WH64()
550 /* postincrement operations */ 549 /* postincrement operations */
551 case IMM_0_OPCODE_X1: 550 case IMM_0_OPCODE_X1:
552 switch (get_ImmOpcodeExtension_X1(bundle)) { 551 switch (get_ImmOpcodeExtension_X1(bundle)) {
@@ -581,7 +580,6 @@ void single_step_once(struct pt_regs *regs)
581 break; 580 break;
582 } 581 }
583 break; 582 break;
584#endif /* CHIP_HAS_WH64() */
585 } 583 }
586 584
587 if (state->update) { 585 if (state->update) {
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile
index 9adfd76fbdd8..c4211cbb2021 100644
--- a/arch/tile/lib/Makefile
+++ b/arch/tile/lib/Makefile
@@ -7,7 +7,7 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \
7 strchr_$(BITS).o strlen_$(BITS).o strnlen_$(BITS).o 7 strchr_$(BITS).o strlen_$(BITS).o strnlen_$(BITS).o
8 8
9lib-$(CONFIG_TILEGX) += memcpy_user_64.o 9lib-$(CONFIG_TILEGX) += memcpy_user_64.o
10lib-$(CONFIG_TILEPRO) += atomic_32.o atomic_asm_32.o memcpy_tile64.o 10lib-$(CONFIG_TILEPRO) += atomic_32.o atomic_asm_32.o
11lib-$(CONFIG_SMP) += spinlock_$(BITS).o usercopy_$(BITS).o 11lib-$(CONFIG_SMP) += spinlock_$(BITS).o usercopy_$(BITS).o
12 12
13obj-$(CONFIG_MODULES) += exports.o 13obj-$(CONFIG_MODULES) += exports.o
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
index 42eacb1f737a..5d91d1860640 100644
--- a/arch/tile/lib/atomic_32.c
+++ b/arch/tile/lib/atomic_32.c
@@ -20,50 +20,12 @@
20#include <linux/atomic.h> 20#include <linux/atomic.h>
21#include <arch/chip.h> 21#include <arch/chip.h>
22 22
23/* See <asm/atomic_32.h> */
24#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
25
26/*
27 * A block of memory containing locks for atomic ops. Each instance of this
28 * struct will be homed on a different CPU.
29 */
30struct atomic_locks_on_cpu {
31 int lock[ATOMIC_HASH_L2_SIZE];
32} __attribute__((aligned(ATOMIC_HASH_L2_SIZE * 4)));
33
34static DEFINE_PER_CPU(struct atomic_locks_on_cpu, atomic_lock_pool);
35
36/* The locks we'll use until __init_atomic_per_cpu is called. */
37static struct atomic_locks_on_cpu __initdata initial_atomic_locks;
38
39/* Hash into this vector to get a pointer to lock for the given atomic. */
40struct atomic_locks_on_cpu *atomic_lock_ptr[ATOMIC_HASH_L1_SIZE]
41 __write_once = {
42 [0 ... ATOMIC_HASH_L1_SIZE-1] (&initial_atomic_locks)
43};
44
45#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
46
47/* This page is remapped on startup to be hash-for-home. */ 23/* This page is remapped on startup to be hash-for-home. */
48int atomic_locks[PAGE_SIZE / sizeof(int)] __page_aligned_bss; 24int atomic_locks[PAGE_SIZE / sizeof(int)] __page_aligned_bss;
49 25
50#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
51
52int *__atomic_hashed_lock(volatile void *v) 26int *__atomic_hashed_lock(volatile void *v)
53{ 27{
54 /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */ 28 /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */
55#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
56 unsigned long i =
57 (unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long));
58 unsigned long n = __insn_crc32_32(0, i);
59
60 /* Grab high bits for L1 index. */
61 unsigned long l1_index = n >> ((sizeof(n) * 8) - ATOMIC_HASH_L1_SHIFT);
62 /* Grab low bits for L2 index. */
63 unsigned long l2_index = n & (ATOMIC_HASH_L2_SIZE - 1);
64
65 return &atomic_lock_ptr[l1_index]->lock[l2_index];
66#else
67 /* 29 /*
68 * Use bits [3, 3 + ATOMIC_HASH_SHIFT) as the lock index. 30 * Use bits [3, 3 + ATOMIC_HASH_SHIFT) as the lock index.
69 * Using mm works here because atomic_locks is page aligned. 31 * Using mm works here because atomic_locks is page aligned.
@@ -72,26 +34,13 @@ int *__atomic_hashed_lock(volatile void *v)
72 (unsigned long)atomic_locks, 34 (unsigned long)atomic_locks,
73 2, (ATOMIC_HASH_SHIFT + 2) - 1); 35 2, (ATOMIC_HASH_SHIFT + 2) - 1);
74 return (int *)ptr; 36 return (int *)ptr;
75#endif
76} 37}
77 38
78#ifdef CONFIG_SMP 39#ifdef CONFIG_SMP
79/* Return whether the passed pointer is a valid atomic lock pointer. */ 40/* Return whether the passed pointer is a valid atomic lock pointer. */
80static int is_atomic_lock(int *p) 41static int is_atomic_lock(int *p)
81{ 42{
82#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
83 int i;
84 for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) {
85
86 if (p >= &atomic_lock_ptr[i]->lock[0] &&
87 p < &atomic_lock_ptr[i]->lock[ATOMIC_HASH_L2_SIZE]) {
88 return 1;
89 }
90 }
91 return 0;
92#else
93 return p >= &atomic_locks[0] && p < &atomic_locks[ATOMIC_HASH_SIZE]; 43 return p >= &atomic_locks[0] && p < &atomic_locks[ATOMIC_HASH_SIZE];
94#endif
95} 44}
96 45
97void __atomic_fault_unlock(int *irqlock_word) 46void __atomic_fault_unlock(int *irqlock_word)
@@ -210,43 +159,6 @@ struct __get_user __atomic_bad_address(int __user *addr)
210 159
211void __init __init_atomic_per_cpu(void) 160void __init __init_atomic_per_cpu(void)
212{ 161{
213#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
214
215 unsigned int i;
216 int actual_cpu;
217
218 /*
219 * Before this is called from setup, we just have one lock for
220 * all atomic objects/operations. Here we replace the
221 * elements of atomic_lock_ptr so that they point at per_cpu
222 * integers. This seemingly over-complex approach stems from
223 * the fact that DEFINE_PER_CPU defines an entry for each cpu
224 * in the grid, not each cpu from 0..ATOMIC_HASH_SIZE-1. But
225 * for efficient hashing of atomics to their locks we want a
226 * compile time constant power of 2 for the size of this
227 * table, so we use ATOMIC_HASH_SIZE.
228 *
229 * Here we populate atomic_lock_ptr from the per cpu
230 * atomic_lock_pool, interspersing by actual cpu so that
231 * subsequent elements are homed on consecutive cpus.
232 */
233
234 actual_cpu = cpumask_first(cpu_possible_mask);
235
236 for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) {
237 /*
238 * Preincrement to slightly bias against using cpu 0,
239 * which has plenty of stuff homed on it already.
240 */
241 actual_cpu = cpumask_next(actual_cpu, cpu_possible_mask);
242 if (actual_cpu >= nr_cpu_ids)
243 actual_cpu = cpumask_first(cpu_possible_mask);
244
245 atomic_lock_ptr[i] = &per_cpu(atomic_lock_pool, actual_cpu);
246 }
247
248#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
249
250 /* Validate power-of-two and "bigger than cpus" assumption */ 162 /* Validate power-of-two and "bigger than cpus" assumption */
251 BUILD_BUG_ON(ATOMIC_HASH_SIZE & (ATOMIC_HASH_SIZE-1)); 163 BUILD_BUG_ON(ATOMIC_HASH_SIZE & (ATOMIC_HASH_SIZE-1));
252 BUG_ON(ATOMIC_HASH_SIZE < nr_cpu_ids); 164 BUG_ON(ATOMIC_HASH_SIZE < nr_cpu_ids);
@@ -270,6 +182,4 @@ void __init __init_atomic_per_cpu(void)
270 * That should not produce more indices than ATOMIC_HASH_SIZE. 182 * That should not produce more indices than ATOMIC_HASH_SIZE.
271 */ 183 */
272 BUILD_BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE); 184 BUILD_BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE);
273
274#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
275} 185}
diff --git a/arch/tile/lib/memcpy_32.S b/arch/tile/lib/memcpy_32.S
index 8ba7626cfeb1..a2771ae5da53 100644
--- a/arch/tile/lib/memcpy_32.S
+++ b/arch/tile/lib/memcpy_32.S
@@ -22,14 +22,6 @@
22 22
23#include <linux/linkage.h> 23#include <linux/linkage.h>
24 24
25/* On TILE64, we wrap these functions via arch/tile/lib/memcpy_tile64.c */
26#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
27#define memcpy __memcpy_asm
28#define __copy_to_user_inatomic __copy_to_user_inatomic_asm
29#define __copy_from_user_inatomic __copy_from_user_inatomic_asm
30#define __copy_from_user_zeroing __copy_from_user_zeroing_asm
31#endif
32
33#define IS_MEMCPY 0 25#define IS_MEMCPY 0
34#define IS_COPY_FROM_USER 1 26#define IS_COPY_FROM_USER 1
35#define IS_COPY_FROM_USER_ZEROING 2 27#define IS_COPY_FROM_USER_ZEROING 2
@@ -159,12 +151,9 @@ EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 }
159 151
160 { addi r3, r1, 60; andi r9, r9, -64 } 152 { addi r3, r1, 60; andi r9, r9, -64 }
161 153
162#if CHIP_HAS_WH64()
163 /* No need to prefetch dst, we'll just do the wh64 154 /* No need to prefetch dst, we'll just do the wh64
164 * right before we copy a line. 155 * right before we copy a line.
165 */ 156 */
166#endif
167
168EX: { lw r5, r3; addi r3, r3, 64; movei r4, 1 } 157EX: { lw r5, r3; addi r3, r3, 64; movei r4, 1 }
169 /* Intentionally stall for a few cycles to leave L2 cache alone. */ 158 /* Intentionally stall for a few cycles to leave L2 cache alone. */
170 { bnzt zero, .; move r27, lr } 159 { bnzt zero, .; move r27, lr }
@@ -172,21 +161,6 @@ EX: { lw r6, r3; addi r3, r3, 64 }
172 /* Intentionally stall for a few cycles to leave L2 cache alone. */ 161 /* Intentionally stall for a few cycles to leave L2 cache alone. */
173 { bnzt zero, . } 162 { bnzt zero, . }
174EX: { lw r7, r3; addi r3, r3, 64 } 163EX: { lw r7, r3; addi r3, r3, 64 }
175#if !CHIP_HAS_WH64()
176 /* Prefetch the dest */
177 /* Intentionally stall for a few cycles to leave L2 cache alone. */
178 { bnzt zero, . }
179 /* Use a real load to cause a TLB miss if necessary. We aren't using
180 * r28, so this should be fine.
181 */
182EX: { lw r28, r9; addi r9, r9, 64 }
183 /* Intentionally stall for a few cycles to leave L2 cache alone. */
184 { bnzt zero, . }
185 { prefetch r9; addi r9, r9, 64 }
186 /* Intentionally stall for a few cycles to leave L2 cache alone. */
187 { bnzt zero, . }
188 { prefetch r9; addi r9, r9, 64 }
189#endif
190 /* Intentionally stall for a few cycles to leave L2 cache alone. */ 164 /* Intentionally stall for a few cycles to leave L2 cache alone. */
191 { bz zero, .Lbig_loop2 } 165 { bz zero, .Lbig_loop2 }
192 166
@@ -287,13 +261,8 @@ EX: { lw r7, r3; addi r3, r3, 64 }
287 /* Fill second L1D line. */ 261 /* Fill second L1D line. */
288EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */ 262EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */
289 263
290#if CHIP_HAS_WH64()
291 /* Prepare destination line for writing. */ 264 /* Prepare destination line for writing. */
292EX: { wh64 r9; addi r9, r9, 64 } 265EX: { wh64 r9; addi r9, r9, 64 }
293#else
294 /* Prefetch dest line */
295 { prefetch r9; addi r9, r9, 64 }
296#endif
297 /* Load seven words that are L1D hits to cover wh64 L2 usage. */ 266 /* Load seven words that are L1D hits to cover wh64 L2 usage. */
298 267
299 /* Load the three remaining words from the last L1D line, which 268 /* Load the three remaining words from the last L1D line, which
@@ -331,16 +300,7 @@ EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */
331EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */ 300EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */
332EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */ 301EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */
333EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */ 302EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */
334#if CHIP_HAS_WH64()
335EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */ 303EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */
336#else
337 /* Back up the r9 to a cache line we are already storing to
338 * if it gets past the end of the dest vector. Strictly speaking,
339 * we don't need to back up to the start of a cache line, but it's free
340 * and tidy, so why not?
341 */
342EX: { sw r0, r15; addi r0, r0, 4; andi r13, r0, -64 } /* store(WORD_3) */
343#endif
344 /* Store second L1D line. */ 304 /* Store second L1D line. */
345EX: { sw r0, r17; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */ 305EX: { sw r0, r17; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */
346EX: { sw r0, r19; addi r0, r0, 4 } /* store(WORD_5) */ 306EX: { sw r0, r19; addi r0, r0, 4 } /* store(WORD_5) */
@@ -404,7 +364,6 @@ EX: { sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 }
404 364
405.Ldest_is_word_aligned: 365.Ldest_is_word_aligned:
406 366
407#if CHIP_HAS_DWORD_ALIGN()
408EX: { andi r8, r0, 63; lwadd_na r6, r1, 4} 367EX: { andi r8, r0, 63; lwadd_na r6, r1, 4}
409 { slti_u r9, r2, 64; bz r8, .Ldest_is_L2_line_aligned } 368 { slti_u r9, r2, 64; bz r8, .Ldest_is_L2_line_aligned }
410 369
@@ -512,26 +471,6 @@ EX: { swadd r0, r13, 4; addi r2, r2, -32 }
512 /* Move r1 back to the point where it corresponds to r0. */ 471 /* Move r1 back to the point where it corresponds to r0. */
513 { addi r1, r1, -4 } 472 { addi r1, r1, -4 }
514 473
515#else /* !CHIP_HAS_DWORD_ALIGN() */
516
517 /* Compute right/left shift counts and load initial source words. */
518 { andi r5, r1, -4; andi r3, r1, 3 }
519EX: { lw r6, r5; addi r5, r5, 4; shli r3, r3, 3 }
520EX: { lw r7, r5; addi r5, r5, 4; sub r4, zero, r3 }
521
522 /* Load and store one word at a time, using shifts and ORs
523 * to correct for the misaligned src.
524 */
525.Lcopy_unaligned_src_loop:
526 { shr r6, r6, r3; shl r8, r7, r4 }
527EX: { lw r7, r5; or r8, r8, r6; move r6, r7 }
528EX: { sw r0, r8; addi r0, r0, 4; addi r2, r2, -4 }
529 { addi r5, r5, 4; slti_u r8, r2, 8 }
530 { bzt r8, .Lcopy_unaligned_src_loop; addi r1, r1, 4 }
531
532 { bz r2, .Lcopy_unaligned_done }
533#endif /* !CHIP_HAS_DWORD_ALIGN() */
534
535 /* Fall through */ 474 /* Fall through */
536 475
537/* 476/*
diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c
deleted file mode 100644
index 0290c222847b..000000000000
--- a/arch/tile/lib/memcpy_tile64.c
+++ /dev/null
@@ -1,280 +0,0 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/string.h>
16#include <linux/smp.h>
17#include <linux/module.h>
18#include <linux/uaccess.h>
19#include <asm/fixmap.h>
20#include <asm/kmap_types.h>
21#include <asm/tlbflush.h>
22#include <hv/hypervisor.h>
23#include <arch/chip.h>
24
25
26#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
27
28/* Defined in memcpy.S */
29extern unsigned long __memcpy_asm(void *to, const void *from, unsigned long n);
30extern unsigned long __copy_to_user_inatomic_asm(
31 void __user *to, const void *from, unsigned long n);
32extern unsigned long __copy_from_user_inatomic_asm(
33 void *to, const void __user *from, unsigned long n);
34extern unsigned long __copy_from_user_zeroing_asm(
35 void *to, const void __user *from, unsigned long n);
36
37typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long);
38
39/* Size above which to consider TLB games for performance */
40#define LARGE_COPY_CUTOFF 2048
41
42/* Communicate to the simulator what we are trying to do. */
43#define sim_allow_multiple_caching(b) \
44 __insn_mtspr(SPR_SIM_CONTROL, \
45 SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ((b) << _SIM_CONTROL_OPERATOR_BITS))
46
47/*
48 * Copy memory by briefly enabling incoherent cacheline-at-a-time mode.
49 *
50 * We set up our own source and destination PTEs that we fully control.
51 * This is the only way to guarantee that we don't race with another
52 * thread that is modifying the PTE; we can't afford to try the
53 * copy_{to,from}_user() technique of catching the interrupt, since
54 * we must run with interrupts disabled to avoid the risk of some
55 * other code seeing the incoherent data in our cache. (Recall that
56 * our cache is indexed by PA, so even if the other code doesn't use
57 * our kmap_atomic virtual addresses, they'll still hit in cache using
58 * the normal VAs that aren't supposed to hit in cache.)
59 */
60static void memcpy_multicache(void *dest, const void *source,
61 pte_t dst_pte, pte_t src_pte, int len)
62{
63 int idx;
64 unsigned long flags, newsrc, newdst;
65 pmd_t *pmdp;
66 pte_t *ptep;
67 int type0, type1;
68 int cpu = smp_processor_id();
69
70 /*
71 * Disable interrupts so that we don't recurse into memcpy()
72 * in an interrupt handler, nor accidentally reference
73 * the PA of the source from an interrupt routine. Also
74 * notify the simulator that we're playing games so we don't
75 * generate spurious coherency warnings.
76 */
77 local_irq_save(flags);
78 sim_allow_multiple_caching(1);
79
80 /* Set up the new dest mapping */
81 type0 = kmap_atomic_idx_push();
82 idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0;
83 newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1));
84 pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst);
85 ptep = pte_offset_kernel(pmdp, newdst);
86 if (pte_val(*ptep) != pte_val(dst_pte)) {
87 set_pte(ptep, dst_pte);
88 local_flush_tlb_page(NULL, newdst, PAGE_SIZE);
89 }
90
91 /* Set up the new source mapping */
92 type1 = kmap_atomic_idx_push();
93 idx += (type0 - type1);
94 src_pte = hv_pte_set_nc(src_pte);
95 src_pte = hv_pte_clear_writable(src_pte); /* be paranoid */
96 newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1));
97 pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc);
98 ptep = pte_offset_kernel(pmdp, newsrc);
99 __set_pte(ptep, src_pte); /* set_pte() would be confused by this */
100 local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
101
102 /* Actually move the data. */
103 __memcpy_asm((void *)newdst, (const void *)newsrc, len);
104
105 /*
106 * Remap the source as locally-cached and not OLOC'ed so that
107 * we can inval without also invaling the remote cpu's cache.
108 * This also avoids known errata with inv'ing cacheable oloc data.
109 */
110 src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3);
111 src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */
112 __set_pte(ptep, src_pte); /* set_pte() would be confused by this */
113 local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
114
115 /*
116 * Do the actual invalidation, covering the full L2 cache line
117 * at the end since __memcpy_asm() is somewhat aggressive.
118 */
119 __inv_buffer((void *)newsrc, len);
120
121 /*
122 * We're done: notify the simulator that all is back to normal,
123 * and re-enable interrupts and pre-emption.
124 */
125 kmap_atomic_idx_pop();
126 kmap_atomic_idx_pop();
127 sim_allow_multiple_caching(0);
128 local_irq_restore(flags);
129}
130
131/*
132 * Identify large copies from remotely-cached memory, and copy them
133 * via memcpy_multicache() if they look good, otherwise fall back
134 * to the particular kind of copying passed as the memcpy_t function.
135 */
136static unsigned long fast_copy(void *dest, const void *source, int len,
137 memcpy_t func)
138{
139 int cpu = get_cpu();
140 unsigned long retval;
141
142 /*
143 * Check if it's big enough to bother with. We may end up doing a
144 * small copy via TLB manipulation if we're near a page boundary,
145 * but presumably we'll make it up when we hit the second page.
146 */
147 while (len >= LARGE_COPY_CUTOFF) {
148 int copy_size, bytes_left_on_page;
149 pte_t *src_ptep, *dst_ptep;
150 pte_t src_pte, dst_pte;
151 struct page *src_page, *dst_page;
152
153 /* Is the source page oloc'ed to a remote cpu? */
154retry_source:
155 src_ptep = virt_to_pte(current->mm, (unsigned long)source);
156 if (src_ptep == NULL)
157 break;
158 src_pte = *src_ptep;
159 if (!hv_pte_get_present(src_pte) ||
160 !hv_pte_get_readable(src_pte) ||
161 hv_pte_get_mode(src_pte) != HV_PTE_MODE_CACHE_TILE_L3)
162 break;
163 if (get_remote_cache_cpu(src_pte) == cpu)
164 break;
165 src_page = pfn_to_page(pte_pfn(src_pte));
166 get_page(src_page);
167 if (pte_val(src_pte) != pte_val(*src_ptep)) {
168 put_page(src_page);
169 goto retry_source;
170 }
171 if (pte_huge(src_pte)) {
172 /* Adjust the PTE to correspond to a small page */
173 int pfn = pte_pfn(src_pte);
174 pfn += (((unsigned long)source & (HPAGE_SIZE-1))
175 >> PAGE_SHIFT);
176 src_pte = pfn_pte(pfn, src_pte);
177 src_pte = pte_mksmall(src_pte);
178 }
179
180 /* Is the destination page writable? */
181retry_dest:
182 dst_ptep = virt_to_pte(current->mm, (unsigned long)dest);
183 if (dst_ptep == NULL) {
184 put_page(src_page);
185 break;
186 }
187 dst_pte = *dst_ptep;
188 if (!hv_pte_get_present(dst_pte) ||
189 !hv_pte_get_writable(dst_pte)) {
190 put_page(src_page);
191 break;
192 }
193 dst_page = pfn_to_page(pte_pfn(dst_pte));
194 if (dst_page == src_page) {
195 /*
196 * Source and dest are on the same page; this
197 * potentially exposes us to incoherence if any
198 * part of src and dest overlap on a cache line.
199 * Just give up rather than trying to be precise.
200 */
201 put_page(src_page);
202 break;
203 }
204 get_page(dst_page);
205 if (pte_val(dst_pte) != pte_val(*dst_ptep)) {
206 put_page(dst_page);
207 goto retry_dest;
208 }
209 if (pte_huge(dst_pte)) {
210 /* Adjust the PTE to correspond to a small page */
211 int pfn = pte_pfn(dst_pte);
212 pfn += (((unsigned long)dest & (HPAGE_SIZE-1))
213 >> PAGE_SHIFT);
214 dst_pte = pfn_pte(pfn, dst_pte);
215 dst_pte = pte_mksmall(dst_pte);
216 }
217
218 /* All looks good: create a cachable PTE and copy from it */
219 copy_size = len;
220 bytes_left_on_page =
221 PAGE_SIZE - (((int)source) & (PAGE_SIZE-1));
222 if (copy_size > bytes_left_on_page)
223 copy_size = bytes_left_on_page;
224 bytes_left_on_page =
225 PAGE_SIZE - (((int)dest) & (PAGE_SIZE-1));
226 if (copy_size > bytes_left_on_page)
227 copy_size = bytes_left_on_page;
228 memcpy_multicache(dest, source, dst_pte, src_pte, copy_size);
229
230 /* Release the pages */
231 put_page(dst_page);
232 put_page(src_page);
233
234 /* Continue on the next page */
235 dest += copy_size;
236 source += copy_size;
237 len -= copy_size;
238 }
239
240 retval = func(dest, source, len);
241 put_cpu();
242 return retval;
243}
244
245void *memcpy(void *to, const void *from, __kernel_size_t n)
246{
247 if (n < LARGE_COPY_CUTOFF)
248 return (void *)__memcpy_asm(to, from, n);
249 else
250 return (void *)fast_copy(to, from, n, __memcpy_asm);
251}
252
253unsigned long __copy_to_user_inatomic(void __user *to, const void *from,
254 unsigned long n)
255{
256 if (n < LARGE_COPY_CUTOFF)
257 return __copy_to_user_inatomic_asm(to, from, n);
258 else
259 return fast_copy(to, from, n, __copy_to_user_inatomic_asm);
260}
261
262unsigned long __copy_from_user_inatomic(void *to, const void __user *from,
263 unsigned long n)
264{
265 if (n < LARGE_COPY_CUTOFF)
266 return __copy_from_user_inatomic_asm(to, from, n);
267 else
268 return fast_copy(to, from, n, __copy_from_user_inatomic_asm);
269}
270
271unsigned long __copy_from_user_zeroing(void *to, const void __user *from,
272 unsigned long n)
273{
274 if (n < LARGE_COPY_CUTOFF)
275 return __copy_from_user_zeroing_asm(to, from, n);
276 else
277 return fast_copy(to, from, n, __copy_from_user_zeroing_asm);
278}
279
280#endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */
diff --git a/arch/tile/lib/memset_32.c b/arch/tile/lib/memset_32.c
index 9a7837d11f7d..2042bfe6595f 100644
--- a/arch/tile/lib/memset_32.c
+++ b/arch/tile/lib/memset_32.c
@@ -23,11 +23,7 @@ void *memset(void *s, int c, size_t n)
23 int n32; 23 int n32;
24 uint32_t v16, v32; 24 uint32_t v16, v32;
25 uint8_t *out8 = s; 25 uint8_t *out8 = s;
26#if !CHIP_HAS_WH64()
27 int ahead32;
28#else
29 int to_align32; 26 int to_align32;
30#endif
31 27
32 /* Experimentation shows that a trivial tight loop is a win up until 28 /* Experimentation shows that a trivial tight loop is a win up until
33 * around a size of 20, where writing a word at a time starts to win. 29 * around a size of 20, where writing a word at a time starts to win.
@@ -58,21 +54,6 @@ void *memset(void *s, int c, size_t n)
58 return s; 54 return s;
59 } 55 }
60 56
61#if !CHIP_HAS_WH64()
62 /* Use a spare issue slot to start prefetching the first cache
63 * line early. This instruction is free as the store can be buried
64 * in otherwise idle issue slots doing ALU ops.
65 */
66 __insn_prefetch(out8);
67
68 /* We prefetch the end so that a short memset that spans two cache
69 * lines gets some prefetching benefit. Again we believe this is free
70 * to issue.
71 */
72 __insn_prefetch(&out8[n - 1]);
73#endif /* !CHIP_HAS_WH64() */
74
75
76 /* Align 'out8'. We know n >= 3 so this won't write past the end. */ 57 /* Align 'out8'. We know n >= 3 so this won't write past the end. */
77 while (((uintptr_t) out8 & 3) != 0) { 58 while (((uintptr_t) out8 & 3) != 0) {
78 *out8++ = c; 59 *out8++ = c;
@@ -93,90 +74,6 @@ void *memset(void *s, int c, size_t n)
93 /* This must be at least 8 or the following loop doesn't work. */ 74 /* This must be at least 8 or the following loop doesn't work. */
94#define CACHE_LINE_SIZE_IN_WORDS (CHIP_L2_LINE_SIZE() / 4) 75#define CACHE_LINE_SIZE_IN_WORDS (CHIP_L2_LINE_SIZE() / 4)
95 76
96#if !CHIP_HAS_WH64()
97
98 ahead32 = CACHE_LINE_SIZE_IN_WORDS;
99
100 /* We already prefetched the first and last cache lines, so
101 * we only need to do more prefetching if we are storing
102 * to more than two cache lines.
103 */
104 if (n32 > CACHE_LINE_SIZE_IN_WORDS * 2) {
105 int i;
106
107 /* Prefetch the next several cache lines.
108 * This is the setup code for the software-pipelined
109 * loop below.
110 */
111#define MAX_PREFETCH 5
112 ahead32 = n32 & -CACHE_LINE_SIZE_IN_WORDS;
113 if (ahead32 > MAX_PREFETCH * CACHE_LINE_SIZE_IN_WORDS)
114 ahead32 = MAX_PREFETCH * CACHE_LINE_SIZE_IN_WORDS;
115
116 for (i = CACHE_LINE_SIZE_IN_WORDS;
117 i < ahead32; i += CACHE_LINE_SIZE_IN_WORDS)
118 __insn_prefetch(&out32[i]);
119 }
120
121 if (n32 > ahead32) {
122 while (1) {
123 int j;
124
125 /* Prefetch by reading one word several cache lines
126 * ahead. Since loads are non-blocking this will
127 * cause the full cache line to be read while we are
128 * finishing earlier cache lines. Using a store
129 * here causes microarchitectural performance
130 * problems where a victimizing store miss goes to
131 * the head of the retry FIFO and locks the pipe for
132 * a few cycles. So a few subsequent stores in this
133 * loop go into the retry FIFO, and then later
134 * stores see other stores to the same cache line
135 * are already in the retry FIFO and themselves go
136 * into the retry FIFO, filling it up and grinding
137 * to a halt waiting for the original miss to be
138 * satisfied.
139 */
140 __insn_prefetch(&out32[ahead32]);
141
142#if CACHE_LINE_SIZE_IN_WORDS % 4 != 0
143#error "Unhandled CACHE_LINE_SIZE_IN_WORDS"
144#endif
145
146 n32 -= CACHE_LINE_SIZE_IN_WORDS;
147
148 /* Save icache space by only partially unrolling
149 * this loop.
150 */
151 for (j = CACHE_LINE_SIZE_IN_WORDS / 4; j > 0; j--) {
152 *out32++ = v32;
153 *out32++ = v32;
154 *out32++ = v32;
155 *out32++ = v32;
156 }
157
158 /* To save compiled code size, reuse this loop even
159 * when we run out of prefetching to do by dropping
160 * ahead32 down.
161 */
162 if (n32 <= ahead32) {
163 /* Not even a full cache line left,
164 * so stop now.
165 */
166 if (n32 < CACHE_LINE_SIZE_IN_WORDS)
167 break;
168
169 /* Choose a small enough value that we don't
170 * prefetch past the end. There's no sense
171 * in touching cache lines we don't have to.
172 */
173 ahead32 = CACHE_LINE_SIZE_IN_WORDS - 1;
174 }
175 }
176 }
177
178#else /* CHIP_HAS_WH64() */
179
180 /* Determine how many words we need to emit before the 'out32' 77 /* Determine how many words we need to emit before the 'out32'
181 * pointer becomes aligned modulo the cache line size. 78 * pointer becomes aligned modulo the cache line size.
182 */ 79 */
@@ -233,8 +130,6 @@ void *memset(void *s, int c, size_t n)
233 n32 &= CACHE_LINE_SIZE_IN_WORDS - 1; 130 n32 &= CACHE_LINE_SIZE_IN_WORDS - 1;
234 } 131 }
235 132
236#endif /* CHIP_HAS_WH64() */
237
238 /* Now handle any leftover values. */ 133 /* Now handle any leftover values. */
239 if (n32 != 0) { 134 if (n32 != 0) {
240 do { 135 do {
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 39c48cbe0a96..111d5a9b76f1 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -466,28 +466,15 @@ good_area:
466 } 466 }
467 } 467 }
468 468
469#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
470 /*
471 * If this was an asynchronous fault,
472 * restart the appropriate engine.
473 */
474 switch (fault_num) {
475#if CHIP_HAS_TILE_DMA() 469#if CHIP_HAS_TILE_DMA()
470 /* If this was a DMA TLB fault, restart the DMA engine. */
471 switch (fault_num) {
476 case INT_DMATLB_MISS: 472 case INT_DMATLB_MISS:
477 case INT_DMATLB_MISS_DWNCL: 473 case INT_DMATLB_MISS_DWNCL:
478 case INT_DMATLB_ACCESS: 474 case INT_DMATLB_ACCESS:
479 case INT_DMATLB_ACCESS_DWNCL: 475 case INT_DMATLB_ACCESS_DWNCL:
480 __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK); 476 __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK);
481 break; 477 break;
482#endif
483#if CHIP_HAS_SN_PROC()
484 case INT_SNITLB_MISS:
485 case INT_SNITLB_MISS_DWNCL:
486 __insn_mtspr(SPR_SNCTL,
487 __insn_mfspr(SPR_SNCTL) &
488 ~SPR_SNCTL__FRZPROC_MASK);
489 break;
490#endif
491 } 478 }
492#endif 479#endif
493 480
@@ -804,10 +791,6 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
804 case INT_DMATLB_MISS: 791 case INT_DMATLB_MISS:
805 case INT_DMATLB_MISS_DWNCL: 792 case INT_DMATLB_MISS_DWNCL:
806#endif 793#endif
807#if CHIP_HAS_SN_PROC()
808 case INT_SNITLB_MISS:
809 case INT_SNITLB_MISS_DWNCL:
810#endif
811 is_page_fault = 1; 794 is_page_fault = 1;
812 break; 795 break;
813 796
@@ -823,7 +806,7 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
823 panic("Bad fault number %d in do_page_fault", fault_num); 806 panic("Bad fault number %d in do_page_fault", fault_num);
824 } 807 }
825 808
826#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() 809#if CHIP_HAS_TILE_DMA()
827 if (!user_mode(regs)) { 810 if (!user_mode(regs)) {
828 struct async_tlb *async; 811 struct async_tlb *async;
829 switch (fault_num) { 812 switch (fault_num) {
@@ -835,12 +818,6 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
835 async = &current->thread.dma_async_tlb; 818 async = &current->thread.dma_async_tlb;
836 break; 819 break;
837#endif 820#endif
838#if CHIP_HAS_SN_PROC()
839 case INT_SNITLB_MISS:
840 case INT_SNITLB_MISS_DWNCL:
841 async = &current->thread.sn_async_tlb;
842 break;
843#endif
844 default: 821 default:
845 async = NULL; 822 async = NULL;
846 } 823 }
@@ -873,14 +850,22 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
873} 850}
874 851
875 852
876#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() 853#if CHIP_HAS_TILE_DMA()
877/* 854/*
878 * Check an async_tlb structure to see if a deferred fault is waiting, 855 * This routine effectively re-issues asynchronous page faults
879 * and if so pass it to the page-fault code. 856 * when we are returning to user space.
880 */ 857 */
881static void handle_async_page_fault(struct pt_regs *regs, 858void do_async_page_fault(struct pt_regs *regs)
882 struct async_tlb *async)
883{ 859{
860 struct async_tlb *async = &current->thread.dma_async_tlb;
861
862 /*
863 * Clear thread flag early. If we re-interrupt while processing
864 * code here, we will reset it and recall this routine before
865 * returning to user space.
866 */
867 clear_thread_flag(TIF_ASYNC_TLB);
868
884 if (async->fault_num) { 869 if (async->fault_num) {
885 /* 870 /*
886 * Clear async->fault_num before calling the page-fault 871 * Clear async->fault_num before calling the page-fault
@@ -894,28 +879,7 @@ static void handle_async_page_fault(struct pt_regs *regs,
894 async->address, async->is_write); 879 async->address, async->is_write);
895 } 880 }
896} 881}
897 882#endif /* CHIP_HAS_TILE_DMA() */
898/*
899 * This routine effectively re-issues asynchronous page faults
900 * when we are returning to user space.
901 */
902void do_async_page_fault(struct pt_regs *regs)
903{
904 /*
905 * Clear thread flag early. If we re-interrupt while processing
906 * code here, we will reset it and recall this routine before
907 * returning to user space.
908 */
909 clear_thread_flag(TIF_ASYNC_TLB);
910
911#if CHIP_HAS_TILE_DMA()
912 handle_async_page_fault(regs, &current->thread.dma_async_tlb);
913#endif
914#if CHIP_HAS_SN_PROC()
915 handle_async_page_fault(regs, &current->thread.sn_async_tlb);
916#endif
917}
918#endif /* CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() */
919 883
920 884
921void vmalloc_sync_all(void) 885void vmalloc_sync_all(void)
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index e3ee55b0327a..004ba568d93f 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -43,12 +43,9 @@
43#include "migrate.h" 43#include "migrate.h"
44 44
45 45
46#if CHIP_HAS_COHERENT_LOCAL_CACHE()
47
48/* 46/*
49 * The noallocl2 option suppresses all use of the L2 cache to cache 47 * The noallocl2 option suppresses all use of the L2 cache to cache
50 * locally from a remote home. There's no point in using it if we 48 * locally from a remote home.
51 * don't have coherent local caching, though.
52 */ 49 */
53static int __write_once noallocl2; 50static int __write_once noallocl2;
54static int __init set_noallocl2(char *str) 51static int __init set_noallocl2(char *str)
@@ -58,12 +55,6 @@ static int __init set_noallocl2(char *str)
58} 55}
59early_param("noallocl2", set_noallocl2); 56early_param("noallocl2", set_noallocl2);
60 57
61#else
62
63#define noallocl2 0
64
65#endif
66
67 58
68/* 59/*
69 * Update the irq_stat for cpus that we are going to interrupt 60 * Update the irq_stat for cpus that we are going to interrupt
@@ -265,10 +256,8 @@ static int pte_to_home(pte_t pte)
265 return PAGE_HOME_INCOHERENT; 256 return PAGE_HOME_INCOHERENT;
266 case HV_PTE_MODE_UNCACHED: 257 case HV_PTE_MODE_UNCACHED:
267 return PAGE_HOME_UNCACHED; 258 return PAGE_HOME_UNCACHED;
268#if CHIP_HAS_CBOX_HOME_MAP()
269 case HV_PTE_MODE_CACHE_HASH_L3: 259 case HV_PTE_MODE_CACHE_HASH_L3:
270 return PAGE_HOME_HASH; 260 return PAGE_HOME_HASH;
271#endif
272 } 261 }
273 panic("Bad PTE %#llx\n", pte.val); 262 panic("Bad PTE %#llx\n", pte.val);
274} 263}
@@ -325,20 +314,16 @@ pte_t pte_set_home(pte_t pte, int home)
325 HV_PTE_MODE_CACHE_NO_L3); 314 HV_PTE_MODE_CACHE_NO_L3);
326 } 315 }
327 } else 316 } else
328#if CHIP_HAS_CBOX_HOME_MAP()
329 if (hash_default) 317 if (hash_default)
330 pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); 318 pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3);
331 else 319 else
332#endif
333 pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); 320 pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3);
334 pte = hv_pte_set_nc(pte); 321 pte = hv_pte_set_nc(pte);
335 break; 322 break;
336 323
337#if CHIP_HAS_CBOX_HOME_MAP()
338 case PAGE_HOME_HASH: 324 case PAGE_HOME_HASH:
339 pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); 325 pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3);
340 break; 326 break;
341#endif
342 327
343 default: 328 default:
344 BUG_ON(home < 0 || home >= NR_CPUS || 329 BUG_ON(home < 0 || home >= NR_CPUS ||
@@ -348,7 +333,6 @@ pte_t pte_set_home(pte_t pte, int home)
348 break; 333 break;
349 } 334 }
350 335
351#if CHIP_HAS_NC_AND_NOALLOC_BITS()
352 if (noallocl2) 336 if (noallocl2)
353 pte = hv_pte_set_no_alloc_l2(pte); 337 pte = hv_pte_set_no_alloc_l2(pte);
354 338
@@ -357,7 +341,6 @@ pte_t pte_set_home(pte_t pte, int home)
357 hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_NO_L3) { 341 hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_NO_L3) {
358 pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED); 342 pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED);
359 } 343 }
360#endif
361 344
362 /* Checking this case here gives a better panic than from the hv. */ 345 /* Checking this case here gives a better panic than from the hv. */
363 BUG_ON(hv_pte_get_mode(pte) == 0); 346 BUG_ON(hv_pte_get_mode(pte) == 0);
@@ -373,16 +356,10 @@ EXPORT_SYMBOL(pte_set_home);
373 * so they're not suitable for anything but infrequent use. 356 * so they're not suitable for anything but infrequent use.
374 */ 357 */
375 358
376#if CHIP_HAS_CBOX_HOME_MAP()
377static inline int initial_page_home(void) { return PAGE_HOME_HASH; }
378#else
379static inline int initial_page_home(void) { return 0; }
380#endif
381
382int page_home(struct page *page) 359int page_home(struct page *page)
383{ 360{
384 if (PageHighMem(page)) { 361 if (PageHighMem(page)) {
385 return initial_page_home(); 362 return PAGE_HOME_HASH;
386 } else { 363 } else {
387 unsigned long kva = (unsigned long)page_address(page); 364 unsigned long kva = (unsigned long)page_address(page);
388 return pte_to_home(*virt_to_kpte(kva)); 365 return pte_to_home(*virt_to_kpte(kva));
@@ -438,7 +415,7 @@ struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask,
438void __homecache_free_pages(struct page *page, unsigned int order) 415void __homecache_free_pages(struct page *page, unsigned int order)
439{ 416{
440 if (put_page_testzero(page)) { 417 if (put_page_testzero(page)) {
441 homecache_change_page_home(page, order, initial_page_home()); 418 homecache_change_page_home(page, order, PAGE_HOME_HASH);
442 if (order == 0) { 419 if (order == 0) {
443 free_hot_cold_page(page, 0); 420 free_hot_cold_page(page, 0);
444 } else { 421 } else {
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index c8f58c12866d..22e41cf5a2a9 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -106,10 +106,8 @@ pte_t *get_prealloc_pte(unsigned long pfn)
106 */ 106 */
107static int initial_heap_home(void) 107static int initial_heap_home(void)
108{ 108{
109#if CHIP_HAS_CBOX_HOME_MAP()
110 if (hash_default) 109 if (hash_default)
111 return PAGE_HOME_HASH; 110 return PAGE_HOME_HASH;
112#endif
113 return smp_processor_id(); 111 return smp_processor_id();
114} 112}
115 113
@@ -190,14 +188,11 @@ static void __init page_table_range_init(unsigned long start,
190} 188}
191 189
192 190
193#if CHIP_HAS_CBOX_HOME_MAP()
194
195static int __initdata ktext_hash = 1; /* .text pages */ 191static int __initdata ktext_hash = 1; /* .text pages */
196static int __initdata kdata_hash = 1; /* .data and .bss pages */ 192static int __initdata kdata_hash = 1; /* .data and .bss pages */
197int __write_once hash_default = 1; /* kernel allocator pages */ 193int __write_once hash_default = 1; /* kernel allocator pages */
198EXPORT_SYMBOL(hash_default); 194EXPORT_SYMBOL(hash_default);
199int __write_once kstack_hash = 1; /* if no homecaching, use h4h */ 195int __write_once kstack_hash = 1; /* if no homecaching, use h4h */
200#endif /* CHIP_HAS_CBOX_HOME_MAP */
201 196
202/* 197/*
203 * CPUs to use to for striping the pages of kernel data. If hash-for-home 198 * CPUs to use to for striping the pages of kernel data. If hash-for-home
@@ -215,14 +210,12 @@ int __write_once kdata_huge; /* if no homecaching, small pages */
215static pgprot_t __init construct_pgprot(pgprot_t prot, int home) 210static pgprot_t __init construct_pgprot(pgprot_t prot, int home)
216{ 211{
217 prot = pte_set_home(prot, home); 212 prot = pte_set_home(prot, home);
218#if CHIP_HAS_CBOX_HOME_MAP()
219 if (home == PAGE_HOME_IMMUTABLE) { 213 if (home == PAGE_HOME_IMMUTABLE) {
220 if (ktext_hash) 214 if (ktext_hash)
221 prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_HASH_L3); 215 prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_HASH_L3);
222 else 216 else
223 prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_NO_L3); 217 prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_NO_L3);
224 } 218 }
225#endif
226 return prot; 219 return prot;
227} 220}
228 221
@@ -236,20 +229,15 @@ static pgprot_t __init init_pgprot(ulong address)
236 unsigned long page; 229 unsigned long page;
237 enum { CODE_DELTA = MEM_SV_START - PAGE_OFFSET }; 230 enum { CODE_DELTA = MEM_SV_START - PAGE_OFFSET };
238 231
239#if CHIP_HAS_CBOX_HOME_MAP()
240 /* For kdata=huge, everything is just hash-for-home. */ 232 /* For kdata=huge, everything is just hash-for-home. */
241 if (kdata_huge) 233 if (kdata_huge)
242 return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); 234 return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH);
243#endif
244 235
245 /* We map the aliased pages of permanent text inaccessible. */ 236 /* We map the aliased pages of permanent text inaccessible. */
246 if (address < (ulong) _sinittext - CODE_DELTA) 237 if (address < (ulong) _sinittext - CODE_DELTA)
247 return PAGE_NONE; 238 return PAGE_NONE;
248 239
249 /* 240 /* We map read-only data non-coherent for performance. */
250 * We map read-only data non-coherent for performance. We could
251 * use neighborhood caching on TILE64, but it's not clear it's a win.
252 */
253 if ((address >= (ulong) __start_rodata && 241 if ((address >= (ulong) __start_rodata &&
254 address < (ulong) __end_rodata) || 242 address < (ulong) __end_rodata) ||
255 address == (ulong) empty_zero_page) { 243 address == (ulong) empty_zero_page) {
@@ -257,12 +245,10 @@ static pgprot_t __init init_pgprot(ulong address)
257 } 245 }
258 246
259#ifndef __tilegx__ 247#ifndef __tilegx__
260#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
261 /* Force the atomic_locks[] array page to be hash-for-home. */ 248 /* Force the atomic_locks[] array page to be hash-for-home. */
262 if (address == (ulong) atomic_locks) 249 if (address == (ulong) atomic_locks)
263 return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); 250 return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH);
264#endif 251#endif
265#endif
266 252
267 /* 253 /*
268 * Everything else that isn't data or bss is heap, so mark it 254 * Everything else that isn't data or bss is heap, so mark it
@@ -280,11 +266,9 @@ static pgprot_t __init init_pgprot(ulong address)
280 if (address >= (ulong) _end || address < (ulong) _einitdata) 266 if (address >= (ulong) _end || address < (ulong) _einitdata)
281 return construct_pgprot(PAGE_KERNEL, initial_heap_home()); 267 return construct_pgprot(PAGE_KERNEL, initial_heap_home());
282 268
283#if CHIP_HAS_CBOX_HOME_MAP()
284 /* Use hash-for-home if requested for data/bss. */ 269 /* Use hash-for-home if requested for data/bss. */
285 if (kdata_hash) 270 if (kdata_hash)
286 return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); 271 return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH);
287#endif
288 272
289 /* 273 /*
290 * Make the w1data homed like heap to start with, to avoid 274 * Make the w1data homed like heap to start with, to avoid
@@ -311,11 +295,9 @@ static pgprot_t __init init_pgprot(ulong address)
311 if (page == (ulong)empty_zero_page) 295 if (page == (ulong)empty_zero_page)
312 continue; 296 continue;
313#ifndef __tilegx__ 297#ifndef __tilegx__
314#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
315 if (page == (ulong)atomic_locks) 298 if (page == (ulong)atomic_locks)
316 continue; 299 continue;
317#endif 300#endif
318#endif
319 cpu = cpumask_next(cpu, &kdata_mask); 301 cpu = cpumask_next(cpu, &kdata_mask);
320 if (cpu == NR_CPUS) 302 if (cpu == NR_CPUS)
321 cpu = cpumask_first(&kdata_mask); 303 cpu = cpumask_first(&kdata_mask);
@@ -358,7 +340,7 @@ static int __init setup_ktext(char *str)
358 340
359 ktext_arg_seen = 1; 341 ktext_arg_seen = 1;
360 342
361 /* Default setting on Tile64: use a huge page */ 343 /* Default setting: use a huge page */
362 if (strcmp(str, "huge") == 0) 344 if (strcmp(str, "huge") == 0)
363 pr_info("ktext: using one huge locally cached page\n"); 345 pr_info("ktext: using one huge locally cached page\n");
364 346
@@ -404,10 +386,8 @@ static inline pgprot_t ktext_set_nocache(pgprot_t prot)
404{ 386{
405 if (!ktext_nocache) 387 if (!ktext_nocache)
406 prot = hv_pte_set_nc(prot); 388 prot = hv_pte_set_nc(prot);
407#if CHIP_HAS_NC_AND_NOALLOC_BITS()
408 else 389 else
409 prot = hv_pte_set_no_alloc_l2(prot); 390 prot = hv_pte_set_no_alloc_l2(prot);
410#endif
411 return prot; 391 return prot;
412} 392}
413 393
@@ -440,7 +420,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
440 struct cpumask kstripe_mask; 420 struct cpumask kstripe_mask;
441 int rc, i; 421 int rc, i;
442 422
443#if CHIP_HAS_CBOX_HOME_MAP()
444 if (ktext_arg_seen && ktext_hash) { 423 if (ktext_arg_seen && ktext_hash) {
445 pr_warning("warning: \"ktext\" boot argument ignored" 424 pr_warning("warning: \"ktext\" boot argument ignored"
446 " if \"kcache_hash\" sets up text hash-for-home\n"); 425 " if \"kcache_hash\" sets up text hash-for-home\n");
@@ -457,7 +436,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
457 " kcache_hash=all or =allbutstack\n"); 436 " kcache_hash=all or =allbutstack\n");
458 kdata_huge = 0; 437 kdata_huge = 0;
459 } 438 }
460#endif
461 439
462 /* 440 /*
463 * Set up a mask for cpus to use for kernel striping. 441 * Set up a mask for cpus to use for kernel striping.
@@ -585,13 +563,11 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
585 } else { 563 } else {
586 pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC); 564 pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC);
587 pteval = pte_mkhuge(pteval); 565 pteval = pte_mkhuge(pteval);
588#if CHIP_HAS_CBOX_HOME_MAP()
589 if (ktext_hash) { 566 if (ktext_hash) {
590 pteval = hv_pte_set_mode(pteval, 567 pteval = hv_pte_set_mode(pteval,
591 HV_PTE_MODE_CACHE_HASH_L3); 568 HV_PTE_MODE_CACHE_HASH_L3);
592 pteval = ktext_set_nocache(pteval); 569 pteval = ktext_set_nocache(pteval);
593 } else 570 } else
594#endif /* CHIP_HAS_CBOX_HOME_MAP() */
595 if (cpumask_weight(&ktext_mask) == 1) { 571 if (cpumask_weight(&ktext_mask) == 1) {
596 pteval = set_remote_cache_cpu(pteval, 572 pteval = set_remote_cache_cpu(pteval,
597 cpumask_first(&ktext_mask)); 573 cpumask_first(&ktext_mask));
@@ -938,26 +914,6 @@ void __init pgtable_cache_init(void)
938 panic("pgtable_cache_init(): Cannot create pgd cache"); 914 panic("pgtable_cache_init(): Cannot create pgd cache");
939} 915}
940 916
941#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
942/*
943 * The __w1data area holds data that is only written during initialization,
944 * and is read-only and thus freely cacheable thereafter. Fix the page
945 * table entries that cover that region accordingly.
946 */
947static void mark_w1data_ro(void)
948{
949 /* Loop over page table entries */
950 unsigned long addr = (unsigned long)__w1data_begin;
951 BUG_ON((addr & (PAGE_SIZE-1)) != 0);
952 for (; addr <= (unsigned long)__w1data_end - 1; addr += PAGE_SIZE) {
953 unsigned long pfn = kaddr_to_pfn((void *)addr);
954 pte_t *ptep = virt_to_kpte(addr);
955 BUG_ON(pte_huge(*ptep)); /* not relevant for kdata_huge */
956 set_pte_at(&init_mm, addr, ptep, pfn_pte(pfn, PAGE_KERNEL_RO));
957 }
958}
959#endif
960
961#ifdef CONFIG_DEBUG_PAGEALLOC 917#ifdef CONFIG_DEBUG_PAGEALLOC
962static long __write_once initfree; 918static long __write_once initfree;
963#else 919#else
@@ -1026,10 +982,7 @@ void free_initmem(void)
1026 /* 982 /*
1027 * Evict the dirty initdata on the boot cpu, evict the w1data 983 * Evict the dirty initdata on the boot cpu, evict the w1data
1028 * wherever it's homed, and evict all the init code everywhere. 984 * wherever it's homed, and evict all the init code everywhere.
1029 * We are guaranteed that no one will touch the init pages any 985 * We are guaranteed that no one will touch the init pages any more.
1030 * more, and although other cpus may be touching the w1data,
1031 * we only actually change the caching on tile64, which won't
1032 * be keeping local copies in the other tiles' caches anyway.
1033 */ 986 */
1034 homecache_evict(&cpu_cacheable_map); 987 homecache_evict(&cpu_cacheable_map);
1035 988
@@ -1045,21 +998,6 @@ void free_initmem(void)
1045 free_init_pages("unused kernel text", 998 free_init_pages("unused kernel text",
1046 (unsigned long)_sinittext - text_delta, 999 (unsigned long)_sinittext - text_delta,
1047 (unsigned long)_einittext - text_delta); 1000 (unsigned long)_einittext - text_delta);
1048
1049#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
1050 /*
1051 * Upgrade the .w1data section to globally cached.
1052 * We don't do this on tilepro, since the cache architecture
1053 * pretty much makes it irrelevant, and in any case we end
1054 * up having racing issues with other tiles that may touch
1055 * the data after we flush the cache but before we update
1056 * the PTEs and flush the TLBs, causing sharer shootdowns
1057 * later. Even though this is to clean data, it seems like
1058 * an unnecessary complication.
1059 */
1060 mark_w1data_ro();
1061#endif
1062
1063 /* Do a global TLB flush so everyone sees the changes. */ 1001 /* Do a global TLB flush so everyone sees the changes. */
1064 flush_tlb_all(); 1002 flush_tlb_all();
1065} 1003}