diff options
Diffstat (limited to 'drivers/misc/sgi-gru')
-rw-r--r-- | drivers/misc/sgi-gru/Makefile | 3 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/gru.h | 67 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/gru_instructions.h | 669 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grufault.c | 633 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grufile.c | 485 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/gruhandles.h | 663 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grukservices.c | 679 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grukservices.h | 134 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grulib.h | 97 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grumain.c | 802 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/gruprocfs.c | 336 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grutables.h | 609 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grutlbpurge.c | 371 |
13 files changed, 5548 insertions, 0 deletions
diff --git a/drivers/misc/sgi-gru/Makefile b/drivers/misc/sgi-gru/Makefile new file mode 100644 index 000000000000..d03597a521b0 --- /dev/null +++ b/drivers/misc/sgi-gru/Makefile | |||
@@ -0,0 +1,3 @@ | |||
1 | obj-$(CONFIG_SGI_GRU) := gru.o | ||
2 | gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o | ||
3 | |||
diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h new file mode 100644 index 000000000000..40df7cb3f0a5 --- /dev/null +++ b/drivers/misc/sgi-gru/gru.h | |||
@@ -0,0 +1,67 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU Lesser General Public License as published by | ||
6 | * the Free Software Foundation; either version 2.1 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU Lesser General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU Lesser General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | |||
19 | #ifndef __GRU_H__ | ||
20 | #define __GRU_H__ | ||
21 | |||
22 | /* | ||
23 | * GRU architectural definitions | ||
24 | */ | ||
25 | #define GRU_CACHE_LINE_BYTES 64 | ||
26 | #define GRU_HANDLE_STRIDE 256 | ||
27 | #define GRU_CB_BASE 0 | ||
28 | #define GRU_DS_BASE 0x20000 | ||
29 | |||
30 | /* | ||
31 | * Size used to map GRU GSeg | ||
32 | */ | ||
33 | #if defined CONFIG_IA64 | ||
34 | #define GRU_GSEG_PAGESIZE (256 * 1024UL) | ||
35 | #elif defined CONFIG_X86_64 | ||
36 | #define GRU_GSEG_PAGESIZE (256 * 1024UL) /* ZZZ 2MB ??? */ | ||
37 | #else | ||
38 | #error "Unsupported architecture" | ||
39 | #endif | ||
40 | |||
41 | /* | ||
42 | * Structure for obtaining GRU resource information | ||
43 | */ | ||
44 | struct gru_chiplet_info { | ||
45 | int node; | ||
46 | int chiplet; | ||
47 | int blade; | ||
48 | int total_dsr_bytes; | ||
49 | int total_cbr; | ||
50 | int total_user_dsr_bytes; | ||
51 | int total_user_cbr; | ||
52 | int free_user_dsr_bytes; | ||
53 | int free_user_cbr; | ||
54 | }; | ||
55 | |||
56 | /* Flags for GRU options on the gru_create_context() call */ | ||
57 | /* Select one of the follow 4 options to specify how TLB misses are handled */ | ||
58 | #define GRU_OPT_MISS_DEFAULT 0x0000 /* Use default mode */ | ||
59 | #define GRU_OPT_MISS_USER_POLL 0x0001 /* User will poll CB for faults */ | ||
60 | #define GRU_OPT_MISS_FMM_INTR 0x0002 /* Send interrupt to cpu to | ||
61 | handle fault */ | ||
62 | #define GRU_OPT_MISS_FMM_POLL 0x0003 /* Use system polling thread */ | ||
63 | #define GRU_OPT_MISS_MASK 0x0003 /* Mask for TLB MISS option */ | ||
64 | |||
65 | |||
66 | |||
67 | #endif /* __GRU_H__ */ | ||
diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h new file mode 100644 index 000000000000..0dc36225c7c6 --- /dev/null +++ b/drivers/misc/sgi-gru/gru_instructions.h | |||
@@ -0,0 +1,669 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU Lesser General Public License as published by | ||
6 | * the Free Software Foundation; either version 2.1 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU Lesser General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU Lesser General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | |||
19 | #ifndef __GRU_INSTRUCTIONS_H__ | ||
20 | #define __GRU_INSTRUCTIONS_H__ | ||
21 | |||
22 | #define gru_flush_cache_hook(p) | ||
23 | #define gru_emulator_wait_hook(p, w) | ||
24 | |||
25 | /* | ||
26 | * Architecture dependent functions | ||
27 | */ | ||
28 | |||
29 | #if defined CONFIG_IA64 | ||
30 | #include <linux/compiler.h> | ||
31 | #include <asm/intrinsics.h> | ||
32 | #define __flush_cache(p) ia64_fc(p) | ||
33 | /* Use volatile on IA64 to ensure ordering via st4.rel */ | ||
34 | #define gru_ordered_store_int(p,v) \ | ||
35 | do { \ | ||
36 | barrier(); \ | ||
37 | *((volatile int *)(p)) = v; /* force st.rel */ \ | ||
38 | } while (0) | ||
39 | #elif defined CONFIG_X86_64 | ||
40 | #define __flush_cache(p) clflush(p) | ||
41 | #define gru_ordered_store_int(p,v) \ | ||
42 | do { \ | ||
43 | barrier(); \ | ||
44 | *(int *)p = v; \ | ||
45 | } while (0) | ||
46 | #else | ||
47 | #error "Unsupported architecture" | ||
48 | #endif | ||
49 | |||
50 | /* | ||
51 | * Control block status and exception codes | ||
52 | */ | ||
53 | #define CBS_IDLE 0 | ||
54 | #define CBS_EXCEPTION 1 | ||
55 | #define CBS_ACTIVE 2 | ||
56 | #define CBS_CALL_OS 3 | ||
57 | |||
58 | /* CB substatus bitmasks */ | ||
59 | #define CBSS_MSG_QUEUE_MASK 7 | ||
60 | #define CBSS_IMPLICIT_ABORT_ACTIVE_MASK 8 | ||
61 | |||
62 | /* CB substatus message queue values (low 3 bits of substatus) */ | ||
63 | #define CBSS_NO_ERROR 0 | ||
64 | #define CBSS_LB_OVERFLOWED 1 | ||
65 | #define CBSS_QLIMIT_REACHED 2 | ||
66 | #define CBSS_PAGE_OVERFLOW 3 | ||
67 | #define CBSS_AMO_NACKED 4 | ||
68 | #define CBSS_PUT_NACKED 5 | ||
69 | |||
70 | /* | ||
71 | * Structure used to fetch exception detail for CBs that terminate with | ||
72 | * CBS_EXCEPTION | ||
73 | */ | ||
74 | struct control_block_extended_exc_detail { | ||
75 | unsigned long cb; | ||
76 | int opc; | ||
77 | int ecause; | ||
78 | int exopc; | ||
79 | long exceptdet0; | ||
80 | int exceptdet1; | ||
81 | }; | ||
82 | |||
83 | /* | ||
84 | * Instruction formats | ||
85 | */ | ||
86 | |||
87 | /* | ||
88 | * Generic instruction format. | ||
89 | * This definition has precise bit field definitions. | ||
90 | */ | ||
91 | struct gru_instruction_bits { | ||
92 | /* DW 0 - low */ | ||
93 | unsigned int icmd: 1; | ||
94 | unsigned char ima: 3; /* CB_DelRep, unmapped mode */ | ||
95 | unsigned char reserved0: 4; | ||
96 | unsigned int xtype: 3; | ||
97 | unsigned int iaa0: 2; | ||
98 | unsigned int iaa1: 2; | ||
99 | unsigned char reserved1: 1; | ||
100 | unsigned char opc: 8; /* opcode */ | ||
101 | unsigned char exopc: 8; /* extended opcode */ | ||
102 | /* DW 0 - high */ | ||
103 | unsigned int idef2: 22; /* TRi0 */ | ||
104 | unsigned char reserved2: 2; | ||
105 | unsigned char istatus: 2; | ||
106 | unsigned char isubstatus:4; | ||
107 | unsigned char reserved3: 2; | ||
108 | /* DW 1 */ | ||
109 | unsigned long idef4; /* 42 bits: TRi1, BufSize */ | ||
110 | /* DW 2-6 */ | ||
111 | unsigned long idef1; /* BAddr0 */ | ||
112 | unsigned long idef5; /* Nelem */ | ||
113 | unsigned long idef6; /* Stride, Operand1 */ | ||
114 | unsigned long idef3; /* BAddr1, Value, Operand2 */ | ||
115 | unsigned long reserved4; | ||
116 | /* DW 7 */ | ||
117 | unsigned long avalue; /* AValue */ | ||
118 | }; | ||
119 | |||
120 | /* | ||
121 | * Generic instruction with friendlier names. This format is used | ||
122 | * for inline instructions. | ||
123 | */ | ||
124 | struct gru_instruction { | ||
125 | /* DW 0 */ | ||
126 | unsigned int op32; /* icmd,xtype,iaa0,ima,opc */ | ||
127 | unsigned int tri0; | ||
128 | unsigned long tri1_bufsize; /* DW 1 */ | ||
129 | unsigned long baddr0; /* DW 2 */ | ||
130 | unsigned long nelem; /* DW 3 */ | ||
131 | unsigned long op1_stride; /* DW 4 */ | ||
132 | unsigned long op2_value_baddr1; /* DW 5 */ | ||
133 | unsigned long reserved0; /* DW 6 */ | ||
134 | unsigned long avalue; /* DW 7 */ | ||
135 | }; | ||
136 | |||
137 | /* Some shifts and masks for the low 32 bits of a GRU command */ | ||
138 | #define GRU_CB_ICMD_SHFT 0 | ||
139 | #define GRU_CB_ICMD_MASK 0x1 | ||
140 | #define GRU_CB_XTYPE_SHFT 8 | ||
141 | #define GRU_CB_XTYPE_MASK 0x7 | ||
142 | #define GRU_CB_IAA0_SHFT 11 | ||
143 | #define GRU_CB_IAA0_MASK 0x3 | ||
144 | #define GRU_CB_IAA1_SHFT 13 | ||
145 | #define GRU_CB_IAA1_MASK 0x3 | ||
146 | #define GRU_CB_IMA_SHFT 1 | ||
147 | #define GRU_CB_IMA_MASK 0x3 | ||
148 | #define GRU_CB_OPC_SHFT 16 | ||
149 | #define GRU_CB_OPC_MASK 0xff | ||
150 | #define GRU_CB_EXOPC_SHFT 24 | ||
151 | #define GRU_CB_EXOPC_MASK 0xff | ||
152 | |||
153 | /* GRU instruction opcodes (opc field) */ | ||
154 | #define OP_NOP 0x00 | ||
155 | #define OP_BCOPY 0x01 | ||
156 | #define OP_VLOAD 0x02 | ||
157 | #define OP_IVLOAD 0x03 | ||
158 | #define OP_VSTORE 0x04 | ||
159 | #define OP_IVSTORE 0x05 | ||
160 | #define OP_VSET 0x06 | ||
161 | #define OP_IVSET 0x07 | ||
162 | #define OP_MESQ 0x08 | ||
163 | #define OP_GAMXR 0x09 | ||
164 | #define OP_GAMIR 0x0a | ||
165 | #define OP_GAMIRR 0x0b | ||
166 | #define OP_GAMER 0x0c | ||
167 | #define OP_GAMERR 0x0d | ||
168 | #define OP_BSTORE 0x0e | ||
169 | #define OP_VFLUSH 0x0f | ||
170 | |||
171 | |||
172 | /* Extended opcodes values (exopc field) */ | ||
173 | |||
174 | /* GAMIR - AMOs with implicit operands */ | ||
175 | #define EOP_IR_FETCH 0x01 /* Plain fetch of memory */ | ||
176 | #define EOP_IR_CLR 0x02 /* Fetch and clear */ | ||
177 | #define EOP_IR_INC 0x05 /* Fetch and increment */ | ||
178 | #define EOP_IR_DEC 0x07 /* Fetch and decrement */ | ||
179 | #define EOP_IR_QCHK1 0x0d /* Queue check, 64 byte msg */ | ||
180 | #define EOP_IR_QCHK2 0x0e /* Queue check, 128 byte msg */ | ||
181 | |||
182 | /* GAMIRR - Registered AMOs with implicit operands */ | ||
183 | #define EOP_IRR_FETCH 0x01 /* Registered fetch of memory */ | ||
184 | #define EOP_IRR_CLR 0x02 /* Registered fetch and clear */ | ||
185 | #define EOP_IRR_INC 0x05 /* Registered fetch and increment */ | ||
186 | #define EOP_IRR_DEC 0x07 /* Registered fetch and decrement */ | ||
187 | #define EOP_IRR_DECZ 0x0f /* Registered fetch and decrement, update on zero*/ | ||
188 | |||
189 | /* GAMER - AMOs with explicit operands */ | ||
190 | #define EOP_ER_SWAP 0x00 /* Exchange argument and memory */ | ||
191 | #define EOP_ER_OR 0x01 /* Logical OR with memory */ | ||
192 | #define EOP_ER_AND 0x02 /* Logical AND with memory */ | ||
193 | #define EOP_ER_XOR 0x03 /* Logical XOR with memory */ | ||
194 | #define EOP_ER_ADD 0x04 /* Add value to memory */ | ||
195 | #define EOP_ER_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/ | ||
196 | #define EOP_ER_CADD 0x0c /* Queue check, operand1*64 byte msg */ | ||
197 | |||
198 | /* GAMERR - Registered AMOs with explicit operands */ | ||
199 | #define EOP_ERR_SWAP 0x00 /* Exchange argument and memory */ | ||
200 | #define EOP_ERR_OR 0x01 /* Logical OR with memory */ | ||
201 | #define EOP_ERR_AND 0x02 /* Logical AND with memory */ | ||
202 | #define EOP_ERR_XOR 0x03 /* Logical XOR with memory */ | ||
203 | #define EOP_ERR_ADD 0x04 /* Add value to memory */ | ||
204 | #define EOP_ERR_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/ | ||
205 | #define EOP_ERR_EPOLL 0x09 /* Poll for equality */ | ||
206 | #define EOP_ERR_NPOLL 0x0a /* Poll for inequality */ | ||
207 | |||
208 | /* GAMXR - SGI Arithmetic unit */ | ||
209 | #define EOP_XR_CSWAP 0x0b /* Masked compare exchange */ | ||
210 | |||
211 | |||
212 | /* Transfer types (xtype field) */ | ||
213 | #define XTYPE_B 0x0 /* byte */ | ||
214 | #define XTYPE_S 0x1 /* short (2-byte) */ | ||
215 | #define XTYPE_W 0x2 /* word (4-byte) */ | ||
216 | #define XTYPE_DW 0x3 /* doubleword (8-byte) */ | ||
217 | #define XTYPE_CL 0x6 /* cacheline (64-byte) */ | ||
218 | |||
219 | |||
220 | /* Instruction access attributes (iaa0, iaa1 fields) */ | ||
221 | #define IAA_RAM 0x0 /* normal cached RAM access */ | ||
222 | #define IAA_NCRAM 0x2 /* noncoherent RAM access */ | ||
223 | #define IAA_MMIO 0x1 /* noncoherent memory-mapped I/O space */ | ||
224 | #define IAA_REGISTER 0x3 /* memory-mapped registers, etc. */ | ||
225 | |||
226 | |||
227 | /* Instruction mode attributes (ima field) */ | ||
228 | #define IMA_MAPPED 0x0 /* Virtual mode */ | ||
229 | #define IMA_CB_DELAY 0x1 /* hold read responses until status changes */ | ||
230 | #define IMA_UNMAPPED 0x2 /* bypass the TLBs (OS only) */ | ||
231 | #define IMA_INTERRUPT 0x4 /* Interrupt when instruction completes */ | ||
232 | |||
233 | /* CBE ecause bits */ | ||
234 | #define CBE_CAUSE_RI (1 << 0) | ||
235 | #define CBE_CAUSE_INVALID_INSTRUCTION (1 << 1) | ||
236 | #define CBE_CAUSE_UNMAPPED_MODE_FORBIDDEN (1 << 2) | ||
237 | #define CBE_CAUSE_PE_CHECK_DATA_ERROR (1 << 3) | ||
238 | #define CBE_CAUSE_IAA_GAA_MISMATCH (1 << 4) | ||
239 | #define CBE_CAUSE_DATA_SEGMENT_LIMIT_EXCEPTION (1 << 5) | ||
240 | #define CBE_CAUSE_OS_FATAL_TLB_FAULT (1 << 6) | ||
241 | #define CBE_CAUSE_EXECUTION_HW_ERROR (1 << 7) | ||
242 | #define CBE_CAUSE_TLBHW_ERROR (1 << 8) | ||
243 | #define CBE_CAUSE_RA_REQUEST_TIMEOUT (1 << 9) | ||
244 | #define CBE_CAUSE_HA_REQUEST_TIMEOUT (1 << 10) | ||
245 | #define CBE_CAUSE_RA_RESPONSE_FATAL (1 << 11) | ||
246 | #define CBE_CAUSE_RA_RESPONSE_NON_FATAL (1 << 12) | ||
247 | #define CBE_CAUSE_HA_RESPONSE_FATAL (1 << 13) | ||
248 | #define CBE_CAUSE_HA_RESPONSE_NON_FATAL (1 << 14) | ||
249 | #define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR (1 << 15) | ||
250 | #define CBE_CAUSE_RESPONSE_DATA_ERROR (1 << 16) | ||
251 | #define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 17) | ||
252 | |||
253 | /* | ||
254 | * Exceptions are retried for the following cases. If any OTHER bits are set | ||
255 | * in ecause, the exception is not retryable. | ||
256 | */ | ||
257 | #define EXCEPTION_RETRY_BITS (CBE_CAUSE_RESPONSE_DATA_ERROR | \ | ||
258 | CBE_CAUSE_RA_REQUEST_TIMEOUT | \ | ||
259 | CBE_CAUSE_TLBHW_ERROR | \ | ||
260 | CBE_CAUSE_HA_REQUEST_TIMEOUT) | ||
261 | |||
262 | /* Message queue head structure */ | ||
263 | union gru_mesqhead { | ||
264 | unsigned long val; | ||
265 | struct { | ||
266 | unsigned int head; | ||
267 | unsigned int limit; | ||
268 | }; | ||
269 | }; | ||
270 | |||
271 | |||
272 | /* Generate the low word of a GRU instruction */ | ||
273 | static inline unsigned int | ||
274 | __opword(unsigned char opcode, unsigned char exopc, unsigned char xtype, | ||
275 | unsigned char iaa0, unsigned char iaa1, | ||
276 | unsigned char ima) | ||
277 | { | ||
278 | return (1 << GRU_CB_ICMD_SHFT) | | ||
279 | (iaa0 << GRU_CB_IAA0_SHFT) | | ||
280 | (iaa1 << GRU_CB_IAA1_SHFT) | | ||
281 | (ima << GRU_CB_IMA_SHFT) | | ||
282 | (xtype << GRU_CB_XTYPE_SHFT) | | ||
283 | (opcode << GRU_CB_OPC_SHFT) | | ||
284 | (exopc << GRU_CB_EXOPC_SHFT); | ||
285 | } | ||
286 | |||
287 | /* | ||
288 | * Architecture specific intrinsics | ||
289 | */ | ||
290 | static inline void gru_flush_cache(void *p) | ||
291 | { | ||
292 | __flush_cache(p); | ||
293 | } | ||
294 | |||
295 | /* | ||
296 | * Store the lower 32 bits of the command including the "start" bit. Then | ||
297 | * start the instruction executing. | ||
298 | */ | ||
299 | static inline void gru_start_instruction(struct gru_instruction *ins, int op32) | ||
300 | { | ||
301 | gru_ordered_store_int(ins, op32); | ||
302 | } | ||
303 | |||
304 | |||
305 | /* Convert "hints" to IMA */ | ||
306 | #define CB_IMA(h) ((h) | IMA_UNMAPPED) | ||
307 | |||
308 | /* Convert data segment cache line index into TRI0 / TRI1 value */ | ||
309 | #define GRU_DINDEX(i) ((i) * GRU_CACHE_LINE_BYTES) | ||
310 | |||
311 | /* Inline functions for GRU instructions. | ||
312 | * Note: | ||
313 | * - nelem and stride are in elements | ||
314 | * - tri0/tri1 is in bytes for the beginning of the data segment. | ||
315 | */ | ||
316 | static inline void gru_vload(void *cb, unsigned long mem_addr, | ||
317 | unsigned int tri0, unsigned char xtype, unsigned long nelem, | ||
318 | unsigned long stride, unsigned long hints) | ||
319 | { | ||
320 | struct gru_instruction *ins = (struct gru_instruction *)cb; | ||
321 | |||
322 | ins->baddr0 = (long)mem_addr; | ||
323 | ins->nelem = nelem; | ||
324 | ins->tri0 = tri0; | ||
325 | ins->op1_stride = stride; | ||
326 | gru_start_instruction(ins, __opword(OP_VLOAD, 0, xtype, IAA_RAM, 0, | ||
327 | CB_IMA(hints))); | ||
328 | } | ||
329 | |||
330 | static inline void gru_vstore(void *cb, unsigned long mem_addr, | ||
331 | unsigned int tri0, unsigned char xtype, unsigned long nelem, | ||
332 | unsigned long stride, unsigned long hints) | ||
333 | { | ||
334 | struct gru_instruction *ins = (void *)cb; | ||
335 | |||
336 | ins->baddr0 = (long)mem_addr; | ||
337 | ins->nelem = nelem; | ||
338 | ins->tri0 = tri0; | ||
339 | ins->op1_stride = stride; | ||
340 | gru_start_instruction(ins, __opword(OP_VSTORE, 0, xtype, IAA_RAM, 0, | ||
341 | CB_IMA(hints))); | ||
342 | } | ||
343 | |||
344 | static inline void gru_ivload(void *cb, unsigned long mem_addr, | ||
345 | unsigned int tri0, unsigned int tri1, unsigned char xtype, | ||
346 | unsigned long nelem, unsigned long hints) | ||
347 | { | ||
348 | struct gru_instruction *ins = (void *)cb; | ||
349 | |||
350 | ins->baddr0 = (long)mem_addr; | ||
351 | ins->nelem = nelem; | ||
352 | ins->tri0 = tri0; | ||
353 | ins->tri1_bufsize = tri1; | ||
354 | gru_start_instruction(ins, __opword(OP_IVLOAD, 0, xtype, IAA_RAM, 0, | ||
355 | CB_IMA(hints))); | ||
356 | } | ||
357 | |||
358 | static inline void gru_ivstore(void *cb, unsigned long mem_addr, | ||
359 | unsigned int tri0, unsigned int tri1, | ||
360 | unsigned char xtype, unsigned long nelem, unsigned long hints) | ||
361 | { | ||
362 | struct gru_instruction *ins = (void *)cb; | ||
363 | |||
364 | ins->baddr0 = (long)mem_addr; | ||
365 | ins->nelem = nelem; | ||
366 | ins->tri0 = tri0; | ||
367 | ins->tri1_bufsize = tri1; | ||
368 | gru_start_instruction(ins, __opword(OP_IVSTORE, 0, xtype, IAA_RAM, 0, | ||
369 | CB_IMA(hints))); | ||
370 | } | ||
371 | |||
372 | static inline void gru_vset(void *cb, unsigned long mem_addr, | ||
373 | unsigned long value, unsigned char xtype, unsigned long nelem, | ||
374 | unsigned long stride, unsigned long hints) | ||
375 | { | ||
376 | struct gru_instruction *ins = (void *)cb; | ||
377 | |||
378 | ins->baddr0 = (long)mem_addr; | ||
379 | ins->op2_value_baddr1 = value; | ||
380 | ins->nelem = nelem; | ||
381 | ins->op1_stride = stride; | ||
382 | gru_start_instruction(ins, __opword(OP_VSET, 0, xtype, IAA_RAM, 0, | ||
383 | CB_IMA(hints))); | ||
384 | } | ||
385 | |||
386 | static inline void gru_ivset(void *cb, unsigned long mem_addr, | ||
387 | unsigned int tri1, unsigned long value, unsigned char xtype, | ||
388 | unsigned long nelem, unsigned long hints) | ||
389 | { | ||
390 | struct gru_instruction *ins = (void *)cb; | ||
391 | |||
392 | ins->baddr0 = (long)mem_addr; | ||
393 | ins->op2_value_baddr1 = value; | ||
394 | ins->nelem = nelem; | ||
395 | ins->tri1_bufsize = tri1; | ||
396 | gru_start_instruction(ins, __opword(OP_IVSET, 0, xtype, IAA_RAM, 0, | ||
397 | CB_IMA(hints))); | ||
398 | } | ||
399 | |||
400 | static inline void gru_vflush(void *cb, unsigned long mem_addr, | ||
401 | unsigned long nelem, unsigned char xtype, unsigned long stride, | ||
402 | unsigned long hints) | ||
403 | { | ||
404 | struct gru_instruction *ins = (void *)cb; | ||
405 | |||
406 | ins->baddr0 = (long)mem_addr; | ||
407 | ins->op1_stride = stride; | ||
408 | ins->nelem = nelem; | ||
409 | gru_start_instruction(ins, __opword(OP_VFLUSH, 0, xtype, IAA_RAM, 0, | ||
410 | CB_IMA(hints))); | ||
411 | } | ||
412 | |||
413 | static inline void gru_nop(void *cb, int hints) | ||
414 | { | ||
415 | struct gru_instruction *ins = (void *)cb; | ||
416 | |||
417 | gru_start_instruction(ins, __opword(OP_NOP, 0, 0, 0, 0, CB_IMA(hints))); | ||
418 | } | ||
419 | |||
420 | |||
421 | static inline void gru_bcopy(void *cb, const unsigned long src, | ||
422 | unsigned long dest, | ||
423 | unsigned int tri0, unsigned int xtype, unsigned long nelem, | ||
424 | unsigned int bufsize, unsigned long hints) | ||
425 | { | ||
426 | struct gru_instruction *ins = (void *)cb; | ||
427 | |||
428 | ins->baddr0 = (long)src; | ||
429 | ins->op2_value_baddr1 = (long)dest; | ||
430 | ins->nelem = nelem; | ||
431 | ins->tri0 = tri0; | ||
432 | ins->tri1_bufsize = bufsize; | ||
433 | gru_start_instruction(ins, __opword(OP_BCOPY, 0, xtype, IAA_RAM, | ||
434 | IAA_RAM, CB_IMA(hints))); | ||
435 | } | ||
436 | |||
437 | static inline void gru_bstore(void *cb, const unsigned long src, | ||
438 | unsigned long dest, unsigned int tri0, unsigned int xtype, | ||
439 | unsigned long nelem, unsigned long hints) | ||
440 | { | ||
441 | struct gru_instruction *ins = (void *)cb; | ||
442 | |||
443 | ins->baddr0 = (long)src; | ||
444 | ins->op2_value_baddr1 = (long)dest; | ||
445 | ins->nelem = nelem; | ||
446 | ins->tri0 = tri0; | ||
447 | gru_start_instruction(ins, __opword(OP_BSTORE, 0, xtype, 0, IAA_RAM, | ||
448 | CB_IMA(hints))); | ||
449 | } | ||
450 | |||
451 | static inline void gru_gamir(void *cb, int exopc, unsigned long src, | ||
452 | unsigned int xtype, unsigned long hints) | ||
453 | { | ||
454 | struct gru_instruction *ins = (void *)cb; | ||
455 | |||
456 | ins->baddr0 = (long)src; | ||
457 | gru_start_instruction(ins, __opword(OP_GAMIR, exopc, xtype, IAA_RAM, 0, | ||
458 | CB_IMA(hints))); | ||
459 | } | ||
460 | |||
461 | static inline void gru_gamirr(void *cb, int exopc, unsigned long src, | ||
462 | unsigned int xtype, unsigned long hints) | ||
463 | { | ||
464 | struct gru_instruction *ins = (void *)cb; | ||
465 | |||
466 | ins->baddr0 = (long)src; | ||
467 | gru_start_instruction(ins, __opword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0, | ||
468 | CB_IMA(hints))); | ||
469 | } | ||
470 | |||
471 | static inline void gru_gamer(void *cb, int exopc, unsigned long src, | ||
472 | unsigned int xtype, | ||
473 | unsigned long operand1, unsigned long operand2, | ||
474 | unsigned long hints) | ||
475 | { | ||
476 | struct gru_instruction *ins = (void *)cb; | ||
477 | |||
478 | ins->baddr0 = (long)src; | ||
479 | ins->op1_stride = operand1; | ||
480 | ins->op2_value_baddr1 = operand2; | ||
481 | gru_start_instruction(ins, __opword(OP_GAMER, exopc, xtype, IAA_RAM, 0, | ||
482 | CB_IMA(hints))); | ||
483 | } | ||
484 | |||
485 | static inline void gru_gamerr(void *cb, int exopc, unsigned long src, | ||
486 | unsigned int xtype, unsigned long operand1, | ||
487 | unsigned long operand2, unsigned long hints) | ||
488 | { | ||
489 | struct gru_instruction *ins = (void *)cb; | ||
490 | |||
491 | ins->baddr0 = (long)src; | ||
492 | ins->op1_stride = operand1; | ||
493 | ins->op2_value_baddr1 = operand2; | ||
494 | gru_start_instruction(ins, __opword(OP_GAMERR, exopc, xtype, IAA_RAM, 0, | ||
495 | CB_IMA(hints))); | ||
496 | } | ||
497 | |||
498 | static inline void gru_gamxr(void *cb, unsigned long src, | ||
499 | unsigned int tri0, unsigned long hints) | ||
500 | { | ||
501 | struct gru_instruction *ins = (void *)cb; | ||
502 | |||
503 | ins->baddr0 = (long)src; | ||
504 | ins->nelem = 4; | ||
505 | gru_start_instruction(ins, __opword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW, | ||
506 | IAA_RAM, 0, CB_IMA(hints))); | ||
507 | } | ||
508 | |||
509 | static inline void gru_mesq(void *cb, unsigned long queue, | ||
510 | unsigned long tri0, unsigned long nelem, | ||
511 | unsigned long hints) | ||
512 | { | ||
513 | struct gru_instruction *ins = (void *)cb; | ||
514 | |||
515 | ins->baddr0 = (long)queue; | ||
516 | ins->nelem = nelem; | ||
517 | ins->tri0 = tri0; | ||
518 | gru_start_instruction(ins, __opword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0, | ||
519 | CB_IMA(hints))); | ||
520 | } | ||
521 | |||
522 | static inline unsigned long gru_get_amo_value(void *cb) | ||
523 | { | ||
524 | struct gru_instruction *ins = (void *)cb; | ||
525 | |||
526 | return ins->avalue; | ||
527 | } | ||
528 | |||
529 | static inline int gru_get_amo_value_head(void *cb) | ||
530 | { | ||
531 | struct gru_instruction *ins = (void *)cb; | ||
532 | |||
533 | return ins->avalue & 0xffffffff; | ||
534 | } | ||
535 | |||
536 | static inline int gru_get_amo_value_limit(void *cb) | ||
537 | { | ||
538 | struct gru_instruction *ins = (void *)cb; | ||
539 | |||
540 | return ins->avalue >> 32; | ||
541 | } | ||
542 | |||
543 | static inline union gru_mesqhead gru_mesq_head(int head, int limit) | ||
544 | { | ||
545 | union gru_mesqhead mqh; | ||
546 | |||
547 | mqh.head = head; | ||
548 | mqh.limit = limit; | ||
549 | return mqh; | ||
550 | } | ||
551 | |||
552 | /* | ||
553 | * Get struct control_block_extended_exc_detail for CB. | ||
554 | */ | ||
555 | extern int gru_get_cb_exception_detail(void *cb, | ||
556 | struct control_block_extended_exc_detail *excdet); | ||
557 | |||
558 | #define GRU_EXC_STR_SIZE 256 | ||
559 | |||
560 | extern int gru_check_status_proc(void *cb); | ||
561 | extern int gru_wait_proc(void *cb); | ||
562 | extern void gru_wait_abort_proc(void *cb); | ||
563 | |||
564 | /* | ||
565 | * Control block definition for checking status | ||
566 | */ | ||
567 | struct gru_control_block_status { | ||
568 | unsigned int icmd :1; | ||
569 | unsigned int unused1 :31; | ||
570 | unsigned int unused2 :24; | ||
571 | unsigned int istatus :2; | ||
572 | unsigned int isubstatus :4; | ||
573 | unsigned int inused3 :2; | ||
574 | }; | ||
575 | |||
576 | /* Get CB status */ | ||
577 | static inline int gru_get_cb_status(void *cb) | ||
578 | { | ||
579 | struct gru_control_block_status *cbs = (void *)cb; | ||
580 | |||
581 | return cbs->istatus; | ||
582 | } | ||
583 | |||
584 | /* Get CB message queue substatus */ | ||
585 | static inline int gru_get_cb_message_queue_substatus(void *cb) | ||
586 | { | ||
587 | struct gru_control_block_status *cbs = (void *)cb; | ||
588 | |||
589 | return cbs->isubstatus & CBSS_MSG_QUEUE_MASK; | ||
590 | } | ||
591 | |||
592 | /* Get CB substatus */ | ||
593 | static inline int gru_get_cb_substatus(void *cb) | ||
594 | { | ||
595 | struct gru_control_block_status *cbs = (void *)cb; | ||
596 | |||
597 | return cbs->isubstatus; | ||
598 | } | ||
599 | |||
600 | /* Check the status of a CB. If the CB is in UPM mode, call the | ||
601 | * OS to handle the UPM status. | ||
602 | * Returns the CB status field value (0 for normal completion) | ||
603 | */ | ||
604 | static inline int gru_check_status(void *cb) | ||
605 | { | ||
606 | struct gru_control_block_status *cbs = (void *)cb; | ||
607 | int ret = cbs->istatus; | ||
608 | |||
609 | if (ret == CBS_CALL_OS) | ||
610 | ret = gru_check_status_proc(cb); | ||
611 | return ret; | ||
612 | } | ||
613 | |||
614 | /* Wait for CB to complete. | ||
615 | * Returns the CB status field value (0 for normal completion) | ||
616 | */ | ||
617 | static inline int gru_wait(void *cb) | ||
618 | { | ||
619 | struct gru_control_block_status *cbs = (void *)cb; | ||
620 | int ret = cbs->istatus;; | ||
621 | |||
622 | if (ret != CBS_IDLE) | ||
623 | ret = gru_wait_proc(cb); | ||
624 | return ret; | ||
625 | } | ||
626 | |||
627 | /* Wait for CB to complete. Aborts program if error. (Note: error does NOT | ||
628 | * mean TLB mis - only fatal errors such as memory parity error or user | ||
629 | * bugs will cause termination. | ||
630 | */ | ||
631 | static inline void gru_wait_abort(void *cb) | ||
632 | { | ||
633 | struct gru_control_block_status *cbs = (void *)cb; | ||
634 | |||
635 | if (cbs->istatus != CBS_IDLE) | ||
636 | gru_wait_abort_proc(cb); | ||
637 | } | ||
638 | |||
639 | |||
640 | /* | ||
641 | * Get a pointer to a control block | ||
642 | * gseg - GSeg address returned from gru_get_thread_gru_segment() | ||
643 | * index - index of desired CB | ||
644 | */ | ||
645 | static inline void *gru_get_cb_pointer(void *gseg, | ||
646 | int index) | ||
647 | { | ||
648 | return gseg + GRU_CB_BASE + index * GRU_HANDLE_STRIDE; | ||
649 | } | ||
650 | |||
651 | /* | ||
652 | * Get a pointer to a cacheline in the data segment portion of a GSeg | ||
653 | * gseg - GSeg address returned from gru_get_thread_gru_segment() | ||
654 | * index - index of desired cache line | ||
655 | */ | ||
656 | static inline void *gru_get_data_pointer(void *gseg, int index) | ||
657 | { | ||
658 | return gseg + GRU_DS_BASE + index * GRU_CACHE_LINE_BYTES; | ||
659 | } | ||
660 | |||
661 | /* | ||
662 | * Convert a vaddr into the tri index within the GSEG | ||
663 | * vaddr - virtual address of within gseg | ||
664 | */ | ||
665 | static inline int gru_get_tri(void *vaddr) | ||
666 | { | ||
667 | return ((unsigned long)vaddr & (GRU_GSEG_PAGESIZE - 1)) - GRU_DS_BASE; | ||
668 | } | ||
669 | #endif /* __GRU_INSTRUCTIONS_H__ */ | ||
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c new file mode 100644 index 000000000000..3d33015bbf31 --- /dev/null +++ b/drivers/misc/sgi-gru/grufault.c | |||
@@ -0,0 +1,633 @@ | |||
1 | /* | ||
2 | * SN Platform GRU Driver | ||
3 | * | ||
4 | * FAULT HANDLER FOR GRU DETECTED TLB MISSES | ||
5 | * | ||
6 | * This file contains code that handles TLB misses within the GRU. | ||
7 | * These misses are reported either via interrupts or user polling of | ||
8 | * the user CB. | ||
9 | * | ||
10 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or modify | ||
13 | * it under the terms of the GNU General Public License as published by | ||
14 | * the Free Software Foundation; either version 2 of the License, or | ||
15 | * (at your option) any later version. | ||
16 | * | ||
17 | * This program is distributed in the hope that it will be useful, | ||
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
20 | * GNU General Public License for more details. | ||
21 | * | ||
22 | * You should have received a copy of the GNU General Public License | ||
23 | * along with this program; if not, write to the Free Software | ||
24 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
25 | */ | ||
26 | |||
27 | #include <linux/kernel.h> | ||
28 | #include <linux/errno.h> | ||
29 | #include <linux/spinlock.h> | ||
30 | #include <linux/mm.h> | ||
31 | #include <linux/hugetlb.h> | ||
32 | #include <linux/device.h> | ||
33 | #include <linux/io.h> | ||
34 | #include <linux/uaccess.h> | ||
35 | #include <asm/pgtable.h> | ||
36 | #include "gru.h" | ||
37 | #include "grutables.h" | ||
38 | #include "grulib.h" | ||
39 | #include "gru_instructions.h" | ||
40 | #include <asm/uv/uv_hub.h> | ||
41 | |||
42 | /* | ||
43 | * Test if a physical address is a valid GRU GSEG address | ||
44 | */ | ||
45 | static inline int is_gru_paddr(unsigned long paddr) | ||
46 | { | ||
47 | return paddr >= gru_start_paddr && paddr < gru_end_paddr; | ||
48 | } | ||
49 | |||
50 | /* | ||
51 | * Find the vma of a GRU segment. Caller must hold mmap_sem. | ||
52 | */ | ||
53 | struct vm_area_struct *gru_find_vma(unsigned long vaddr) | ||
54 | { | ||
55 | struct vm_area_struct *vma; | ||
56 | |||
57 | vma = find_vma(current->mm, vaddr); | ||
58 | if (vma && vma->vm_start <= vaddr && vma->vm_ops == &gru_vm_ops) | ||
59 | return vma; | ||
60 | return NULL; | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | * Find and lock the gts that contains the specified user vaddr. | ||
65 | * | ||
66 | * Returns: | ||
67 | * - *gts with the mmap_sem locked for read and the GTS locked. | ||
68 | * - NULL if vaddr invalid OR is not a valid GSEG vaddr. | ||
69 | */ | ||
70 | |||
71 | static struct gru_thread_state *gru_find_lock_gts(unsigned long vaddr) | ||
72 | { | ||
73 | struct mm_struct *mm = current->mm; | ||
74 | struct vm_area_struct *vma; | ||
75 | struct gru_thread_state *gts = NULL; | ||
76 | |||
77 | down_read(&mm->mmap_sem); | ||
78 | vma = gru_find_vma(vaddr); | ||
79 | if (vma) | ||
80 | gts = gru_find_thread_state(vma, TSID(vaddr, vma)); | ||
81 | if (gts) | ||
82 | mutex_lock(>s->ts_ctxlock); | ||
83 | else | ||
84 | up_read(&mm->mmap_sem); | ||
85 | return gts; | ||
86 | } | ||
87 | |||
88 | static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr) | ||
89 | { | ||
90 | struct mm_struct *mm = current->mm; | ||
91 | struct vm_area_struct *vma; | ||
92 | struct gru_thread_state *gts = NULL; | ||
93 | |||
94 | down_write(&mm->mmap_sem); | ||
95 | vma = gru_find_vma(vaddr); | ||
96 | if (vma) | ||
97 | gts = gru_alloc_thread_state(vma, TSID(vaddr, vma)); | ||
98 | if (gts) { | ||
99 | mutex_lock(>s->ts_ctxlock); | ||
100 | downgrade_write(&mm->mmap_sem); | ||
101 | } else { | ||
102 | up_write(&mm->mmap_sem); | ||
103 | } | ||
104 | |||
105 | return gts; | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * Unlock a GTS that was previously locked with gru_find_lock_gts(). | ||
110 | */ | ||
111 | static void gru_unlock_gts(struct gru_thread_state *gts) | ||
112 | { | ||
113 | mutex_unlock(>s->ts_ctxlock); | ||
114 | up_read(¤t->mm->mmap_sem); | ||
115 | } | ||
116 | |||
117 | /* | ||
118 | * Set a CB.istatus to active using a user virtual address. This must be done | ||
119 | * just prior to a TFH RESTART. The new cb.istatus is an in-cache status ONLY. | ||
120 | * If the line is evicted, the status may be lost. The in-cache update | ||
121 | * is necessary to prevent the user from seeing a stale cb.istatus that will | ||
122 | * change as soon as the TFH restart is complete. Races may cause an | ||
123 | * occasional failure to clear the cb.istatus, but that is ok. | ||
124 | * | ||
125 | * If the cb address is not valid (should not happen, but...), nothing | ||
126 | * bad will happen.. The get_user()/put_user() will fail but there | ||
127 | * are no bad side-effects. | ||
128 | */ | ||
129 | static void gru_cb_set_istatus_active(unsigned long __user *cb) | ||
130 | { | ||
131 | union { | ||
132 | struct gru_instruction_bits bits; | ||
133 | unsigned long dw; | ||
134 | } u; | ||
135 | |||
136 | if (cb) { | ||
137 | get_user(u.dw, cb); | ||
138 | u.bits.istatus = CBS_ACTIVE; | ||
139 | put_user(u.dw, cb); | ||
140 | } | ||
141 | } | ||
142 | |||
143 | /* | ||
144 | * Convert a interrupt IRQ to a pointer to the GRU GTS that caused the | ||
145 | * interrupt. Interrupts are always sent to a cpu on the blade that contains the | ||
146 | * GRU (except for headless blades which are not currently supported). A blade | ||
147 | * has N grus; a block of N consecutive IRQs is assigned to the GRUs. The IRQ | ||
148 | * number uniquely identifies the GRU chiplet on the local blade that caused the | ||
149 | * interrupt. Always called in interrupt context. | ||
150 | */ | ||
151 | static inline struct gru_state *irq_to_gru(int irq) | ||
152 | { | ||
153 | return &gru_base[uv_numa_blade_id()]->bs_grus[irq - IRQ_GRU]; | ||
154 | } | ||
155 | |||
156 | /* | ||
157 | * Read & clear a TFM | ||
158 | * | ||
159 | * The GRU has an array of fault maps. A map is private to a cpu | ||
160 | * Only one cpu will be accessing a cpu's fault map. | ||
161 | * | ||
162 | * This function scans the cpu-private fault map & clears all bits that | ||
163 | * are set. The function returns a bitmap that indicates the bits that | ||
164 | * were cleared. Note that sense the maps may be updated asynchronously by | ||
165 | * the GRU, atomic operations must be used to clear bits. | ||
166 | */ | ||
167 | static void get_clear_fault_map(struct gru_state *gru, | ||
168 | struct gru_tlb_fault_map *map) | ||
169 | { | ||
170 | unsigned long i, k; | ||
171 | struct gru_tlb_fault_map *tfm; | ||
172 | |||
173 | tfm = get_tfm_for_cpu(gru, gru_cpu_fault_map_id()); | ||
174 | prefetchw(tfm); /* Helps on hardware, required for emulator */ | ||
175 | for (i = 0; i < BITS_TO_LONGS(GRU_NUM_CBE); i++) { | ||
176 | k = tfm->fault_bits[i]; | ||
177 | if (k) | ||
178 | k = xchg(&tfm->fault_bits[i], 0UL); | ||
179 | map->fault_bits[i] = k; | ||
180 | } | ||
181 | |||
182 | /* | ||
183 | * Not functionally required but helps performance. (Required | ||
184 | * on emulator) | ||
185 | */ | ||
186 | gru_flush_cache(tfm); | ||
187 | } | ||
188 | |||
189 | /* | ||
190 | * Atomic (interrupt context) & non-atomic (user context) functions to | ||
191 | * convert a vaddr into a physical address. The size of the page | ||
192 | * is returned in pageshift. | ||
193 | * returns: | ||
194 | * 0 - successful | ||
195 | * < 0 - error code | ||
196 | * 1 - (atomic only) try again in non-atomic context | ||
197 | */ | ||
198 | static int non_atomic_pte_lookup(struct vm_area_struct *vma, | ||
199 | unsigned long vaddr, int write, | ||
200 | unsigned long *paddr, int *pageshift) | ||
201 | { | ||
202 | struct page *page; | ||
203 | |||
204 | /* ZZZ Need to handle HUGE pages */ | ||
205 | if (is_vm_hugetlb_page(vma)) | ||
206 | return -EFAULT; | ||
207 | *pageshift = PAGE_SHIFT; | ||
208 | if (get_user_pages | ||
209 | (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0) | ||
210 | return -EFAULT; | ||
211 | *paddr = page_to_phys(page); | ||
212 | put_page(page); | ||
213 | return 0; | ||
214 | } | ||
215 | |||
216 | /* | ||
217 | * | ||
218 | * atomic_pte_lookup | ||
219 | * | ||
220 | * Convert a user virtual address to a physical address | ||
221 | * Only supports Intel large pages (2MB only) on x86_64. | ||
222 | * ZZZ - hugepage support is incomplete | ||
223 | */ | ||
224 | static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, | ||
225 | int write, unsigned long *paddr, int *pageshift) | ||
226 | { | ||
227 | pgd_t *pgdp; | ||
228 | pmd_t *pmdp; | ||
229 | pud_t *pudp; | ||
230 | pte_t pte; | ||
231 | |||
232 | WARN_ON(irqs_disabled()); /* ZZZ debug */ | ||
233 | |||
234 | local_irq_disable(); | ||
235 | pgdp = pgd_offset(vma->vm_mm, vaddr); | ||
236 | if (unlikely(pgd_none(*pgdp))) | ||
237 | goto err; | ||
238 | |||
239 | pudp = pud_offset(pgdp, vaddr); | ||
240 | if (unlikely(pud_none(*pudp))) | ||
241 | goto err; | ||
242 | |||
243 | pmdp = pmd_offset(pudp, vaddr); | ||
244 | if (unlikely(pmd_none(*pmdp))) | ||
245 | goto err; | ||
246 | #ifdef CONFIG_X86_64 | ||
247 | if (unlikely(pmd_large(*pmdp))) | ||
248 | pte = *(pte_t *) pmdp; | ||
249 | else | ||
250 | #endif | ||
251 | pte = *pte_offset_kernel(pmdp, vaddr); | ||
252 | |||
253 | local_irq_enable(); | ||
254 | |||
255 | if (unlikely(!pte_present(pte) || | ||
256 | (write && (!pte_write(pte) || !pte_dirty(pte))))) | ||
257 | return 1; | ||
258 | |||
259 | *paddr = pte_pfn(pte) << PAGE_SHIFT; | ||
260 | *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT; | ||
261 | return 0; | ||
262 | |||
263 | err: | ||
264 | local_irq_enable(); | ||
265 | return 1; | ||
266 | } | ||
267 | |||
268 | /* | ||
269 | * Drop a TLB entry into the GRU. The fault is described by info in an TFH. | ||
270 | * Input: | ||
271 | * cb Address of user CBR. Null if not running in user context | ||
272 | * Return: | ||
273 | * 0 = dropin, exception, or switch to UPM successful | ||
274 | * 1 = range invalidate active | ||
275 | * < 0 = error code | ||
276 | * | ||
277 | */ | ||
278 | static int gru_try_dropin(struct gru_thread_state *gts, | ||
279 | struct gru_tlb_fault_handle *tfh, | ||
280 | unsigned long __user *cb) | ||
281 | { | ||
282 | struct mm_struct *mm = gts->ts_mm; | ||
283 | struct vm_area_struct *vma; | ||
284 | int pageshift, asid, write, ret; | ||
285 | unsigned long paddr, gpa, vaddr; | ||
286 | |||
287 | /* | ||
288 | * NOTE: The GRU contains magic hardware that eliminates races between | ||
289 | * TLB invalidates and TLB dropins. If an invalidate occurs | ||
290 | * in the window between reading the TFH and the subsequent TLB dropin, | ||
291 | * the dropin is ignored. This eliminates the need for additional locks. | ||
292 | */ | ||
293 | |||
294 | /* | ||
295 | * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call. | ||
296 | * Might be a hardware race OR a stupid user. Ignore FMM because FMM | ||
297 | * is a transient state. | ||
298 | */ | ||
299 | if (tfh->state == TFHSTATE_IDLE) | ||
300 | goto failidle; | ||
301 | if (tfh->state == TFHSTATE_MISS_FMM && cb) | ||
302 | goto failfmm; | ||
303 | |||
304 | write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0; | ||
305 | vaddr = tfh->missvaddr; | ||
306 | asid = tfh->missasid; | ||
307 | if (asid == 0) | ||
308 | goto failnoasid; | ||
309 | |||
310 | rmb(); /* TFH must be cache resident before reading ms_range_active */ | ||
311 | |||
312 | /* | ||
313 | * TFH is cache resident - at least briefly. Fail the dropin | ||
314 | * if a range invalidate is active. | ||
315 | */ | ||
316 | if (atomic_read(>s->ts_gms->ms_range_active)) | ||
317 | goto failactive; | ||
318 | |||
319 | vma = find_vma(mm, vaddr); | ||
320 | if (!vma) | ||
321 | goto failinval; | ||
322 | |||
323 | /* | ||
324 | * Atomic lookup is faster & usually works even if called in non-atomic | ||
325 | * context. | ||
326 | */ | ||
327 | ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &pageshift); | ||
328 | if (ret) { | ||
329 | if (!cb) | ||
330 | goto failupm; | ||
331 | if (non_atomic_pte_lookup(vma, vaddr, write, &paddr, | ||
332 | &pageshift)) | ||
333 | goto failinval; | ||
334 | } | ||
335 | if (is_gru_paddr(paddr)) | ||
336 | goto failinval; | ||
337 | |||
338 | paddr = paddr & ~((1UL << pageshift) - 1); | ||
339 | gpa = uv_soc_phys_ram_to_gpa(paddr); | ||
340 | gru_cb_set_istatus_active(cb); | ||
341 | tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write, | ||
342 | GRU_PAGESIZE(pageshift)); | ||
343 | STAT(tlb_dropin); | ||
344 | gru_dbg(grudev, | ||
345 | "%s: tfh 0x%p, vaddr 0x%lx, asid 0x%x, ps %d, gpa 0x%lx\n", | ||
346 | ret ? "non-atomic" : "atomic", tfh, vaddr, asid, | ||
347 | pageshift, gpa); | ||
348 | return 0; | ||
349 | |||
350 | failnoasid: | ||
351 | /* No asid (delayed unload). */ | ||
352 | STAT(tlb_dropin_fail_no_asid); | ||
353 | gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); | ||
354 | if (!cb) | ||
355 | tfh_user_polling_mode(tfh); | ||
356 | else | ||
357 | gru_flush_cache(tfh); | ||
358 | return -EAGAIN; | ||
359 | |||
360 | failupm: | ||
361 | /* Atomic failure switch CBR to UPM */ | ||
362 | tfh_user_polling_mode(tfh); | ||
363 | STAT(tlb_dropin_fail_upm); | ||
364 | gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); | ||
365 | return 1; | ||
366 | |||
367 | failfmm: | ||
368 | /* FMM state on UPM call */ | ||
369 | STAT(tlb_dropin_fail_fmm); | ||
370 | gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state); | ||
371 | return 0; | ||
372 | |||
373 | failidle: | ||
374 | /* TFH was idle - no miss pending */ | ||
375 | gru_flush_cache(tfh); | ||
376 | if (cb) | ||
377 | gru_flush_cache(cb); | ||
378 | STAT(tlb_dropin_fail_idle); | ||
379 | gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state); | ||
380 | return 0; | ||
381 | |||
382 | failinval: | ||
383 | /* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */ | ||
384 | tfh_exception(tfh); | ||
385 | STAT(tlb_dropin_fail_invalid); | ||
386 | gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); | ||
387 | return -EFAULT; | ||
388 | |||
389 | failactive: | ||
390 | /* Range invalidate active. Switch to UPM iff atomic */ | ||
391 | if (!cb) | ||
392 | tfh_user_polling_mode(tfh); | ||
393 | else | ||
394 | gru_flush_cache(tfh); | ||
395 | STAT(tlb_dropin_fail_range_active); | ||
396 | gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n", | ||
397 | tfh, vaddr); | ||
398 | return 1; | ||
399 | } | ||
400 | |||
401 | /* | ||
402 | * Process an external interrupt from the GRU. This interrupt is | ||
403 | * caused by a TLB miss. | ||
404 | * Note that this is the interrupt handler that is registered with linux | ||
405 | * interrupt handlers. | ||
406 | */ | ||
407 | irqreturn_t gru_intr(int irq, void *dev_id) | ||
408 | { | ||
409 | struct gru_state *gru; | ||
410 | struct gru_tlb_fault_map map; | ||
411 | struct gru_thread_state *gts; | ||
412 | struct gru_tlb_fault_handle *tfh = NULL; | ||
413 | int cbrnum, ctxnum; | ||
414 | |||
415 | STAT(intr); | ||
416 | |||
417 | gru = irq_to_gru(irq); | ||
418 | if (!gru) { | ||
419 | dev_err(grudev, "GRU: invalid interrupt: cpu %d, irq %d\n", | ||
420 | raw_smp_processor_id(), irq); | ||
421 | return IRQ_NONE; | ||
422 | } | ||
423 | get_clear_fault_map(gru, &map); | ||
424 | gru_dbg(grudev, "irq %d, gru %x, map 0x%lx\n", irq, gru->gs_gid, | ||
425 | map.fault_bits[0]); | ||
426 | |||
427 | for_each_cbr_in_tfm(cbrnum, map.fault_bits) { | ||
428 | tfh = get_tfh_by_index(gru, cbrnum); | ||
429 | prefetchw(tfh); /* Helps on hdw, required for emulator */ | ||
430 | |||
431 | /* | ||
432 | * When hardware sets a bit in the faultmap, it implicitly | ||
433 | * locks the GRU context so that it cannot be unloaded. | ||
434 | * The gts cannot change until a TFH start/writestart command | ||
435 | * is issued. | ||
436 | */ | ||
437 | ctxnum = tfh->ctxnum; | ||
438 | gts = gru->gs_gts[ctxnum]; | ||
439 | |||
440 | /* | ||
441 | * This is running in interrupt context. Trylock the mmap_sem. | ||
442 | * If it fails, retry the fault in user context. | ||
443 | */ | ||
444 | if (down_read_trylock(>s->ts_mm->mmap_sem)) { | ||
445 | gru_try_dropin(gts, tfh, NULL); | ||
446 | up_read(>s->ts_mm->mmap_sem); | ||
447 | } else { | ||
448 | tfh_user_polling_mode(tfh); | ||
449 | } | ||
450 | } | ||
451 | return IRQ_HANDLED; | ||
452 | } | ||
453 | |||
454 | |||
455 | static int gru_user_dropin(struct gru_thread_state *gts, | ||
456 | struct gru_tlb_fault_handle *tfh, | ||
457 | unsigned long __user *cb) | ||
458 | { | ||
459 | struct gru_mm_struct *gms = gts->ts_gms; | ||
460 | int ret; | ||
461 | |||
462 | while (1) { | ||
463 | wait_event(gms->ms_wait_queue, | ||
464 | atomic_read(&gms->ms_range_active) == 0); | ||
465 | prefetchw(tfh); /* Helps on hdw, required for emulator */ | ||
466 | ret = gru_try_dropin(gts, tfh, cb); | ||
467 | if (ret <= 0) | ||
468 | return ret; | ||
469 | STAT(call_os_wait_queue); | ||
470 | } | ||
471 | } | ||
472 | |||
473 | /* | ||
474 | * This interface is called as a result of a user detecting a "call OS" bit | ||
475 | * in a user CB. Normally means that a TLB fault has occurred. | ||
476 | * cb - user virtual address of the CB | ||
477 | */ | ||
478 | int gru_handle_user_call_os(unsigned long cb) | ||
479 | { | ||
480 | struct gru_tlb_fault_handle *tfh; | ||
481 | struct gru_thread_state *gts; | ||
482 | unsigned long __user *cbp; | ||
483 | int ucbnum, cbrnum, ret = -EINVAL; | ||
484 | |||
485 | STAT(call_os); | ||
486 | gru_dbg(grudev, "address 0x%lx\n", cb); | ||
487 | |||
488 | /* sanity check the cb pointer */ | ||
489 | ucbnum = get_cb_number((void *)cb); | ||
490 | if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB) | ||
491 | return -EINVAL; | ||
492 | cbp = (unsigned long *)cb; | ||
493 | |||
494 | gts = gru_find_lock_gts(cb); | ||
495 | if (!gts) | ||
496 | return -EINVAL; | ||
497 | |||
498 | if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) { | ||
499 | ret = -EINVAL; | ||
500 | goto exit; | ||
501 | } | ||
502 | |||
503 | /* | ||
504 | * If force_unload is set, the UPM TLB fault is phony. The task | ||
505 | * has migrated to another node and the GSEG must be moved. Just | ||
506 | * unload the context. The task will page fault and assign a new | ||
507 | * context. | ||
508 | */ | ||
509 | ret = -EAGAIN; | ||
510 | cbrnum = thread_cbr_number(gts, ucbnum); | ||
511 | if (gts->ts_force_unload) { | ||
512 | gru_unload_context(gts, 1); | ||
513 | } else if (gts->ts_gru) { | ||
514 | tfh = get_tfh_by_index(gts->ts_gru, cbrnum); | ||
515 | ret = gru_user_dropin(gts, tfh, cbp); | ||
516 | } | ||
517 | exit: | ||
518 | gru_unlock_gts(gts); | ||
519 | return ret; | ||
520 | } | ||
521 | |||
522 | /* | ||
523 | * Fetch the exception detail information for a CB that terminated with | ||
524 | * an exception. | ||
525 | */ | ||
526 | int gru_get_exception_detail(unsigned long arg) | ||
527 | { | ||
528 | struct control_block_extended_exc_detail excdet; | ||
529 | struct gru_control_block_extended *cbe; | ||
530 | struct gru_thread_state *gts; | ||
531 | int ucbnum, cbrnum, ret; | ||
532 | |||
533 | STAT(user_exception); | ||
534 | if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet))) | ||
535 | return -EFAULT; | ||
536 | |||
537 | gru_dbg(grudev, "address 0x%lx\n", excdet.cb); | ||
538 | gts = gru_find_lock_gts(excdet.cb); | ||
539 | if (!gts) | ||
540 | return -EINVAL; | ||
541 | |||
542 | if (gts->ts_gru) { | ||
543 | ucbnum = get_cb_number((void *)excdet.cb); | ||
544 | cbrnum = thread_cbr_number(gts, ucbnum); | ||
545 | cbe = get_cbe_by_index(gts->ts_gru, cbrnum); | ||
546 | excdet.opc = cbe->opccpy; | ||
547 | excdet.exopc = cbe->exopccpy; | ||
548 | excdet.ecause = cbe->ecause; | ||
549 | excdet.exceptdet0 = cbe->idef1upd; | ||
550 | excdet.exceptdet1 = cbe->idef3upd; | ||
551 | ret = 0; | ||
552 | } else { | ||
553 | ret = -EAGAIN; | ||
554 | } | ||
555 | gru_unlock_gts(gts); | ||
556 | |||
557 | gru_dbg(grudev, "address 0x%lx, ecause 0x%x\n", excdet.cb, | ||
558 | excdet.ecause); | ||
559 | if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet))) | ||
560 | ret = -EFAULT; | ||
561 | return ret; | ||
562 | } | ||
563 | |||
564 | /* | ||
565 | * User request to unload a context. Content is saved for possible reload. | ||
566 | */ | ||
567 | int gru_user_unload_context(unsigned long arg) | ||
568 | { | ||
569 | struct gru_thread_state *gts; | ||
570 | struct gru_unload_context_req req; | ||
571 | |||
572 | STAT(user_unload_context); | ||
573 | if (copy_from_user(&req, (void __user *)arg, sizeof(req))) | ||
574 | return -EFAULT; | ||
575 | |||
576 | gru_dbg(grudev, "gseg 0x%lx\n", req.gseg); | ||
577 | |||
578 | gts = gru_find_lock_gts(req.gseg); | ||
579 | if (!gts) | ||
580 | return -EINVAL; | ||
581 | |||
582 | if (gts->ts_gru) | ||
583 | gru_unload_context(gts, 1); | ||
584 | gru_unlock_gts(gts); | ||
585 | |||
586 | return 0; | ||
587 | } | ||
588 | |||
589 | /* | ||
590 | * User request to flush a range of virtual addresses from the GRU TLB | ||
591 | * (Mainly for testing). | ||
592 | */ | ||
593 | int gru_user_flush_tlb(unsigned long arg) | ||
594 | { | ||
595 | struct gru_thread_state *gts; | ||
596 | struct gru_flush_tlb_req req; | ||
597 | |||
598 | STAT(user_flush_tlb); | ||
599 | if (copy_from_user(&req, (void __user *)arg, sizeof(req))) | ||
600 | return -EFAULT; | ||
601 | |||
602 | gru_dbg(grudev, "gseg 0x%lx, vaddr 0x%lx, len 0x%lx\n", req.gseg, | ||
603 | req.vaddr, req.len); | ||
604 | |||
605 | gts = gru_find_lock_gts(req.gseg); | ||
606 | if (!gts) | ||
607 | return -EINVAL; | ||
608 | |||
609 | gru_flush_tlb_range(gts->ts_gms, req.vaddr, req.vaddr + req.len); | ||
610 | gru_unlock_gts(gts); | ||
611 | |||
612 | return 0; | ||
613 | } | ||
614 | |||
615 | /* | ||
616 | * Register the current task as the user of the GSEG slice. | ||
617 | * Needed for TLB fault interrupt targeting. | ||
618 | */ | ||
619 | int gru_set_task_slice(long address) | ||
620 | { | ||
621 | struct gru_thread_state *gts; | ||
622 | |||
623 | STAT(set_task_slice); | ||
624 | gru_dbg(grudev, "address 0x%lx\n", address); | ||
625 | gts = gru_alloc_locked_gts(address); | ||
626 | if (!gts) | ||
627 | return -EINVAL; | ||
628 | |||
629 | gts->ts_tgid_owner = current->tgid; | ||
630 | gru_unlock_gts(gts); | ||
631 | |||
632 | return 0; | ||
633 | } | ||
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c new file mode 100644 index 000000000000..23c91f5f6b61 --- /dev/null +++ b/drivers/misc/sgi-gru/grufile.c | |||
@@ -0,0 +1,485 @@ | |||
1 | /* | ||
2 | * SN Platform GRU Driver | ||
3 | * | ||
4 | * FILE OPERATIONS & DRIVER INITIALIZATION | ||
5 | * | ||
6 | * This file supports the user system call for file open, close, mmap, etc. | ||
7 | * This also incudes the driver initialization code. | ||
8 | * | ||
9 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2 of the License, or | ||
14 | * (at your option) any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | * GNU General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; if not, write to the Free Software | ||
23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
24 | */ | ||
25 | |||
26 | #include <linux/module.h> | ||
27 | #include <linux/kernel.h> | ||
28 | #include <linux/errno.h> | ||
29 | #include <linux/slab.h> | ||
30 | #include <linux/mm.h> | ||
31 | #include <linux/io.h> | ||
32 | #include <linux/smp_lock.h> | ||
33 | #include <linux/spinlock.h> | ||
34 | #include <linux/device.h> | ||
35 | #include <linux/miscdevice.h> | ||
36 | #include <linux/interrupt.h> | ||
37 | #include <linux/proc_fs.h> | ||
38 | #include <linux/uaccess.h> | ||
39 | #include "gru.h" | ||
40 | #include "grulib.h" | ||
41 | #include "grutables.h" | ||
42 | |||
43 | #if defined CONFIG_X86_64 | ||
44 | #include <asm/genapic.h> | ||
45 | #include <asm/irq.h> | ||
46 | #define IS_UV() is_uv_system() | ||
47 | #elif defined CONFIG_IA64 | ||
48 | #include <asm/system.h> | ||
49 | #include <asm/sn/simulator.h> | ||
50 | /* temp support for running on hardware simulator */ | ||
51 | #define IS_UV() IS_MEDUSA() || ia64_platform_is("uv") | ||
52 | #else | ||
53 | #define IS_UV() 0 | ||
54 | #endif | ||
55 | |||
56 | #include <asm/uv/uv_hub.h> | ||
57 | #include <asm/uv/uv_mmrs.h> | ||
58 | |||
59 | struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly; | ||
60 | unsigned long gru_start_paddr, gru_end_paddr __read_mostly; | ||
61 | struct gru_stats_s gru_stats; | ||
62 | |||
63 | /* Guaranteed user available resources on each node */ | ||
64 | static int max_user_cbrs, max_user_dsr_bytes; | ||
65 | |||
66 | static struct file_operations gru_fops; | ||
67 | static struct miscdevice gru_miscdev; | ||
68 | |||
69 | |||
70 | /* | ||
71 | * gru_vma_close | ||
72 | * | ||
73 | * Called when unmapping a device mapping. Frees all gru resources | ||
74 | * and tables belonging to the vma. | ||
75 | */ | ||
76 | static void gru_vma_close(struct vm_area_struct *vma) | ||
77 | { | ||
78 | struct gru_vma_data *vdata; | ||
79 | struct gru_thread_state *gts; | ||
80 | struct list_head *entry, *next; | ||
81 | |||
82 | if (!vma->vm_private_data) | ||
83 | return; | ||
84 | |||
85 | vdata = vma->vm_private_data; | ||
86 | vma->vm_private_data = NULL; | ||
87 | gru_dbg(grudev, "vma %p, file %p, vdata %p\n", vma, vma->vm_file, | ||
88 | vdata); | ||
89 | list_for_each_safe(entry, next, &vdata->vd_head) { | ||
90 | gts = | ||
91 | list_entry(entry, struct gru_thread_state, ts_next); | ||
92 | list_del(>s->ts_next); | ||
93 | mutex_lock(>s->ts_ctxlock); | ||
94 | if (gts->ts_gru) | ||
95 | gru_unload_context(gts, 0); | ||
96 | mutex_unlock(>s->ts_ctxlock); | ||
97 | gts_drop(gts); | ||
98 | } | ||
99 | kfree(vdata); | ||
100 | STAT(vdata_free); | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * gru_file_mmap | ||
105 | * | ||
106 | * Called when mmaping the device. Initializes the vma with a fault handler | ||
107 | * and private data structure necessary to allocate, track, and free the | ||
108 | * underlying pages. | ||
109 | */ | ||
110 | static int gru_file_mmap(struct file *file, struct vm_area_struct *vma) | ||
111 | { | ||
112 | if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE)) | ||
113 | return -EPERM; | ||
114 | |||
115 | if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) || | ||
116 | vma->vm_end & (GRU_GSEG_PAGESIZE - 1)) | ||
117 | return -EINVAL; | ||
118 | |||
119 | vma->vm_flags |= | ||
120 | (VM_IO | VM_DONTCOPY | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP | | ||
121 | VM_RESERVED); | ||
122 | vma->vm_page_prot = PAGE_SHARED; | ||
123 | vma->vm_ops = &gru_vm_ops; | ||
124 | |||
125 | vma->vm_private_data = gru_alloc_vma_data(vma, 0); | ||
126 | if (!vma->vm_private_data) | ||
127 | return -ENOMEM; | ||
128 | |||
129 | gru_dbg(grudev, "file %p, vaddr 0x%lx, vma %p, vdata %p\n", | ||
130 | file, vma->vm_start, vma, vma->vm_private_data); | ||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | /* | ||
135 | * Create a new GRU context | ||
136 | */ | ||
137 | static int gru_create_new_context(unsigned long arg) | ||
138 | { | ||
139 | struct gru_create_context_req req; | ||
140 | struct vm_area_struct *vma; | ||
141 | struct gru_vma_data *vdata; | ||
142 | int ret = -EINVAL; | ||
143 | |||
144 | |||
145 | if (copy_from_user(&req, (void __user *)arg, sizeof(req))) | ||
146 | return -EFAULT; | ||
147 | |||
148 | if (req.data_segment_bytes == 0 || | ||
149 | req.data_segment_bytes > max_user_dsr_bytes) | ||
150 | return -EINVAL; | ||
151 | if (!req.control_blocks || !req.maximum_thread_count || | ||
152 | req.control_blocks > max_user_cbrs) | ||
153 | return -EINVAL; | ||
154 | |||
155 | if (!(req.options & GRU_OPT_MISS_MASK)) | ||
156 | req.options |= GRU_OPT_MISS_FMM_INTR; | ||
157 | |||
158 | down_write(¤t->mm->mmap_sem); | ||
159 | vma = gru_find_vma(req.gseg); | ||
160 | if (vma) { | ||
161 | vdata = vma->vm_private_data; | ||
162 | vdata->vd_user_options = req.options; | ||
163 | vdata->vd_dsr_au_count = | ||
164 | GRU_DS_BYTES_TO_AU(req.data_segment_bytes); | ||
165 | vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks); | ||
166 | ret = 0; | ||
167 | } | ||
168 | up_write(¤t->mm->mmap_sem); | ||
169 | |||
170 | return ret; | ||
171 | } | ||
172 | |||
173 | /* | ||
174 | * Get GRU configuration info (temp - for emulator testing) | ||
175 | */ | ||
176 | static long gru_get_config_info(unsigned long arg) | ||
177 | { | ||
178 | struct gru_config_info info; | ||
179 | int nodesperblade; | ||
180 | |||
181 | if (num_online_nodes() > 1 && | ||
182 | (uv_node_to_blade_id(1) == uv_node_to_blade_id(0))) | ||
183 | nodesperblade = 2; | ||
184 | else | ||
185 | nodesperblade = 1; | ||
186 | info.cpus = num_online_cpus(); | ||
187 | info.nodes = num_online_nodes(); | ||
188 | info.blades = info.nodes / nodesperblade; | ||
189 | info.chiplets = GRU_CHIPLETS_PER_BLADE * info.blades; | ||
190 | |||
191 | if (copy_to_user((void __user *)arg, &info, sizeof(info))) | ||
192 | return -EFAULT; | ||
193 | return 0; | ||
194 | } | ||
195 | |||
196 | /* | ||
197 | * Get GRU chiplet status | ||
198 | */ | ||
199 | static long gru_get_chiplet_status(unsigned long arg) | ||
200 | { | ||
201 | struct gru_state *gru; | ||
202 | struct gru_chiplet_info info; | ||
203 | |||
204 | if (copy_from_user(&info, (void __user *)arg, sizeof(info))) | ||
205 | return -EFAULT; | ||
206 | |||
207 | if (info.node == -1) | ||
208 | info.node = numa_node_id(); | ||
209 | if (info.node >= num_possible_nodes() || | ||
210 | info.chiplet >= GRU_CHIPLETS_PER_HUB || | ||
211 | info.node < 0 || info.chiplet < 0) | ||
212 | return -EINVAL; | ||
213 | |||
214 | info.blade = uv_node_to_blade_id(info.node); | ||
215 | gru = get_gru(info.blade, info.chiplet); | ||
216 | |||
217 | info.total_dsr_bytes = GRU_NUM_DSR_BYTES; | ||
218 | info.total_cbr = GRU_NUM_CB; | ||
219 | info.total_user_dsr_bytes = GRU_NUM_DSR_BYTES - | ||
220 | gru->gs_reserved_dsr_bytes; | ||
221 | info.total_user_cbr = GRU_NUM_CB - gru->gs_reserved_cbrs; | ||
222 | info.free_user_dsr_bytes = hweight64(gru->gs_dsr_map) * | ||
223 | GRU_DSR_AU_BYTES; | ||
224 | info.free_user_cbr = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE; | ||
225 | |||
226 | if (copy_to_user((void __user *)arg, &info, sizeof(info))) | ||
227 | return -EFAULT; | ||
228 | return 0; | ||
229 | } | ||
230 | |||
231 | /* | ||
232 | * gru_file_unlocked_ioctl | ||
233 | * | ||
234 | * Called to update file attributes via IOCTL calls. | ||
235 | */ | ||
236 | static long gru_file_unlocked_ioctl(struct file *file, unsigned int req, | ||
237 | unsigned long arg) | ||
238 | { | ||
239 | int err = -EBADRQC; | ||
240 | |||
241 | gru_dbg(grudev, "file %p\n", file); | ||
242 | |||
243 | switch (req) { | ||
244 | case GRU_CREATE_CONTEXT: | ||
245 | err = gru_create_new_context(arg); | ||
246 | break; | ||
247 | case GRU_SET_TASK_SLICE: | ||
248 | err = gru_set_task_slice(arg); | ||
249 | break; | ||
250 | case GRU_USER_GET_EXCEPTION_DETAIL: | ||
251 | err = gru_get_exception_detail(arg); | ||
252 | break; | ||
253 | case GRU_USER_UNLOAD_CONTEXT: | ||
254 | err = gru_user_unload_context(arg); | ||
255 | break; | ||
256 | case GRU_GET_CHIPLET_STATUS: | ||
257 | err = gru_get_chiplet_status(arg); | ||
258 | break; | ||
259 | case GRU_USER_FLUSH_TLB: | ||
260 | err = gru_user_flush_tlb(arg); | ||
261 | break; | ||
262 | case GRU_USER_CALL_OS: | ||
263 | err = gru_handle_user_call_os(arg); | ||
264 | break; | ||
265 | case GRU_GET_CONFIG_INFO: | ||
266 | err = gru_get_config_info(arg); | ||
267 | break; | ||
268 | } | ||
269 | return err; | ||
270 | } | ||
271 | |||
272 | /* | ||
273 | * Called at init time to build tables for all GRUs that are present in the | ||
274 | * system. | ||
275 | */ | ||
276 | static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr, | ||
277 | void *vaddr, int nid, int bid, int grunum) | ||
278 | { | ||
279 | spin_lock_init(&gru->gs_lock); | ||
280 | spin_lock_init(&gru->gs_asid_lock); | ||
281 | gru->gs_gru_base_paddr = paddr; | ||
282 | gru->gs_gru_base_vaddr = vaddr; | ||
283 | gru->gs_gid = bid * GRU_CHIPLETS_PER_BLADE + grunum; | ||
284 | gru->gs_blade = gru_base[bid]; | ||
285 | gru->gs_blade_id = bid; | ||
286 | gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1; | ||
287 | gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1; | ||
288 | gru_tgh_flush_init(gru); | ||
289 | gru_dbg(grudev, "bid %d, nid %d, gru %x, vaddr %p (0x%lx)\n", | ||
290 | bid, nid, gru->gs_gid, gru->gs_gru_base_vaddr, | ||
291 | gru->gs_gru_base_paddr); | ||
292 | gru_kservices_init(gru); | ||
293 | } | ||
294 | |||
295 | static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr) | ||
296 | { | ||
297 | int pnode, nid, bid, chip; | ||
298 | int cbrs, dsrbytes, n; | ||
299 | int order = get_order(sizeof(struct gru_blade_state)); | ||
300 | struct page *page; | ||
301 | struct gru_state *gru; | ||
302 | unsigned long paddr; | ||
303 | void *vaddr; | ||
304 | |||
305 | max_user_cbrs = GRU_NUM_CB; | ||
306 | max_user_dsr_bytes = GRU_NUM_DSR_BYTES; | ||
307 | for_each_online_node(nid) { | ||
308 | bid = uv_node_to_blade_id(nid); | ||
309 | pnode = uv_node_to_pnode(nid); | ||
310 | if (gru_base[bid]) | ||
311 | continue; | ||
312 | page = alloc_pages_node(nid, GFP_KERNEL, order); | ||
313 | if (!page) | ||
314 | goto fail; | ||
315 | gru_base[bid] = page_address(page); | ||
316 | memset(gru_base[bid], 0, sizeof(struct gru_blade_state)); | ||
317 | gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0]; | ||
318 | spin_lock_init(&gru_base[bid]->bs_lock); | ||
319 | |||
320 | dsrbytes = 0; | ||
321 | cbrs = 0; | ||
322 | for (gru = gru_base[bid]->bs_grus, chip = 0; | ||
323 | chip < GRU_CHIPLETS_PER_BLADE; | ||
324 | chip++, gru++) { | ||
325 | paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip); | ||
326 | vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip); | ||
327 | gru_init_chiplet(gru, paddr, vaddr, bid, nid, chip); | ||
328 | n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE; | ||
329 | cbrs = max(cbrs, n); | ||
330 | n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES; | ||
331 | dsrbytes = max(dsrbytes, n); | ||
332 | } | ||
333 | max_user_cbrs = min(max_user_cbrs, cbrs); | ||
334 | max_user_dsr_bytes = min(max_user_dsr_bytes, dsrbytes); | ||
335 | } | ||
336 | |||
337 | return 0; | ||
338 | |||
339 | fail: | ||
340 | for (nid--; nid >= 0; nid--) | ||
341 | free_pages((unsigned long)gru_base[nid], order); | ||
342 | return -ENOMEM; | ||
343 | } | ||
344 | |||
345 | #ifdef CONFIG_IA64 | ||
346 | |||
347 | static int get_base_irq(void) | ||
348 | { | ||
349 | return IRQ_GRU; | ||
350 | } | ||
351 | |||
352 | #elif defined CONFIG_X86_64 | ||
353 | |||
354 | static void noop(unsigned int irq) | ||
355 | { | ||
356 | } | ||
357 | |||
358 | static struct irq_chip gru_chip = { | ||
359 | .name = "gru", | ||
360 | .mask = noop, | ||
361 | .unmask = noop, | ||
362 | .ack = noop, | ||
363 | }; | ||
364 | |||
365 | static int get_base_irq(void) | ||
366 | { | ||
367 | set_irq_chip(IRQ_GRU, &gru_chip); | ||
368 | set_irq_chip(IRQ_GRU + 1, &gru_chip); | ||
369 | return IRQ_GRU; | ||
370 | } | ||
371 | #endif | ||
372 | |||
373 | /* | ||
374 | * gru_init | ||
375 | * | ||
376 | * Called at boot or module load time to initialize the GRUs. | ||
377 | */ | ||
378 | static int __init gru_init(void) | ||
379 | { | ||
380 | int ret, irq, chip; | ||
381 | char id[10]; | ||
382 | void *gru_start_vaddr; | ||
383 | |||
384 | if (!IS_UV()) | ||
385 | return 0; | ||
386 | |||
387 | #if defined CONFIG_IA64 | ||
388 | gru_start_paddr = 0xd000000000UL; /* ZZZZZZZZZZZZZZZZZZZ fixme */ | ||
389 | #else | ||
390 | gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR) & | ||
391 | 0x7fffffffffffUL; | ||
392 | |||
393 | #endif | ||
394 | gru_start_vaddr = __va(gru_start_paddr); | ||
395 | gru_end_paddr = gru_start_paddr + MAX_NUMNODES * GRU_SIZE; | ||
396 | printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n", | ||
397 | gru_start_paddr, gru_end_paddr); | ||
398 | irq = get_base_irq(); | ||
399 | for (chip = 0; chip < GRU_CHIPLETS_PER_BLADE; chip++) { | ||
400 | ret = request_irq(irq + chip, gru_intr, 0, id, NULL); | ||
401 | if (ret) { | ||
402 | printk(KERN_ERR "%s: request_irq failed\n", | ||
403 | GRU_DRIVER_ID_STR); | ||
404 | goto exit1; | ||
405 | } | ||
406 | } | ||
407 | |||
408 | ret = misc_register(&gru_miscdev); | ||
409 | if (ret) { | ||
410 | printk(KERN_ERR "%s: misc_register failed\n", | ||
411 | GRU_DRIVER_ID_STR); | ||
412 | goto exit1; | ||
413 | } | ||
414 | |||
415 | ret = gru_proc_init(); | ||
416 | if (ret) { | ||
417 | printk(KERN_ERR "%s: proc init failed\n", GRU_DRIVER_ID_STR); | ||
418 | goto exit2; | ||
419 | } | ||
420 | |||
421 | ret = gru_init_tables(gru_start_paddr, gru_start_vaddr); | ||
422 | if (ret) { | ||
423 | printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR); | ||
424 | goto exit3; | ||
425 | } | ||
426 | |||
427 | printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR, | ||
428 | GRU_DRIVER_VERSION_STR); | ||
429 | return 0; | ||
430 | |||
431 | exit3: | ||
432 | gru_proc_exit(); | ||
433 | exit2: | ||
434 | misc_deregister(&gru_miscdev); | ||
435 | exit1: | ||
436 | for (--chip; chip >= 0; chip--) | ||
437 | free_irq(irq + chip, NULL); | ||
438 | return ret; | ||
439 | |||
440 | } | ||
441 | |||
442 | static void __exit gru_exit(void) | ||
443 | { | ||
444 | int i, bid; | ||
445 | int order = get_order(sizeof(struct gru_state) * | ||
446 | GRU_CHIPLETS_PER_BLADE); | ||
447 | |||
448 | for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++) | ||
449 | free_irq(IRQ_GRU + i, NULL); | ||
450 | |||
451 | for (bid = 0; bid < GRU_MAX_BLADES; bid++) | ||
452 | free_pages((unsigned long)gru_base[bid], order); | ||
453 | |||
454 | misc_deregister(&gru_miscdev); | ||
455 | gru_proc_exit(); | ||
456 | } | ||
457 | |||
458 | static struct file_operations gru_fops = { | ||
459 | .owner = THIS_MODULE, | ||
460 | .unlocked_ioctl = gru_file_unlocked_ioctl, | ||
461 | .mmap = gru_file_mmap, | ||
462 | }; | ||
463 | |||
464 | static struct miscdevice gru_miscdev = { | ||
465 | .minor = MISC_DYNAMIC_MINOR, | ||
466 | .name = "gru", | ||
467 | .fops = &gru_fops, | ||
468 | }; | ||
469 | |||
470 | struct vm_operations_struct gru_vm_ops = { | ||
471 | .close = gru_vma_close, | ||
472 | .fault = gru_fault, | ||
473 | }; | ||
474 | |||
475 | module_init(gru_init); | ||
476 | module_exit(gru_exit); | ||
477 | |||
478 | module_param(gru_options, ulong, 0644); | ||
479 | MODULE_PARM_DESC(gru_options, "Various debug options"); | ||
480 | |||
481 | MODULE_AUTHOR("Silicon Graphics, Inc."); | ||
482 | MODULE_LICENSE("GPL"); | ||
483 | MODULE_DESCRIPTION(GRU_DRIVER_ID_STR GRU_DRIVER_VERSION_STR); | ||
484 | MODULE_VERSION(GRU_DRIVER_VERSION_STR); | ||
485 | |||
diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h new file mode 100644 index 000000000000..d16031d62673 --- /dev/null +++ b/drivers/misc/sgi-gru/gruhandles.h | |||
@@ -0,0 +1,663 @@ | |||
1 | /* | ||
2 | * SN Platform GRU Driver | ||
3 | * | ||
4 | * GRU HANDLE DEFINITION | ||
5 | * | ||
6 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #ifndef __GRUHANDLES_H__ | ||
24 | #define __GRUHANDLES_H__ | ||
25 | #include "gru_instructions.h" | ||
26 | |||
27 | /* | ||
28 | * Manifest constants for GRU Memory Map | ||
29 | */ | ||
30 | #define GRU_GSEG0_BASE 0 | ||
31 | #define GRU_MCS_BASE (64 * 1024 * 1024) | ||
32 | #define GRU_SIZE (128UL * 1024 * 1024) | ||
33 | |||
34 | /* Handle & resource counts */ | ||
35 | #define GRU_NUM_CB 128 | ||
36 | #define GRU_NUM_DSR_BYTES (32 * 1024) | ||
37 | #define GRU_NUM_TFM 16 | ||
38 | #define GRU_NUM_TGH 24 | ||
39 | #define GRU_NUM_CBE 128 | ||
40 | #define GRU_NUM_TFH 128 | ||
41 | #define GRU_NUM_CCH 16 | ||
42 | #define GRU_NUM_GSH 1 | ||
43 | |||
44 | /* Maximum resource counts that can be reserved by user programs */ | ||
45 | #define GRU_NUM_USER_CBR GRU_NUM_CBE | ||
46 | #define GRU_NUM_USER_DSR_BYTES GRU_NUM_DSR_BYTES | ||
47 | |||
48 | /* Bytes per handle & handle stride. Code assumes all cb, tfh, cbe handles | ||
49 | * are the same */ | ||
50 | #define GRU_HANDLE_BYTES 64 | ||
51 | #define GRU_HANDLE_STRIDE 256 | ||
52 | |||
53 | /* Base addresses of handles */ | ||
54 | #define GRU_TFM_BASE (GRU_MCS_BASE + 0x00000) | ||
55 | #define GRU_TGH_BASE (GRU_MCS_BASE + 0x08000) | ||
56 | #define GRU_CBE_BASE (GRU_MCS_BASE + 0x10000) | ||
57 | #define GRU_TFH_BASE (GRU_MCS_BASE + 0x18000) | ||
58 | #define GRU_CCH_BASE (GRU_MCS_BASE + 0x20000) | ||
59 | #define GRU_GSH_BASE (GRU_MCS_BASE + 0x30000) | ||
60 | |||
61 | /* User gseg constants */ | ||
62 | #define GRU_GSEG_STRIDE (4 * 1024 * 1024) | ||
63 | #define GSEG_BASE(a) ((a) & ~(GRU_GSEG_PAGESIZE - 1)) | ||
64 | |||
65 | /* Data segment constants */ | ||
66 | #define GRU_DSR_AU_BYTES 1024 | ||
67 | #define GRU_DSR_CL (GRU_NUM_DSR_BYTES / GRU_CACHE_LINE_BYTES) | ||
68 | #define GRU_DSR_AU_CL (GRU_DSR_AU_BYTES / GRU_CACHE_LINE_BYTES) | ||
69 | #define GRU_DSR_AU (GRU_NUM_DSR_BYTES / GRU_DSR_AU_BYTES) | ||
70 | |||
71 | /* Control block constants */ | ||
72 | #define GRU_CBR_AU_SIZE 2 | ||
73 | #define GRU_CBR_AU (GRU_NUM_CBE / GRU_CBR_AU_SIZE) | ||
74 | |||
75 | /* Convert resource counts to the number of AU */ | ||
76 | #define GRU_DS_BYTES_TO_AU(n) DIV_ROUND_UP(n, GRU_DSR_AU_BYTES) | ||
77 | #define GRU_CB_COUNT_TO_AU(n) DIV_ROUND_UP(n, GRU_CBR_AU_SIZE) | ||
78 | |||
79 | /* UV limits */ | ||
80 | #define GRU_CHIPLETS_PER_HUB 2 | ||
81 | #define GRU_HUBS_PER_BLADE 1 | ||
82 | #define GRU_CHIPLETS_PER_BLADE (GRU_HUBS_PER_BLADE * GRU_CHIPLETS_PER_HUB) | ||
83 | |||
84 | /* User GRU Gseg offsets */ | ||
85 | #define GRU_CB_BASE 0 | ||
86 | #define GRU_CB_LIMIT (GRU_CB_BASE + GRU_HANDLE_STRIDE * GRU_NUM_CBE) | ||
87 | #define GRU_DS_BASE 0x20000 | ||
88 | #define GRU_DS_LIMIT (GRU_DS_BASE + GRU_NUM_DSR_BYTES) | ||
89 | |||
90 | /* Convert a GRU physical address to the chiplet offset */ | ||
91 | #define GSEGPOFF(h) ((h) & (GRU_SIZE - 1)) | ||
92 | |||
93 | /* Convert an arbitrary handle address to the beginning of the GRU segment */ | ||
94 | #ifndef __PLUGIN__ | ||
95 | #define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1))) | ||
96 | #else | ||
97 | extern void *gmu_grubase(void *h); | ||
98 | #define GRUBASE(h) gmu_grubase(h) | ||
99 | #endif | ||
100 | |||
101 | /* General addressing macros. */ | ||
102 | static inline void *get_gseg_base_address(void *base, int ctxnum) | ||
103 | { | ||
104 | return (void *)(base + GRU_GSEG0_BASE + GRU_GSEG_STRIDE * ctxnum); | ||
105 | } | ||
106 | |||
107 | static inline void *get_gseg_base_address_cb(void *base, int ctxnum, int line) | ||
108 | { | ||
109 | return (void *)(get_gseg_base_address(base, ctxnum) + | ||
110 | GRU_CB_BASE + GRU_HANDLE_STRIDE * line); | ||
111 | } | ||
112 | |||
113 | static inline void *get_gseg_base_address_ds(void *base, int ctxnum, int line) | ||
114 | { | ||
115 | return (void *)(get_gseg_base_address(base, ctxnum) + GRU_DS_BASE + | ||
116 | GRU_CACHE_LINE_BYTES * line); | ||
117 | } | ||
118 | |||
119 | static inline struct gru_tlb_fault_map *get_tfm(void *base, int ctxnum) | ||
120 | { | ||
121 | return (struct gru_tlb_fault_map *)(base + GRU_TFM_BASE + | ||
122 | ctxnum * GRU_HANDLE_STRIDE); | ||
123 | } | ||
124 | |||
125 | static inline struct gru_tlb_global_handle *get_tgh(void *base, int ctxnum) | ||
126 | { | ||
127 | return (struct gru_tlb_global_handle *)(base + GRU_TGH_BASE + | ||
128 | ctxnum * GRU_HANDLE_STRIDE); | ||
129 | } | ||
130 | |||
131 | static inline struct gru_control_block_extended *get_cbe(void *base, int ctxnum) | ||
132 | { | ||
133 | return (struct gru_control_block_extended *)(base + GRU_CBE_BASE + | ||
134 | ctxnum * GRU_HANDLE_STRIDE); | ||
135 | } | ||
136 | |||
137 | static inline struct gru_tlb_fault_handle *get_tfh(void *base, int ctxnum) | ||
138 | { | ||
139 | return (struct gru_tlb_fault_handle *)(base + GRU_TFH_BASE + | ||
140 | ctxnum * GRU_HANDLE_STRIDE); | ||
141 | } | ||
142 | |||
143 | static inline struct gru_context_configuration_handle *get_cch(void *base, | ||
144 | int ctxnum) | ||
145 | { | ||
146 | return (struct gru_context_configuration_handle *)(base + | ||
147 | GRU_CCH_BASE + ctxnum * GRU_HANDLE_STRIDE); | ||
148 | } | ||
149 | |||
150 | static inline unsigned long get_cb_number(void *cb) | ||
151 | { | ||
152 | return (((unsigned long)cb - GRU_CB_BASE) % GRU_GSEG_PAGESIZE) / | ||
153 | GRU_HANDLE_STRIDE; | ||
154 | } | ||
155 | |||
156 | /* byte offset to a specific GRU chiplet. (p=pnode, c=chiplet (0 or 1)*/ | ||
157 | static inline unsigned long gru_chiplet_paddr(unsigned long paddr, int pnode, | ||
158 | int chiplet) | ||
159 | { | ||
160 | return paddr + GRU_SIZE * (2 * pnode + chiplet); | ||
161 | } | ||
162 | |||
163 | static inline void *gru_chiplet_vaddr(void *vaddr, int pnode, int chiplet) | ||
164 | { | ||
165 | return vaddr + GRU_SIZE * (2 * pnode + chiplet); | ||
166 | } | ||
167 | |||
168 | |||
169 | |||
170 | /* | ||
171 | * Global TLB Fault Map | ||
172 | * Bitmap of outstanding TLB misses needing interrupt/polling service. | ||
173 | * | ||
174 | */ | ||
175 | struct gru_tlb_fault_map { | ||
176 | unsigned long fault_bits[BITS_TO_LONGS(GRU_NUM_CBE)]; | ||
177 | unsigned long fill0[2]; | ||
178 | unsigned long done_bits[BITS_TO_LONGS(GRU_NUM_CBE)]; | ||
179 | unsigned long fill1[2]; | ||
180 | }; | ||
181 | |||
182 | /* | ||
183 | * TGH - TLB Global Handle | ||
184 | * Used for TLB flushing. | ||
185 | * | ||
186 | */ | ||
187 | struct gru_tlb_global_handle { | ||
188 | unsigned int cmd:1; /* DW 0 */ | ||
189 | unsigned int delresp:1; | ||
190 | unsigned int opc:1; | ||
191 | unsigned int fill1:5; | ||
192 | |||
193 | unsigned int fill2:8; | ||
194 | |||
195 | unsigned int status:2; | ||
196 | unsigned long fill3:2; | ||
197 | unsigned int state:3; | ||
198 | unsigned long fill4:1; | ||
199 | |||
200 | unsigned int cause:3; | ||
201 | unsigned long fill5:37; | ||
202 | |||
203 | unsigned long vaddr:64; /* DW 1 */ | ||
204 | |||
205 | unsigned int asid:24; /* DW 2 */ | ||
206 | unsigned int fill6:8; | ||
207 | |||
208 | unsigned int pagesize:5; | ||
209 | unsigned int fill7:11; | ||
210 | |||
211 | unsigned int global:1; | ||
212 | unsigned int fill8:15; | ||
213 | |||
214 | unsigned long vaddrmask:39; /* DW 3 */ | ||
215 | unsigned int fill9:9; | ||
216 | unsigned int n:10; | ||
217 | unsigned int fill10:6; | ||
218 | |||
219 | unsigned int ctxbitmap:16; /* DW4 */ | ||
220 | unsigned long fill11[3]; | ||
221 | }; | ||
222 | |||
223 | enum gru_tgh_cmd { | ||
224 | TGHCMD_START | ||
225 | }; | ||
226 | |||
227 | enum gru_tgh_opc { | ||
228 | TGHOP_TLBNOP, | ||
229 | TGHOP_TLBINV | ||
230 | }; | ||
231 | |||
232 | enum gru_tgh_status { | ||
233 | TGHSTATUS_IDLE, | ||
234 | TGHSTATUS_EXCEPTION, | ||
235 | TGHSTATUS_ACTIVE | ||
236 | }; | ||
237 | |||
238 | enum gru_tgh_state { | ||
239 | TGHSTATE_IDLE, | ||
240 | TGHSTATE_PE_INVAL, | ||
241 | TGHSTATE_INTERRUPT_INVAL, | ||
242 | TGHSTATE_WAITDONE, | ||
243 | TGHSTATE_RESTART_CTX, | ||
244 | }; | ||
245 | |||
246 | /* | ||
247 | * TFH - TLB Global Handle | ||
248 | * Used for TLB dropins into the GRU TLB. | ||
249 | * | ||
250 | */ | ||
251 | struct gru_tlb_fault_handle { | ||
252 | unsigned int cmd:1; /* DW 0 - low 32*/ | ||
253 | unsigned int delresp:1; | ||
254 | unsigned int fill0:2; | ||
255 | unsigned int opc:3; | ||
256 | unsigned int fill1:9; | ||
257 | |||
258 | unsigned int status:2; | ||
259 | unsigned int fill2:1; | ||
260 | unsigned int color:1; | ||
261 | unsigned int state:3; | ||
262 | unsigned int fill3:1; | ||
263 | |||
264 | unsigned int cause:7; /* DW 0 - high 32 */ | ||
265 | unsigned int fill4:1; | ||
266 | |||
267 | unsigned int indexway:12; | ||
268 | unsigned int fill5:4; | ||
269 | |||
270 | unsigned int ctxnum:4; | ||
271 | unsigned int fill6:12; | ||
272 | |||
273 | unsigned long missvaddr:64; /* DW 1 */ | ||
274 | |||
275 | unsigned int missasid:24; /* DW 2 */ | ||
276 | unsigned int fill7:8; | ||
277 | unsigned int fillasid:24; | ||
278 | unsigned int dirty:1; | ||
279 | unsigned int gaa:2; | ||
280 | unsigned long fill8:5; | ||
281 | |||
282 | unsigned long pfn:41; /* DW 3 */ | ||
283 | unsigned int fill9:7; | ||
284 | unsigned int pagesize:5; | ||
285 | unsigned int fill10:11; | ||
286 | |||
287 | unsigned long fillvaddr:64; /* DW 4 */ | ||
288 | |||
289 | unsigned long fill11[3]; | ||
290 | }; | ||
291 | |||
292 | enum gru_tfh_opc { | ||
293 | TFHOP_NOOP, | ||
294 | TFHOP_RESTART, | ||
295 | TFHOP_WRITE_ONLY, | ||
296 | TFHOP_WRITE_RESTART, | ||
297 | TFHOP_EXCEPTION, | ||
298 | TFHOP_USER_POLLING_MODE = 7, | ||
299 | }; | ||
300 | |||
301 | enum tfh_status { | ||
302 | TFHSTATUS_IDLE, | ||
303 | TFHSTATUS_EXCEPTION, | ||
304 | TFHSTATUS_ACTIVE, | ||
305 | }; | ||
306 | |||
307 | enum tfh_state { | ||
308 | TFHSTATE_INACTIVE, | ||
309 | TFHSTATE_IDLE, | ||
310 | TFHSTATE_MISS_UPM, | ||
311 | TFHSTATE_MISS_FMM, | ||
312 | TFHSTATE_HW_ERR, | ||
313 | TFHSTATE_WRITE_TLB, | ||
314 | TFHSTATE_RESTART_CBR, | ||
315 | }; | ||
316 | |||
317 | /* TFH cause bits */ | ||
318 | enum tfh_cause { | ||
319 | TFHCAUSE_NONE, | ||
320 | TFHCAUSE_TLB_MISS, | ||
321 | TFHCAUSE_TLB_MOD, | ||
322 | TFHCAUSE_HW_ERROR_RR, | ||
323 | TFHCAUSE_HW_ERROR_MAIN_ARRAY, | ||
324 | TFHCAUSE_HW_ERROR_VALID, | ||
325 | TFHCAUSE_HW_ERROR_PAGESIZE, | ||
326 | TFHCAUSE_INSTRUCTION_EXCEPTION, | ||
327 | TFHCAUSE_UNCORRECTIBLE_ERROR, | ||
328 | }; | ||
329 | |||
330 | /* GAA values */ | ||
331 | #define GAA_RAM 0x0 | ||
332 | #define GAA_NCRAM 0x2 | ||
333 | #define GAA_MMIO 0x1 | ||
334 | #define GAA_REGISTER 0x3 | ||
335 | |||
336 | /* GRU paddr shift for pfn. (NOTE: shift is NOT by actual pagesize) */ | ||
337 | #define GRU_PADDR_SHIFT 12 | ||
338 | |||
339 | /* | ||
340 | * Context Configuration handle | ||
341 | * Used to allocate resources to a GSEG context. | ||
342 | * | ||
343 | */ | ||
344 | struct gru_context_configuration_handle { | ||
345 | unsigned int cmd:1; /* DW0 */ | ||
346 | unsigned int delresp:1; | ||
347 | unsigned int opc:3; | ||
348 | unsigned int unmap_enable:1; | ||
349 | unsigned int req_slice_set_enable:1; | ||
350 | unsigned int req_slice:2; | ||
351 | unsigned int cb_int_enable:1; | ||
352 | unsigned int tlb_int_enable:1; | ||
353 | unsigned int tfm_fault_bit_enable:1; | ||
354 | unsigned int tlb_int_select:4; | ||
355 | |||
356 | unsigned int status:2; | ||
357 | unsigned int state:2; | ||
358 | unsigned int reserved2:4; | ||
359 | |||
360 | unsigned int cause:4; | ||
361 | unsigned int tfm_done_bit_enable:1; | ||
362 | unsigned int unused:3; | ||
363 | |||
364 | unsigned int dsr_allocation_map; | ||
365 | |||
366 | unsigned long cbr_allocation_map; /* DW1 */ | ||
367 | |||
368 | unsigned int asid[8]; /* DW 2 - 5 */ | ||
369 | unsigned short sizeavail[8]; /* DW 6 - 7 */ | ||
370 | } __attribute__ ((packed)); | ||
371 | |||
372 | enum gru_cch_opc { | ||
373 | CCHOP_START = 1, | ||
374 | CCHOP_ALLOCATE, | ||
375 | CCHOP_INTERRUPT, | ||
376 | CCHOP_DEALLOCATE, | ||
377 | CCHOP_INTERRUPT_SYNC, | ||
378 | }; | ||
379 | |||
380 | enum gru_cch_status { | ||
381 | CCHSTATUS_IDLE, | ||
382 | CCHSTATUS_EXCEPTION, | ||
383 | CCHSTATUS_ACTIVE, | ||
384 | }; | ||
385 | |||
386 | enum gru_cch_state { | ||
387 | CCHSTATE_INACTIVE, | ||
388 | CCHSTATE_MAPPED, | ||
389 | CCHSTATE_ACTIVE, | ||
390 | CCHSTATE_INTERRUPTED, | ||
391 | }; | ||
392 | |||
393 | /* CCH Exception cause */ | ||
394 | enum gru_cch_cause { | ||
395 | CCHCAUSE_REGION_REGISTER_WRITE_ERROR = 1, | ||
396 | CCHCAUSE_ILLEGAL_OPCODE = 2, | ||
397 | CCHCAUSE_INVALID_START_REQUEST = 3, | ||
398 | CCHCAUSE_INVALID_ALLOCATION_REQUEST = 4, | ||
399 | CCHCAUSE_INVALID_DEALLOCATION_REQUEST = 5, | ||
400 | CCHCAUSE_INVALID_INTERRUPT_REQUEST = 6, | ||
401 | CCHCAUSE_CCH_BUSY = 7, | ||
402 | CCHCAUSE_NO_CBRS_TO_ALLOCATE = 8, | ||
403 | CCHCAUSE_BAD_TFM_CONFIG = 9, | ||
404 | CCHCAUSE_CBR_RESOURCES_OVERSUBSCRIPED = 10, | ||
405 | CCHCAUSE_DSR_RESOURCES_OVERSUBSCRIPED = 11, | ||
406 | CCHCAUSE_CBR_DEALLOCATION_ERROR = 12, | ||
407 | }; | ||
408 | /* | ||
409 | * CBE - Control Block Extended | ||
410 | * Maintains internal GRU state for active CBs. | ||
411 | * | ||
412 | */ | ||
413 | struct gru_control_block_extended { | ||
414 | unsigned int reserved0:1; /* DW 0 - low */ | ||
415 | unsigned int imacpy:3; | ||
416 | unsigned int reserved1:4; | ||
417 | unsigned int xtypecpy:3; | ||
418 | unsigned int iaa0cpy:2; | ||
419 | unsigned int iaa1cpy:2; | ||
420 | unsigned int reserved2:1; | ||
421 | unsigned int opccpy:8; | ||
422 | unsigned int exopccpy:8; | ||
423 | |||
424 | unsigned int idef2cpy:22; /* DW 0 - high */ | ||
425 | unsigned int reserved3:10; | ||
426 | |||
427 | unsigned int idef4cpy:22; /* DW 1 */ | ||
428 | unsigned int reserved4:10; | ||
429 | unsigned int idef4upd:22; | ||
430 | unsigned int reserved5:10; | ||
431 | |||
432 | unsigned long idef1upd:64; /* DW 2 */ | ||
433 | |||
434 | unsigned long idef5cpy:64; /* DW 3 */ | ||
435 | |||
436 | unsigned long idef6cpy:64; /* DW 4 */ | ||
437 | |||
438 | unsigned long idef3upd:64; /* DW 5 */ | ||
439 | |||
440 | unsigned long idef5upd:64; /* DW 6 */ | ||
441 | |||
442 | unsigned int idef2upd:22; /* DW 7 */ | ||
443 | unsigned int reserved6:10; | ||
444 | |||
445 | unsigned int ecause:20; | ||
446 | unsigned int cbrstate:4; | ||
447 | unsigned int cbrexecstatus:8; | ||
448 | }; | ||
449 | |||
450 | enum gru_cbr_state { | ||
451 | CBRSTATE_INACTIVE, | ||
452 | CBRSTATE_IDLE, | ||
453 | CBRSTATE_PE_CHECK, | ||
454 | CBRSTATE_QUEUED, | ||
455 | CBRSTATE_WAIT_RESPONSE, | ||
456 | CBRSTATE_INTERRUPTED, | ||
457 | CBRSTATE_INTERRUPTED_MISS_FMM, | ||
458 | CBRSTATE_BUSY_INTERRUPT_MISS_FMM, | ||
459 | CBRSTATE_INTERRUPTED_MISS_UPM, | ||
460 | CBRSTATE_BUSY_INTERRUPTED_MISS_UPM, | ||
461 | CBRSTATE_REQUEST_ISSUE, | ||
462 | CBRSTATE_BUSY_INTERRUPT, | ||
463 | }; | ||
464 | |||
465 | /* CBE cbrexecstatus bits */ | ||
466 | #define CBR_EXS_ABORT_OCC_BIT 0 | ||
467 | #define CBR_EXS_INT_OCC_BIT 1 | ||
468 | #define CBR_EXS_PENDING_BIT 2 | ||
469 | #define CBR_EXS_QUEUED_BIT 3 | ||
470 | #define CBR_EXS_TLBHW_BIT 4 | ||
471 | #define CBR_EXS_EXCEPTION_BIT 5 | ||
472 | |||
473 | #define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT) | ||
474 | #define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT) | ||
475 | #define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT) | ||
476 | #define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT) | ||
477 | #define CBR_EXS_TLBHW (1 << CBR_EXS_TLBHW_BIT) | ||
478 | #define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT) | ||
479 | |||
480 | /* CBE ecause bits - defined in gru_instructions.h */ | ||
481 | |||
482 | /* | ||
483 | * Convert a processor pagesize into the strange encoded pagesize used by the | ||
484 | * GRU. Processor pagesize is encoded as log of bytes per page. (or PAGE_SHIFT) | ||
485 | * pagesize log pagesize grupagesize | ||
486 | * 4k 12 0 | ||
487 | * 16k 14 1 | ||
488 | * 64k 16 2 | ||
489 | * 256k 18 3 | ||
490 | * 1m 20 4 | ||
491 | * 2m 21 5 | ||
492 | * 4m 22 6 | ||
493 | * 16m 24 7 | ||
494 | * 64m 26 8 | ||
495 | * ... | ||
496 | */ | ||
497 | #define GRU_PAGESIZE(sh) ((((sh) > 20 ? (sh) + 2: (sh)) >> 1) - 6) | ||
498 | #define GRU_SIZEAVAIL(sh) (1UL << GRU_PAGESIZE(sh)) | ||
499 | |||
500 | /* minimum TLB purge count to ensure a full purge */ | ||
501 | #define GRUMAXINVAL 1024UL | ||
502 | |||
503 | |||
504 | /* Extract the status field from a kernel handle */ | ||
505 | #define GET_MSEG_HANDLE_STATUS(h) (((*(unsigned long *)(h)) >> 16) & 3) | ||
506 | |||
507 | static inline void start_instruction(void *h) | ||
508 | { | ||
509 | unsigned long *w0 = h; | ||
510 | |||
511 | wmb(); /* setting CMD bit must be last */ | ||
512 | *w0 = *w0 | 1; | ||
513 | gru_flush_cache(h); | ||
514 | } | ||
515 | |||
516 | static inline int wait_instruction_complete(void *h) | ||
517 | { | ||
518 | int status; | ||
519 | |||
520 | do { | ||
521 | cpu_relax(); | ||
522 | barrier(); | ||
523 | status = GET_MSEG_HANDLE_STATUS(h); | ||
524 | } while (status == CCHSTATUS_ACTIVE); | ||
525 | return status; | ||
526 | } | ||
527 | |||
528 | #if defined CONFIG_IA64 | ||
529 | static inline void cch_allocate_set_asids( | ||
530 | struct gru_context_configuration_handle *cch, int asidval) | ||
531 | { | ||
532 | int i; | ||
533 | |||
534 | for (i = 0; i <= RGN_HPAGE; i++) { /* assume HPAGE is last region */ | ||
535 | cch->asid[i] = (asidval++); | ||
536 | #if 0 | ||
537 | /* ZZZ hugepages not supported yet */ | ||
538 | if (i == RGN_HPAGE) | ||
539 | cch->sizeavail[i] = GRU_SIZEAVAIL(hpage_shift); | ||
540 | else | ||
541 | #endif | ||
542 | cch->sizeavail[i] = GRU_SIZEAVAIL(PAGE_SHIFT); | ||
543 | } | ||
544 | } | ||
545 | #elif defined CONFIG_X86_64 | ||
546 | static inline void cch_allocate_set_asids( | ||
547 | struct gru_context_configuration_handle *cch, int asidval) | ||
548 | { | ||
549 | int i; | ||
550 | |||
551 | for (i = 0; i < 8; i++) { | ||
552 | cch->asid[i] = asidval++; | ||
553 | cch->sizeavail[i] = GRU_SIZEAVAIL(PAGE_SHIFT) | | ||
554 | GRU_SIZEAVAIL(21); | ||
555 | } | ||
556 | } | ||
557 | #endif | ||
558 | |||
559 | static inline int cch_allocate(struct gru_context_configuration_handle *cch, | ||
560 | int asidval, unsigned long cbrmap, | ||
561 | unsigned long dsrmap) | ||
562 | { | ||
563 | cch_allocate_set_asids(cch, asidval); | ||
564 | cch->dsr_allocation_map = dsrmap; | ||
565 | cch->cbr_allocation_map = cbrmap; | ||
566 | cch->opc = CCHOP_ALLOCATE; | ||
567 | start_instruction(cch); | ||
568 | return wait_instruction_complete(cch); | ||
569 | } | ||
570 | |||
571 | static inline int cch_start(struct gru_context_configuration_handle *cch) | ||
572 | { | ||
573 | cch->opc = CCHOP_START; | ||
574 | start_instruction(cch); | ||
575 | return wait_instruction_complete(cch); | ||
576 | } | ||
577 | |||
578 | static inline int cch_interrupt(struct gru_context_configuration_handle *cch) | ||
579 | { | ||
580 | cch->opc = CCHOP_INTERRUPT; | ||
581 | start_instruction(cch); | ||
582 | return wait_instruction_complete(cch); | ||
583 | } | ||
584 | |||
585 | static inline int cch_deallocate(struct gru_context_configuration_handle *cch) | ||
586 | { | ||
587 | cch->opc = CCHOP_DEALLOCATE; | ||
588 | start_instruction(cch); | ||
589 | return wait_instruction_complete(cch); | ||
590 | } | ||
591 | |||
592 | static inline int cch_interrupt_sync(struct gru_context_configuration_handle | ||
593 | *cch) | ||
594 | { | ||
595 | cch->opc = CCHOP_INTERRUPT_SYNC; | ||
596 | start_instruction(cch); | ||
597 | return wait_instruction_complete(cch); | ||
598 | } | ||
599 | |||
600 | static inline int tgh_invalidate(struct gru_tlb_global_handle *tgh, | ||
601 | unsigned long vaddr, unsigned long vaddrmask, | ||
602 | int asid, int pagesize, int global, int n, | ||
603 | unsigned short ctxbitmap) | ||
604 | { | ||
605 | tgh->vaddr = vaddr; | ||
606 | tgh->asid = asid; | ||
607 | tgh->pagesize = pagesize; | ||
608 | tgh->n = n; | ||
609 | tgh->global = global; | ||
610 | tgh->vaddrmask = vaddrmask; | ||
611 | tgh->ctxbitmap = ctxbitmap; | ||
612 | tgh->opc = TGHOP_TLBINV; | ||
613 | start_instruction(tgh); | ||
614 | return wait_instruction_complete(tgh); | ||
615 | } | ||
616 | |||
617 | static inline void tfh_write_only(struct gru_tlb_fault_handle *tfh, | ||
618 | unsigned long pfn, unsigned long vaddr, | ||
619 | int asid, int dirty, int pagesize) | ||
620 | { | ||
621 | tfh->fillasid = asid; | ||
622 | tfh->fillvaddr = vaddr; | ||
623 | tfh->pfn = pfn; | ||
624 | tfh->dirty = dirty; | ||
625 | tfh->pagesize = pagesize; | ||
626 | tfh->opc = TFHOP_WRITE_ONLY; | ||
627 | start_instruction(tfh); | ||
628 | } | ||
629 | |||
630 | static inline void tfh_write_restart(struct gru_tlb_fault_handle *tfh, | ||
631 | unsigned long paddr, int gaa, | ||
632 | unsigned long vaddr, int asid, int dirty, | ||
633 | int pagesize) | ||
634 | { | ||
635 | tfh->fillasid = asid; | ||
636 | tfh->fillvaddr = vaddr; | ||
637 | tfh->pfn = paddr >> GRU_PADDR_SHIFT; | ||
638 | tfh->gaa = gaa; | ||
639 | tfh->dirty = dirty; | ||
640 | tfh->pagesize = pagesize; | ||
641 | tfh->opc = TFHOP_WRITE_RESTART; | ||
642 | start_instruction(tfh); | ||
643 | } | ||
644 | |||
645 | static inline void tfh_restart(struct gru_tlb_fault_handle *tfh) | ||
646 | { | ||
647 | tfh->opc = TFHOP_RESTART; | ||
648 | start_instruction(tfh); | ||
649 | } | ||
650 | |||
651 | static inline void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh) | ||
652 | { | ||
653 | tfh->opc = TFHOP_USER_POLLING_MODE; | ||
654 | start_instruction(tfh); | ||
655 | } | ||
656 | |||
657 | static inline void tfh_exception(struct gru_tlb_fault_handle *tfh) | ||
658 | { | ||
659 | tfh->opc = TFHOP_EXCEPTION; | ||
660 | start_instruction(tfh); | ||
661 | } | ||
662 | |||
663 | #endif /* __GRUHANDLES_H__ */ | ||
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c new file mode 100644 index 000000000000..dfd49af0fe18 --- /dev/null +++ b/drivers/misc/sgi-gru/grukservices.c | |||
@@ -0,0 +1,679 @@ | |||
1 | /* | ||
2 | * SN Platform GRU Driver | ||
3 | * | ||
4 | * KERNEL SERVICES THAT USE THE GRU | ||
5 | * | ||
6 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/errno.h> | ||
25 | #include <linux/slab.h> | ||
26 | #include <linux/mm.h> | ||
27 | #include <linux/smp_lock.h> | ||
28 | #include <linux/spinlock.h> | ||
29 | #include <linux/device.h> | ||
30 | #include <linux/miscdevice.h> | ||
31 | #include <linux/proc_fs.h> | ||
32 | #include <linux/interrupt.h> | ||
33 | #include <linux/uaccess.h> | ||
34 | #include "gru.h" | ||
35 | #include "grulib.h" | ||
36 | #include "grutables.h" | ||
37 | #include "grukservices.h" | ||
38 | #include "gru_instructions.h" | ||
39 | #include <asm/uv/uv_hub.h> | ||
40 | |||
41 | /* | ||
42 | * Kernel GRU Usage | ||
43 | * | ||
44 | * The following is an interim algorithm for management of kernel GRU | ||
45 | * resources. This will likely be replaced when we better understand the | ||
46 | * kernel/user requirements. | ||
47 | * | ||
48 | * At boot time, the kernel permanently reserves a fixed number of | ||
49 | * CBRs/DSRs for each cpu to use. The resources are all taken from | ||
50 | * the GRU chiplet 1 on the blade. This leaves the full set of resources | ||
51 | * of chiplet 0 available to be allocated to a single user. | ||
52 | */ | ||
53 | |||
54 | /* Blade percpu resources PERMANENTLY reserved for kernel use */ | ||
55 | #define GRU_NUM_KERNEL_CBR 1 | ||
56 | #define GRU_NUM_KERNEL_DSR_BYTES 256 | ||
57 | #define KERNEL_CTXNUM 15 | ||
58 | |||
59 | /* GRU instruction attributes for all instructions */ | ||
60 | #define IMA IMA_CB_DELAY | ||
61 | |||
62 | /* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */ | ||
63 | #define __gru_cacheline_aligned__ \ | ||
64 | __attribute__((__aligned__(GRU_CACHE_LINE_BYTES))) | ||
65 | |||
66 | #define MAGIC 0x1234567887654321UL | ||
67 | |||
68 | /* Default retry count for GRU errors on kernel instructions */ | ||
69 | #define EXCEPTION_RETRY_LIMIT 3 | ||
70 | |||
71 | /* Status of message queue sections */ | ||
72 | #define MQS_EMPTY 0 | ||
73 | #define MQS_FULL 1 | ||
74 | #define MQS_NOOP 2 | ||
75 | |||
76 | /*----------------- RESOURCE MANAGEMENT -------------------------------------*/ | ||
77 | /* optimized for x86_64 */ | ||
78 | struct message_queue { | ||
79 | union gru_mesqhead head __gru_cacheline_aligned__; /* CL 0 */ | ||
80 | int qlines; /* DW 1 */ | ||
81 | long hstatus[2]; | ||
82 | void *next __gru_cacheline_aligned__;/* CL 1 */ | ||
83 | void *limit; | ||
84 | void *start; | ||
85 | void *start2; | ||
86 | char data ____cacheline_aligned; /* CL 2 */ | ||
87 | }; | ||
88 | |||
89 | /* First word in every message - used by mesq interface */ | ||
90 | struct message_header { | ||
91 | char present; | ||
92 | char present2; | ||
93 | char lines; | ||
94 | char fill; | ||
95 | }; | ||
96 | |||
97 | #define QLINES(mq) ((mq) + offsetof(struct message_queue, qlines)) | ||
98 | #define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h])) | ||
99 | |||
100 | static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr) | ||
101 | { | ||
102 | struct gru_blade_state *bs; | ||
103 | int lcpu; | ||
104 | |||
105 | BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES); | ||
106 | preempt_disable(); | ||
107 | bs = gru_base[uv_numa_blade_id()]; | ||
108 | lcpu = uv_blade_processor_id(); | ||
109 | *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE; | ||
110 | *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES; | ||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | static void gru_free_cpu_resources(void *cb, void *dsr) | ||
115 | { | ||
116 | preempt_enable(); | ||
117 | } | ||
118 | |||
119 | int gru_get_cb_exception_detail(void *cb, | ||
120 | struct control_block_extended_exc_detail *excdet) | ||
121 | { | ||
122 | struct gru_control_block_extended *cbe; | ||
123 | |||
124 | cbe = get_cbe(GRUBASE(cb), get_cb_number(cb)); | ||
125 | excdet->opc = cbe->opccpy; | ||
126 | excdet->exopc = cbe->exopccpy; | ||
127 | excdet->ecause = cbe->ecause; | ||
128 | excdet->exceptdet0 = cbe->idef1upd; | ||
129 | excdet->exceptdet1 = cbe->idef3upd; | ||
130 | return 0; | ||
131 | } | ||
132 | |||
133 | char *gru_get_cb_exception_detail_str(int ret, void *cb, | ||
134 | char *buf, int size) | ||
135 | { | ||
136 | struct gru_control_block_status *gen = (void *)cb; | ||
137 | struct control_block_extended_exc_detail excdet; | ||
138 | |||
139 | if (ret > 0 && gen->istatus == CBS_EXCEPTION) { | ||
140 | gru_get_cb_exception_detail(cb, &excdet); | ||
141 | snprintf(buf, size, | ||
142 | "GRU exception: cb %p, opc %d, exopc %d, ecause 0x%x," | ||
143 | "excdet0 0x%lx, excdet1 0x%x", | ||
144 | gen, excdet.opc, excdet.exopc, excdet.ecause, | ||
145 | excdet.exceptdet0, excdet.exceptdet1); | ||
146 | } else { | ||
147 | snprintf(buf, size, "No exception"); | ||
148 | } | ||
149 | return buf; | ||
150 | } | ||
151 | |||
152 | static int gru_wait_idle_or_exception(struct gru_control_block_status *gen) | ||
153 | { | ||
154 | while (gen->istatus >= CBS_ACTIVE) { | ||
155 | cpu_relax(); | ||
156 | barrier(); | ||
157 | } | ||
158 | return gen->istatus; | ||
159 | } | ||
160 | |||
161 | static int gru_retry_exception(void *cb) | ||
162 | { | ||
163 | struct gru_control_block_status *gen = (void *)cb; | ||
164 | struct control_block_extended_exc_detail excdet; | ||
165 | int retry = EXCEPTION_RETRY_LIMIT; | ||
166 | |||
167 | while (1) { | ||
168 | if (gru_get_cb_message_queue_substatus(cb)) | ||
169 | break; | ||
170 | if (gru_wait_idle_or_exception(gen) == CBS_IDLE) | ||
171 | return CBS_IDLE; | ||
172 | |||
173 | gru_get_cb_exception_detail(cb, &excdet); | ||
174 | if (excdet.ecause & ~EXCEPTION_RETRY_BITS) | ||
175 | break; | ||
176 | if (retry-- == 0) | ||
177 | break; | ||
178 | gen->icmd = 1; | ||
179 | gru_flush_cache(gen); | ||
180 | } | ||
181 | return CBS_EXCEPTION; | ||
182 | } | ||
183 | |||
184 | int gru_check_status_proc(void *cb) | ||
185 | { | ||
186 | struct gru_control_block_status *gen = (void *)cb; | ||
187 | int ret; | ||
188 | |||
189 | ret = gen->istatus; | ||
190 | if (ret != CBS_EXCEPTION) | ||
191 | return ret; | ||
192 | return gru_retry_exception(cb); | ||
193 | |||
194 | } | ||
195 | |||
196 | int gru_wait_proc(void *cb) | ||
197 | { | ||
198 | struct gru_control_block_status *gen = (void *)cb; | ||
199 | int ret; | ||
200 | |||
201 | ret = gru_wait_idle_or_exception(gen); | ||
202 | if (ret == CBS_EXCEPTION) | ||
203 | ret = gru_retry_exception(cb); | ||
204 | |||
205 | return ret; | ||
206 | } | ||
207 | |||
208 | void gru_abort(int ret, void *cb, char *str) | ||
209 | { | ||
210 | char buf[GRU_EXC_STR_SIZE]; | ||
211 | |||
212 | panic("GRU FATAL ERROR: %s - %s\n", str, | ||
213 | gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf))); | ||
214 | } | ||
215 | |||
216 | void gru_wait_abort_proc(void *cb) | ||
217 | { | ||
218 | int ret; | ||
219 | |||
220 | ret = gru_wait_proc(cb); | ||
221 | if (ret) | ||
222 | gru_abort(ret, cb, "gru_wait_abort"); | ||
223 | } | ||
224 | |||
225 | |||
226 | /*------------------------------ MESSAGE QUEUES -----------------------------*/ | ||
227 | |||
228 | /* Internal status . These are NOT returned to the user. */ | ||
229 | #define MQIE_AGAIN -1 /* try again */ | ||
230 | |||
231 | |||
232 | /* | ||
233 | * Save/restore the "present" flag that is in the second line of 2-line | ||
234 | * messages | ||
235 | */ | ||
236 | static inline int get_present2(void *p) | ||
237 | { | ||
238 | struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; | ||
239 | return mhdr->present; | ||
240 | } | ||
241 | |||
242 | static inline void restore_present2(void *p, int val) | ||
243 | { | ||
244 | struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; | ||
245 | mhdr->present = val; | ||
246 | } | ||
247 | |||
248 | /* | ||
249 | * Create a message queue. | ||
250 | * qlines - message queue size in cache lines. Includes 2-line header. | ||
251 | */ | ||
252 | int gru_create_message_queue(void *p, unsigned int bytes) | ||
253 | { | ||
254 | struct message_queue *mq = p; | ||
255 | unsigned int qlines; | ||
256 | |||
257 | qlines = bytes / GRU_CACHE_LINE_BYTES - 2; | ||
258 | memset(mq, 0, bytes); | ||
259 | mq->start = &mq->data; | ||
260 | mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES; | ||
261 | mq->next = &mq->data; | ||
262 | mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES; | ||
263 | mq->qlines = qlines; | ||
264 | mq->hstatus[0] = 0; | ||
265 | mq->hstatus[1] = 1; | ||
266 | mq->head = gru_mesq_head(2, qlines / 2 + 1); | ||
267 | return 0; | ||
268 | } | ||
269 | EXPORT_SYMBOL_GPL(gru_create_message_queue); | ||
270 | |||
271 | /* | ||
272 | * Send a NOOP message to a message queue | ||
273 | * Returns: | ||
274 | * 0 - if queue is full after the send. This is the normal case | ||
275 | * but various races can change this. | ||
276 | * -1 - if mesq sent successfully but queue not full | ||
277 | * >0 - unexpected error. MQE_xxx returned | ||
278 | */ | ||
279 | static int send_noop_message(void *cb, | ||
280 | unsigned long mq, void *mesg) | ||
281 | { | ||
282 | const struct message_header noop_header = { | ||
283 | .present = MQS_NOOP, .lines = 1}; | ||
284 | unsigned long m; | ||
285 | int substatus, ret; | ||
286 | struct message_header save_mhdr, *mhdr = mesg; | ||
287 | |||
288 | STAT(mesq_noop); | ||
289 | save_mhdr = *mhdr; | ||
290 | *mhdr = noop_header; | ||
291 | gru_mesq(cb, mq, gru_get_tri(mhdr), 1, IMA); | ||
292 | ret = gru_wait(cb); | ||
293 | |||
294 | if (ret) { | ||
295 | substatus = gru_get_cb_message_queue_substatus(cb); | ||
296 | switch (substatus) { | ||
297 | case CBSS_NO_ERROR: | ||
298 | STAT(mesq_noop_unexpected_error); | ||
299 | ret = MQE_UNEXPECTED_CB_ERR; | ||
300 | break; | ||
301 | case CBSS_LB_OVERFLOWED: | ||
302 | STAT(mesq_noop_lb_overflow); | ||
303 | ret = MQE_CONGESTION; | ||
304 | break; | ||
305 | case CBSS_QLIMIT_REACHED: | ||
306 | STAT(mesq_noop_qlimit_reached); | ||
307 | ret = 0; | ||
308 | break; | ||
309 | case CBSS_AMO_NACKED: | ||
310 | STAT(mesq_noop_amo_nacked); | ||
311 | ret = MQE_CONGESTION; | ||
312 | break; | ||
313 | case CBSS_PUT_NACKED: | ||
314 | STAT(mesq_noop_put_nacked); | ||
315 | m = mq + (gru_get_amo_value_head(cb) << 6); | ||
316 | gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1, | ||
317 | IMA); | ||
318 | if (gru_wait(cb) == CBS_IDLE) | ||
319 | ret = MQIE_AGAIN; | ||
320 | else | ||
321 | ret = MQE_UNEXPECTED_CB_ERR; | ||
322 | break; | ||
323 | case CBSS_PAGE_OVERFLOW: | ||
324 | default: | ||
325 | BUG(); | ||
326 | } | ||
327 | } | ||
328 | *mhdr = save_mhdr; | ||
329 | return ret; | ||
330 | } | ||
331 | |||
332 | /* | ||
333 | * Handle a gru_mesq full. | ||
334 | */ | ||
335 | static int send_message_queue_full(void *cb, | ||
336 | unsigned long mq, void *mesg, int lines) | ||
337 | { | ||
338 | union gru_mesqhead mqh; | ||
339 | unsigned int limit, head; | ||
340 | unsigned long avalue; | ||
341 | int half, qlines, save; | ||
342 | |||
343 | /* Determine if switching to first/second half of q */ | ||
344 | avalue = gru_get_amo_value(cb); | ||
345 | head = gru_get_amo_value_head(cb); | ||
346 | limit = gru_get_amo_value_limit(cb); | ||
347 | |||
348 | /* | ||
349 | * Fetch "qlines" from the queue header. Since the queue may be | ||
350 | * in memory that can't be accessed using socket addresses, use | ||
351 | * the GRU to access the data. Use DSR space from the message. | ||
352 | */ | ||
353 | save = *(int *)mesg; | ||
354 | gru_vload(cb, QLINES(mq), gru_get_tri(mesg), XTYPE_W, 1, 1, IMA); | ||
355 | if (gru_wait(cb) != CBS_IDLE) | ||
356 | goto cberr; | ||
357 | qlines = *(int *)mesg; | ||
358 | *(int *)mesg = save; | ||
359 | half = (limit != qlines); | ||
360 | |||
361 | if (half) | ||
362 | mqh = gru_mesq_head(qlines / 2 + 1, qlines); | ||
363 | else | ||
364 | mqh = gru_mesq_head(2, qlines / 2 + 1); | ||
365 | |||
366 | /* Try to get lock for switching head pointer */ | ||
367 | gru_gamir(cb, EOP_IR_CLR, HSTATUS(mq, half), XTYPE_DW, IMA); | ||
368 | if (gru_wait(cb) != CBS_IDLE) | ||
369 | goto cberr; | ||
370 | if (!gru_get_amo_value(cb)) { | ||
371 | STAT(mesq_qf_locked); | ||
372 | return MQE_QUEUE_FULL; | ||
373 | } | ||
374 | |||
375 | /* Got the lock. Send optional NOP if queue not full, */ | ||
376 | if (head != limit) { | ||
377 | if (send_noop_message(cb, mq, mesg)) { | ||
378 | gru_gamir(cb, EOP_IR_INC, HSTATUS(mq, half), | ||
379 | XTYPE_DW, IMA); | ||
380 | if (gru_wait(cb) != CBS_IDLE) | ||
381 | goto cberr; | ||
382 | STAT(mesq_qf_noop_not_full); | ||
383 | return MQIE_AGAIN; | ||
384 | } | ||
385 | avalue++; | ||
386 | } | ||
387 | |||
388 | /* Then flip queuehead to other half of queue. */ | ||
389 | gru_gamer(cb, EOP_ERR_CSWAP, mq, XTYPE_DW, mqh.val, avalue, IMA); | ||
390 | if (gru_wait(cb) != CBS_IDLE) | ||
391 | goto cberr; | ||
392 | |||
393 | /* If not successfully in swapping queue head, clear the hstatus lock */ | ||
394 | if (gru_get_amo_value(cb) != avalue) { | ||
395 | STAT(mesq_qf_switch_head_failed); | ||
396 | gru_gamir(cb, EOP_IR_INC, HSTATUS(mq, half), XTYPE_DW, IMA); | ||
397 | if (gru_wait(cb) != CBS_IDLE) | ||
398 | goto cberr; | ||
399 | } | ||
400 | return MQIE_AGAIN; | ||
401 | cberr: | ||
402 | STAT(mesq_qf_unexpected_error); | ||
403 | return MQE_UNEXPECTED_CB_ERR; | ||
404 | } | ||
405 | |||
406 | |||
407 | /* | ||
408 | * Handle a gru_mesq failure. Some of these failures are software recoverable | ||
409 | * or retryable. | ||
410 | */ | ||
411 | static int send_message_failure(void *cb, | ||
412 | unsigned long mq, | ||
413 | void *mesg, | ||
414 | int lines) | ||
415 | { | ||
416 | int substatus, ret = 0; | ||
417 | unsigned long m; | ||
418 | |||
419 | substatus = gru_get_cb_message_queue_substatus(cb); | ||
420 | switch (substatus) { | ||
421 | case CBSS_NO_ERROR: | ||
422 | STAT(mesq_send_unexpected_error); | ||
423 | ret = MQE_UNEXPECTED_CB_ERR; | ||
424 | break; | ||
425 | case CBSS_LB_OVERFLOWED: | ||
426 | STAT(mesq_send_lb_overflow); | ||
427 | ret = MQE_CONGESTION; | ||
428 | break; | ||
429 | case CBSS_QLIMIT_REACHED: | ||
430 | STAT(mesq_send_qlimit_reached); | ||
431 | ret = send_message_queue_full(cb, mq, mesg, lines); | ||
432 | break; | ||
433 | case CBSS_AMO_NACKED: | ||
434 | STAT(mesq_send_amo_nacked); | ||
435 | ret = MQE_CONGESTION; | ||
436 | break; | ||
437 | case CBSS_PUT_NACKED: | ||
438 | STAT(mesq_send_put_nacked); | ||
439 | m =mq + (gru_get_amo_value_head(cb) << 6); | ||
440 | gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); | ||
441 | if (gru_wait(cb) == CBS_IDLE) | ||
442 | ret = MQE_OK; | ||
443 | else | ||
444 | ret = MQE_UNEXPECTED_CB_ERR; | ||
445 | break; | ||
446 | default: | ||
447 | BUG(); | ||
448 | } | ||
449 | return ret; | ||
450 | } | ||
451 | |||
452 | /* | ||
453 | * Send a message to a message queue | ||
454 | * cb GRU control block to use to send message | ||
455 | * mq message queue | ||
456 | * mesg message. ust be vaddr within a GSEG | ||
457 | * bytes message size (<= 2 CL) | ||
458 | */ | ||
459 | int gru_send_message_gpa(unsigned long mq, void *mesg, unsigned int bytes) | ||
460 | { | ||
461 | struct message_header *mhdr; | ||
462 | void *cb; | ||
463 | void *dsr; | ||
464 | int istatus, clines, ret; | ||
465 | |||
466 | STAT(mesq_send); | ||
467 | BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES); | ||
468 | |||
469 | clines = (bytes + GRU_CACHE_LINE_BYTES - 1) / GRU_CACHE_LINE_BYTES; | ||
470 | if (gru_get_cpu_resources(bytes, &cb, &dsr)) | ||
471 | return MQE_BUG_NO_RESOURCES; | ||
472 | memcpy(dsr, mesg, bytes); | ||
473 | mhdr = dsr; | ||
474 | mhdr->present = MQS_FULL; | ||
475 | mhdr->lines = clines; | ||
476 | if (clines == 2) { | ||
477 | mhdr->present2 = get_present2(mhdr); | ||
478 | restore_present2(mhdr, MQS_FULL); | ||
479 | } | ||
480 | |||
481 | do { | ||
482 | ret = MQE_OK; | ||
483 | gru_mesq(cb, mq, gru_get_tri(mhdr), clines, IMA); | ||
484 | istatus = gru_wait(cb); | ||
485 | if (istatus != CBS_IDLE) | ||
486 | ret = send_message_failure(cb, mq, dsr, clines); | ||
487 | } while (ret == MQIE_AGAIN); | ||
488 | gru_free_cpu_resources(cb, dsr); | ||
489 | |||
490 | if (ret) | ||
491 | STAT(mesq_send_failed); | ||
492 | return ret; | ||
493 | } | ||
494 | EXPORT_SYMBOL_GPL(gru_send_message_gpa); | ||
495 | |||
496 | /* | ||
497 | * Advance the receive pointer for the queue to the next message. | ||
498 | */ | ||
499 | void gru_free_message(void *rmq, void *mesg) | ||
500 | { | ||
501 | struct message_queue *mq = rmq; | ||
502 | struct message_header *mhdr = mq->next; | ||
503 | void *next, *pnext; | ||
504 | int half = -1; | ||
505 | int lines = mhdr->lines; | ||
506 | |||
507 | if (lines == 2) | ||
508 | restore_present2(mhdr, MQS_EMPTY); | ||
509 | mhdr->present = MQS_EMPTY; | ||
510 | |||
511 | pnext = mq->next; | ||
512 | next = pnext + GRU_CACHE_LINE_BYTES * lines; | ||
513 | if (next == mq->limit) { | ||
514 | next = mq->start; | ||
515 | half = 1; | ||
516 | } else if (pnext < mq->start2 && next >= mq->start2) { | ||
517 | half = 0; | ||
518 | } | ||
519 | |||
520 | if (half >= 0) | ||
521 | mq->hstatus[half] = 1; | ||
522 | mq->next = next; | ||
523 | } | ||
524 | EXPORT_SYMBOL_GPL(gru_free_message); | ||
525 | |||
526 | /* | ||
527 | * Get next message from message queue. Return NULL if no message | ||
528 | * present. User must call next_message() to move to next message. | ||
529 | * rmq message queue | ||
530 | */ | ||
531 | void *gru_get_next_message(void *rmq) | ||
532 | { | ||
533 | struct message_queue *mq = rmq; | ||
534 | struct message_header *mhdr = mq->next; | ||
535 | int present = mhdr->present; | ||
536 | |||
537 | /* skip NOOP messages */ | ||
538 | STAT(mesq_receive); | ||
539 | while (present == MQS_NOOP) { | ||
540 | gru_free_message(rmq, mhdr); | ||
541 | mhdr = mq->next; | ||
542 | present = mhdr->present; | ||
543 | } | ||
544 | |||
545 | /* Wait for both halves of 2 line messages */ | ||
546 | if (present == MQS_FULL && mhdr->lines == 2 && | ||
547 | get_present2(mhdr) == MQS_EMPTY) | ||
548 | present = MQS_EMPTY; | ||
549 | |||
550 | if (!present) { | ||
551 | STAT(mesq_receive_none); | ||
552 | return NULL; | ||
553 | } | ||
554 | |||
555 | if (mhdr->lines == 2) | ||
556 | restore_present2(mhdr, mhdr->present2); | ||
557 | |||
558 | return mhdr; | ||
559 | } | ||
560 | EXPORT_SYMBOL_GPL(gru_get_next_message); | ||
561 | |||
562 | /* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/ | ||
563 | |||
564 | /* | ||
565 | * Copy a block of data using the GRU resources | ||
566 | */ | ||
567 | int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, | ||
568 | unsigned int bytes) | ||
569 | { | ||
570 | void *cb; | ||
571 | void *dsr; | ||
572 | int ret; | ||
573 | |||
574 | STAT(copy_gpa); | ||
575 | if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) | ||
576 | return MQE_BUG_NO_RESOURCES; | ||
577 | gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr), | ||
578 | XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_BYTES, IMA); | ||
579 | ret = gru_wait(cb); | ||
580 | gru_free_cpu_resources(cb, dsr); | ||
581 | return ret; | ||
582 | } | ||
583 | EXPORT_SYMBOL_GPL(gru_copy_gpa); | ||
584 | |||
585 | /* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/ | ||
586 | /* Temp - will delete after we gain confidence in the GRU */ | ||
587 | static __cacheline_aligned unsigned long word0; | ||
588 | static __cacheline_aligned unsigned long word1; | ||
589 | |||
590 | static int quicktest(struct gru_state *gru) | ||
591 | { | ||
592 | void *cb; | ||
593 | void *ds; | ||
594 | unsigned long *p; | ||
595 | |||
596 | cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0); | ||
597 | ds = get_gseg_base_address_ds(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0); | ||
598 | p = ds; | ||
599 | word0 = MAGIC; | ||
600 | |||
601 | gru_vload(cb, uv_gpa(&word0), 0, XTYPE_DW, 1, 1, IMA); | ||
602 | if (gru_wait(cb) != CBS_IDLE) | ||
603 | BUG(); | ||
604 | |||
605 | if (*(unsigned long *)ds != MAGIC) | ||
606 | BUG(); | ||
607 | gru_vstore(cb, uv_gpa(&word1), 0, XTYPE_DW, 1, 1, IMA); | ||
608 | if (gru_wait(cb) != CBS_IDLE) | ||
609 | BUG(); | ||
610 | |||
611 | if (word0 != word1 || word0 != MAGIC) { | ||
612 | printk | ||
613 | ("GRU quicktest err: gru %d, found 0x%lx, expected 0x%lx\n", | ||
614 | gru->gs_gid, word1, MAGIC); | ||
615 | BUG(); /* ZZZ should not be fatal */ | ||
616 | } | ||
617 | |||
618 | return 0; | ||
619 | } | ||
620 | |||
621 | |||
622 | int gru_kservices_init(struct gru_state *gru) | ||
623 | { | ||
624 | struct gru_blade_state *bs; | ||
625 | struct gru_context_configuration_handle *cch; | ||
626 | unsigned long cbr_map, dsr_map; | ||
627 | int err, num, cpus_possible; | ||
628 | |||
629 | /* | ||
630 | * Currently, resources are reserved ONLY on the second chiplet | ||
631 | * on each blade. This leaves ALL resources on chiplet 0 available | ||
632 | * for user code. | ||
633 | */ | ||
634 | bs = gru->gs_blade; | ||
635 | if (gru != &bs->bs_grus[1]) | ||
636 | return 0; | ||
637 | |||
638 | cpus_possible = uv_blade_nr_possible_cpus(gru->gs_blade_id); | ||
639 | |||
640 | num = GRU_NUM_KERNEL_CBR * cpus_possible; | ||
641 | cbr_map = gru_reserve_cb_resources(gru, GRU_CB_COUNT_TO_AU(num), NULL); | ||
642 | gru->gs_reserved_cbrs += num; | ||
643 | |||
644 | num = GRU_NUM_KERNEL_DSR_BYTES * cpus_possible; | ||
645 | dsr_map = gru_reserve_ds_resources(gru, GRU_DS_BYTES_TO_AU(num), NULL); | ||
646 | gru->gs_reserved_dsr_bytes += num; | ||
647 | |||
648 | gru->gs_active_contexts++; | ||
649 | __set_bit(KERNEL_CTXNUM, &gru->gs_context_map); | ||
650 | cch = get_cch(gru->gs_gru_base_vaddr, KERNEL_CTXNUM); | ||
651 | |||
652 | bs->kernel_cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr, | ||
653 | KERNEL_CTXNUM, 0); | ||
654 | bs->kernel_dsr = get_gseg_base_address_ds(gru->gs_gru_base_vaddr, | ||
655 | KERNEL_CTXNUM, 0); | ||
656 | |||
657 | lock_cch_handle(cch); | ||
658 | cch->tfm_fault_bit_enable = 0; | ||
659 | cch->tlb_int_enable = 0; | ||
660 | cch->tfm_done_bit_enable = 0; | ||
661 | cch->unmap_enable = 1; | ||
662 | err = cch_allocate(cch, 0, cbr_map, dsr_map); | ||
663 | if (err) { | ||
664 | gru_dbg(grudev, | ||
665 | "Unable to allocate kernel CCH: gru %d, err %d\n", | ||
666 | gru->gs_gid, err); | ||
667 | BUG(); | ||
668 | } | ||
669 | if (cch_start(cch)) { | ||
670 | gru_dbg(grudev, "Unable to start kernel CCH: gru %d, err %d\n", | ||
671 | gru->gs_gid, err); | ||
672 | BUG(); | ||
673 | } | ||
674 | unlock_cch_handle(cch); | ||
675 | |||
676 | if (gru_options & GRU_QUICKLOOK) | ||
677 | quicktest(gru); | ||
678 | return 0; | ||
679 | } | ||
diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h new file mode 100644 index 000000000000..eb17e0a3ac61 --- /dev/null +++ b/drivers/misc/sgi-gru/grukservices.h | |||
@@ -0,0 +1,134 @@ | |||
1 | |||
2 | /* | ||
3 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 | */ | ||
19 | #ifndef __GRU_KSERVICES_H_ | ||
20 | #define __GRU_KSERVICES_H_ | ||
21 | |||
22 | |||
23 | /* | ||
24 | * Message queues using the GRU to send/receive messages. | ||
25 | * | ||
26 | * These function allow the user to create a message queue for | ||
27 | * sending/receiving 1 or 2 cacheline messages using the GRU. | ||
28 | * | ||
29 | * Processes SENDING messages will use a kernel CBR/DSR to send | ||
30 | * the message. This is transparent to the caller. | ||
31 | * | ||
32 | * The receiver does not use any GRU resources. | ||
33 | * | ||
34 | * The functions support: | ||
35 | * - single receiver | ||
36 | * - multiple senders | ||
37 | * - cross partition message | ||
38 | * | ||
39 | * Missing features ZZZ: | ||
40 | * - user options for dealing with timeouts, queue full, etc. | ||
41 | * - gru_create_message_queue() needs interrupt vector info | ||
42 | */ | ||
43 | |||
44 | /* | ||
45 | * Initialize a user allocated chunk of memory to be used as | ||
46 | * a message queue. The caller must ensure that the queue is | ||
47 | * in contiguous physical memory and is cacheline aligned. | ||
48 | * | ||
49 | * Message queue size is the total number of bytes allocated | ||
50 | * to the queue including a 2 cacheline header that is used | ||
51 | * to manage the queue. | ||
52 | * | ||
53 | * Input: | ||
54 | * p pointer to user allocated memory. | ||
55 | * bytes size of message queue in bytes | ||
56 | * | ||
57 | * Errors: | ||
58 | * 0 OK | ||
59 | * >0 error | ||
60 | */ | ||
61 | extern int gru_create_message_queue(void *p, unsigned int bytes); | ||
62 | |||
63 | /* | ||
64 | * Send a message to a message queue. | ||
65 | * | ||
66 | * Note: The message queue transport mechanism uses the first 32 | ||
67 | * bits of the message. Users should avoid using these bits. | ||
68 | * | ||
69 | * | ||
70 | * Input: | ||
71 | * xmq message queue - must be a UV global physical address | ||
72 | * mesg pointer to message. Must be 64-bit aligned | ||
73 | * bytes size of message in bytes | ||
74 | * | ||
75 | * Output: | ||
76 | * 0 message sent | ||
77 | * >0 Send failure - see error codes below | ||
78 | * | ||
79 | */ | ||
80 | extern int gru_send_message_gpa(unsigned long mq_gpa, void *mesg, | ||
81 | unsigned int bytes); | ||
82 | |||
83 | /* Status values for gru_send_message() */ | ||
84 | #define MQE_OK 0 /* message sent successfully */ | ||
85 | #define MQE_CONGESTION 1 /* temporary congestion, try again */ | ||
86 | #define MQE_QUEUE_FULL 2 /* queue is full */ | ||
87 | #define MQE_UNEXPECTED_CB_ERR 3 /* unexpected CB error */ | ||
88 | #define MQE_PAGE_OVERFLOW 10 /* BUG - queue overflowed a page */ | ||
89 | #define MQE_BUG_NO_RESOURCES 11 /* BUG - could not alloc GRU cb/dsr */ | ||
90 | |||
91 | /* | ||
92 | * Advance the receive pointer for the message queue to the next message. | ||
93 | * Note: current API requires messages to be gotten & freed in order. Future | ||
94 | * API extensions may allow for out-of-order freeing. | ||
95 | * | ||
96 | * Input | ||
97 | * mq message queue | ||
98 | * mesq message being freed | ||
99 | */ | ||
100 | extern void gru_free_message(void *mq, void *mesq); | ||
101 | |||
102 | /* | ||
103 | * Get next message from message queue. Returns pointer to | ||
104 | * message OR NULL if no message present. | ||
105 | * User must call gru_free_message() after message is processed | ||
106 | * in order to move the queue pointers to next message. | ||
107 | * | ||
108 | * Input | ||
109 | * mq message queue | ||
110 | * | ||
111 | * Output: | ||
112 | * p pointer to message | ||
113 | * NULL no message available | ||
114 | */ | ||
115 | extern void *gru_get_next_message(void *mq); | ||
116 | |||
117 | |||
118 | /* | ||
119 | * Copy data using the GRU. Source or destination can be located in a remote | ||
120 | * partition. | ||
121 | * | ||
122 | * Input: | ||
123 | * dest_gpa destination global physical address | ||
124 | * src_gpa source global physical address | ||
125 | * bytes number of bytes to copy | ||
126 | * | ||
127 | * Output: | ||
128 | * 0 OK | ||
129 | * >0 error | ||
130 | */ | ||
131 | extern int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, | ||
132 | unsigned int bytes); | ||
133 | |||
134 | #endif /* __GRU_KSERVICES_H_ */ | ||
diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h new file mode 100644 index 000000000000..e56e196a6998 --- /dev/null +++ b/drivers/misc/sgi-gru/grulib.h | |||
@@ -0,0 +1,97 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU Lesser General Public License as published by | ||
6 | * the Free Software Foundation; either version 2.1 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU Lesser General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU Lesser General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | |||
19 | #ifndef __GRULIB_H__ | ||
20 | #define __GRULIB_H__ | ||
21 | |||
22 | #define GRU_BASENAME "gru" | ||
23 | #define GRU_FULLNAME "/dev/gru" | ||
24 | #define GRU_IOCTL_NUM 'G' | ||
25 | |||
26 | /* | ||
27 | * Maximum number of GRU segments that a user can have open | ||
28 | * ZZZ temp - set high for testing. Revisit. | ||
29 | */ | ||
30 | #define GRU_MAX_OPEN_CONTEXTS 32 | ||
31 | |||
32 | /* Set Number of Request Blocks */ | ||
33 | #define GRU_CREATE_CONTEXT _IOWR(GRU_IOCTL_NUM, 1, void *) | ||
34 | |||
35 | /* Register task as using the slice */ | ||
36 | #define GRU_SET_TASK_SLICE _IOWR(GRU_IOCTL_NUM, 5, void *) | ||
37 | |||
38 | /* Fetch exception detail */ | ||
39 | #define GRU_USER_GET_EXCEPTION_DETAIL _IOWR(GRU_IOCTL_NUM, 6, void *) | ||
40 | |||
41 | /* For user call_os handling - normally a TLB fault */ | ||
42 | #define GRU_USER_CALL_OS _IOWR(GRU_IOCTL_NUM, 8, void *) | ||
43 | |||
44 | /* For user unload context */ | ||
45 | #define GRU_USER_UNLOAD_CONTEXT _IOWR(GRU_IOCTL_NUM, 9, void *) | ||
46 | |||
47 | /* For fetching GRU chiplet status */ | ||
48 | #define GRU_GET_CHIPLET_STATUS _IOWR(GRU_IOCTL_NUM, 10, void *) | ||
49 | |||
50 | /* For user TLB flushing (primarily for tests) */ | ||
51 | #define GRU_USER_FLUSH_TLB _IOWR(GRU_IOCTL_NUM, 50, void *) | ||
52 | |||
53 | /* Get some config options (primarily for tests & emulator) */ | ||
54 | #define GRU_GET_CONFIG_INFO _IOWR(GRU_IOCTL_NUM, 51, void *) | ||
55 | |||
56 | #define CONTEXT_WINDOW_BYTES(th) (GRU_GSEG_PAGESIZE * (th)) | ||
57 | #define THREAD_POINTER(p, th) (p + GRU_GSEG_PAGESIZE * (th)) | ||
58 | |||
59 | /* | ||
60 | * Structure used to pass TLB flush parameters to the driver | ||
61 | */ | ||
62 | struct gru_create_context_req { | ||
63 | unsigned long gseg; | ||
64 | unsigned int data_segment_bytes; | ||
65 | unsigned int control_blocks; | ||
66 | unsigned int maximum_thread_count; | ||
67 | unsigned int options; | ||
68 | }; | ||
69 | |||
70 | /* | ||
71 | * Structure used to pass unload context parameters to the driver | ||
72 | */ | ||
73 | struct gru_unload_context_req { | ||
74 | unsigned long gseg; | ||
75 | }; | ||
76 | |||
77 | /* | ||
78 | * Structure used to pass TLB flush parameters to the driver | ||
79 | */ | ||
80 | struct gru_flush_tlb_req { | ||
81 | unsigned long gseg; | ||
82 | unsigned long vaddr; | ||
83 | size_t len; | ||
84 | }; | ||
85 | |||
86 | /* | ||
87 | * GRU configuration info (temp - for testing) | ||
88 | */ | ||
89 | struct gru_config_info { | ||
90 | int cpus; | ||
91 | int blades; | ||
92 | int nodes; | ||
93 | int chiplets; | ||
94 | int fill[16]; | ||
95 | }; | ||
96 | |||
97 | #endif /* __GRULIB_H__ */ | ||
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c new file mode 100644 index 000000000000..0eeb8dddd2f5 --- /dev/null +++ b/drivers/misc/sgi-gru/grumain.c | |||
@@ -0,0 +1,802 @@ | |||
1 | /* | ||
2 | * SN Platform GRU Driver | ||
3 | * | ||
4 | * DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD | ||
5 | * | ||
6 | * This file is subject to the terms and conditions of the GNU General Public | ||
7 | * License. See the file "COPYING" in the main directory of this archive | ||
8 | * for more details. | ||
9 | * | ||
10 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. | ||
11 | */ | ||
12 | |||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include <linux/mm.h> | ||
16 | #include <linux/spinlock.h> | ||
17 | #include <linux/sched.h> | ||
18 | #include <linux/device.h> | ||
19 | #include <linux/list.h> | ||
20 | #include <asm/uv/uv_hub.h> | ||
21 | #include "gru.h" | ||
22 | #include "grutables.h" | ||
23 | #include "gruhandles.h" | ||
24 | |||
25 | unsigned long gru_options __read_mostly; | ||
26 | |||
27 | static struct device_driver gru_driver = { | ||
28 | .name = "gru" | ||
29 | }; | ||
30 | |||
31 | static struct device gru_device = { | ||
32 | .bus_id = {0}, | ||
33 | .driver = &gru_driver, | ||
34 | }; | ||
35 | |||
36 | struct device *grudev = &gru_device; | ||
37 | |||
38 | /* | ||
39 | * Select a gru fault map to be used by the current cpu. Note that | ||
40 | * multiple cpus may be using the same map. | ||
41 | * ZZZ should "shift" be used?? Depends on HT cpu numbering | ||
42 | * ZZZ should be inline but did not work on emulator | ||
43 | */ | ||
44 | int gru_cpu_fault_map_id(void) | ||
45 | { | ||
46 | return uv_blade_processor_id() % GRU_NUM_TFM; | ||
47 | } | ||
48 | |||
49 | /*--------- ASID Management ------------------------------------------- | ||
50 | * | ||
51 | * Initially, assign asids sequentially from MIN_ASID .. MAX_ASID. | ||
52 | * Once MAX is reached, flush the TLB & start over. However, | ||
53 | * some asids may still be in use. There won't be many (percentage wise) still | ||
54 | * in use. Search active contexts & determine the value of the first | ||
55 | * asid in use ("x"s below). Set "limit" to this value. | ||
56 | * This defines a block of assignable asids. | ||
57 | * | ||
58 | * When "limit" is reached, search forward from limit+1 and determine the | ||
59 | * next block of assignable asids. | ||
60 | * | ||
61 | * Repeat until MAX_ASID is reached, then start over again. | ||
62 | * | ||
63 | * Each time MAX_ASID is reached, increment the asid generation. Since | ||
64 | * the search for in-use asids only checks contexts with GRUs currently | ||
65 | * assigned, asids in some contexts will be missed. Prior to loading | ||
66 | * a context, the asid generation of the GTS asid is rechecked. If it | ||
67 | * doesn't match the current generation, a new asid will be assigned. | ||
68 | * | ||
69 | * 0---------------x------------x---------------------x----| | ||
70 | * ^-next ^-limit ^-MAX_ASID | ||
71 | * | ||
72 | * All asid manipulation & context loading/unloading is protected by the | ||
73 | * gs_lock. | ||
74 | */ | ||
75 | |||
76 | /* Hit the asid limit. Start over */ | ||
77 | static int gru_wrap_asid(struct gru_state *gru) | ||
78 | { | ||
79 | gru_dbg(grudev, "gru %p\n", gru); | ||
80 | STAT(asid_wrap); | ||
81 | gru->gs_asid_gen++; | ||
82 | gru_flush_all_tlb(gru); | ||
83 | return MIN_ASID; | ||
84 | } | ||
85 | |||
86 | /* Find the next chunk of unused asids */ | ||
87 | static int gru_reset_asid_limit(struct gru_state *gru, int asid) | ||
88 | { | ||
89 | int i, gid, inuse_asid, limit; | ||
90 | |||
91 | gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid); | ||
92 | STAT(asid_next); | ||
93 | limit = MAX_ASID; | ||
94 | if (asid >= limit) | ||
95 | asid = gru_wrap_asid(gru); | ||
96 | gid = gru->gs_gid; | ||
97 | again: | ||
98 | for (i = 0; i < GRU_NUM_CCH; i++) { | ||
99 | if (!gru->gs_gts[i]) | ||
100 | continue; | ||
101 | inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid; | ||
102 | gru_dbg(grudev, "gru %p, inuse_asid 0x%x, cxtnum %d, gts %p\n", | ||
103 | gru, inuse_asid, i, gru->gs_gts[i]); | ||
104 | if (inuse_asid == asid) { | ||
105 | asid += ASID_INC; | ||
106 | if (asid >= limit) { | ||
107 | /* | ||
108 | * empty range: reset the range limit and | ||
109 | * start over | ||
110 | */ | ||
111 | limit = MAX_ASID; | ||
112 | if (asid >= MAX_ASID) | ||
113 | asid = gru_wrap_asid(gru); | ||
114 | goto again; | ||
115 | } | ||
116 | } | ||
117 | |||
118 | if ((inuse_asid > asid) && (inuse_asid < limit)) | ||
119 | limit = inuse_asid; | ||
120 | } | ||
121 | gru->gs_asid_limit = limit; | ||
122 | gru->gs_asid = asid; | ||
123 | gru_dbg(grudev, "gru %p, new asid 0x%x, new_limit 0x%x\n", gru, asid, | ||
124 | limit); | ||
125 | return asid; | ||
126 | } | ||
127 | |||
128 | /* Assign a new ASID to a thread context. */ | ||
129 | static int gru_assign_asid(struct gru_state *gru) | ||
130 | { | ||
131 | int asid; | ||
132 | |||
133 | spin_lock(&gru->gs_asid_lock); | ||
134 | gru->gs_asid += ASID_INC; | ||
135 | asid = gru->gs_asid; | ||
136 | if (asid >= gru->gs_asid_limit) | ||
137 | asid = gru_reset_asid_limit(gru, asid); | ||
138 | spin_unlock(&gru->gs_asid_lock); | ||
139 | |||
140 | gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid); | ||
141 | return asid; | ||
142 | } | ||
143 | |||
144 | /* | ||
145 | * Clear n bits in a word. Return a word indicating the bits that were cleared. | ||
146 | * Optionally, build an array of chars that contain the bit numbers allocated. | ||
147 | */ | ||
148 | static unsigned long reserve_resources(unsigned long *p, int n, int mmax, | ||
149 | char *idx) | ||
150 | { | ||
151 | unsigned long bits = 0; | ||
152 | int i; | ||
153 | |||
154 | do { | ||
155 | i = find_first_bit(p, mmax); | ||
156 | if (i == mmax) | ||
157 | BUG(); | ||
158 | __clear_bit(i, p); | ||
159 | __set_bit(i, &bits); | ||
160 | if (idx) | ||
161 | *idx++ = i; | ||
162 | } while (--n); | ||
163 | return bits; | ||
164 | } | ||
165 | |||
166 | unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count, | ||
167 | char *cbmap) | ||
168 | { | ||
169 | return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU, | ||
170 | cbmap); | ||
171 | } | ||
172 | |||
173 | unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count, | ||
174 | char *dsmap) | ||
175 | { | ||
176 | return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU, | ||
177 | dsmap); | ||
178 | } | ||
179 | |||
180 | static void reserve_gru_resources(struct gru_state *gru, | ||
181 | struct gru_thread_state *gts) | ||
182 | { | ||
183 | gru->gs_active_contexts++; | ||
184 | gts->ts_cbr_map = | ||
185 | gru_reserve_cb_resources(gru, gts->ts_cbr_au_count, | ||
186 | gts->ts_cbr_idx); | ||
187 | gts->ts_dsr_map = | ||
188 | gru_reserve_ds_resources(gru, gts->ts_dsr_au_count, NULL); | ||
189 | } | ||
190 | |||
191 | static void free_gru_resources(struct gru_state *gru, | ||
192 | struct gru_thread_state *gts) | ||
193 | { | ||
194 | gru->gs_active_contexts--; | ||
195 | gru->gs_cbr_map |= gts->ts_cbr_map; | ||
196 | gru->gs_dsr_map |= gts->ts_dsr_map; | ||
197 | } | ||
198 | |||
199 | /* | ||
200 | * Check if a GRU has sufficient free resources to satisfy an allocation | ||
201 | * request. Note: GRU locks may or may not be held when this is called. If | ||
202 | * not held, recheck after acquiring the appropriate locks. | ||
203 | * | ||
204 | * Returns 1 if sufficient resources, 0 if not | ||
205 | */ | ||
206 | static int check_gru_resources(struct gru_state *gru, int cbr_au_count, | ||
207 | int dsr_au_count, int max_active_contexts) | ||
208 | { | ||
209 | return hweight64(gru->gs_cbr_map) >= cbr_au_count | ||
210 | && hweight64(gru->gs_dsr_map) >= dsr_au_count | ||
211 | && gru->gs_active_contexts < max_active_contexts; | ||
212 | } | ||
213 | |||
214 | /* | ||
215 | * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG | ||
216 | * context. | ||
217 | */ | ||
218 | static int gru_load_mm_tracker(struct gru_state *gru, struct gru_mm_struct *gms, | ||
219 | int ctxnum) | ||
220 | { | ||
221 | struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid]; | ||
222 | unsigned short ctxbitmap = (1 << ctxnum); | ||
223 | int asid; | ||
224 | |||
225 | spin_lock(&gms->ms_asid_lock); | ||
226 | asid = asids->mt_asid; | ||
227 | |||
228 | if (asid == 0 || asids->mt_asid_gen != gru->gs_asid_gen) { | ||
229 | asid = gru_assign_asid(gru); | ||
230 | asids->mt_asid = asid; | ||
231 | asids->mt_asid_gen = gru->gs_asid_gen; | ||
232 | STAT(asid_new); | ||
233 | } else { | ||
234 | STAT(asid_reuse); | ||
235 | } | ||
236 | |||
237 | BUG_ON(asids->mt_ctxbitmap & ctxbitmap); | ||
238 | asids->mt_ctxbitmap |= ctxbitmap; | ||
239 | if (!test_bit(gru->gs_gid, gms->ms_asidmap)) | ||
240 | __set_bit(gru->gs_gid, gms->ms_asidmap); | ||
241 | spin_unlock(&gms->ms_asid_lock); | ||
242 | |||
243 | gru_dbg(grudev, | ||
244 | "gru %x, gms %p, ctxnum 0x%d, asid 0x%x, asidmap 0x%lx\n", | ||
245 | gru->gs_gid, gms, ctxnum, asid, gms->ms_asidmap[0]); | ||
246 | return asid; | ||
247 | } | ||
248 | |||
249 | static void gru_unload_mm_tracker(struct gru_state *gru, | ||
250 | struct gru_mm_struct *gms, int ctxnum) | ||
251 | { | ||
252 | struct gru_mm_tracker *asids; | ||
253 | unsigned short ctxbitmap; | ||
254 | |||
255 | asids = &gms->ms_asids[gru->gs_gid]; | ||
256 | ctxbitmap = (1 << ctxnum); | ||
257 | spin_lock(&gms->ms_asid_lock); | ||
258 | BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap); | ||
259 | asids->mt_ctxbitmap ^= ctxbitmap; | ||
260 | gru_dbg(grudev, "gru %x, gms %p, ctxnum 0x%d, asidmap 0x%lx\n", | ||
261 | gru->gs_gid, gms, ctxnum, gms->ms_asidmap[0]); | ||
262 | spin_unlock(&gms->ms_asid_lock); | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * Decrement the reference count on a GTS structure. Free the structure | ||
267 | * if the reference count goes to zero. | ||
268 | */ | ||
269 | void gts_drop(struct gru_thread_state *gts) | ||
270 | { | ||
271 | if (gts && atomic_dec_return(>s->ts_refcnt) == 0) { | ||
272 | gru_drop_mmu_notifier(gts->ts_gms); | ||
273 | kfree(gts); | ||
274 | STAT(gts_free); | ||
275 | } | ||
276 | } | ||
277 | |||
278 | /* | ||
279 | * Locate the GTS structure for the current thread. | ||
280 | */ | ||
281 | static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data | ||
282 | *vdata, int tsid) | ||
283 | { | ||
284 | struct gru_thread_state *gts; | ||
285 | |||
286 | list_for_each_entry(gts, &vdata->vd_head, ts_next) | ||
287 | if (gts->ts_tsid == tsid) | ||
288 | return gts; | ||
289 | return NULL; | ||
290 | } | ||
291 | |||
292 | /* | ||
293 | * Allocate a thread state structure. | ||
294 | */ | ||
295 | static struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, | ||
296 | struct gru_vma_data *vdata, | ||
297 | int tsid) | ||
298 | { | ||
299 | struct gru_thread_state *gts; | ||
300 | int bytes; | ||
301 | |||
302 | bytes = DSR_BYTES(vdata->vd_dsr_au_count) + | ||
303 | CBR_BYTES(vdata->vd_cbr_au_count); | ||
304 | bytes += sizeof(struct gru_thread_state); | ||
305 | gts = kzalloc(bytes, GFP_KERNEL); | ||
306 | if (!gts) | ||
307 | return NULL; | ||
308 | |||
309 | STAT(gts_alloc); | ||
310 | atomic_set(>s->ts_refcnt, 1); | ||
311 | mutex_init(>s->ts_ctxlock); | ||
312 | gts->ts_cbr_au_count = vdata->vd_cbr_au_count; | ||
313 | gts->ts_dsr_au_count = vdata->vd_dsr_au_count; | ||
314 | gts->ts_user_options = vdata->vd_user_options; | ||
315 | gts->ts_tsid = tsid; | ||
316 | gts->ts_user_options = vdata->vd_user_options; | ||
317 | gts->ts_ctxnum = NULLCTX; | ||
318 | gts->ts_mm = current->mm; | ||
319 | gts->ts_vma = vma; | ||
320 | gts->ts_tlb_int_select = -1; | ||
321 | gts->ts_gms = gru_register_mmu_notifier(); | ||
322 | if (!gts->ts_gms) | ||
323 | goto err; | ||
324 | |||
325 | gru_dbg(grudev, "alloc vdata %p, new gts %p\n", vdata, gts); | ||
326 | return gts; | ||
327 | |||
328 | err: | ||
329 | gts_drop(gts); | ||
330 | return NULL; | ||
331 | } | ||
332 | |||
333 | /* | ||
334 | * Allocate a vma private data structure. | ||
335 | */ | ||
336 | struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid) | ||
337 | { | ||
338 | struct gru_vma_data *vdata = NULL; | ||
339 | |||
340 | vdata = kmalloc(sizeof(*vdata), GFP_KERNEL); | ||
341 | if (!vdata) | ||
342 | return NULL; | ||
343 | |||
344 | INIT_LIST_HEAD(&vdata->vd_head); | ||
345 | spin_lock_init(&vdata->vd_lock); | ||
346 | gru_dbg(grudev, "alloc vdata %p\n", vdata); | ||
347 | return vdata; | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * Find the thread state structure for the current thread. | ||
352 | */ | ||
353 | struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma, | ||
354 | int tsid) | ||
355 | { | ||
356 | struct gru_vma_data *vdata = vma->vm_private_data; | ||
357 | struct gru_thread_state *gts; | ||
358 | |||
359 | spin_lock(&vdata->vd_lock); | ||
360 | gts = gru_find_current_gts_nolock(vdata, tsid); | ||
361 | spin_unlock(&vdata->vd_lock); | ||
362 | gru_dbg(grudev, "vma %p, gts %p\n", vma, gts); | ||
363 | return gts; | ||
364 | } | ||
365 | |||
366 | /* | ||
367 | * Allocate a new thread state for a GSEG. Note that races may allow | ||
368 | * another thread to race to create a gts. | ||
369 | */ | ||
370 | struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma, | ||
371 | int tsid) | ||
372 | { | ||
373 | struct gru_vma_data *vdata = vma->vm_private_data; | ||
374 | struct gru_thread_state *gts, *ngts; | ||
375 | |||
376 | gts = gru_alloc_gts(vma, vdata, tsid); | ||
377 | if (!gts) | ||
378 | return NULL; | ||
379 | |||
380 | spin_lock(&vdata->vd_lock); | ||
381 | ngts = gru_find_current_gts_nolock(vdata, tsid); | ||
382 | if (ngts) { | ||
383 | gts_drop(gts); | ||
384 | gts = ngts; | ||
385 | STAT(gts_double_allocate); | ||
386 | } else { | ||
387 | list_add(>s->ts_next, &vdata->vd_head); | ||
388 | } | ||
389 | spin_unlock(&vdata->vd_lock); | ||
390 | gru_dbg(grudev, "vma %p, gts %p\n", vma, gts); | ||
391 | return gts; | ||
392 | } | ||
393 | |||
394 | /* | ||
395 | * Free the GRU context assigned to the thread state. | ||
396 | */ | ||
397 | static void gru_free_gru_context(struct gru_thread_state *gts) | ||
398 | { | ||
399 | struct gru_state *gru; | ||
400 | |||
401 | gru = gts->ts_gru; | ||
402 | gru_dbg(grudev, "gts %p, gru %p\n", gts, gru); | ||
403 | |||
404 | spin_lock(&gru->gs_lock); | ||
405 | gru->gs_gts[gts->ts_ctxnum] = NULL; | ||
406 | free_gru_resources(gru, gts); | ||
407 | BUG_ON(test_bit(gts->ts_ctxnum, &gru->gs_context_map) == 0); | ||
408 | __clear_bit(gts->ts_ctxnum, &gru->gs_context_map); | ||
409 | gts->ts_ctxnum = NULLCTX; | ||
410 | gts->ts_gru = NULL; | ||
411 | spin_unlock(&gru->gs_lock); | ||
412 | |||
413 | gts_drop(gts); | ||
414 | STAT(free_context); | ||
415 | } | ||
416 | |||
417 | /* | ||
418 | * Prefetching cachelines help hardware performance. | ||
419 | * (Strictly a performance enhancement. Not functionally required). | ||
420 | */ | ||
421 | static void prefetch_data(void *p, int num, int stride) | ||
422 | { | ||
423 | while (num-- > 0) { | ||
424 | prefetchw(p); | ||
425 | p += stride; | ||
426 | } | ||
427 | } | ||
428 | |||
429 | static inline long gru_copy_handle(void *d, void *s) | ||
430 | { | ||
431 | memcpy(d, s, GRU_HANDLE_BYTES); | ||
432 | return GRU_HANDLE_BYTES; | ||
433 | } | ||
434 | |||
435 | /* rewrite in assembly & use lots of prefetch */ | ||
436 | static void gru_load_context_data(void *save, void *grubase, int ctxnum, | ||
437 | unsigned long cbrmap, unsigned long dsrmap) | ||
438 | { | ||
439 | void *gseg, *cb, *cbe; | ||
440 | unsigned long length; | ||
441 | int i, scr; | ||
442 | |||
443 | gseg = grubase + ctxnum * GRU_GSEG_STRIDE; | ||
444 | length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; | ||
445 | prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES, | ||
446 | GRU_CACHE_LINE_BYTES); | ||
447 | |||
448 | cb = gseg + GRU_CB_BASE; | ||
449 | cbe = grubase + GRU_CBE_BASE; | ||
450 | for_each_cbr_in_allocation_map(i, &cbrmap, scr) { | ||
451 | prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES); | ||
452 | prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1, | ||
453 | GRU_CACHE_LINE_BYTES); | ||
454 | cb += GRU_HANDLE_STRIDE; | ||
455 | } | ||
456 | |||
457 | cb = gseg + GRU_CB_BASE; | ||
458 | for_each_cbr_in_allocation_map(i, &cbrmap, scr) { | ||
459 | save += gru_copy_handle(cb, save); | ||
460 | save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, save); | ||
461 | cb += GRU_HANDLE_STRIDE; | ||
462 | } | ||
463 | |||
464 | memcpy(gseg + GRU_DS_BASE, save, length); | ||
465 | } | ||
466 | |||
467 | static void gru_unload_context_data(void *save, void *grubase, int ctxnum, | ||
468 | unsigned long cbrmap, unsigned long dsrmap) | ||
469 | { | ||
470 | void *gseg, *cb, *cbe; | ||
471 | unsigned long length; | ||
472 | int i, scr; | ||
473 | |||
474 | gseg = grubase + ctxnum * GRU_GSEG_STRIDE; | ||
475 | |||
476 | cb = gseg + GRU_CB_BASE; | ||
477 | cbe = grubase + GRU_CBE_BASE; | ||
478 | for_each_cbr_in_allocation_map(i, &cbrmap, scr) { | ||
479 | save += gru_copy_handle(save, cb); | ||
480 | save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE); | ||
481 | cb += GRU_HANDLE_STRIDE; | ||
482 | } | ||
483 | length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; | ||
484 | memcpy(save, gseg + GRU_DS_BASE, length); | ||
485 | } | ||
486 | |||
487 | void gru_unload_context(struct gru_thread_state *gts, int savestate) | ||
488 | { | ||
489 | struct gru_state *gru = gts->ts_gru; | ||
490 | struct gru_context_configuration_handle *cch; | ||
491 | int ctxnum = gts->ts_ctxnum; | ||
492 | |||
493 | zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE); | ||
494 | cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); | ||
495 | |||
496 | lock_cch_handle(cch); | ||
497 | if (cch_interrupt_sync(cch)) | ||
498 | BUG(); | ||
499 | gru_dbg(grudev, "gts %p\n", gts); | ||
500 | |||
501 | gru_unload_mm_tracker(gru, gts->ts_gms, gts->ts_ctxnum); | ||
502 | if (savestate) | ||
503 | gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, | ||
504 | ctxnum, gts->ts_cbr_map, | ||
505 | gts->ts_dsr_map); | ||
506 | |||
507 | if (cch_deallocate(cch)) | ||
508 | BUG(); | ||
509 | gts->ts_force_unload = 0; /* ts_force_unload locked by CCH lock */ | ||
510 | unlock_cch_handle(cch); | ||
511 | |||
512 | gru_free_gru_context(gts); | ||
513 | STAT(unload_context); | ||
514 | } | ||
515 | |||
516 | /* | ||
517 | * Load a GRU context by copying it from the thread data structure in memory | ||
518 | * to the GRU. | ||
519 | */ | ||
520 | static void gru_load_context(struct gru_thread_state *gts) | ||
521 | { | ||
522 | struct gru_state *gru = gts->ts_gru; | ||
523 | struct gru_context_configuration_handle *cch; | ||
524 | int err, asid, ctxnum = gts->ts_ctxnum; | ||
525 | |||
526 | gru_dbg(grudev, "gts %p\n", gts); | ||
527 | cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); | ||
528 | |||
529 | lock_cch_handle(cch); | ||
530 | asid = gru_load_mm_tracker(gru, gts->ts_gms, gts->ts_ctxnum); | ||
531 | cch->tfm_fault_bit_enable = | ||
532 | (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL | ||
533 | || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); | ||
534 | cch->tlb_int_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); | ||
535 | if (cch->tlb_int_enable) { | ||
536 | gts->ts_tlb_int_select = gru_cpu_fault_map_id(); | ||
537 | cch->tlb_int_select = gts->ts_tlb_int_select; | ||
538 | } | ||
539 | cch->tfm_done_bit_enable = 0; | ||
540 | err = cch_allocate(cch, asid, gts->ts_cbr_map, gts->ts_dsr_map); | ||
541 | if (err) { | ||
542 | gru_dbg(grudev, | ||
543 | "err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n", | ||
544 | err, cch, gts, gts->ts_cbr_map, gts->ts_dsr_map); | ||
545 | BUG(); | ||
546 | } | ||
547 | |||
548 | gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum, | ||
549 | gts->ts_cbr_map, gts->ts_dsr_map); | ||
550 | |||
551 | if (cch_start(cch)) | ||
552 | BUG(); | ||
553 | unlock_cch_handle(cch); | ||
554 | |||
555 | STAT(load_context); | ||
556 | } | ||
557 | |||
558 | /* | ||
559 | * Update fields in an active CCH: | ||
560 | * - retarget interrupts on local blade | ||
561 | * - force a delayed context unload by clearing the CCH asids. This | ||
562 | * forces TLB misses for new GRU instructions. The context is unloaded | ||
563 | * when the next TLB miss occurs. | ||
564 | */ | ||
565 | static int gru_update_cch(struct gru_thread_state *gts, int int_select) | ||
566 | { | ||
567 | struct gru_context_configuration_handle *cch; | ||
568 | struct gru_state *gru = gts->ts_gru; | ||
569 | int i, ctxnum = gts->ts_ctxnum, ret = 0; | ||
570 | |||
571 | cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); | ||
572 | |||
573 | lock_cch_handle(cch); | ||
574 | if (cch->state == CCHSTATE_ACTIVE) { | ||
575 | if (gru->gs_gts[gts->ts_ctxnum] != gts) | ||
576 | goto exit; | ||
577 | if (cch_interrupt(cch)) | ||
578 | BUG(); | ||
579 | if (int_select >= 0) { | ||
580 | gts->ts_tlb_int_select = int_select; | ||
581 | cch->tlb_int_select = int_select; | ||
582 | } else { | ||
583 | for (i = 0; i < 8; i++) | ||
584 | cch->asid[i] = 0; | ||
585 | cch->tfm_fault_bit_enable = 0; | ||
586 | cch->tlb_int_enable = 0; | ||
587 | gts->ts_force_unload = 1; | ||
588 | } | ||
589 | if (cch_start(cch)) | ||
590 | BUG(); | ||
591 | ret = 1; | ||
592 | } | ||
593 | exit: | ||
594 | unlock_cch_handle(cch); | ||
595 | return ret; | ||
596 | } | ||
597 | |||
598 | /* | ||
599 | * Update CCH tlb interrupt select. Required when all the following is true: | ||
600 | * - task's GRU context is loaded into a GRU | ||
601 | * - task is using interrupt notification for TLB faults | ||
602 | * - task has migrated to a different cpu on the same blade where | ||
603 | * it was previously running. | ||
604 | */ | ||
605 | static int gru_retarget_intr(struct gru_thread_state *gts) | ||
606 | { | ||
607 | if (gts->ts_tlb_int_select < 0 | ||
608 | || gts->ts_tlb_int_select == gru_cpu_fault_map_id()) | ||
609 | return 0; | ||
610 | |||
611 | gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select, | ||
612 | gru_cpu_fault_map_id()); | ||
613 | return gru_update_cch(gts, gru_cpu_fault_map_id()); | ||
614 | } | ||
615 | |||
616 | |||
617 | /* | ||
618 | * Insufficient GRU resources available on the local blade. Steal a context from | ||
619 | * a process. This is a hack until a _real_ resource scheduler is written.... | ||
620 | */ | ||
621 | #define next_ctxnum(n) ((n) < GRU_NUM_CCH - 2 ? (n) + 1 : 0) | ||
622 | #define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \ | ||
623 | ((g)+1) : &(b)->bs_grus[0]) | ||
624 | |||
625 | static void gru_steal_context(struct gru_thread_state *gts) | ||
626 | { | ||
627 | struct gru_blade_state *blade; | ||
628 | struct gru_state *gru, *gru0; | ||
629 | struct gru_thread_state *ngts = NULL; | ||
630 | int ctxnum, ctxnum0, flag = 0, cbr, dsr; | ||
631 | |||
632 | cbr = gts->ts_cbr_au_count; | ||
633 | dsr = gts->ts_dsr_au_count; | ||
634 | |||
635 | preempt_disable(); | ||
636 | blade = gru_base[uv_numa_blade_id()]; | ||
637 | spin_lock(&blade->bs_lock); | ||
638 | |||
639 | ctxnum = next_ctxnum(blade->bs_lru_ctxnum); | ||
640 | gru = blade->bs_lru_gru; | ||
641 | if (ctxnum == 0) | ||
642 | gru = next_gru(blade, gru); | ||
643 | ctxnum0 = ctxnum; | ||
644 | gru0 = gru; | ||
645 | while (1) { | ||
646 | if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH)) | ||
647 | break; | ||
648 | spin_lock(&gru->gs_lock); | ||
649 | for (; ctxnum < GRU_NUM_CCH; ctxnum++) { | ||
650 | if (flag && gru == gru0 && ctxnum == ctxnum0) | ||
651 | break; | ||
652 | ngts = gru->gs_gts[ctxnum]; | ||
653 | /* | ||
654 | * We are grabbing locks out of order, so trylock is | ||
655 | * needed. GTSs are usually not locked, so the odds of | ||
656 | * success are high. If trylock fails, try to steal a | ||
657 | * different GSEG. | ||
658 | */ | ||
659 | if (ngts && mutex_trylock(&ngts->ts_ctxlock)) | ||
660 | break; | ||
661 | ngts = NULL; | ||
662 | flag = 1; | ||
663 | } | ||
664 | spin_unlock(&gru->gs_lock); | ||
665 | if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0)) | ||
666 | break; | ||
667 | ctxnum = 0; | ||
668 | gru = next_gru(blade, gru); | ||
669 | } | ||
670 | blade->bs_lru_gru = gru; | ||
671 | blade->bs_lru_ctxnum = ctxnum; | ||
672 | spin_unlock(&blade->bs_lock); | ||
673 | preempt_enable(); | ||
674 | |||
675 | if (ngts) { | ||
676 | STAT(steal_context); | ||
677 | ngts->ts_steal_jiffies = jiffies; | ||
678 | gru_unload_context(ngts, 1); | ||
679 | mutex_unlock(&ngts->ts_ctxlock); | ||
680 | } else { | ||
681 | STAT(steal_context_failed); | ||
682 | } | ||
683 | gru_dbg(grudev, | ||
684 | "stole gru %x, ctxnum %d from gts %p. Need cb %d, ds %d;" | ||
685 | " avail cb %ld, ds %ld\n", | ||
686 | gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map), | ||
687 | hweight64(gru->gs_dsr_map)); | ||
688 | } | ||
689 | |||
690 | /* | ||
691 | * Scan the GRUs on the local blade & assign a GRU context. | ||
692 | */ | ||
693 | static struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts) | ||
694 | { | ||
695 | struct gru_state *gru, *grux; | ||
696 | int i, max_active_contexts; | ||
697 | |||
698 | preempt_disable(); | ||
699 | |||
700 | again: | ||
701 | gru = NULL; | ||
702 | max_active_contexts = GRU_NUM_CCH; | ||
703 | for_each_gru_on_blade(grux, uv_numa_blade_id(), i) { | ||
704 | if (check_gru_resources(grux, gts->ts_cbr_au_count, | ||
705 | gts->ts_dsr_au_count, | ||
706 | max_active_contexts)) { | ||
707 | gru = grux; | ||
708 | max_active_contexts = grux->gs_active_contexts; | ||
709 | if (max_active_contexts == 0) | ||
710 | break; | ||
711 | } | ||
712 | } | ||
713 | |||
714 | if (gru) { | ||
715 | spin_lock(&gru->gs_lock); | ||
716 | if (!check_gru_resources(gru, gts->ts_cbr_au_count, | ||
717 | gts->ts_dsr_au_count, GRU_NUM_CCH)) { | ||
718 | spin_unlock(&gru->gs_lock); | ||
719 | goto again; | ||
720 | } | ||
721 | reserve_gru_resources(gru, gts); | ||
722 | gts->ts_gru = gru; | ||
723 | gts->ts_ctxnum = | ||
724 | find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH); | ||
725 | BUG_ON(gts->ts_ctxnum == GRU_NUM_CCH); | ||
726 | atomic_inc(>s->ts_refcnt); | ||
727 | gru->gs_gts[gts->ts_ctxnum] = gts; | ||
728 | __set_bit(gts->ts_ctxnum, &gru->gs_context_map); | ||
729 | spin_unlock(&gru->gs_lock); | ||
730 | |||
731 | STAT(assign_context); | ||
732 | gru_dbg(grudev, | ||
733 | "gseg %p, gts %p, gru %x, ctx %d, cbr %d, dsr %d\n", | ||
734 | gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts, | ||
735 | gts->ts_gru->gs_gid, gts->ts_ctxnum, | ||
736 | gts->ts_cbr_au_count, gts->ts_dsr_au_count); | ||
737 | } else { | ||
738 | gru_dbg(grudev, "failed to allocate a GTS %s\n", ""); | ||
739 | STAT(assign_context_failed); | ||
740 | } | ||
741 | |||
742 | preempt_enable(); | ||
743 | return gru; | ||
744 | } | ||
745 | |||
746 | /* | ||
747 | * gru_nopage | ||
748 | * | ||
749 | * Map the user's GRU segment | ||
750 | * | ||
751 | * Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries. | ||
752 | */ | ||
753 | int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
754 | { | ||
755 | struct gru_thread_state *gts; | ||
756 | unsigned long paddr, vaddr; | ||
757 | |||
758 | vaddr = (unsigned long)vmf->virtual_address; | ||
759 | gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n", | ||
760 | vma, vaddr, GSEG_BASE(vaddr)); | ||
761 | STAT(nopfn); | ||
762 | |||
763 | /* The following check ensures vaddr is a valid address in the VMA */ | ||
764 | gts = gru_find_thread_state(vma, TSID(vaddr, vma)); | ||
765 | if (!gts) | ||
766 | return VM_FAULT_SIGBUS; | ||
767 | |||
768 | again: | ||
769 | preempt_disable(); | ||
770 | mutex_lock(>s->ts_ctxlock); | ||
771 | if (gts->ts_gru) { | ||
772 | if (gts->ts_gru->gs_blade_id != uv_numa_blade_id()) { | ||
773 | STAT(migrated_nopfn_unload); | ||
774 | gru_unload_context(gts, 1); | ||
775 | } else { | ||
776 | if (gru_retarget_intr(gts)) | ||
777 | STAT(migrated_nopfn_retarget); | ||
778 | } | ||
779 | } | ||
780 | |||
781 | if (!gts->ts_gru) { | ||
782 | if (!gru_assign_gru_context(gts)) { | ||
783 | mutex_unlock(>s->ts_ctxlock); | ||
784 | preempt_enable(); | ||
785 | schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */ | ||
786 | if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies) | ||
787 | gru_steal_context(gts); | ||
788 | goto again; | ||
789 | } | ||
790 | gru_load_context(gts); | ||
791 | paddr = gseg_physical_address(gts->ts_gru, gts->ts_ctxnum); | ||
792 | remap_pfn_range(vma, vaddr & ~(GRU_GSEG_PAGESIZE - 1), | ||
793 | paddr >> PAGE_SHIFT, GRU_GSEG_PAGESIZE, | ||
794 | vma->vm_page_prot); | ||
795 | } | ||
796 | |||
797 | mutex_unlock(>s->ts_ctxlock); | ||
798 | preempt_enable(); | ||
799 | |||
800 | return VM_FAULT_NOPAGE; | ||
801 | } | ||
802 | |||
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c new file mode 100644 index 000000000000..533923f83f1a --- /dev/null +++ b/drivers/misc/sgi-gru/gruprocfs.c | |||
@@ -0,0 +1,336 @@ | |||
1 | /* | ||
2 | * SN Platform GRU Driver | ||
3 | * | ||
4 | * PROC INTERFACES | ||
5 | * | ||
6 | * This file supports the /proc interfaces for the GRU driver | ||
7 | * | ||
8 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
23 | */ | ||
24 | |||
25 | #include <linux/proc_fs.h> | ||
26 | #include <linux/device.h> | ||
27 | #include <linux/seq_file.h> | ||
28 | #include <linux/uaccess.h> | ||
29 | #include "gru.h" | ||
30 | #include "grulib.h" | ||
31 | #include "grutables.h" | ||
32 | |||
33 | #define printstat(s, f) printstat_val(s, &gru_stats.f, #f) | ||
34 | |||
35 | static void printstat_val(struct seq_file *s, atomic_long_t *v, char *id) | ||
36 | { | ||
37 | unsigned long val = atomic_long_read(v); | ||
38 | |||
39 | if (val) | ||
40 | seq_printf(s, "%16lu %s\n", val, id); | ||
41 | } | ||
42 | |||
43 | static int statistics_show(struct seq_file *s, void *p) | ||
44 | { | ||
45 | printstat(s, vdata_alloc); | ||
46 | printstat(s, vdata_free); | ||
47 | printstat(s, gts_alloc); | ||
48 | printstat(s, gts_free); | ||
49 | printstat(s, vdata_double_alloc); | ||
50 | printstat(s, gts_double_allocate); | ||
51 | printstat(s, assign_context); | ||
52 | printstat(s, assign_context_failed); | ||
53 | printstat(s, free_context); | ||
54 | printstat(s, load_context); | ||
55 | printstat(s, unload_context); | ||
56 | printstat(s, steal_context); | ||
57 | printstat(s, steal_context_failed); | ||
58 | printstat(s, nopfn); | ||
59 | printstat(s, break_cow); | ||
60 | printstat(s, asid_new); | ||
61 | printstat(s, asid_next); | ||
62 | printstat(s, asid_wrap); | ||
63 | printstat(s, asid_reuse); | ||
64 | printstat(s, intr); | ||
65 | printstat(s, call_os); | ||
66 | printstat(s, call_os_check_for_bug); | ||
67 | printstat(s, call_os_wait_queue); | ||
68 | printstat(s, user_flush_tlb); | ||
69 | printstat(s, user_unload_context); | ||
70 | printstat(s, user_exception); | ||
71 | printstat(s, set_task_slice); | ||
72 | printstat(s, migrate_check); | ||
73 | printstat(s, migrated_retarget); | ||
74 | printstat(s, migrated_unload); | ||
75 | printstat(s, migrated_unload_delay); | ||
76 | printstat(s, migrated_nopfn_retarget); | ||
77 | printstat(s, migrated_nopfn_unload); | ||
78 | printstat(s, tlb_dropin); | ||
79 | printstat(s, tlb_dropin_fail_no_asid); | ||
80 | printstat(s, tlb_dropin_fail_upm); | ||
81 | printstat(s, tlb_dropin_fail_invalid); | ||
82 | printstat(s, tlb_dropin_fail_range_active); | ||
83 | printstat(s, tlb_dropin_fail_idle); | ||
84 | printstat(s, tlb_dropin_fail_fmm); | ||
85 | printstat(s, mmu_invalidate_range); | ||
86 | printstat(s, mmu_invalidate_page); | ||
87 | printstat(s, mmu_clear_flush_young); | ||
88 | printstat(s, flush_tlb); | ||
89 | printstat(s, flush_tlb_gru); | ||
90 | printstat(s, flush_tlb_gru_tgh); | ||
91 | printstat(s, flush_tlb_gru_zero_asid); | ||
92 | printstat(s, copy_gpa); | ||
93 | printstat(s, mesq_receive); | ||
94 | printstat(s, mesq_receive_none); | ||
95 | printstat(s, mesq_send); | ||
96 | printstat(s, mesq_send_failed); | ||
97 | printstat(s, mesq_noop); | ||
98 | printstat(s, mesq_send_unexpected_error); | ||
99 | printstat(s, mesq_send_lb_overflow); | ||
100 | printstat(s, mesq_send_qlimit_reached); | ||
101 | printstat(s, mesq_send_amo_nacked); | ||
102 | printstat(s, mesq_send_put_nacked); | ||
103 | printstat(s, mesq_qf_not_full); | ||
104 | printstat(s, mesq_qf_locked); | ||
105 | printstat(s, mesq_qf_noop_not_full); | ||
106 | printstat(s, mesq_qf_switch_head_failed); | ||
107 | printstat(s, mesq_qf_unexpected_error); | ||
108 | printstat(s, mesq_noop_unexpected_error); | ||
109 | printstat(s, mesq_noop_lb_overflow); | ||
110 | printstat(s, mesq_noop_qlimit_reached); | ||
111 | printstat(s, mesq_noop_amo_nacked); | ||
112 | printstat(s, mesq_noop_put_nacked); | ||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | static ssize_t statistics_write(struct file *file, const char __user *userbuf, | ||
117 | size_t count, loff_t *data) | ||
118 | { | ||
119 | memset(&gru_stats, 0, sizeof(gru_stats)); | ||
120 | return count; | ||
121 | } | ||
122 | |||
123 | static int options_show(struct seq_file *s, void *p) | ||
124 | { | ||
125 | seq_printf(s, "0x%lx\n", gru_options); | ||
126 | return 0; | ||
127 | } | ||
128 | |||
129 | static ssize_t options_write(struct file *file, const char __user *userbuf, | ||
130 | size_t count, loff_t *data) | ||
131 | { | ||
132 | unsigned long val; | ||
133 | char buf[80]; | ||
134 | |||
135 | if (copy_from_user | ||
136 | (buf, userbuf, count < sizeof(buf) ? count : sizeof(buf))) | ||
137 | return -EFAULT; | ||
138 | if (!strict_strtoul(buf, 10, &val)) | ||
139 | gru_options = val; | ||
140 | |||
141 | return count; | ||
142 | } | ||
143 | |||
144 | static int cch_seq_show(struct seq_file *file, void *data) | ||
145 | { | ||
146 | long gid = *(long *)data; | ||
147 | int i; | ||
148 | struct gru_state *gru = GID_TO_GRU(gid); | ||
149 | struct gru_thread_state *ts; | ||
150 | const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" }; | ||
151 | |||
152 | if (gid == 0) | ||
153 | seq_printf(file, "#%5s%5s%6s%9s%6s%8s%8s\n", "gid", "bid", | ||
154 | "ctx#", "pid", "cbrs", "dsbytes", "mode"); | ||
155 | if (gru) | ||
156 | for (i = 0; i < GRU_NUM_CCH; i++) { | ||
157 | ts = gru->gs_gts[i]; | ||
158 | if (!ts) | ||
159 | continue; | ||
160 | seq_printf(file, " %5d%5d%6d%9d%6d%8d%8s\n", | ||
161 | gru->gs_gid, gru->gs_blade_id, i, | ||
162 | ts->ts_tgid_owner, | ||
163 | ts->ts_cbr_au_count * GRU_CBR_AU_SIZE, | ||
164 | ts->ts_cbr_au_count * GRU_DSR_AU_BYTES, | ||
165 | mode[ts->ts_user_options & | ||
166 | GRU_OPT_MISS_MASK]); | ||
167 | } | ||
168 | |||
169 | return 0; | ||
170 | } | ||
171 | |||
172 | static int gru_seq_show(struct seq_file *file, void *data) | ||
173 | { | ||
174 | long gid = *(long *)data, ctxfree, cbrfree, dsrfree; | ||
175 | struct gru_state *gru = GID_TO_GRU(gid); | ||
176 | |||
177 | if (gid == 0) { | ||
178 | seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "gid", "nid", | ||
179 | "ctx", "cbr", "dsr", "ctx", "cbr", "dsr"); | ||
180 | seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "", "", "busy", | ||
181 | "busy", "busy", "free", "free", "free"); | ||
182 | } | ||
183 | if (gru) { | ||
184 | ctxfree = GRU_NUM_CCH - gru->gs_active_contexts; | ||
185 | cbrfree = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE; | ||
186 | dsrfree = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES; | ||
187 | seq_printf(file, " %5d%5d%7ld%6ld%6ld%8ld%6ld%6ld\n", | ||
188 | gru->gs_gid, gru->gs_blade_id, GRU_NUM_CCH - ctxfree, | ||
189 | GRU_NUM_CBE - cbrfree, GRU_NUM_DSR_BYTES - dsrfree, | ||
190 | ctxfree, cbrfree, dsrfree); | ||
191 | } | ||
192 | |||
193 | return 0; | ||
194 | } | ||
195 | |||
196 | static void seq_stop(struct seq_file *file, void *data) | ||
197 | { | ||
198 | } | ||
199 | |||
200 | static void *seq_start(struct seq_file *file, loff_t *gid) | ||
201 | { | ||
202 | if (*gid < GRU_MAX_GRUS) | ||
203 | return gid; | ||
204 | return NULL; | ||
205 | } | ||
206 | |||
207 | static void *seq_next(struct seq_file *file, void *data, loff_t *gid) | ||
208 | { | ||
209 | (*gid)++; | ||
210 | if (*gid < GRU_MAX_GRUS) | ||
211 | return gid; | ||
212 | return NULL; | ||
213 | } | ||
214 | |||
215 | static const struct seq_operations cch_seq_ops = { | ||
216 | .start = seq_start, | ||
217 | .next = seq_next, | ||
218 | .stop = seq_stop, | ||
219 | .show = cch_seq_show | ||
220 | }; | ||
221 | |||
222 | static const struct seq_operations gru_seq_ops = { | ||
223 | .start = seq_start, | ||
224 | .next = seq_next, | ||
225 | .stop = seq_stop, | ||
226 | .show = gru_seq_show | ||
227 | }; | ||
228 | |||
229 | static int statistics_open(struct inode *inode, struct file *file) | ||
230 | { | ||
231 | return single_open(file, statistics_show, NULL); | ||
232 | } | ||
233 | |||
234 | static int options_open(struct inode *inode, struct file *file) | ||
235 | { | ||
236 | return single_open(file, options_show, NULL); | ||
237 | } | ||
238 | |||
239 | static int cch_open(struct inode *inode, struct file *file) | ||
240 | { | ||
241 | return seq_open(file, &cch_seq_ops); | ||
242 | } | ||
243 | |||
244 | static int gru_open(struct inode *inode, struct file *file) | ||
245 | { | ||
246 | return seq_open(file, &gru_seq_ops); | ||
247 | } | ||
248 | |||
249 | /* *INDENT-OFF* */ | ||
250 | static const struct file_operations statistics_fops = { | ||
251 | .open = statistics_open, | ||
252 | .read = seq_read, | ||
253 | .write = statistics_write, | ||
254 | .llseek = seq_lseek, | ||
255 | .release = single_release, | ||
256 | }; | ||
257 | |||
258 | static const struct file_operations options_fops = { | ||
259 | .open = options_open, | ||
260 | .read = seq_read, | ||
261 | .write = options_write, | ||
262 | .llseek = seq_lseek, | ||
263 | .release = single_release, | ||
264 | }; | ||
265 | |||
266 | static const struct file_operations cch_fops = { | ||
267 | .open = cch_open, | ||
268 | .read = seq_read, | ||
269 | .llseek = seq_lseek, | ||
270 | .release = seq_release, | ||
271 | }; | ||
272 | static const struct file_operations gru_fops = { | ||
273 | .open = gru_open, | ||
274 | .read = seq_read, | ||
275 | .llseek = seq_lseek, | ||
276 | .release = seq_release, | ||
277 | }; | ||
278 | |||
279 | static struct proc_entry { | ||
280 | char *name; | ||
281 | int mode; | ||
282 | const struct file_operations *fops; | ||
283 | struct proc_dir_entry *entry; | ||
284 | } proc_files[] = { | ||
285 | {"statistics", 0644, &statistics_fops}, | ||
286 | {"debug_options", 0644, &options_fops}, | ||
287 | {"cch_status", 0444, &cch_fops}, | ||
288 | {"gru_status", 0444, &gru_fops}, | ||
289 | {NULL} | ||
290 | }; | ||
291 | /* *INDENT-ON* */ | ||
292 | |||
293 | static struct proc_dir_entry *proc_gru __read_mostly; | ||
294 | |||
295 | static int create_proc_file(struct proc_entry *p) | ||
296 | { | ||
297 | p->entry = create_proc_entry(p->name, p->mode, proc_gru); | ||
298 | if (!p->entry) | ||
299 | return -1; | ||
300 | p->entry->proc_fops = p->fops; | ||
301 | return 0; | ||
302 | } | ||
303 | |||
304 | static void delete_proc_files(void) | ||
305 | { | ||
306 | struct proc_entry *p; | ||
307 | |||
308 | if (proc_gru) { | ||
309 | for (p = proc_files; p->name; p++) | ||
310 | if (p->entry) | ||
311 | remove_proc_entry(p->name, proc_gru); | ||
312 | remove_proc_entry("gru", NULL); | ||
313 | } | ||
314 | } | ||
315 | |||
316 | int gru_proc_init(void) | ||
317 | { | ||
318 | struct proc_entry *p; | ||
319 | |||
320 | proc_mkdir("sgi_uv", NULL); | ||
321 | proc_gru = proc_mkdir("sgi_uv/gru", NULL); | ||
322 | |||
323 | for (p = proc_files; p->name; p++) | ||
324 | if (create_proc_file(p)) | ||
325 | goto err; | ||
326 | return 0; | ||
327 | |||
328 | err: | ||
329 | delete_proc_files(); | ||
330 | return -1; | ||
331 | } | ||
332 | |||
333 | void gru_proc_exit(void) | ||
334 | { | ||
335 | delete_proc_files(); | ||
336 | } | ||
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h new file mode 100644 index 000000000000..4251018f70ff --- /dev/null +++ b/drivers/misc/sgi-gru/grutables.h | |||
@@ -0,0 +1,609 @@ | |||
1 | /* | ||
2 | * SN Platform GRU Driver | ||
3 | * | ||
4 | * GRU DRIVER TABLES, MACROS, externs, etc | ||
5 | * | ||
6 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #ifndef __GRUTABLES_H__ | ||
24 | #define __GRUTABLES_H__ | ||
25 | |||
26 | /* | ||
27 | * GRU Chiplet: | ||
28 | * The GRU is a user addressible memory accelerator. It provides | ||
29 | * several forms of load, store, memset, bcopy instructions. In addition, it | ||
30 | * contains special instructions for AMOs, sending messages to message | ||
31 | * queues, etc. | ||
32 | * | ||
33 | * The GRU is an integral part of the node controller. It connects | ||
34 | * directly to the cpu socket. In its current implementation, there are 2 | ||
35 | * GRU chiplets in the node controller on each blade (~node). | ||
36 | * | ||
37 | * The entire GRU memory space is fully coherent and cacheable by the cpus. | ||
38 | * | ||
39 | * Each GRU chiplet has a physical memory map that looks like the following: | ||
40 | * | ||
41 | * +-----------------+ | ||
42 | * |/////////////////| | ||
43 | * |/////////////////| | ||
44 | * |/////////////////| | ||
45 | * |/////////////////| | ||
46 | * |/////////////////| | ||
47 | * |/////////////////| | ||
48 | * |/////////////////| | ||
49 | * |/////////////////| | ||
50 | * +-----------------+ | ||
51 | * | system control | | ||
52 | * +-----------------+ _______ +-------------+ | ||
53 | * |/////////////////| / | | | ||
54 | * |/////////////////| / | | | ||
55 | * |/////////////////| / | instructions| | ||
56 | * |/////////////////| / | | | ||
57 | * |/////////////////| / | | | ||
58 | * |/////////////////| / |-------------| | ||
59 | * |/////////////////| / | | | ||
60 | * +-----------------+ | | | ||
61 | * | context 15 | | data | | ||
62 | * +-----------------+ | | | ||
63 | * | ...... | \ | | | ||
64 | * +-----------------+ \____________ +-------------+ | ||
65 | * | context 1 | | ||
66 | * +-----------------+ | ||
67 | * | context 0 | | ||
68 | * +-----------------+ | ||
69 | * | ||
70 | * Each of the "contexts" is a chunk of memory that can be mmaped into user | ||
71 | * space. The context consists of 2 parts: | ||
72 | * | ||
73 | * - an instruction space that can be directly accessed by the user | ||
74 | * to issue GRU instructions and to check instruction status. | ||
75 | * | ||
76 | * - a data area that acts as normal RAM. | ||
77 | * | ||
78 | * User instructions contain virtual addresses of data to be accessed by the | ||
79 | * GRU. The GRU contains a TLB that is used to convert these user virtual | ||
80 | * addresses to physical addresses. | ||
81 | * | ||
82 | * The "system control" area of the GRU chiplet is used by the kernel driver | ||
83 | * to manage user contexts and to perform functions such as TLB dropin and | ||
84 | * purging. | ||
85 | * | ||
86 | * One context may be reserved for the kernel and used for cross-partition | ||
87 | * communication. The GRU will also be used to asynchronously zero out | ||
88 | * large blocks of memory (not currently implemented). | ||
89 | * | ||
90 | * | ||
91 | * Tables: | ||
92 | * | ||
93 | * VDATA-VMA Data - Holds a few parameters. Head of linked list of | ||
94 | * GTS tables for threads using the GSEG | ||
95 | * GTS - Gru Thread State - contains info for managing a GSEG context. A | ||
96 | * GTS is allocated for each thread accessing a | ||
97 | * GSEG. | ||
98 | * GTD - GRU Thread Data - contains shadow copy of GRU data when GSEG is | ||
99 | * not loaded into a GRU | ||
100 | * GMS - GRU Memory Struct - Used to manage TLB shootdowns. Tracks GRUs | ||
101 | * where a GSEG has been loaded. Similar to | ||
102 | * an mm_struct but for GRU. | ||
103 | * | ||
104 | * GS - GRU State - Used to manage the state of a GRU chiplet | ||
105 | * BS - Blade State - Used to manage state of all GRU chiplets | ||
106 | * on a blade | ||
107 | * | ||
108 | * | ||
109 | * Normal task tables for task using GRU. | ||
110 | * - 2 threads in process | ||
111 | * - 2 GSEGs open in process | ||
112 | * - GSEG1 is being used by both threads | ||
113 | * - GSEG2 is used only by thread 2 | ||
114 | * | ||
115 | * task -->| | ||
116 | * task ---+---> mm ->------ (notifier) -------+-> gms | ||
117 | * | | | ||
118 | * |--> vma -> vdata ---> gts--->| GSEG1 (thread1) | ||
119 | * | | | | ||
120 | * | +-> gts--->| GSEG1 (thread2) | ||
121 | * | | | ||
122 | * |--> vma -> vdata ---> gts--->| GSEG2 (thread2) | ||
123 | * . | ||
124 | * . | ||
125 | * | ||
126 | * GSEGs are marked DONTCOPY on fork | ||
127 | * | ||
128 | * At open | ||
129 | * file.private_data -> NULL | ||
130 | * | ||
131 | * At mmap, | ||
132 | * vma -> vdata | ||
133 | * | ||
134 | * After gseg reference | ||
135 | * vma -> vdata ->gts | ||
136 | * | ||
137 | * After fork | ||
138 | * parent | ||
139 | * vma -> vdata -> gts | ||
140 | * child | ||
141 | * (vma is not copied) | ||
142 | * | ||
143 | */ | ||
144 | |||
145 | #include <linux/rmap.h> | ||
146 | #include <linux/interrupt.h> | ||
147 | #include <linux/mutex.h> | ||
148 | #include <linux/wait.h> | ||
149 | #include <linux/mmu_notifier.h> | ||
150 | #include "gru.h" | ||
151 | #include "gruhandles.h" | ||
152 | |||
153 | extern struct gru_stats_s gru_stats; | ||
154 | extern struct gru_blade_state *gru_base[]; | ||
155 | extern unsigned long gru_start_paddr, gru_end_paddr; | ||
156 | |||
157 | #define GRU_MAX_BLADES MAX_NUMNODES | ||
158 | #define GRU_MAX_GRUS (GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE) | ||
159 | |||
160 | #define GRU_DRIVER_ID_STR "SGI GRU Device Driver" | ||
161 | #define GRU_DRIVER_VERSION_STR "0.80" | ||
162 | |||
163 | /* | ||
164 | * GRU statistics. | ||
165 | */ | ||
166 | struct gru_stats_s { | ||
167 | atomic_long_t vdata_alloc; | ||
168 | atomic_long_t vdata_free; | ||
169 | atomic_long_t gts_alloc; | ||
170 | atomic_long_t gts_free; | ||
171 | atomic_long_t vdata_double_alloc; | ||
172 | atomic_long_t gts_double_allocate; | ||
173 | atomic_long_t assign_context; | ||
174 | atomic_long_t assign_context_failed; | ||
175 | atomic_long_t free_context; | ||
176 | atomic_long_t load_context; | ||
177 | atomic_long_t unload_context; | ||
178 | atomic_long_t steal_context; | ||
179 | atomic_long_t steal_context_failed; | ||
180 | atomic_long_t nopfn; | ||
181 | atomic_long_t break_cow; | ||
182 | atomic_long_t asid_new; | ||
183 | atomic_long_t asid_next; | ||
184 | atomic_long_t asid_wrap; | ||
185 | atomic_long_t asid_reuse; | ||
186 | atomic_long_t intr; | ||
187 | atomic_long_t call_os; | ||
188 | atomic_long_t call_os_check_for_bug; | ||
189 | atomic_long_t call_os_wait_queue; | ||
190 | atomic_long_t user_flush_tlb; | ||
191 | atomic_long_t user_unload_context; | ||
192 | atomic_long_t user_exception; | ||
193 | atomic_long_t set_task_slice; | ||
194 | atomic_long_t migrate_check; | ||
195 | atomic_long_t migrated_retarget; | ||
196 | atomic_long_t migrated_unload; | ||
197 | atomic_long_t migrated_unload_delay; | ||
198 | atomic_long_t migrated_nopfn_retarget; | ||
199 | atomic_long_t migrated_nopfn_unload; | ||
200 | atomic_long_t tlb_dropin; | ||
201 | atomic_long_t tlb_dropin_fail_no_asid; | ||
202 | atomic_long_t tlb_dropin_fail_upm; | ||
203 | atomic_long_t tlb_dropin_fail_invalid; | ||
204 | atomic_long_t tlb_dropin_fail_range_active; | ||
205 | atomic_long_t tlb_dropin_fail_idle; | ||
206 | atomic_long_t tlb_dropin_fail_fmm; | ||
207 | atomic_long_t mmu_invalidate_range; | ||
208 | atomic_long_t mmu_invalidate_page; | ||
209 | atomic_long_t mmu_clear_flush_young; | ||
210 | atomic_long_t flush_tlb; | ||
211 | atomic_long_t flush_tlb_gru; | ||
212 | atomic_long_t flush_tlb_gru_tgh; | ||
213 | atomic_long_t flush_tlb_gru_zero_asid; | ||
214 | |||
215 | atomic_long_t copy_gpa; | ||
216 | |||
217 | atomic_long_t mesq_receive; | ||
218 | atomic_long_t mesq_receive_none; | ||
219 | atomic_long_t mesq_send; | ||
220 | atomic_long_t mesq_send_failed; | ||
221 | atomic_long_t mesq_noop; | ||
222 | atomic_long_t mesq_send_unexpected_error; | ||
223 | atomic_long_t mesq_send_lb_overflow; | ||
224 | atomic_long_t mesq_send_qlimit_reached; | ||
225 | atomic_long_t mesq_send_amo_nacked; | ||
226 | atomic_long_t mesq_send_put_nacked; | ||
227 | atomic_long_t mesq_qf_not_full; | ||
228 | atomic_long_t mesq_qf_locked; | ||
229 | atomic_long_t mesq_qf_noop_not_full; | ||
230 | atomic_long_t mesq_qf_switch_head_failed; | ||
231 | atomic_long_t mesq_qf_unexpected_error; | ||
232 | atomic_long_t mesq_noop_unexpected_error; | ||
233 | atomic_long_t mesq_noop_lb_overflow; | ||
234 | atomic_long_t mesq_noop_qlimit_reached; | ||
235 | atomic_long_t mesq_noop_amo_nacked; | ||
236 | atomic_long_t mesq_noop_put_nacked; | ||
237 | |||
238 | }; | ||
239 | |||
240 | #define OPT_DPRINT 1 | ||
241 | #define OPT_STATS 2 | ||
242 | #define GRU_QUICKLOOK 4 | ||
243 | |||
244 | |||
245 | #define IRQ_GRU 110 /* Starting IRQ number for interrupts */ | ||
246 | |||
247 | /* Delay in jiffies between attempts to assign a GRU context */ | ||
248 | #define GRU_ASSIGN_DELAY ((HZ * 20) / 1000) | ||
249 | |||
250 | /* | ||
251 | * If a process has it's context stolen, min delay in jiffies before trying to | ||
252 | * steal a context from another process. | ||
253 | */ | ||
254 | #define GRU_STEAL_DELAY ((HZ * 200) / 1000) | ||
255 | |||
256 | #define STAT(id) do { \ | ||
257 | if (gru_options & OPT_STATS) \ | ||
258 | atomic_long_inc(&gru_stats.id); \ | ||
259 | } while (0) | ||
260 | |||
261 | #ifdef CONFIG_SGI_GRU_DEBUG | ||
262 | #define gru_dbg(dev, fmt, x...) \ | ||
263 | do { \ | ||
264 | if (gru_options & OPT_DPRINT) \ | ||
265 | dev_dbg(dev, "%s: " fmt, __func__, x); \ | ||
266 | } while (0) | ||
267 | #else | ||
268 | #define gru_dbg(x...) | ||
269 | #endif | ||
270 | |||
271 | /*----------------------------------------------------------------------------- | ||
272 | * ASID management | ||
273 | */ | ||
274 | #define MAX_ASID 0xfffff0 | ||
275 | #define MIN_ASID 8 | ||
276 | #define ASID_INC 8 /* number of regions */ | ||
277 | |||
278 | /* Generate a GRU asid value from a GRU base asid & a virtual address. */ | ||
279 | #if defined CONFIG_IA64 | ||
280 | #define VADDR_HI_BIT 64 | ||
281 | #define GRUREGION(addr) ((addr) >> (VADDR_HI_BIT - 3) & 3) | ||
282 | #elif defined __x86_64 | ||
283 | #define VADDR_HI_BIT 48 | ||
284 | #define GRUREGION(addr) (0) /* ZZZ could do better */ | ||
285 | #else | ||
286 | #error "Unsupported architecture" | ||
287 | #endif | ||
288 | #define GRUASID(asid, addr) ((asid) + GRUREGION(addr)) | ||
289 | |||
290 | /*------------------------------------------------------------------------------ | ||
291 | * File & VMS Tables | ||
292 | */ | ||
293 | |||
294 | struct gru_state; | ||
295 | |||
296 | /* | ||
297 | * This structure is pointed to from the mmstruct via the notifier pointer. | ||
298 | * There is one of these per address space. | ||
299 | */ | ||
300 | struct gru_mm_tracker { | ||
301 | unsigned int mt_asid_gen; /* ASID wrap count */ | ||
302 | int mt_asid; /* current base ASID for gru */ | ||
303 | unsigned short mt_ctxbitmap; /* bitmap of contexts using | ||
304 | asid */ | ||
305 | }; | ||
306 | |||
307 | struct gru_mm_struct { | ||
308 | struct mmu_notifier ms_notifier; | ||
309 | atomic_t ms_refcnt; | ||
310 | spinlock_t ms_asid_lock; /* protects ASID assignment */ | ||
311 | atomic_t ms_range_active;/* num range_invals active */ | ||
312 | char ms_released; | ||
313 | wait_queue_head_t ms_wait_queue; | ||
314 | DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS); | ||
315 | struct gru_mm_tracker ms_asids[GRU_MAX_GRUS]; | ||
316 | }; | ||
317 | |||
318 | /* | ||
319 | * One of these structures is allocated when a GSEG is mmaped. The | ||
320 | * structure is pointed to by the vma->vm_private_data field in the vma struct. | ||
321 | */ | ||
322 | struct gru_vma_data { | ||
323 | spinlock_t vd_lock; /* Serialize access to vma */ | ||
324 | struct list_head vd_head; /* head of linked list of gts */ | ||
325 | long vd_user_options;/* misc user option flags */ | ||
326 | int vd_cbr_au_count; | ||
327 | int vd_dsr_au_count; | ||
328 | }; | ||
329 | |||
330 | /* | ||
331 | * One of these is allocated for each thread accessing a mmaped GRU. A linked | ||
332 | * list of these structure is hung off the struct gru_vma_data in the mm_struct. | ||
333 | */ | ||
334 | struct gru_thread_state { | ||
335 | struct list_head ts_next; /* list - head at vma-private */ | ||
336 | struct mutex ts_ctxlock; /* load/unload CTX lock */ | ||
337 | struct mm_struct *ts_mm; /* mm currently mapped to | ||
338 | context */ | ||
339 | struct vm_area_struct *ts_vma; /* vma of GRU context */ | ||
340 | struct gru_state *ts_gru; /* GRU where the context is | ||
341 | loaded */ | ||
342 | struct gru_mm_struct *ts_gms; /* asid & ioproc struct */ | ||
343 | unsigned long ts_cbr_map; /* map of allocated CBRs */ | ||
344 | unsigned long ts_dsr_map; /* map of allocated DATA | ||
345 | resources */ | ||
346 | unsigned long ts_steal_jiffies;/* jiffies when context last | ||
347 | stolen */ | ||
348 | long ts_user_options;/* misc user option flags */ | ||
349 | pid_t ts_tgid_owner; /* task that is using the | ||
350 | context - for migration */ | ||
351 | int ts_tsid; /* thread that owns the | ||
352 | structure */ | ||
353 | int ts_tlb_int_select;/* target cpu if interrupts | ||
354 | enabled */ | ||
355 | int ts_ctxnum; /* context number where the | ||
356 | context is loaded */ | ||
357 | atomic_t ts_refcnt; /* reference count GTS */ | ||
358 | unsigned char ts_dsr_au_count;/* Number of DSR resources | ||
359 | required for contest */ | ||
360 | unsigned char ts_cbr_au_count;/* Number of CBR resources | ||
361 | required for contest */ | ||
362 | char ts_force_unload;/* force context to be unloaded | ||
363 | after migration */ | ||
364 | char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each | ||
365 | allocated CB */ | ||
366 | unsigned long ts_gdata[0]; /* save area for GRU data (CB, | ||
367 | DS, CBE) */ | ||
368 | }; | ||
369 | |||
370 | /* | ||
371 | * Threaded programs actually allocate an array of GSEGs when a context is | ||
372 | * created. Each thread uses a separate GSEG. TSID is the index into the GSEG | ||
373 | * array. | ||
374 | */ | ||
375 | #define TSID(a, v) (((a) - (v)->vm_start) / GRU_GSEG_PAGESIZE) | ||
376 | #define UGRUADDR(gts) ((gts)->ts_vma->vm_start + \ | ||
377 | (gts)->ts_tsid * GRU_GSEG_PAGESIZE) | ||
378 | |||
379 | #define NULLCTX (-1) /* if context not loaded into GRU */ | ||
380 | |||
381 | /*----------------------------------------------------------------------------- | ||
382 | * GRU State Tables | ||
383 | */ | ||
384 | |||
385 | /* | ||
386 | * One of these exists for each GRU chiplet. | ||
387 | */ | ||
388 | struct gru_state { | ||
389 | struct gru_blade_state *gs_blade; /* GRU state for entire | ||
390 | blade */ | ||
391 | unsigned long gs_gru_base_paddr; /* Physical address of | ||
392 | gru segments (64) */ | ||
393 | void *gs_gru_base_vaddr; /* Virtual address of | ||
394 | gru segments (64) */ | ||
395 | unsigned char gs_gid; /* unique GRU number */ | ||
396 | unsigned char gs_tgh_local_shift; /* used to pick TGH for | ||
397 | local flush */ | ||
398 | unsigned char gs_tgh_first_remote; /* starting TGH# for | ||
399 | remote flush */ | ||
400 | unsigned short gs_blade_id; /* blade of GRU */ | ||
401 | spinlock_t gs_asid_lock; /* lock used for | ||
402 | assigning asids */ | ||
403 | spinlock_t gs_lock; /* lock used for | ||
404 | assigning contexts */ | ||
405 | |||
406 | /* -- the following are protected by the gs_asid_lock spinlock ---- */ | ||
407 | unsigned int gs_asid; /* Next availe ASID */ | ||
408 | unsigned int gs_asid_limit; /* Limit of available | ||
409 | ASIDs */ | ||
410 | unsigned int gs_asid_gen; /* asid generation. | ||
411 | Inc on wrap */ | ||
412 | |||
413 | /* --- the following fields are protected by the gs_lock spinlock --- */ | ||
414 | unsigned long gs_context_map; /* bitmap to manage | ||
415 | contexts in use */ | ||
416 | unsigned long gs_cbr_map; /* bitmap to manage CB | ||
417 | resources */ | ||
418 | unsigned long gs_dsr_map; /* bitmap used to manage | ||
419 | DATA resources */ | ||
420 | unsigned int gs_reserved_cbrs; /* Number of kernel- | ||
421 | reserved cbrs */ | ||
422 | unsigned int gs_reserved_dsr_bytes; /* Bytes of kernel- | ||
423 | reserved dsrs */ | ||
424 | unsigned short gs_active_contexts; /* number of contexts | ||
425 | in use */ | ||
426 | struct gru_thread_state *gs_gts[GRU_NUM_CCH]; /* GTS currently using | ||
427 | the context */ | ||
428 | }; | ||
429 | |||
430 | /* | ||
431 | * This structure contains the GRU state for all the GRUs on a blade. | ||
432 | */ | ||
433 | struct gru_blade_state { | ||
434 | void *kernel_cb; /* First kernel | ||
435 | reserved cb */ | ||
436 | void *kernel_dsr; /* First kernel | ||
437 | reserved DSR */ | ||
438 | /* ---- the following are protected by the bs_lock spinlock ---- */ | ||
439 | spinlock_t bs_lock; /* lock used for | ||
440 | stealing contexts */ | ||
441 | int bs_lru_ctxnum; /* STEAL - last context | ||
442 | stolen */ | ||
443 | struct gru_state *bs_lru_gru; /* STEAL - last gru | ||
444 | stolen */ | ||
445 | |||
446 | struct gru_state bs_grus[GRU_CHIPLETS_PER_BLADE]; | ||
447 | }; | ||
448 | |||
449 | /*----------------------------------------------------------------------------- | ||
450 | * Address Primitives | ||
451 | */ | ||
452 | #define get_tfm_for_cpu(g, c) \ | ||
453 | ((struct gru_tlb_fault_map *)get_tfm((g)->gs_gru_base_vaddr, (c))) | ||
454 | #define get_tfh_by_index(g, i) \ | ||
455 | ((struct gru_tlb_fault_handle *)get_tfh((g)->gs_gru_base_vaddr, (i))) | ||
456 | #define get_tgh_by_index(g, i) \ | ||
457 | ((struct gru_tlb_global_handle *)get_tgh((g)->gs_gru_base_vaddr, (i))) | ||
458 | #define get_cbe_by_index(g, i) \ | ||
459 | ((struct gru_control_block_extended *)get_cbe((g)->gs_gru_base_vaddr,\ | ||
460 | (i))) | ||
461 | |||
462 | /*----------------------------------------------------------------------------- | ||
463 | * Useful Macros | ||
464 | */ | ||
465 | |||
466 | /* Given a blade# & chiplet#, get a pointer to the GRU */ | ||
467 | #define get_gru(b, c) (&gru_base[b]->bs_grus[c]) | ||
468 | |||
469 | /* Number of bytes to save/restore when unloading/loading GRU contexts */ | ||
470 | #define DSR_BYTES(dsr) ((dsr) * GRU_DSR_AU_BYTES) | ||
471 | #define CBR_BYTES(cbr) ((cbr) * GRU_HANDLE_BYTES * GRU_CBR_AU_SIZE * 2) | ||
472 | |||
473 | /* Convert a user CB number to the actual CBRNUM */ | ||
474 | #define thread_cbr_number(gts, n) ((gts)->ts_cbr_idx[(n) / GRU_CBR_AU_SIZE] \ | ||
475 | * GRU_CBR_AU_SIZE + (n) % GRU_CBR_AU_SIZE) | ||
476 | |||
477 | /* Convert a gid to a pointer to the GRU */ | ||
478 | #define GID_TO_GRU(gid) \ | ||
479 | (gru_base[(gid) / GRU_CHIPLETS_PER_BLADE] ? \ | ||
480 | (&gru_base[(gid) / GRU_CHIPLETS_PER_BLADE]-> \ | ||
481 | bs_grus[(gid) % GRU_CHIPLETS_PER_BLADE]) : \ | ||
482 | NULL) | ||
483 | |||
484 | /* Scan all active GRUs in a GRU bitmap */ | ||
485 | #define for_each_gru_in_bitmap(gid, map) \ | ||
486 | for ((gid) = find_first_bit((map), GRU_MAX_GRUS); (gid) < GRU_MAX_GRUS;\ | ||
487 | (gid)++, (gid) = find_next_bit((map), GRU_MAX_GRUS, (gid))) | ||
488 | |||
489 | /* Scan all active GRUs on a specific blade */ | ||
490 | #define for_each_gru_on_blade(gru, nid, i) \ | ||
491 | for ((gru) = gru_base[nid]->bs_grus, (i) = 0; \ | ||
492 | (i) < GRU_CHIPLETS_PER_BLADE; \ | ||
493 | (i)++, (gru)++) | ||
494 | |||
495 | /* Scan all active GTSs on a gru. Note: must hold ss_lock to use this macro. */ | ||
496 | #define for_each_gts_on_gru(gts, gru, ctxnum) \ | ||
497 | for ((ctxnum) = 0; (ctxnum) < GRU_NUM_CCH; (ctxnum)++) \ | ||
498 | if (((gts) = (gru)->gs_gts[ctxnum])) | ||
499 | |||
500 | /* Scan each CBR whose bit is set in a TFM (or copy of) */ | ||
501 | #define for_each_cbr_in_tfm(i, map) \ | ||
502 | for ((i) = find_first_bit(map, GRU_NUM_CBE); \ | ||
503 | (i) < GRU_NUM_CBE; \ | ||
504 | (i)++, (i) = find_next_bit(map, GRU_NUM_CBE, i)) | ||
505 | |||
506 | /* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */ | ||
507 | #define for_each_cbr_in_allocation_map(i, map, k) \ | ||
508 | for ((k) = find_first_bit(map, GRU_CBR_AU); (k) < GRU_CBR_AU; \ | ||
509 | (k) = find_next_bit(map, GRU_CBR_AU, (k) + 1)) \ | ||
510 | for ((i) = (k)*GRU_CBR_AU_SIZE; \ | ||
511 | (i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++) | ||
512 | |||
513 | /* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */ | ||
514 | #define for_each_dsr_in_allocation_map(i, map, k) \ | ||
515 | for ((k) = find_first_bit((const unsigned long *)map, GRU_DSR_AU);\ | ||
516 | (k) < GRU_DSR_AU; \ | ||
517 | (k) = find_next_bit((const unsigned long *)map, \ | ||
518 | GRU_DSR_AU, (k) + 1)) \ | ||
519 | for ((i) = (k) * GRU_DSR_AU_CL; \ | ||
520 | (i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++) | ||
521 | |||
522 | #define gseg_physical_address(gru, ctxnum) \ | ||
523 | ((gru)->gs_gru_base_paddr + ctxnum * GRU_GSEG_STRIDE) | ||
524 | #define gseg_virtual_address(gru, ctxnum) \ | ||
525 | ((gru)->gs_gru_base_vaddr + ctxnum * GRU_GSEG_STRIDE) | ||
526 | |||
527 | /*----------------------------------------------------------------------------- | ||
528 | * Lock / Unlock GRU handles | ||
529 | * Use the "delresp" bit in the handle as a "lock" bit. | ||
530 | */ | ||
531 | |||
532 | /* Lock hierarchy checking enabled only in emulator */ | ||
533 | |||
534 | static inline void __lock_handle(void *h) | ||
535 | { | ||
536 | while (test_and_set_bit(1, h)) | ||
537 | cpu_relax(); | ||
538 | } | ||
539 | |||
540 | static inline void __unlock_handle(void *h) | ||
541 | { | ||
542 | clear_bit(1, h); | ||
543 | } | ||
544 | |||
545 | static inline void lock_cch_handle(struct gru_context_configuration_handle *cch) | ||
546 | { | ||
547 | __lock_handle(cch); | ||
548 | } | ||
549 | |||
550 | static inline void unlock_cch_handle(struct gru_context_configuration_handle | ||
551 | *cch) | ||
552 | { | ||
553 | __unlock_handle(cch); | ||
554 | } | ||
555 | |||
556 | static inline void lock_tgh_handle(struct gru_tlb_global_handle *tgh) | ||
557 | { | ||
558 | __lock_handle(tgh); | ||
559 | } | ||
560 | |||
561 | static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh) | ||
562 | { | ||
563 | __unlock_handle(tgh); | ||
564 | } | ||
565 | |||
566 | /*----------------------------------------------------------------------------- | ||
567 | * Function prototypes & externs | ||
568 | */ | ||
569 | struct gru_unload_context_req; | ||
570 | |||
571 | extern struct vm_operations_struct gru_vm_ops; | ||
572 | extern struct device *grudev; | ||
573 | |||
574 | extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, | ||
575 | int tsid); | ||
576 | extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct | ||
577 | *vma, int tsid); | ||
578 | extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct | ||
579 | *vma, int tsid); | ||
580 | extern void gru_unload_context(struct gru_thread_state *gts, int savestate); | ||
581 | extern void gts_drop(struct gru_thread_state *gts); | ||
582 | extern void gru_tgh_flush_init(struct gru_state *gru); | ||
583 | extern int gru_kservices_init(struct gru_state *gru); | ||
584 | extern irqreturn_t gru_intr(int irq, void *dev_id); | ||
585 | extern int gru_handle_user_call_os(unsigned long address); | ||
586 | extern int gru_user_flush_tlb(unsigned long arg); | ||
587 | extern int gru_user_unload_context(unsigned long arg); | ||
588 | extern int gru_get_exception_detail(unsigned long arg); | ||
589 | extern int gru_set_task_slice(long address); | ||
590 | extern int gru_cpu_fault_map_id(void); | ||
591 | extern struct vm_area_struct *gru_find_vma(unsigned long vaddr); | ||
592 | extern void gru_flush_all_tlb(struct gru_state *gru); | ||
593 | extern int gru_proc_init(void); | ||
594 | extern void gru_proc_exit(void); | ||
595 | |||
596 | extern unsigned long gru_reserve_cb_resources(struct gru_state *gru, | ||
597 | int cbr_au_count, char *cbmap); | ||
598 | extern unsigned long gru_reserve_ds_resources(struct gru_state *gru, | ||
599 | int dsr_au_count, char *dsmap); | ||
600 | extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf); | ||
601 | extern struct gru_mm_struct *gru_register_mmu_notifier(void); | ||
602 | extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms); | ||
603 | |||
604 | extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, | ||
605 | unsigned long len); | ||
606 | |||
607 | extern unsigned long gru_options; | ||
608 | |||
609 | #endif /* __GRUTABLES_H__ */ | ||
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c new file mode 100644 index 000000000000..c84496a77691 --- /dev/null +++ b/drivers/misc/sgi-gru/grutlbpurge.c | |||
@@ -0,0 +1,371 @@ | |||
1 | /* | ||
2 | * SN Platform GRU Driver | ||
3 | * | ||
4 | * MMUOPS callbacks + TLB flushing | ||
5 | * | ||
6 | * This file handles emu notifier callbacks from the core kernel. The callbacks | ||
7 | * are used to update the TLB in the GRU as a result of changes in the | ||
8 | * state of a process address space. This file also handles TLB invalidates | ||
9 | * from the GRU driver. | ||
10 | * | ||
11 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. | ||
12 | * | ||
13 | * This program is free software; you can redistribute it and/or modify | ||
14 | * it under the terms of the GNU General Public License as published by | ||
15 | * the Free Software Foundation; either version 2 of the License, or | ||
16 | * (at your option) any later version. | ||
17 | * | ||
18 | * This program is distributed in the hope that it will be useful, | ||
19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
21 | * GNU General Public License for more details. | ||
22 | * | ||
23 | * You should have received a copy of the GNU General Public License | ||
24 | * along with this program; if not, write to the Free Software | ||
25 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
26 | */ | ||
27 | |||
28 | #include <linux/kernel.h> | ||
29 | #include <linux/list.h> | ||
30 | #include <linux/spinlock.h> | ||
31 | #include <linux/mm.h> | ||
32 | #include <linux/slab.h> | ||
33 | #include <linux/device.h> | ||
34 | #include <linux/hugetlb.h> | ||
35 | #include <linux/delay.h> | ||
36 | #include <linux/timex.h> | ||
37 | #include <linux/srcu.h> | ||
38 | #include <asm/processor.h> | ||
39 | #include "gru.h" | ||
40 | #include "grutables.h" | ||
41 | #include <asm/uv/uv_hub.h> | ||
42 | |||
43 | #define gru_random() get_cycles() | ||
44 | |||
45 | /* ---------------------------------- TLB Invalidation functions -------- | ||
46 | * get_tgh_handle | ||
47 | * | ||
48 | * Find a TGH to use for issuing a TLB invalidate. For GRUs that are on the | ||
49 | * local blade, use a fixed TGH that is a function of the blade-local cpu | ||
50 | * number. Normally, this TGH is private to the cpu & no contention occurs for | ||
51 | * the TGH. For offblade GRUs, select a random TGH in the range above the | ||
52 | * private TGHs. A spinlock is required to access this TGH & the lock must be | ||
53 | * released when the invalidate is completes. This sucks, but it is the best we | ||
54 | * can do. | ||
55 | * | ||
56 | * Note that the spinlock is IN the TGH handle so locking does not involve | ||
57 | * additional cache lines. | ||
58 | * | ||
59 | */ | ||
60 | static inline int get_off_blade_tgh(struct gru_state *gru) | ||
61 | { | ||
62 | int n; | ||
63 | |||
64 | n = GRU_NUM_TGH - gru->gs_tgh_first_remote; | ||
65 | n = gru_random() % n; | ||
66 | n += gru->gs_tgh_first_remote; | ||
67 | return n; | ||
68 | } | ||
69 | |||
70 | static inline int get_on_blade_tgh(struct gru_state *gru) | ||
71 | { | ||
72 | return uv_blade_processor_id() >> gru->gs_tgh_local_shift; | ||
73 | } | ||
74 | |||
75 | static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state | ||
76 | *gru) | ||
77 | { | ||
78 | struct gru_tlb_global_handle *tgh; | ||
79 | int n; | ||
80 | |||
81 | preempt_disable(); | ||
82 | if (uv_numa_blade_id() == gru->gs_blade_id) | ||
83 | n = get_on_blade_tgh(gru); | ||
84 | else | ||
85 | n = get_off_blade_tgh(gru); | ||
86 | tgh = get_tgh_by_index(gru, n); | ||
87 | lock_tgh_handle(tgh); | ||
88 | |||
89 | return tgh; | ||
90 | } | ||
91 | |||
92 | static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh) | ||
93 | { | ||
94 | unlock_tgh_handle(tgh); | ||
95 | preempt_enable(); | ||
96 | } | ||
97 | |||
98 | /* | ||
99 | * gru_flush_tlb_range | ||
100 | * | ||
101 | * General purpose TLB invalidation function. This function scans every GRU in | ||
102 | * the ENTIRE system (partition) looking for GRUs where the specified MM has | ||
103 | * been accessed by the GRU. For each GRU found, the TLB must be invalidated OR | ||
104 | * the ASID invalidated. Invalidating an ASID causes a new ASID to be assigned | ||
105 | * on the next fault. This effectively flushes the ENTIRE TLB for the MM at the | ||
106 | * cost of (possibly) a large number of future TLBmisses. | ||
107 | * | ||
108 | * The current algorithm is optimized based on the following (somewhat true) | ||
109 | * assumptions: | ||
110 | * - GRU contexts are not loaded into a GRU unless a reference is made to | ||
111 | * the data segment or control block (this is true, not an assumption). | ||
112 | * If a DS/CB is referenced, the user will also issue instructions that | ||
113 | * cause TLBmisses. It is not necessary to optimize for the case where | ||
114 | * contexts are loaded but no instructions cause TLB misses. (I know | ||
115 | * this will happen but I'm not optimizing for it). | ||
116 | * - GRU instructions to invalidate TLB entries are SLOOOOWWW - normally | ||
117 | * a few usec but in unusual cases, it could be longer. Avoid if | ||
118 | * possible. | ||
119 | * - intrablade process migration between cpus is not frequent but is | ||
120 | * common. | ||
121 | * - a GRU context is not typically migrated to a different GRU on the | ||
122 | * blade because of intrablade migration | ||
123 | * - interblade migration is rare. Processes migrate their GRU context to | ||
124 | * the new blade. | ||
125 | * - if interblade migration occurs, migration back to the original blade | ||
126 | * is very very rare (ie., no optimization for this case) | ||
127 | * - most GRU instruction operate on a subset of the user REGIONS. Code | ||
128 | * & shared library regions are not likely targets of GRU instructions. | ||
129 | * | ||
130 | * To help improve the efficiency of TLB invalidation, the GMS data | ||
131 | * structure is maintained for EACH address space (MM struct). The GMS is | ||
132 | * also the structure that contains the pointer to the mmu callout | ||
133 | * functions. This structure is linked to the mm_struct for the address space | ||
134 | * using the mmu "register" function. The mmu interfaces are used to | ||
135 | * provide the callbacks for TLB invalidation. The GMS contains: | ||
136 | * | ||
137 | * - asid[maxgrus] array. ASIDs are assigned to a GRU when a context is | ||
138 | * loaded into the GRU. | ||
139 | * - asidmap[maxgrus]. bitmap to make it easier to find non-zero asids in | ||
140 | * the above array | ||
141 | * - ctxbitmap[maxgrus]. Indicates the contexts that are currently active | ||
142 | * in the GRU for the address space. This bitmap must be passed to the | ||
143 | * GRU to do an invalidate. | ||
144 | * | ||
145 | * The current algorithm for invalidating TLBs is: | ||
146 | * - scan the asidmap for GRUs where the context has been loaded, ie, | ||
147 | * asid is non-zero. | ||
148 | * - for each gru found: | ||
149 | * - if the ctxtmap is non-zero, there are active contexts in the | ||
150 | * GRU. TLB invalidate instructions must be issued to the GRU. | ||
151 | * - if the ctxtmap is zero, no context is active. Set the ASID to | ||
152 | * zero to force a full TLB invalidation. This is fast but will | ||
153 | * cause a lot of TLB misses if the context is reloaded onto the | ||
154 | * GRU | ||
155 | * | ||
156 | */ | ||
157 | |||
158 | void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, | ||
159 | unsigned long len) | ||
160 | { | ||
161 | struct gru_state *gru; | ||
162 | struct gru_mm_tracker *asids; | ||
163 | struct gru_tlb_global_handle *tgh; | ||
164 | unsigned long num; | ||
165 | int grupagesize, pagesize, pageshift, gid, asid; | ||
166 | |||
167 | /* ZZZ TODO - handle huge pages */ | ||
168 | pageshift = PAGE_SHIFT; | ||
169 | pagesize = (1UL << pageshift); | ||
170 | grupagesize = GRU_PAGESIZE(pageshift); | ||
171 | num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL); | ||
172 | |||
173 | STAT(flush_tlb); | ||
174 | gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms, | ||
175 | start, len, gms->ms_asidmap[0]); | ||
176 | |||
177 | spin_lock(&gms->ms_asid_lock); | ||
178 | for_each_gru_in_bitmap(gid, gms->ms_asidmap) { | ||
179 | STAT(flush_tlb_gru); | ||
180 | gru = GID_TO_GRU(gid); | ||
181 | asids = gms->ms_asids + gid; | ||
182 | asid = asids->mt_asid; | ||
183 | if (asids->mt_ctxbitmap && asid) { | ||
184 | STAT(flush_tlb_gru_tgh); | ||
185 | asid = GRUASID(asid, start); | ||
186 | gru_dbg(grudev, | ||
187 | " FLUSH gruid %d, asid 0x%x, num %ld, cbmap 0x%x\n", | ||
188 | gid, asid, num, asids->mt_ctxbitmap); | ||
189 | tgh = get_lock_tgh_handle(gru); | ||
190 | tgh_invalidate(tgh, start, 0, asid, grupagesize, 0, | ||
191 | num - 1, asids->mt_ctxbitmap); | ||
192 | get_unlock_tgh_handle(tgh); | ||
193 | } else { | ||
194 | STAT(flush_tlb_gru_zero_asid); | ||
195 | asids->mt_asid = 0; | ||
196 | __clear_bit(gru->gs_gid, gms->ms_asidmap); | ||
197 | gru_dbg(grudev, | ||
198 | " CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n", | ||
199 | gid, asid, asids->mt_ctxbitmap, | ||
200 | gms->ms_asidmap[0]); | ||
201 | } | ||
202 | } | ||
203 | spin_unlock(&gms->ms_asid_lock); | ||
204 | } | ||
205 | |||
206 | /* | ||
207 | * Flush the entire TLB on a chiplet. | ||
208 | */ | ||
209 | void gru_flush_all_tlb(struct gru_state *gru) | ||
210 | { | ||
211 | struct gru_tlb_global_handle *tgh; | ||
212 | |||
213 | gru_dbg(grudev, "gru %p, gid %d\n", gru, gru->gs_gid); | ||
214 | tgh = get_lock_tgh_handle(gru); | ||
215 | tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0); | ||
216 | get_unlock_tgh_handle(tgh); | ||
217 | preempt_enable(); | ||
218 | } | ||
219 | |||
220 | /* | ||
221 | * MMUOPS notifier callout functions | ||
222 | */ | ||
223 | static void gru_invalidate_range_start(struct mmu_notifier *mn, | ||
224 | struct mm_struct *mm, | ||
225 | unsigned long start, unsigned long end) | ||
226 | { | ||
227 | struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, | ||
228 | ms_notifier); | ||
229 | |||
230 | STAT(mmu_invalidate_range); | ||
231 | atomic_inc(&gms->ms_range_active); | ||
232 | gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms, | ||
233 | start, end, atomic_read(&gms->ms_range_active)); | ||
234 | gru_flush_tlb_range(gms, start, end - start); | ||
235 | } | ||
236 | |||
237 | static void gru_invalidate_range_end(struct mmu_notifier *mn, | ||
238 | struct mm_struct *mm, unsigned long start, | ||
239 | unsigned long end) | ||
240 | { | ||
241 | struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, | ||
242 | ms_notifier); | ||
243 | |||
244 | /* ..._and_test() provides needed barrier */ | ||
245 | (void)atomic_dec_and_test(&gms->ms_range_active); | ||
246 | |||
247 | wake_up_all(&gms->ms_wait_queue); | ||
248 | gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end); | ||
249 | } | ||
250 | |||
251 | static void gru_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm, | ||
252 | unsigned long address) | ||
253 | { | ||
254 | struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, | ||
255 | ms_notifier); | ||
256 | |||
257 | STAT(mmu_invalidate_page); | ||
258 | gru_flush_tlb_range(gms, address, PAGE_SIZE); | ||
259 | gru_dbg(grudev, "gms %p, address 0x%lx\n", gms, address); | ||
260 | } | ||
261 | |||
262 | static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm) | ||
263 | { | ||
264 | struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, | ||
265 | ms_notifier); | ||
266 | |||
267 | gms->ms_released = 1; | ||
268 | gru_dbg(grudev, "gms %p\n", gms); | ||
269 | } | ||
270 | |||
271 | |||
272 | static const struct mmu_notifier_ops gru_mmuops = { | ||
273 | .invalidate_page = gru_invalidate_page, | ||
274 | .invalidate_range_start = gru_invalidate_range_start, | ||
275 | .invalidate_range_end = gru_invalidate_range_end, | ||
276 | .release = gru_release, | ||
277 | }; | ||
278 | |||
279 | /* Move this to the basic mmu_notifier file. But for now... */ | ||
280 | static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm, | ||
281 | const struct mmu_notifier_ops *ops) | ||
282 | { | ||
283 | struct mmu_notifier *mn, *gru_mn = NULL; | ||
284 | struct hlist_node *n; | ||
285 | |||
286 | if (mm->mmu_notifier_mm) { | ||
287 | rcu_read_lock(); | ||
288 | hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, | ||
289 | hlist) | ||
290 | if (mn->ops == ops) { | ||
291 | gru_mn = mn; | ||
292 | break; | ||
293 | } | ||
294 | rcu_read_unlock(); | ||
295 | } | ||
296 | return gru_mn; | ||
297 | } | ||
298 | |||
299 | struct gru_mm_struct *gru_register_mmu_notifier(void) | ||
300 | { | ||
301 | struct gru_mm_struct *gms; | ||
302 | struct mmu_notifier *mn; | ||
303 | |||
304 | mn = mmu_find_ops(current->mm, &gru_mmuops); | ||
305 | if (mn) { | ||
306 | gms = container_of(mn, struct gru_mm_struct, ms_notifier); | ||
307 | atomic_inc(&gms->ms_refcnt); | ||
308 | } else { | ||
309 | gms = kzalloc(sizeof(*gms), GFP_KERNEL); | ||
310 | if (gms) { | ||
311 | spin_lock_init(&gms->ms_asid_lock); | ||
312 | gms->ms_notifier.ops = &gru_mmuops; | ||
313 | atomic_set(&gms->ms_refcnt, 1); | ||
314 | init_waitqueue_head(&gms->ms_wait_queue); | ||
315 | __mmu_notifier_register(&gms->ms_notifier, current->mm); | ||
316 | } | ||
317 | } | ||
318 | gru_dbg(grudev, "gms %p, refcnt %d\n", gms, | ||
319 | atomic_read(&gms->ms_refcnt)); | ||
320 | return gms; | ||
321 | } | ||
322 | |||
323 | void gru_drop_mmu_notifier(struct gru_mm_struct *gms) | ||
324 | { | ||
325 | gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms, | ||
326 | atomic_read(&gms->ms_refcnt), gms->ms_released); | ||
327 | if (atomic_dec_return(&gms->ms_refcnt) == 0) { | ||
328 | if (!gms->ms_released) | ||
329 | mmu_notifier_unregister(&gms->ms_notifier, current->mm); | ||
330 | kfree(gms); | ||
331 | } | ||
332 | } | ||
333 | |||
334 | /* | ||
335 | * Setup TGH parameters. There are: | ||
336 | * - 24 TGH handles per GRU chiplet | ||
337 | * - a portion (MAX_LOCAL_TGH) of the handles are reserved for | ||
338 | * use by blade-local cpus | ||
339 | * - the rest are used by off-blade cpus. This usage is | ||
340 | * less frequent than blade-local usage. | ||
341 | * | ||
342 | * For now, use 16 handles for local flushes, 8 for remote flushes. If the blade | ||
343 | * has less tan or equal to 16 cpus, each cpu has a unique handle that it can | ||
344 | * use. | ||
345 | */ | ||
346 | #define MAX_LOCAL_TGH 16 | ||
347 | |||
348 | void gru_tgh_flush_init(struct gru_state *gru) | ||
349 | { | ||
350 | int cpus, shift = 0, n; | ||
351 | |||
352 | cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id); | ||
353 | |||
354 | /* n = cpus rounded up to next power of 2 */ | ||
355 | if (cpus) { | ||
356 | n = 1 << fls(cpus - 1); | ||
357 | |||
358 | /* | ||
359 | * shift count for converting local cpu# to TGH index | ||
360 | * 0 if cpus <= MAX_LOCAL_TGH, | ||
361 | * 1 if cpus <= 2*MAX_LOCAL_TGH, | ||
362 | * etc | ||
363 | */ | ||
364 | shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1)); | ||
365 | } | ||
366 | gru->gs_tgh_local_shift = shift; | ||
367 | |||
368 | /* first starting TGH index to use for remote purges */ | ||
369 | gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift; | ||
370 | |||
371 | } | ||