aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/misc/sgi-gru
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/sgi-gru')
-rw-r--r--drivers/misc/sgi-gru/Makefile3
-rw-r--r--drivers/misc/sgi-gru/gru.h67
-rw-r--r--drivers/misc/sgi-gru/gru_instructions.h669
-rw-r--r--drivers/misc/sgi-gru/grufault.c633
-rw-r--r--drivers/misc/sgi-gru/grufile.c485
-rw-r--r--drivers/misc/sgi-gru/gruhandles.h663
-rw-r--r--drivers/misc/sgi-gru/grukservices.c679
-rw-r--r--drivers/misc/sgi-gru/grukservices.h134
-rw-r--r--drivers/misc/sgi-gru/grulib.h97
-rw-r--r--drivers/misc/sgi-gru/grumain.c802
-rw-r--r--drivers/misc/sgi-gru/gruprocfs.c336
-rw-r--r--drivers/misc/sgi-gru/grutables.h609
-rw-r--r--drivers/misc/sgi-gru/grutlbpurge.c371
13 files changed, 5548 insertions, 0 deletions
diff --git a/drivers/misc/sgi-gru/Makefile b/drivers/misc/sgi-gru/Makefile
new file mode 100644
index 000000000000..d03597a521b0
--- /dev/null
+++ b/drivers/misc/sgi-gru/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_SGI_GRU) := gru.o
2gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o
3
diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h
new file mode 100644
index 000000000000..40df7cb3f0a5
--- /dev/null
+++ b/drivers/misc/sgi-gru/gru.h
@@ -0,0 +1,67 @@
1/*
2 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as published by
6 * the Free Software Foundation; either version 2.1 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#ifndef __GRU_H__
20#define __GRU_H__
21
22/*
23 * GRU architectural definitions
24 */
25#define GRU_CACHE_LINE_BYTES 64
26#define GRU_HANDLE_STRIDE 256
27#define GRU_CB_BASE 0
28#define GRU_DS_BASE 0x20000
29
30/*
31 * Size used to map GRU GSeg
32 */
33#if defined CONFIG_IA64
34#define GRU_GSEG_PAGESIZE (256 * 1024UL)
35#elif defined CONFIG_X86_64
36#define GRU_GSEG_PAGESIZE (256 * 1024UL) /* ZZZ 2MB ??? */
37#else
38#error "Unsupported architecture"
39#endif
40
41/*
42 * Structure for obtaining GRU resource information
43 */
44struct gru_chiplet_info {
45 int node;
46 int chiplet;
47 int blade;
48 int total_dsr_bytes;
49 int total_cbr;
50 int total_user_dsr_bytes;
51 int total_user_cbr;
52 int free_user_dsr_bytes;
53 int free_user_cbr;
54};
55
56/* Flags for GRU options on the gru_create_context() call */
57/* Select one of the follow 4 options to specify how TLB misses are handled */
58#define GRU_OPT_MISS_DEFAULT 0x0000 /* Use default mode */
59#define GRU_OPT_MISS_USER_POLL 0x0001 /* User will poll CB for faults */
60#define GRU_OPT_MISS_FMM_INTR 0x0002 /* Send interrupt to cpu to
61 handle fault */
62#define GRU_OPT_MISS_FMM_POLL 0x0003 /* Use system polling thread */
63#define GRU_OPT_MISS_MASK 0x0003 /* Mask for TLB MISS option */
64
65
66
67#endif /* __GRU_H__ */
diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h
new file mode 100644
index 000000000000..0dc36225c7c6
--- /dev/null
+++ b/drivers/misc/sgi-gru/gru_instructions.h
@@ -0,0 +1,669 @@
1/*
2 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as published by
6 * the Free Software Foundation; either version 2.1 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#ifndef __GRU_INSTRUCTIONS_H__
20#define __GRU_INSTRUCTIONS_H__
21
22#define gru_flush_cache_hook(p)
23#define gru_emulator_wait_hook(p, w)
24
25/*
26 * Architecture dependent functions
27 */
28
29#if defined CONFIG_IA64
30#include <linux/compiler.h>
31#include <asm/intrinsics.h>
32#define __flush_cache(p) ia64_fc(p)
33/* Use volatile on IA64 to ensure ordering via st4.rel */
34#define gru_ordered_store_int(p,v) \
35 do { \
36 barrier(); \
37 *((volatile int *)(p)) = v; /* force st.rel */ \
38 } while (0)
39#elif defined CONFIG_X86_64
40#define __flush_cache(p) clflush(p)
41#define gru_ordered_store_int(p,v) \
42 do { \
43 barrier(); \
44 *(int *)p = v; \
45 } while (0)
46#else
47#error "Unsupported architecture"
48#endif
49
50/*
51 * Control block status and exception codes
52 */
53#define CBS_IDLE 0
54#define CBS_EXCEPTION 1
55#define CBS_ACTIVE 2
56#define CBS_CALL_OS 3
57
58/* CB substatus bitmasks */
59#define CBSS_MSG_QUEUE_MASK 7
60#define CBSS_IMPLICIT_ABORT_ACTIVE_MASK 8
61
62/* CB substatus message queue values (low 3 bits of substatus) */
63#define CBSS_NO_ERROR 0
64#define CBSS_LB_OVERFLOWED 1
65#define CBSS_QLIMIT_REACHED 2
66#define CBSS_PAGE_OVERFLOW 3
67#define CBSS_AMO_NACKED 4
68#define CBSS_PUT_NACKED 5
69
70/*
71 * Structure used to fetch exception detail for CBs that terminate with
72 * CBS_EXCEPTION
73 */
74struct control_block_extended_exc_detail {
75 unsigned long cb;
76 int opc;
77 int ecause;
78 int exopc;
79 long exceptdet0;
80 int exceptdet1;
81};
82
83/*
84 * Instruction formats
85 */
86
87/*
88 * Generic instruction format.
89 * This definition has precise bit field definitions.
90 */
91struct gru_instruction_bits {
92 /* DW 0 - low */
93 unsigned int icmd: 1;
94 unsigned char ima: 3; /* CB_DelRep, unmapped mode */
95 unsigned char reserved0: 4;
96 unsigned int xtype: 3;
97 unsigned int iaa0: 2;
98 unsigned int iaa1: 2;
99 unsigned char reserved1: 1;
100 unsigned char opc: 8; /* opcode */
101 unsigned char exopc: 8; /* extended opcode */
102 /* DW 0 - high */
103 unsigned int idef2: 22; /* TRi0 */
104 unsigned char reserved2: 2;
105 unsigned char istatus: 2;
106 unsigned char isubstatus:4;
107 unsigned char reserved3: 2;
108 /* DW 1 */
109 unsigned long idef4; /* 42 bits: TRi1, BufSize */
110 /* DW 2-6 */
111 unsigned long idef1; /* BAddr0 */
112 unsigned long idef5; /* Nelem */
113 unsigned long idef6; /* Stride, Operand1 */
114 unsigned long idef3; /* BAddr1, Value, Operand2 */
115 unsigned long reserved4;
116 /* DW 7 */
117 unsigned long avalue; /* AValue */
118};
119
120/*
121 * Generic instruction with friendlier names. This format is used
122 * for inline instructions.
123 */
124struct gru_instruction {
125 /* DW 0 */
126 unsigned int op32; /* icmd,xtype,iaa0,ima,opc */
127 unsigned int tri0;
128 unsigned long tri1_bufsize; /* DW 1 */
129 unsigned long baddr0; /* DW 2 */
130 unsigned long nelem; /* DW 3 */
131 unsigned long op1_stride; /* DW 4 */
132 unsigned long op2_value_baddr1; /* DW 5 */
133 unsigned long reserved0; /* DW 6 */
134 unsigned long avalue; /* DW 7 */
135};
136
137/* Some shifts and masks for the low 32 bits of a GRU command */
138#define GRU_CB_ICMD_SHFT 0
139#define GRU_CB_ICMD_MASK 0x1
140#define GRU_CB_XTYPE_SHFT 8
141#define GRU_CB_XTYPE_MASK 0x7
142#define GRU_CB_IAA0_SHFT 11
143#define GRU_CB_IAA0_MASK 0x3
144#define GRU_CB_IAA1_SHFT 13
145#define GRU_CB_IAA1_MASK 0x3
146#define GRU_CB_IMA_SHFT 1
147#define GRU_CB_IMA_MASK 0x3
148#define GRU_CB_OPC_SHFT 16
149#define GRU_CB_OPC_MASK 0xff
150#define GRU_CB_EXOPC_SHFT 24
151#define GRU_CB_EXOPC_MASK 0xff
152
153/* GRU instruction opcodes (opc field) */
154#define OP_NOP 0x00
155#define OP_BCOPY 0x01
156#define OP_VLOAD 0x02
157#define OP_IVLOAD 0x03
158#define OP_VSTORE 0x04
159#define OP_IVSTORE 0x05
160#define OP_VSET 0x06
161#define OP_IVSET 0x07
162#define OP_MESQ 0x08
163#define OP_GAMXR 0x09
164#define OP_GAMIR 0x0a
165#define OP_GAMIRR 0x0b
166#define OP_GAMER 0x0c
167#define OP_GAMERR 0x0d
168#define OP_BSTORE 0x0e
169#define OP_VFLUSH 0x0f
170
171
172/* Extended opcodes values (exopc field) */
173
174/* GAMIR - AMOs with implicit operands */
175#define EOP_IR_FETCH 0x01 /* Plain fetch of memory */
176#define EOP_IR_CLR 0x02 /* Fetch and clear */
177#define EOP_IR_INC 0x05 /* Fetch and increment */
178#define EOP_IR_DEC 0x07 /* Fetch and decrement */
179#define EOP_IR_QCHK1 0x0d /* Queue check, 64 byte msg */
180#define EOP_IR_QCHK2 0x0e /* Queue check, 128 byte msg */
181
182/* GAMIRR - Registered AMOs with implicit operands */
183#define EOP_IRR_FETCH 0x01 /* Registered fetch of memory */
184#define EOP_IRR_CLR 0x02 /* Registered fetch and clear */
185#define EOP_IRR_INC 0x05 /* Registered fetch and increment */
186#define EOP_IRR_DEC 0x07 /* Registered fetch and decrement */
187#define EOP_IRR_DECZ 0x0f /* Registered fetch and decrement, update on zero*/
188
189/* GAMER - AMOs with explicit operands */
190#define EOP_ER_SWAP 0x00 /* Exchange argument and memory */
191#define EOP_ER_OR 0x01 /* Logical OR with memory */
192#define EOP_ER_AND 0x02 /* Logical AND with memory */
193#define EOP_ER_XOR 0x03 /* Logical XOR with memory */
194#define EOP_ER_ADD 0x04 /* Add value to memory */
195#define EOP_ER_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/
196#define EOP_ER_CADD 0x0c /* Queue check, operand1*64 byte msg */
197
198/* GAMERR - Registered AMOs with explicit operands */
199#define EOP_ERR_SWAP 0x00 /* Exchange argument and memory */
200#define EOP_ERR_OR 0x01 /* Logical OR with memory */
201#define EOP_ERR_AND 0x02 /* Logical AND with memory */
202#define EOP_ERR_XOR 0x03 /* Logical XOR with memory */
203#define EOP_ERR_ADD 0x04 /* Add value to memory */
204#define EOP_ERR_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/
205#define EOP_ERR_EPOLL 0x09 /* Poll for equality */
206#define EOP_ERR_NPOLL 0x0a /* Poll for inequality */
207
208/* GAMXR - SGI Arithmetic unit */
209#define EOP_XR_CSWAP 0x0b /* Masked compare exchange */
210
211
212/* Transfer types (xtype field) */
213#define XTYPE_B 0x0 /* byte */
214#define XTYPE_S 0x1 /* short (2-byte) */
215#define XTYPE_W 0x2 /* word (4-byte) */
216#define XTYPE_DW 0x3 /* doubleword (8-byte) */
217#define XTYPE_CL 0x6 /* cacheline (64-byte) */
218
219
220/* Instruction access attributes (iaa0, iaa1 fields) */
221#define IAA_RAM 0x0 /* normal cached RAM access */
222#define IAA_NCRAM 0x2 /* noncoherent RAM access */
223#define IAA_MMIO 0x1 /* noncoherent memory-mapped I/O space */
224#define IAA_REGISTER 0x3 /* memory-mapped registers, etc. */
225
226
227/* Instruction mode attributes (ima field) */
228#define IMA_MAPPED 0x0 /* Virtual mode */
229#define IMA_CB_DELAY 0x1 /* hold read responses until status changes */
230#define IMA_UNMAPPED 0x2 /* bypass the TLBs (OS only) */
231#define IMA_INTERRUPT 0x4 /* Interrupt when instruction completes */
232
233/* CBE ecause bits */
234#define CBE_CAUSE_RI (1 << 0)
235#define CBE_CAUSE_INVALID_INSTRUCTION (1 << 1)
236#define CBE_CAUSE_UNMAPPED_MODE_FORBIDDEN (1 << 2)
237#define CBE_CAUSE_PE_CHECK_DATA_ERROR (1 << 3)
238#define CBE_CAUSE_IAA_GAA_MISMATCH (1 << 4)
239#define CBE_CAUSE_DATA_SEGMENT_LIMIT_EXCEPTION (1 << 5)
240#define CBE_CAUSE_OS_FATAL_TLB_FAULT (1 << 6)
241#define CBE_CAUSE_EXECUTION_HW_ERROR (1 << 7)
242#define CBE_CAUSE_TLBHW_ERROR (1 << 8)
243#define CBE_CAUSE_RA_REQUEST_TIMEOUT (1 << 9)
244#define CBE_CAUSE_HA_REQUEST_TIMEOUT (1 << 10)
245#define CBE_CAUSE_RA_RESPONSE_FATAL (1 << 11)
246#define CBE_CAUSE_RA_RESPONSE_NON_FATAL (1 << 12)
247#define CBE_CAUSE_HA_RESPONSE_FATAL (1 << 13)
248#define CBE_CAUSE_HA_RESPONSE_NON_FATAL (1 << 14)
249#define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR (1 << 15)
250#define CBE_CAUSE_RESPONSE_DATA_ERROR (1 << 16)
251#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 17)
252
253/*
254 * Exceptions are retried for the following cases. If any OTHER bits are set
255 * in ecause, the exception is not retryable.
256 */
257#define EXCEPTION_RETRY_BITS (CBE_CAUSE_RESPONSE_DATA_ERROR | \
258 CBE_CAUSE_RA_REQUEST_TIMEOUT | \
259 CBE_CAUSE_TLBHW_ERROR | \
260 CBE_CAUSE_HA_REQUEST_TIMEOUT)
261
262/* Message queue head structure */
263union gru_mesqhead {
264 unsigned long val;
265 struct {
266 unsigned int head;
267 unsigned int limit;
268 };
269};
270
271
272/* Generate the low word of a GRU instruction */
273static inline unsigned int
274__opword(unsigned char opcode, unsigned char exopc, unsigned char xtype,
275 unsigned char iaa0, unsigned char iaa1,
276 unsigned char ima)
277{
278 return (1 << GRU_CB_ICMD_SHFT) |
279 (iaa0 << GRU_CB_IAA0_SHFT) |
280 (iaa1 << GRU_CB_IAA1_SHFT) |
281 (ima << GRU_CB_IMA_SHFT) |
282 (xtype << GRU_CB_XTYPE_SHFT) |
283 (opcode << GRU_CB_OPC_SHFT) |
284 (exopc << GRU_CB_EXOPC_SHFT);
285}
286
287/*
288 * Architecture specific intrinsics
289 */
290static inline void gru_flush_cache(void *p)
291{
292 __flush_cache(p);
293}
294
295/*
296 * Store the lower 32 bits of the command including the "start" bit. Then
297 * start the instruction executing.
298 */
299static inline void gru_start_instruction(struct gru_instruction *ins, int op32)
300{
301 gru_ordered_store_int(ins, op32);
302}
303
304
305/* Convert "hints" to IMA */
306#define CB_IMA(h) ((h) | IMA_UNMAPPED)
307
308/* Convert data segment cache line index into TRI0 / TRI1 value */
309#define GRU_DINDEX(i) ((i) * GRU_CACHE_LINE_BYTES)
310
311/* Inline functions for GRU instructions.
312 * Note:
313 * - nelem and stride are in elements
314 * - tri0/tri1 is in bytes for the beginning of the data segment.
315 */
316static inline void gru_vload(void *cb, unsigned long mem_addr,
317 unsigned int tri0, unsigned char xtype, unsigned long nelem,
318 unsigned long stride, unsigned long hints)
319{
320 struct gru_instruction *ins = (struct gru_instruction *)cb;
321
322 ins->baddr0 = (long)mem_addr;
323 ins->nelem = nelem;
324 ins->tri0 = tri0;
325 ins->op1_stride = stride;
326 gru_start_instruction(ins, __opword(OP_VLOAD, 0, xtype, IAA_RAM, 0,
327 CB_IMA(hints)));
328}
329
330static inline void gru_vstore(void *cb, unsigned long mem_addr,
331 unsigned int tri0, unsigned char xtype, unsigned long nelem,
332 unsigned long stride, unsigned long hints)
333{
334 struct gru_instruction *ins = (void *)cb;
335
336 ins->baddr0 = (long)mem_addr;
337 ins->nelem = nelem;
338 ins->tri0 = tri0;
339 ins->op1_stride = stride;
340 gru_start_instruction(ins, __opword(OP_VSTORE, 0, xtype, IAA_RAM, 0,
341 CB_IMA(hints)));
342}
343
344static inline void gru_ivload(void *cb, unsigned long mem_addr,
345 unsigned int tri0, unsigned int tri1, unsigned char xtype,
346 unsigned long nelem, unsigned long hints)
347{
348 struct gru_instruction *ins = (void *)cb;
349
350 ins->baddr0 = (long)mem_addr;
351 ins->nelem = nelem;
352 ins->tri0 = tri0;
353 ins->tri1_bufsize = tri1;
354 gru_start_instruction(ins, __opword(OP_IVLOAD, 0, xtype, IAA_RAM, 0,
355 CB_IMA(hints)));
356}
357
358static inline void gru_ivstore(void *cb, unsigned long mem_addr,
359 unsigned int tri0, unsigned int tri1,
360 unsigned char xtype, unsigned long nelem, unsigned long hints)
361{
362 struct gru_instruction *ins = (void *)cb;
363
364 ins->baddr0 = (long)mem_addr;
365 ins->nelem = nelem;
366 ins->tri0 = tri0;
367 ins->tri1_bufsize = tri1;
368 gru_start_instruction(ins, __opword(OP_IVSTORE, 0, xtype, IAA_RAM, 0,
369 CB_IMA(hints)));
370}
371
372static inline void gru_vset(void *cb, unsigned long mem_addr,
373 unsigned long value, unsigned char xtype, unsigned long nelem,
374 unsigned long stride, unsigned long hints)
375{
376 struct gru_instruction *ins = (void *)cb;
377
378 ins->baddr0 = (long)mem_addr;
379 ins->op2_value_baddr1 = value;
380 ins->nelem = nelem;
381 ins->op1_stride = stride;
382 gru_start_instruction(ins, __opword(OP_VSET, 0, xtype, IAA_RAM, 0,
383 CB_IMA(hints)));
384}
385
386static inline void gru_ivset(void *cb, unsigned long mem_addr,
387 unsigned int tri1, unsigned long value, unsigned char xtype,
388 unsigned long nelem, unsigned long hints)
389{
390 struct gru_instruction *ins = (void *)cb;
391
392 ins->baddr0 = (long)mem_addr;
393 ins->op2_value_baddr1 = value;
394 ins->nelem = nelem;
395 ins->tri1_bufsize = tri1;
396 gru_start_instruction(ins, __opword(OP_IVSET, 0, xtype, IAA_RAM, 0,
397 CB_IMA(hints)));
398}
399
400static inline void gru_vflush(void *cb, unsigned long mem_addr,
401 unsigned long nelem, unsigned char xtype, unsigned long stride,
402 unsigned long hints)
403{
404 struct gru_instruction *ins = (void *)cb;
405
406 ins->baddr0 = (long)mem_addr;
407 ins->op1_stride = stride;
408 ins->nelem = nelem;
409 gru_start_instruction(ins, __opword(OP_VFLUSH, 0, xtype, IAA_RAM, 0,
410 CB_IMA(hints)));
411}
412
413static inline void gru_nop(void *cb, int hints)
414{
415 struct gru_instruction *ins = (void *)cb;
416
417 gru_start_instruction(ins, __opword(OP_NOP, 0, 0, 0, 0, CB_IMA(hints)));
418}
419
420
421static inline void gru_bcopy(void *cb, const unsigned long src,
422 unsigned long dest,
423 unsigned int tri0, unsigned int xtype, unsigned long nelem,
424 unsigned int bufsize, unsigned long hints)
425{
426 struct gru_instruction *ins = (void *)cb;
427
428 ins->baddr0 = (long)src;
429 ins->op2_value_baddr1 = (long)dest;
430 ins->nelem = nelem;
431 ins->tri0 = tri0;
432 ins->tri1_bufsize = bufsize;
433 gru_start_instruction(ins, __opword(OP_BCOPY, 0, xtype, IAA_RAM,
434 IAA_RAM, CB_IMA(hints)));
435}
436
437static inline void gru_bstore(void *cb, const unsigned long src,
438 unsigned long dest, unsigned int tri0, unsigned int xtype,
439 unsigned long nelem, unsigned long hints)
440{
441 struct gru_instruction *ins = (void *)cb;
442
443 ins->baddr0 = (long)src;
444 ins->op2_value_baddr1 = (long)dest;
445 ins->nelem = nelem;
446 ins->tri0 = tri0;
447 gru_start_instruction(ins, __opword(OP_BSTORE, 0, xtype, 0, IAA_RAM,
448 CB_IMA(hints)));
449}
450
451static inline void gru_gamir(void *cb, int exopc, unsigned long src,
452 unsigned int xtype, unsigned long hints)
453{
454 struct gru_instruction *ins = (void *)cb;
455
456 ins->baddr0 = (long)src;
457 gru_start_instruction(ins, __opword(OP_GAMIR, exopc, xtype, IAA_RAM, 0,
458 CB_IMA(hints)));
459}
460
461static inline void gru_gamirr(void *cb, int exopc, unsigned long src,
462 unsigned int xtype, unsigned long hints)
463{
464 struct gru_instruction *ins = (void *)cb;
465
466 ins->baddr0 = (long)src;
467 gru_start_instruction(ins, __opword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0,
468 CB_IMA(hints)));
469}
470
471static inline void gru_gamer(void *cb, int exopc, unsigned long src,
472 unsigned int xtype,
473 unsigned long operand1, unsigned long operand2,
474 unsigned long hints)
475{
476 struct gru_instruction *ins = (void *)cb;
477
478 ins->baddr0 = (long)src;
479 ins->op1_stride = operand1;
480 ins->op2_value_baddr1 = operand2;
481 gru_start_instruction(ins, __opword(OP_GAMER, exopc, xtype, IAA_RAM, 0,
482 CB_IMA(hints)));
483}
484
485static inline void gru_gamerr(void *cb, int exopc, unsigned long src,
486 unsigned int xtype, unsigned long operand1,
487 unsigned long operand2, unsigned long hints)
488{
489 struct gru_instruction *ins = (void *)cb;
490
491 ins->baddr0 = (long)src;
492 ins->op1_stride = operand1;
493 ins->op2_value_baddr1 = operand2;
494 gru_start_instruction(ins, __opword(OP_GAMERR, exopc, xtype, IAA_RAM, 0,
495 CB_IMA(hints)));
496}
497
498static inline void gru_gamxr(void *cb, unsigned long src,
499 unsigned int tri0, unsigned long hints)
500{
501 struct gru_instruction *ins = (void *)cb;
502
503 ins->baddr0 = (long)src;
504 ins->nelem = 4;
505 gru_start_instruction(ins, __opword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW,
506 IAA_RAM, 0, CB_IMA(hints)));
507}
508
509static inline void gru_mesq(void *cb, unsigned long queue,
510 unsigned long tri0, unsigned long nelem,
511 unsigned long hints)
512{
513 struct gru_instruction *ins = (void *)cb;
514
515 ins->baddr0 = (long)queue;
516 ins->nelem = nelem;
517 ins->tri0 = tri0;
518 gru_start_instruction(ins, __opword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0,
519 CB_IMA(hints)));
520}
521
522static inline unsigned long gru_get_amo_value(void *cb)
523{
524 struct gru_instruction *ins = (void *)cb;
525
526 return ins->avalue;
527}
528
529static inline int gru_get_amo_value_head(void *cb)
530{
531 struct gru_instruction *ins = (void *)cb;
532
533 return ins->avalue & 0xffffffff;
534}
535
536static inline int gru_get_amo_value_limit(void *cb)
537{
538 struct gru_instruction *ins = (void *)cb;
539
540 return ins->avalue >> 32;
541}
542
543static inline union gru_mesqhead gru_mesq_head(int head, int limit)
544{
545 union gru_mesqhead mqh;
546
547 mqh.head = head;
548 mqh.limit = limit;
549 return mqh;
550}
551
552/*
553 * Get struct control_block_extended_exc_detail for CB.
554 */
555extern int gru_get_cb_exception_detail(void *cb,
556 struct control_block_extended_exc_detail *excdet);
557
558#define GRU_EXC_STR_SIZE 256
559
560extern int gru_check_status_proc(void *cb);
561extern int gru_wait_proc(void *cb);
562extern void gru_wait_abort_proc(void *cb);
563
564/*
565 * Control block definition for checking status
566 */
567struct gru_control_block_status {
568 unsigned int icmd :1;
569 unsigned int unused1 :31;
570 unsigned int unused2 :24;
571 unsigned int istatus :2;
572 unsigned int isubstatus :4;
573 unsigned int inused3 :2;
574};
575
576/* Get CB status */
577static inline int gru_get_cb_status(void *cb)
578{
579 struct gru_control_block_status *cbs = (void *)cb;
580
581 return cbs->istatus;
582}
583
584/* Get CB message queue substatus */
585static inline int gru_get_cb_message_queue_substatus(void *cb)
586{
587 struct gru_control_block_status *cbs = (void *)cb;
588
589 return cbs->isubstatus & CBSS_MSG_QUEUE_MASK;
590}
591
592/* Get CB substatus */
593static inline int gru_get_cb_substatus(void *cb)
594{
595 struct gru_control_block_status *cbs = (void *)cb;
596
597 return cbs->isubstatus;
598}
599
600/* Check the status of a CB. If the CB is in UPM mode, call the
601 * OS to handle the UPM status.
602 * Returns the CB status field value (0 for normal completion)
603 */
604static inline int gru_check_status(void *cb)
605{
606 struct gru_control_block_status *cbs = (void *)cb;
607 int ret = cbs->istatus;
608
609 if (ret == CBS_CALL_OS)
610 ret = gru_check_status_proc(cb);
611 return ret;
612}
613
614/* Wait for CB to complete.
615 * Returns the CB status field value (0 for normal completion)
616 */
617static inline int gru_wait(void *cb)
618{
619 struct gru_control_block_status *cbs = (void *)cb;
620 int ret = cbs->istatus;;
621
622 if (ret != CBS_IDLE)
623 ret = gru_wait_proc(cb);
624 return ret;
625}
626
627/* Wait for CB to complete. Aborts program if error. (Note: error does NOT
628 * mean TLB mis - only fatal errors such as memory parity error or user
629 * bugs will cause termination.
630 */
631static inline void gru_wait_abort(void *cb)
632{
633 struct gru_control_block_status *cbs = (void *)cb;
634
635 if (cbs->istatus != CBS_IDLE)
636 gru_wait_abort_proc(cb);
637}
638
639
640/*
641 * Get a pointer to a control block
642 * gseg - GSeg address returned from gru_get_thread_gru_segment()
643 * index - index of desired CB
644 */
645static inline void *gru_get_cb_pointer(void *gseg,
646 int index)
647{
648 return gseg + GRU_CB_BASE + index * GRU_HANDLE_STRIDE;
649}
650
651/*
652 * Get a pointer to a cacheline in the data segment portion of a GSeg
653 * gseg - GSeg address returned from gru_get_thread_gru_segment()
654 * index - index of desired cache line
655 */
656static inline void *gru_get_data_pointer(void *gseg, int index)
657{
658 return gseg + GRU_DS_BASE + index * GRU_CACHE_LINE_BYTES;
659}
660
661/*
662 * Convert a vaddr into the tri index within the GSEG
663 * vaddr - virtual address of within gseg
664 */
665static inline int gru_get_tri(void *vaddr)
666{
667 return ((unsigned long)vaddr & (GRU_GSEG_PAGESIZE - 1)) - GRU_DS_BASE;
668}
669#endif /* __GRU_INSTRUCTIONS_H__ */
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
new file mode 100644
index 000000000000..3d33015bbf31
--- /dev/null
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -0,0 +1,633 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * FAULT HANDLER FOR GRU DETECTED TLB MISSES
5 *
6 * This file contains code that handles TLB misses within the GRU.
7 * These misses are reported either via interrupts or user polling of
8 * the user CB.
9 *
10 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 */
26
27#include <linux/kernel.h>
28#include <linux/errno.h>
29#include <linux/spinlock.h>
30#include <linux/mm.h>
31#include <linux/hugetlb.h>
32#include <linux/device.h>
33#include <linux/io.h>
34#include <linux/uaccess.h>
35#include <asm/pgtable.h>
36#include "gru.h"
37#include "grutables.h"
38#include "grulib.h"
39#include "gru_instructions.h"
40#include <asm/uv/uv_hub.h>
41
42/*
43 * Test if a physical address is a valid GRU GSEG address
44 */
45static inline int is_gru_paddr(unsigned long paddr)
46{
47 return paddr >= gru_start_paddr && paddr < gru_end_paddr;
48}
49
50/*
51 * Find the vma of a GRU segment. Caller must hold mmap_sem.
52 */
53struct vm_area_struct *gru_find_vma(unsigned long vaddr)
54{
55 struct vm_area_struct *vma;
56
57 vma = find_vma(current->mm, vaddr);
58 if (vma && vma->vm_start <= vaddr && vma->vm_ops == &gru_vm_ops)
59 return vma;
60 return NULL;
61}
62
63/*
64 * Find and lock the gts that contains the specified user vaddr.
65 *
66 * Returns:
67 * - *gts with the mmap_sem locked for read and the GTS locked.
68 * - NULL if vaddr invalid OR is not a valid GSEG vaddr.
69 */
70
71static struct gru_thread_state *gru_find_lock_gts(unsigned long vaddr)
72{
73 struct mm_struct *mm = current->mm;
74 struct vm_area_struct *vma;
75 struct gru_thread_state *gts = NULL;
76
77 down_read(&mm->mmap_sem);
78 vma = gru_find_vma(vaddr);
79 if (vma)
80 gts = gru_find_thread_state(vma, TSID(vaddr, vma));
81 if (gts)
82 mutex_lock(&gts->ts_ctxlock);
83 else
84 up_read(&mm->mmap_sem);
85 return gts;
86}
87
88static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr)
89{
90 struct mm_struct *mm = current->mm;
91 struct vm_area_struct *vma;
92 struct gru_thread_state *gts = NULL;
93
94 down_write(&mm->mmap_sem);
95 vma = gru_find_vma(vaddr);
96 if (vma)
97 gts = gru_alloc_thread_state(vma, TSID(vaddr, vma));
98 if (gts) {
99 mutex_lock(&gts->ts_ctxlock);
100 downgrade_write(&mm->mmap_sem);
101 } else {
102 up_write(&mm->mmap_sem);
103 }
104
105 return gts;
106}
107
108/*
109 * Unlock a GTS that was previously locked with gru_find_lock_gts().
110 */
111static void gru_unlock_gts(struct gru_thread_state *gts)
112{
113 mutex_unlock(&gts->ts_ctxlock);
114 up_read(&current->mm->mmap_sem);
115}
116
117/*
118 * Set a CB.istatus to active using a user virtual address. This must be done
119 * just prior to a TFH RESTART. The new cb.istatus is an in-cache status ONLY.
120 * If the line is evicted, the status may be lost. The in-cache update
121 * is necessary to prevent the user from seeing a stale cb.istatus that will
122 * change as soon as the TFH restart is complete. Races may cause an
123 * occasional failure to clear the cb.istatus, but that is ok.
124 *
125 * If the cb address is not valid (should not happen, but...), nothing
126 * bad will happen.. The get_user()/put_user() will fail but there
127 * are no bad side-effects.
128 */
129static void gru_cb_set_istatus_active(unsigned long __user *cb)
130{
131 union {
132 struct gru_instruction_bits bits;
133 unsigned long dw;
134 } u;
135
136 if (cb) {
137 get_user(u.dw, cb);
138 u.bits.istatus = CBS_ACTIVE;
139 put_user(u.dw, cb);
140 }
141}
142
143/*
144 * Convert a interrupt IRQ to a pointer to the GRU GTS that caused the
145 * interrupt. Interrupts are always sent to a cpu on the blade that contains the
146 * GRU (except for headless blades which are not currently supported). A blade
147 * has N grus; a block of N consecutive IRQs is assigned to the GRUs. The IRQ
148 * number uniquely identifies the GRU chiplet on the local blade that caused the
149 * interrupt. Always called in interrupt context.
150 */
151static inline struct gru_state *irq_to_gru(int irq)
152{
153 return &gru_base[uv_numa_blade_id()]->bs_grus[irq - IRQ_GRU];
154}
155
156/*
157 * Read & clear a TFM
158 *
159 * The GRU has an array of fault maps. A map is private to a cpu
160 * Only one cpu will be accessing a cpu's fault map.
161 *
162 * This function scans the cpu-private fault map & clears all bits that
163 * are set. The function returns a bitmap that indicates the bits that
164 * were cleared. Note that sense the maps may be updated asynchronously by
165 * the GRU, atomic operations must be used to clear bits.
166 */
167static void get_clear_fault_map(struct gru_state *gru,
168 struct gru_tlb_fault_map *map)
169{
170 unsigned long i, k;
171 struct gru_tlb_fault_map *tfm;
172
173 tfm = get_tfm_for_cpu(gru, gru_cpu_fault_map_id());
174 prefetchw(tfm); /* Helps on hardware, required for emulator */
175 for (i = 0; i < BITS_TO_LONGS(GRU_NUM_CBE); i++) {
176 k = tfm->fault_bits[i];
177 if (k)
178 k = xchg(&tfm->fault_bits[i], 0UL);
179 map->fault_bits[i] = k;
180 }
181
182 /*
183 * Not functionally required but helps performance. (Required
184 * on emulator)
185 */
186 gru_flush_cache(tfm);
187}
188
189/*
190 * Atomic (interrupt context) & non-atomic (user context) functions to
191 * convert a vaddr into a physical address. The size of the page
192 * is returned in pageshift.
193 * returns:
194 * 0 - successful
195 * < 0 - error code
196 * 1 - (atomic only) try again in non-atomic context
197 */
198static int non_atomic_pte_lookup(struct vm_area_struct *vma,
199 unsigned long vaddr, int write,
200 unsigned long *paddr, int *pageshift)
201{
202 struct page *page;
203
204 /* ZZZ Need to handle HUGE pages */
205 if (is_vm_hugetlb_page(vma))
206 return -EFAULT;
207 *pageshift = PAGE_SHIFT;
208 if (get_user_pages
209 (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0)
210 return -EFAULT;
211 *paddr = page_to_phys(page);
212 put_page(page);
213 return 0;
214}
215
216/*
217 *
218 * atomic_pte_lookup
219 *
220 * Convert a user virtual address to a physical address
221 * Only supports Intel large pages (2MB only) on x86_64.
222 * ZZZ - hugepage support is incomplete
223 */
224static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
225 int write, unsigned long *paddr, int *pageshift)
226{
227 pgd_t *pgdp;
228 pmd_t *pmdp;
229 pud_t *pudp;
230 pte_t pte;
231
232 WARN_ON(irqs_disabled()); /* ZZZ debug */
233
234 local_irq_disable();
235 pgdp = pgd_offset(vma->vm_mm, vaddr);
236 if (unlikely(pgd_none(*pgdp)))
237 goto err;
238
239 pudp = pud_offset(pgdp, vaddr);
240 if (unlikely(pud_none(*pudp)))
241 goto err;
242
243 pmdp = pmd_offset(pudp, vaddr);
244 if (unlikely(pmd_none(*pmdp)))
245 goto err;
246#ifdef CONFIG_X86_64
247 if (unlikely(pmd_large(*pmdp)))
248 pte = *(pte_t *) pmdp;
249 else
250#endif
251 pte = *pte_offset_kernel(pmdp, vaddr);
252
253 local_irq_enable();
254
255 if (unlikely(!pte_present(pte) ||
256 (write && (!pte_write(pte) || !pte_dirty(pte)))))
257 return 1;
258
259 *paddr = pte_pfn(pte) << PAGE_SHIFT;
260 *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
261 return 0;
262
263err:
264 local_irq_enable();
265 return 1;
266}
267
268/*
269 * Drop a TLB entry into the GRU. The fault is described by info in an TFH.
270 * Input:
271 * cb Address of user CBR. Null if not running in user context
272 * Return:
273 * 0 = dropin, exception, or switch to UPM successful
274 * 1 = range invalidate active
275 * < 0 = error code
276 *
277 */
278static int gru_try_dropin(struct gru_thread_state *gts,
279 struct gru_tlb_fault_handle *tfh,
280 unsigned long __user *cb)
281{
282 struct mm_struct *mm = gts->ts_mm;
283 struct vm_area_struct *vma;
284 int pageshift, asid, write, ret;
285 unsigned long paddr, gpa, vaddr;
286
287 /*
288 * NOTE: The GRU contains magic hardware that eliminates races between
289 * TLB invalidates and TLB dropins. If an invalidate occurs
290 * in the window between reading the TFH and the subsequent TLB dropin,
291 * the dropin is ignored. This eliminates the need for additional locks.
292 */
293
294 /*
295 * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call.
296 * Might be a hardware race OR a stupid user. Ignore FMM because FMM
297 * is a transient state.
298 */
299 if (tfh->state == TFHSTATE_IDLE)
300 goto failidle;
301 if (tfh->state == TFHSTATE_MISS_FMM && cb)
302 goto failfmm;
303
304 write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0;
305 vaddr = tfh->missvaddr;
306 asid = tfh->missasid;
307 if (asid == 0)
308 goto failnoasid;
309
310 rmb(); /* TFH must be cache resident before reading ms_range_active */
311
312 /*
313 * TFH is cache resident - at least briefly. Fail the dropin
314 * if a range invalidate is active.
315 */
316 if (atomic_read(&gts->ts_gms->ms_range_active))
317 goto failactive;
318
319 vma = find_vma(mm, vaddr);
320 if (!vma)
321 goto failinval;
322
323 /*
324 * Atomic lookup is faster & usually works even if called in non-atomic
325 * context.
326 */
327 ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &pageshift);
328 if (ret) {
329 if (!cb)
330 goto failupm;
331 if (non_atomic_pte_lookup(vma, vaddr, write, &paddr,
332 &pageshift))
333 goto failinval;
334 }
335 if (is_gru_paddr(paddr))
336 goto failinval;
337
338 paddr = paddr & ~((1UL << pageshift) - 1);
339 gpa = uv_soc_phys_ram_to_gpa(paddr);
340 gru_cb_set_istatus_active(cb);
341 tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write,
342 GRU_PAGESIZE(pageshift));
343 STAT(tlb_dropin);
344 gru_dbg(grudev,
345 "%s: tfh 0x%p, vaddr 0x%lx, asid 0x%x, ps %d, gpa 0x%lx\n",
346 ret ? "non-atomic" : "atomic", tfh, vaddr, asid,
347 pageshift, gpa);
348 return 0;
349
350failnoasid:
351 /* No asid (delayed unload). */
352 STAT(tlb_dropin_fail_no_asid);
353 gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
354 if (!cb)
355 tfh_user_polling_mode(tfh);
356 else
357 gru_flush_cache(tfh);
358 return -EAGAIN;
359
360failupm:
361 /* Atomic failure switch CBR to UPM */
362 tfh_user_polling_mode(tfh);
363 STAT(tlb_dropin_fail_upm);
364 gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
365 return 1;
366
367failfmm:
368 /* FMM state on UPM call */
369 STAT(tlb_dropin_fail_fmm);
370 gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state);
371 return 0;
372
373failidle:
374 /* TFH was idle - no miss pending */
375 gru_flush_cache(tfh);
376 if (cb)
377 gru_flush_cache(cb);
378 STAT(tlb_dropin_fail_idle);
379 gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state);
380 return 0;
381
382failinval:
383 /* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */
384 tfh_exception(tfh);
385 STAT(tlb_dropin_fail_invalid);
386 gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
387 return -EFAULT;
388
389failactive:
390 /* Range invalidate active. Switch to UPM iff atomic */
391 if (!cb)
392 tfh_user_polling_mode(tfh);
393 else
394 gru_flush_cache(tfh);
395 STAT(tlb_dropin_fail_range_active);
396 gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n",
397 tfh, vaddr);
398 return 1;
399}
400
401/*
402 * Process an external interrupt from the GRU. This interrupt is
403 * caused by a TLB miss.
404 * Note that this is the interrupt handler that is registered with linux
405 * interrupt handlers.
406 */
407irqreturn_t gru_intr(int irq, void *dev_id)
408{
409 struct gru_state *gru;
410 struct gru_tlb_fault_map map;
411 struct gru_thread_state *gts;
412 struct gru_tlb_fault_handle *tfh = NULL;
413 int cbrnum, ctxnum;
414
415 STAT(intr);
416
417 gru = irq_to_gru(irq);
418 if (!gru) {
419 dev_err(grudev, "GRU: invalid interrupt: cpu %d, irq %d\n",
420 raw_smp_processor_id(), irq);
421 return IRQ_NONE;
422 }
423 get_clear_fault_map(gru, &map);
424 gru_dbg(grudev, "irq %d, gru %x, map 0x%lx\n", irq, gru->gs_gid,
425 map.fault_bits[0]);
426
427 for_each_cbr_in_tfm(cbrnum, map.fault_bits) {
428 tfh = get_tfh_by_index(gru, cbrnum);
429 prefetchw(tfh); /* Helps on hdw, required for emulator */
430
431 /*
432 * When hardware sets a bit in the faultmap, it implicitly
433 * locks the GRU context so that it cannot be unloaded.
434 * The gts cannot change until a TFH start/writestart command
435 * is issued.
436 */
437 ctxnum = tfh->ctxnum;
438 gts = gru->gs_gts[ctxnum];
439
440 /*
441 * This is running in interrupt context. Trylock the mmap_sem.
442 * If it fails, retry the fault in user context.
443 */
444 if (down_read_trylock(&gts->ts_mm->mmap_sem)) {
445 gru_try_dropin(gts, tfh, NULL);
446 up_read(&gts->ts_mm->mmap_sem);
447 } else {
448 tfh_user_polling_mode(tfh);
449 }
450 }
451 return IRQ_HANDLED;
452}
453
454
455static int gru_user_dropin(struct gru_thread_state *gts,
456 struct gru_tlb_fault_handle *tfh,
457 unsigned long __user *cb)
458{
459 struct gru_mm_struct *gms = gts->ts_gms;
460 int ret;
461
462 while (1) {
463 wait_event(gms->ms_wait_queue,
464 atomic_read(&gms->ms_range_active) == 0);
465 prefetchw(tfh); /* Helps on hdw, required for emulator */
466 ret = gru_try_dropin(gts, tfh, cb);
467 if (ret <= 0)
468 return ret;
469 STAT(call_os_wait_queue);
470 }
471}
472
473/*
474 * This interface is called as a result of a user detecting a "call OS" bit
475 * in a user CB. Normally means that a TLB fault has occurred.
476 * cb - user virtual address of the CB
477 */
478int gru_handle_user_call_os(unsigned long cb)
479{
480 struct gru_tlb_fault_handle *tfh;
481 struct gru_thread_state *gts;
482 unsigned long __user *cbp;
483 int ucbnum, cbrnum, ret = -EINVAL;
484
485 STAT(call_os);
486 gru_dbg(grudev, "address 0x%lx\n", cb);
487
488 /* sanity check the cb pointer */
489 ucbnum = get_cb_number((void *)cb);
490 if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB)
491 return -EINVAL;
492 cbp = (unsigned long *)cb;
493
494 gts = gru_find_lock_gts(cb);
495 if (!gts)
496 return -EINVAL;
497
498 if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) {
499 ret = -EINVAL;
500 goto exit;
501 }
502
503 /*
504 * If force_unload is set, the UPM TLB fault is phony. The task
505 * has migrated to another node and the GSEG must be moved. Just
506 * unload the context. The task will page fault and assign a new
507 * context.
508 */
509 ret = -EAGAIN;
510 cbrnum = thread_cbr_number(gts, ucbnum);
511 if (gts->ts_force_unload) {
512 gru_unload_context(gts, 1);
513 } else if (gts->ts_gru) {
514 tfh = get_tfh_by_index(gts->ts_gru, cbrnum);
515 ret = gru_user_dropin(gts, tfh, cbp);
516 }
517exit:
518 gru_unlock_gts(gts);
519 return ret;
520}
521
522/*
523 * Fetch the exception detail information for a CB that terminated with
524 * an exception.
525 */
526int gru_get_exception_detail(unsigned long arg)
527{
528 struct control_block_extended_exc_detail excdet;
529 struct gru_control_block_extended *cbe;
530 struct gru_thread_state *gts;
531 int ucbnum, cbrnum, ret;
532
533 STAT(user_exception);
534 if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet)))
535 return -EFAULT;
536
537 gru_dbg(grudev, "address 0x%lx\n", excdet.cb);
538 gts = gru_find_lock_gts(excdet.cb);
539 if (!gts)
540 return -EINVAL;
541
542 if (gts->ts_gru) {
543 ucbnum = get_cb_number((void *)excdet.cb);
544 cbrnum = thread_cbr_number(gts, ucbnum);
545 cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
546 excdet.opc = cbe->opccpy;
547 excdet.exopc = cbe->exopccpy;
548 excdet.ecause = cbe->ecause;
549 excdet.exceptdet0 = cbe->idef1upd;
550 excdet.exceptdet1 = cbe->idef3upd;
551 ret = 0;
552 } else {
553 ret = -EAGAIN;
554 }
555 gru_unlock_gts(gts);
556
557 gru_dbg(grudev, "address 0x%lx, ecause 0x%x\n", excdet.cb,
558 excdet.ecause);
559 if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet)))
560 ret = -EFAULT;
561 return ret;
562}
563
564/*
565 * User request to unload a context. Content is saved for possible reload.
566 */
567int gru_user_unload_context(unsigned long arg)
568{
569 struct gru_thread_state *gts;
570 struct gru_unload_context_req req;
571
572 STAT(user_unload_context);
573 if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
574 return -EFAULT;
575
576 gru_dbg(grudev, "gseg 0x%lx\n", req.gseg);
577
578 gts = gru_find_lock_gts(req.gseg);
579 if (!gts)
580 return -EINVAL;
581
582 if (gts->ts_gru)
583 gru_unload_context(gts, 1);
584 gru_unlock_gts(gts);
585
586 return 0;
587}
588
589/*
590 * User request to flush a range of virtual addresses from the GRU TLB
591 * (Mainly for testing).
592 */
593int gru_user_flush_tlb(unsigned long arg)
594{
595 struct gru_thread_state *gts;
596 struct gru_flush_tlb_req req;
597
598 STAT(user_flush_tlb);
599 if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
600 return -EFAULT;
601
602 gru_dbg(grudev, "gseg 0x%lx, vaddr 0x%lx, len 0x%lx\n", req.gseg,
603 req.vaddr, req.len);
604
605 gts = gru_find_lock_gts(req.gseg);
606 if (!gts)
607 return -EINVAL;
608
609 gru_flush_tlb_range(gts->ts_gms, req.vaddr, req.vaddr + req.len);
610 gru_unlock_gts(gts);
611
612 return 0;
613}
614
615/*
616 * Register the current task as the user of the GSEG slice.
617 * Needed for TLB fault interrupt targeting.
618 */
619int gru_set_task_slice(long address)
620{
621 struct gru_thread_state *gts;
622
623 STAT(set_task_slice);
624 gru_dbg(grudev, "address 0x%lx\n", address);
625 gts = gru_alloc_locked_gts(address);
626 if (!gts)
627 return -EINVAL;
628
629 gts->ts_tgid_owner = current->tgid;
630 gru_unlock_gts(gts);
631
632 return 0;
633}
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
new file mode 100644
index 000000000000..23c91f5f6b61
--- /dev/null
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -0,0 +1,485 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * FILE OPERATIONS & DRIVER INITIALIZATION
5 *
6 * This file supports the user system call for file open, close, mmap, etc.
7 * This also incudes the driver initialization code.
8 *
9 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25
26#include <linux/module.h>
27#include <linux/kernel.h>
28#include <linux/errno.h>
29#include <linux/slab.h>
30#include <linux/mm.h>
31#include <linux/io.h>
32#include <linux/smp_lock.h>
33#include <linux/spinlock.h>
34#include <linux/device.h>
35#include <linux/miscdevice.h>
36#include <linux/interrupt.h>
37#include <linux/proc_fs.h>
38#include <linux/uaccess.h>
39#include "gru.h"
40#include "grulib.h"
41#include "grutables.h"
42
43#if defined CONFIG_X86_64
44#include <asm/genapic.h>
45#include <asm/irq.h>
46#define IS_UV() is_uv_system()
47#elif defined CONFIG_IA64
48#include <asm/system.h>
49#include <asm/sn/simulator.h>
50/* temp support for running on hardware simulator */
51#define IS_UV() IS_MEDUSA() || ia64_platform_is("uv")
52#else
53#define IS_UV() 0
54#endif
55
56#include <asm/uv/uv_hub.h>
57#include <asm/uv/uv_mmrs.h>
58
59struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly;
60unsigned long gru_start_paddr, gru_end_paddr __read_mostly;
61struct gru_stats_s gru_stats;
62
63/* Guaranteed user available resources on each node */
64static int max_user_cbrs, max_user_dsr_bytes;
65
66static struct file_operations gru_fops;
67static struct miscdevice gru_miscdev;
68
69
70/*
71 * gru_vma_close
72 *
73 * Called when unmapping a device mapping. Frees all gru resources
74 * and tables belonging to the vma.
75 */
76static void gru_vma_close(struct vm_area_struct *vma)
77{
78 struct gru_vma_data *vdata;
79 struct gru_thread_state *gts;
80 struct list_head *entry, *next;
81
82 if (!vma->vm_private_data)
83 return;
84
85 vdata = vma->vm_private_data;
86 vma->vm_private_data = NULL;
87 gru_dbg(grudev, "vma %p, file %p, vdata %p\n", vma, vma->vm_file,
88 vdata);
89 list_for_each_safe(entry, next, &vdata->vd_head) {
90 gts =
91 list_entry(entry, struct gru_thread_state, ts_next);
92 list_del(&gts->ts_next);
93 mutex_lock(&gts->ts_ctxlock);
94 if (gts->ts_gru)
95 gru_unload_context(gts, 0);
96 mutex_unlock(&gts->ts_ctxlock);
97 gts_drop(gts);
98 }
99 kfree(vdata);
100 STAT(vdata_free);
101}
102
103/*
104 * gru_file_mmap
105 *
106 * Called when mmaping the device. Initializes the vma with a fault handler
107 * and private data structure necessary to allocate, track, and free the
108 * underlying pages.
109 */
110static int gru_file_mmap(struct file *file, struct vm_area_struct *vma)
111{
112 if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE))
113 return -EPERM;
114
115 if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) ||
116 vma->vm_end & (GRU_GSEG_PAGESIZE - 1))
117 return -EINVAL;
118
119 vma->vm_flags |=
120 (VM_IO | VM_DONTCOPY | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP |
121 VM_RESERVED);
122 vma->vm_page_prot = PAGE_SHARED;
123 vma->vm_ops = &gru_vm_ops;
124
125 vma->vm_private_data = gru_alloc_vma_data(vma, 0);
126 if (!vma->vm_private_data)
127 return -ENOMEM;
128
129 gru_dbg(grudev, "file %p, vaddr 0x%lx, vma %p, vdata %p\n",
130 file, vma->vm_start, vma, vma->vm_private_data);
131 return 0;
132}
133
134/*
135 * Create a new GRU context
136 */
137static int gru_create_new_context(unsigned long arg)
138{
139 struct gru_create_context_req req;
140 struct vm_area_struct *vma;
141 struct gru_vma_data *vdata;
142 int ret = -EINVAL;
143
144
145 if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
146 return -EFAULT;
147
148 if (req.data_segment_bytes == 0 ||
149 req.data_segment_bytes > max_user_dsr_bytes)
150 return -EINVAL;
151 if (!req.control_blocks || !req.maximum_thread_count ||
152 req.control_blocks > max_user_cbrs)
153 return -EINVAL;
154
155 if (!(req.options & GRU_OPT_MISS_MASK))
156 req.options |= GRU_OPT_MISS_FMM_INTR;
157
158 down_write(&current->mm->mmap_sem);
159 vma = gru_find_vma(req.gseg);
160 if (vma) {
161 vdata = vma->vm_private_data;
162 vdata->vd_user_options = req.options;
163 vdata->vd_dsr_au_count =
164 GRU_DS_BYTES_TO_AU(req.data_segment_bytes);
165 vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks);
166 ret = 0;
167 }
168 up_write(&current->mm->mmap_sem);
169
170 return ret;
171}
172
173/*
174 * Get GRU configuration info (temp - for emulator testing)
175 */
176static long gru_get_config_info(unsigned long arg)
177{
178 struct gru_config_info info;
179 int nodesperblade;
180
181 if (num_online_nodes() > 1 &&
182 (uv_node_to_blade_id(1) == uv_node_to_blade_id(0)))
183 nodesperblade = 2;
184 else
185 nodesperblade = 1;
186 info.cpus = num_online_cpus();
187 info.nodes = num_online_nodes();
188 info.blades = info.nodes / nodesperblade;
189 info.chiplets = GRU_CHIPLETS_PER_BLADE * info.blades;
190
191 if (copy_to_user((void __user *)arg, &info, sizeof(info)))
192 return -EFAULT;
193 return 0;
194}
195
196/*
197 * Get GRU chiplet status
198 */
199static long gru_get_chiplet_status(unsigned long arg)
200{
201 struct gru_state *gru;
202 struct gru_chiplet_info info;
203
204 if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
205 return -EFAULT;
206
207 if (info.node == -1)
208 info.node = numa_node_id();
209 if (info.node >= num_possible_nodes() ||
210 info.chiplet >= GRU_CHIPLETS_PER_HUB ||
211 info.node < 0 || info.chiplet < 0)
212 return -EINVAL;
213
214 info.blade = uv_node_to_blade_id(info.node);
215 gru = get_gru(info.blade, info.chiplet);
216
217 info.total_dsr_bytes = GRU_NUM_DSR_BYTES;
218 info.total_cbr = GRU_NUM_CB;
219 info.total_user_dsr_bytes = GRU_NUM_DSR_BYTES -
220 gru->gs_reserved_dsr_bytes;
221 info.total_user_cbr = GRU_NUM_CB - gru->gs_reserved_cbrs;
222 info.free_user_dsr_bytes = hweight64(gru->gs_dsr_map) *
223 GRU_DSR_AU_BYTES;
224 info.free_user_cbr = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
225
226 if (copy_to_user((void __user *)arg, &info, sizeof(info)))
227 return -EFAULT;
228 return 0;
229}
230
231/*
232 * gru_file_unlocked_ioctl
233 *
234 * Called to update file attributes via IOCTL calls.
235 */
236static long gru_file_unlocked_ioctl(struct file *file, unsigned int req,
237 unsigned long arg)
238{
239 int err = -EBADRQC;
240
241 gru_dbg(grudev, "file %p\n", file);
242
243 switch (req) {
244 case GRU_CREATE_CONTEXT:
245 err = gru_create_new_context(arg);
246 break;
247 case GRU_SET_TASK_SLICE:
248 err = gru_set_task_slice(arg);
249 break;
250 case GRU_USER_GET_EXCEPTION_DETAIL:
251 err = gru_get_exception_detail(arg);
252 break;
253 case GRU_USER_UNLOAD_CONTEXT:
254 err = gru_user_unload_context(arg);
255 break;
256 case GRU_GET_CHIPLET_STATUS:
257 err = gru_get_chiplet_status(arg);
258 break;
259 case GRU_USER_FLUSH_TLB:
260 err = gru_user_flush_tlb(arg);
261 break;
262 case GRU_USER_CALL_OS:
263 err = gru_handle_user_call_os(arg);
264 break;
265 case GRU_GET_CONFIG_INFO:
266 err = gru_get_config_info(arg);
267 break;
268 }
269 return err;
270}
271
272/*
273 * Called at init time to build tables for all GRUs that are present in the
274 * system.
275 */
276static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr,
277 void *vaddr, int nid, int bid, int grunum)
278{
279 spin_lock_init(&gru->gs_lock);
280 spin_lock_init(&gru->gs_asid_lock);
281 gru->gs_gru_base_paddr = paddr;
282 gru->gs_gru_base_vaddr = vaddr;
283 gru->gs_gid = bid * GRU_CHIPLETS_PER_BLADE + grunum;
284 gru->gs_blade = gru_base[bid];
285 gru->gs_blade_id = bid;
286 gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1;
287 gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1;
288 gru_tgh_flush_init(gru);
289 gru_dbg(grudev, "bid %d, nid %d, gru %x, vaddr %p (0x%lx)\n",
290 bid, nid, gru->gs_gid, gru->gs_gru_base_vaddr,
291 gru->gs_gru_base_paddr);
292 gru_kservices_init(gru);
293}
294
295static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
296{
297 int pnode, nid, bid, chip;
298 int cbrs, dsrbytes, n;
299 int order = get_order(sizeof(struct gru_blade_state));
300 struct page *page;
301 struct gru_state *gru;
302 unsigned long paddr;
303 void *vaddr;
304
305 max_user_cbrs = GRU_NUM_CB;
306 max_user_dsr_bytes = GRU_NUM_DSR_BYTES;
307 for_each_online_node(nid) {
308 bid = uv_node_to_blade_id(nid);
309 pnode = uv_node_to_pnode(nid);
310 if (gru_base[bid])
311 continue;
312 page = alloc_pages_node(nid, GFP_KERNEL, order);
313 if (!page)
314 goto fail;
315 gru_base[bid] = page_address(page);
316 memset(gru_base[bid], 0, sizeof(struct gru_blade_state));
317 gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0];
318 spin_lock_init(&gru_base[bid]->bs_lock);
319
320 dsrbytes = 0;
321 cbrs = 0;
322 for (gru = gru_base[bid]->bs_grus, chip = 0;
323 chip < GRU_CHIPLETS_PER_BLADE;
324 chip++, gru++) {
325 paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip);
326 vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip);
327 gru_init_chiplet(gru, paddr, vaddr, bid, nid, chip);
328 n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
329 cbrs = max(cbrs, n);
330 n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
331 dsrbytes = max(dsrbytes, n);
332 }
333 max_user_cbrs = min(max_user_cbrs, cbrs);
334 max_user_dsr_bytes = min(max_user_dsr_bytes, dsrbytes);
335 }
336
337 return 0;
338
339fail:
340 for (nid--; nid >= 0; nid--)
341 free_pages((unsigned long)gru_base[nid], order);
342 return -ENOMEM;
343}
344
345#ifdef CONFIG_IA64
346
347static int get_base_irq(void)
348{
349 return IRQ_GRU;
350}
351
352#elif defined CONFIG_X86_64
353
354static void noop(unsigned int irq)
355{
356}
357
358static struct irq_chip gru_chip = {
359 .name = "gru",
360 .mask = noop,
361 .unmask = noop,
362 .ack = noop,
363};
364
365static int get_base_irq(void)
366{
367 set_irq_chip(IRQ_GRU, &gru_chip);
368 set_irq_chip(IRQ_GRU + 1, &gru_chip);
369 return IRQ_GRU;
370}
371#endif
372
373/*
374 * gru_init
375 *
376 * Called at boot or module load time to initialize the GRUs.
377 */
378static int __init gru_init(void)
379{
380 int ret, irq, chip;
381 char id[10];
382 void *gru_start_vaddr;
383
384 if (!IS_UV())
385 return 0;
386
387#if defined CONFIG_IA64
388 gru_start_paddr = 0xd000000000UL; /* ZZZZZZZZZZZZZZZZZZZ fixme */
389#else
390 gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR) &
391 0x7fffffffffffUL;
392
393#endif
394 gru_start_vaddr = __va(gru_start_paddr);
395 gru_end_paddr = gru_start_paddr + MAX_NUMNODES * GRU_SIZE;
396 printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n",
397 gru_start_paddr, gru_end_paddr);
398 irq = get_base_irq();
399 for (chip = 0; chip < GRU_CHIPLETS_PER_BLADE; chip++) {
400 ret = request_irq(irq + chip, gru_intr, 0, id, NULL);
401 if (ret) {
402 printk(KERN_ERR "%s: request_irq failed\n",
403 GRU_DRIVER_ID_STR);
404 goto exit1;
405 }
406 }
407
408 ret = misc_register(&gru_miscdev);
409 if (ret) {
410 printk(KERN_ERR "%s: misc_register failed\n",
411 GRU_DRIVER_ID_STR);
412 goto exit1;
413 }
414
415 ret = gru_proc_init();
416 if (ret) {
417 printk(KERN_ERR "%s: proc init failed\n", GRU_DRIVER_ID_STR);
418 goto exit2;
419 }
420
421 ret = gru_init_tables(gru_start_paddr, gru_start_vaddr);
422 if (ret) {
423 printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR);
424 goto exit3;
425 }
426
427 printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR,
428 GRU_DRIVER_VERSION_STR);
429 return 0;
430
431exit3:
432 gru_proc_exit();
433exit2:
434 misc_deregister(&gru_miscdev);
435exit1:
436 for (--chip; chip >= 0; chip--)
437 free_irq(irq + chip, NULL);
438 return ret;
439
440}
441
442static void __exit gru_exit(void)
443{
444 int i, bid;
445 int order = get_order(sizeof(struct gru_state) *
446 GRU_CHIPLETS_PER_BLADE);
447
448 for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++)
449 free_irq(IRQ_GRU + i, NULL);
450
451 for (bid = 0; bid < GRU_MAX_BLADES; bid++)
452 free_pages((unsigned long)gru_base[bid], order);
453
454 misc_deregister(&gru_miscdev);
455 gru_proc_exit();
456}
457
458static struct file_operations gru_fops = {
459 .owner = THIS_MODULE,
460 .unlocked_ioctl = gru_file_unlocked_ioctl,
461 .mmap = gru_file_mmap,
462};
463
464static struct miscdevice gru_miscdev = {
465 .minor = MISC_DYNAMIC_MINOR,
466 .name = "gru",
467 .fops = &gru_fops,
468};
469
470struct vm_operations_struct gru_vm_ops = {
471 .close = gru_vma_close,
472 .fault = gru_fault,
473};
474
475module_init(gru_init);
476module_exit(gru_exit);
477
478module_param(gru_options, ulong, 0644);
479MODULE_PARM_DESC(gru_options, "Various debug options");
480
481MODULE_AUTHOR("Silicon Graphics, Inc.");
482MODULE_LICENSE("GPL");
483MODULE_DESCRIPTION(GRU_DRIVER_ID_STR GRU_DRIVER_VERSION_STR);
484MODULE_VERSION(GRU_DRIVER_VERSION_STR);
485
diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h
new file mode 100644
index 000000000000..d16031d62673
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruhandles.h
@@ -0,0 +1,663 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * GRU HANDLE DEFINITION
5 *
6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22
23#ifndef __GRUHANDLES_H__
24#define __GRUHANDLES_H__
25#include "gru_instructions.h"
26
27/*
28 * Manifest constants for GRU Memory Map
29 */
30#define GRU_GSEG0_BASE 0
31#define GRU_MCS_BASE (64 * 1024 * 1024)
32#define GRU_SIZE (128UL * 1024 * 1024)
33
34/* Handle & resource counts */
35#define GRU_NUM_CB 128
36#define GRU_NUM_DSR_BYTES (32 * 1024)
37#define GRU_NUM_TFM 16
38#define GRU_NUM_TGH 24
39#define GRU_NUM_CBE 128
40#define GRU_NUM_TFH 128
41#define GRU_NUM_CCH 16
42#define GRU_NUM_GSH 1
43
44/* Maximum resource counts that can be reserved by user programs */
45#define GRU_NUM_USER_CBR GRU_NUM_CBE
46#define GRU_NUM_USER_DSR_BYTES GRU_NUM_DSR_BYTES
47
48/* Bytes per handle & handle stride. Code assumes all cb, tfh, cbe handles
49 * are the same */
50#define GRU_HANDLE_BYTES 64
51#define GRU_HANDLE_STRIDE 256
52
53/* Base addresses of handles */
54#define GRU_TFM_BASE (GRU_MCS_BASE + 0x00000)
55#define GRU_TGH_BASE (GRU_MCS_BASE + 0x08000)
56#define GRU_CBE_BASE (GRU_MCS_BASE + 0x10000)
57#define GRU_TFH_BASE (GRU_MCS_BASE + 0x18000)
58#define GRU_CCH_BASE (GRU_MCS_BASE + 0x20000)
59#define GRU_GSH_BASE (GRU_MCS_BASE + 0x30000)
60
61/* User gseg constants */
62#define GRU_GSEG_STRIDE (4 * 1024 * 1024)
63#define GSEG_BASE(a) ((a) & ~(GRU_GSEG_PAGESIZE - 1))
64
65/* Data segment constants */
66#define GRU_DSR_AU_BYTES 1024
67#define GRU_DSR_CL (GRU_NUM_DSR_BYTES / GRU_CACHE_LINE_BYTES)
68#define GRU_DSR_AU_CL (GRU_DSR_AU_BYTES / GRU_CACHE_LINE_BYTES)
69#define GRU_DSR_AU (GRU_NUM_DSR_BYTES / GRU_DSR_AU_BYTES)
70
71/* Control block constants */
72#define GRU_CBR_AU_SIZE 2
73#define GRU_CBR_AU (GRU_NUM_CBE / GRU_CBR_AU_SIZE)
74
75/* Convert resource counts to the number of AU */
76#define GRU_DS_BYTES_TO_AU(n) DIV_ROUND_UP(n, GRU_DSR_AU_BYTES)
77#define GRU_CB_COUNT_TO_AU(n) DIV_ROUND_UP(n, GRU_CBR_AU_SIZE)
78
79/* UV limits */
80#define GRU_CHIPLETS_PER_HUB 2
81#define GRU_HUBS_PER_BLADE 1
82#define GRU_CHIPLETS_PER_BLADE (GRU_HUBS_PER_BLADE * GRU_CHIPLETS_PER_HUB)
83
84/* User GRU Gseg offsets */
85#define GRU_CB_BASE 0
86#define GRU_CB_LIMIT (GRU_CB_BASE + GRU_HANDLE_STRIDE * GRU_NUM_CBE)
87#define GRU_DS_BASE 0x20000
88#define GRU_DS_LIMIT (GRU_DS_BASE + GRU_NUM_DSR_BYTES)
89
90/* Convert a GRU physical address to the chiplet offset */
91#define GSEGPOFF(h) ((h) & (GRU_SIZE - 1))
92
93/* Convert an arbitrary handle address to the beginning of the GRU segment */
94#ifndef __PLUGIN__
95#define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1)))
96#else
97extern void *gmu_grubase(void *h);
98#define GRUBASE(h) gmu_grubase(h)
99#endif
100
101/* General addressing macros. */
102static inline void *get_gseg_base_address(void *base, int ctxnum)
103{
104 return (void *)(base + GRU_GSEG0_BASE + GRU_GSEG_STRIDE * ctxnum);
105}
106
107static inline void *get_gseg_base_address_cb(void *base, int ctxnum, int line)
108{
109 return (void *)(get_gseg_base_address(base, ctxnum) +
110 GRU_CB_BASE + GRU_HANDLE_STRIDE * line);
111}
112
113static inline void *get_gseg_base_address_ds(void *base, int ctxnum, int line)
114{
115 return (void *)(get_gseg_base_address(base, ctxnum) + GRU_DS_BASE +
116 GRU_CACHE_LINE_BYTES * line);
117}
118
119static inline struct gru_tlb_fault_map *get_tfm(void *base, int ctxnum)
120{
121 return (struct gru_tlb_fault_map *)(base + GRU_TFM_BASE +
122 ctxnum * GRU_HANDLE_STRIDE);
123}
124
125static inline struct gru_tlb_global_handle *get_tgh(void *base, int ctxnum)
126{
127 return (struct gru_tlb_global_handle *)(base + GRU_TGH_BASE +
128 ctxnum * GRU_HANDLE_STRIDE);
129}
130
131static inline struct gru_control_block_extended *get_cbe(void *base, int ctxnum)
132{
133 return (struct gru_control_block_extended *)(base + GRU_CBE_BASE +
134 ctxnum * GRU_HANDLE_STRIDE);
135}
136
137static inline struct gru_tlb_fault_handle *get_tfh(void *base, int ctxnum)
138{
139 return (struct gru_tlb_fault_handle *)(base + GRU_TFH_BASE +
140 ctxnum * GRU_HANDLE_STRIDE);
141}
142
143static inline struct gru_context_configuration_handle *get_cch(void *base,
144 int ctxnum)
145{
146 return (struct gru_context_configuration_handle *)(base +
147 GRU_CCH_BASE + ctxnum * GRU_HANDLE_STRIDE);
148}
149
150static inline unsigned long get_cb_number(void *cb)
151{
152 return (((unsigned long)cb - GRU_CB_BASE) % GRU_GSEG_PAGESIZE) /
153 GRU_HANDLE_STRIDE;
154}
155
156/* byte offset to a specific GRU chiplet. (p=pnode, c=chiplet (0 or 1)*/
157static inline unsigned long gru_chiplet_paddr(unsigned long paddr, int pnode,
158 int chiplet)
159{
160 return paddr + GRU_SIZE * (2 * pnode + chiplet);
161}
162
163static inline void *gru_chiplet_vaddr(void *vaddr, int pnode, int chiplet)
164{
165 return vaddr + GRU_SIZE * (2 * pnode + chiplet);
166}
167
168
169
170/*
171 * Global TLB Fault Map
172 * Bitmap of outstanding TLB misses needing interrupt/polling service.
173 *
174 */
175struct gru_tlb_fault_map {
176 unsigned long fault_bits[BITS_TO_LONGS(GRU_NUM_CBE)];
177 unsigned long fill0[2];
178 unsigned long done_bits[BITS_TO_LONGS(GRU_NUM_CBE)];
179 unsigned long fill1[2];
180};
181
182/*
183 * TGH - TLB Global Handle
184 * Used for TLB flushing.
185 *
186 */
187struct gru_tlb_global_handle {
188 unsigned int cmd:1; /* DW 0 */
189 unsigned int delresp:1;
190 unsigned int opc:1;
191 unsigned int fill1:5;
192
193 unsigned int fill2:8;
194
195 unsigned int status:2;
196 unsigned long fill3:2;
197 unsigned int state:3;
198 unsigned long fill4:1;
199
200 unsigned int cause:3;
201 unsigned long fill5:37;
202
203 unsigned long vaddr:64; /* DW 1 */
204
205 unsigned int asid:24; /* DW 2 */
206 unsigned int fill6:8;
207
208 unsigned int pagesize:5;
209 unsigned int fill7:11;
210
211 unsigned int global:1;
212 unsigned int fill8:15;
213
214 unsigned long vaddrmask:39; /* DW 3 */
215 unsigned int fill9:9;
216 unsigned int n:10;
217 unsigned int fill10:6;
218
219 unsigned int ctxbitmap:16; /* DW4 */
220 unsigned long fill11[3];
221};
222
223enum gru_tgh_cmd {
224 TGHCMD_START
225};
226
227enum gru_tgh_opc {
228 TGHOP_TLBNOP,
229 TGHOP_TLBINV
230};
231
232enum gru_tgh_status {
233 TGHSTATUS_IDLE,
234 TGHSTATUS_EXCEPTION,
235 TGHSTATUS_ACTIVE
236};
237
238enum gru_tgh_state {
239 TGHSTATE_IDLE,
240 TGHSTATE_PE_INVAL,
241 TGHSTATE_INTERRUPT_INVAL,
242 TGHSTATE_WAITDONE,
243 TGHSTATE_RESTART_CTX,
244};
245
246/*
247 * TFH - TLB Global Handle
248 * Used for TLB dropins into the GRU TLB.
249 *
250 */
251struct gru_tlb_fault_handle {
252 unsigned int cmd:1; /* DW 0 - low 32*/
253 unsigned int delresp:1;
254 unsigned int fill0:2;
255 unsigned int opc:3;
256 unsigned int fill1:9;
257
258 unsigned int status:2;
259 unsigned int fill2:1;
260 unsigned int color:1;
261 unsigned int state:3;
262 unsigned int fill3:1;
263
264 unsigned int cause:7; /* DW 0 - high 32 */
265 unsigned int fill4:1;
266
267 unsigned int indexway:12;
268 unsigned int fill5:4;
269
270 unsigned int ctxnum:4;
271 unsigned int fill6:12;
272
273 unsigned long missvaddr:64; /* DW 1 */
274
275 unsigned int missasid:24; /* DW 2 */
276 unsigned int fill7:8;
277 unsigned int fillasid:24;
278 unsigned int dirty:1;
279 unsigned int gaa:2;
280 unsigned long fill8:5;
281
282 unsigned long pfn:41; /* DW 3 */
283 unsigned int fill9:7;
284 unsigned int pagesize:5;
285 unsigned int fill10:11;
286
287 unsigned long fillvaddr:64; /* DW 4 */
288
289 unsigned long fill11[3];
290};
291
292enum gru_tfh_opc {
293 TFHOP_NOOP,
294 TFHOP_RESTART,
295 TFHOP_WRITE_ONLY,
296 TFHOP_WRITE_RESTART,
297 TFHOP_EXCEPTION,
298 TFHOP_USER_POLLING_MODE = 7,
299};
300
301enum tfh_status {
302 TFHSTATUS_IDLE,
303 TFHSTATUS_EXCEPTION,
304 TFHSTATUS_ACTIVE,
305};
306
307enum tfh_state {
308 TFHSTATE_INACTIVE,
309 TFHSTATE_IDLE,
310 TFHSTATE_MISS_UPM,
311 TFHSTATE_MISS_FMM,
312 TFHSTATE_HW_ERR,
313 TFHSTATE_WRITE_TLB,
314 TFHSTATE_RESTART_CBR,
315};
316
317/* TFH cause bits */
318enum tfh_cause {
319 TFHCAUSE_NONE,
320 TFHCAUSE_TLB_MISS,
321 TFHCAUSE_TLB_MOD,
322 TFHCAUSE_HW_ERROR_RR,
323 TFHCAUSE_HW_ERROR_MAIN_ARRAY,
324 TFHCAUSE_HW_ERROR_VALID,
325 TFHCAUSE_HW_ERROR_PAGESIZE,
326 TFHCAUSE_INSTRUCTION_EXCEPTION,
327 TFHCAUSE_UNCORRECTIBLE_ERROR,
328};
329
330/* GAA values */
331#define GAA_RAM 0x0
332#define GAA_NCRAM 0x2
333#define GAA_MMIO 0x1
334#define GAA_REGISTER 0x3
335
336/* GRU paddr shift for pfn. (NOTE: shift is NOT by actual pagesize) */
337#define GRU_PADDR_SHIFT 12
338
339/*
340 * Context Configuration handle
341 * Used to allocate resources to a GSEG context.
342 *
343 */
344struct gru_context_configuration_handle {
345 unsigned int cmd:1; /* DW0 */
346 unsigned int delresp:1;
347 unsigned int opc:3;
348 unsigned int unmap_enable:1;
349 unsigned int req_slice_set_enable:1;
350 unsigned int req_slice:2;
351 unsigned int cb_int_enable:1;
352 unsigned int tlb_int_enable:1;
353 unsigned int tfm_fault_bit_enable:1;
354 unsigned int tlb_int_select:4;
355
356 unsigned int status:2;
357 unsigned int state:2;
358 unsigned int reserved2:4;
359
360 unsigned int cause:4;
361 unsigned int tfm_done_bit_enable:1;
362 unsigned int unused:3;
363
364 unsigned int dsr_allocation_map;
365
366 unsigned long cbr_allocation_map; /* DW1 */
367
368 unsigned int asid[8]; /* DW 2 - 5 */
369 unsigned short sizeavail[8]; /* DW 6 - 7 */
370} __attribute__ ((packed));
371
372enum gru_cch_opc {
373 CCHOP_START = 1,
374 CCHOP_ALLOCATE,
375 CCHOP_INTERRUPT,
376 CCHOP_DEALLOCATE,
377 CCHOP_INTERRUPT_SYNC,
378};
379
380enum gru_cch_status {
381 CCHSTATUS_IDLE,
382 CCHSTATUS_EXCEPTION,
383 CCHSTATUS_ACTIVE,
384};
385
386enum gru_cch_state {
387 CCHSTATE_INACTIVE,
388 CCHSTATE_MAPPED,
389 CCHSTATE_ACTIVE,
390 CCHSTATE_INTERRUPTED,
391};
392
393/* CCH Exception cause */
394enum gru_cch_cause {
395 CCHCAUSE_REGION_REGISTER_WRITE_ERROR = 1,
396 CCHCAUSE_ILLEGAL_OPCODE = 2,
397 CCHCAUSE_INVALID_START_REQUEST = 3,
398 CCHCAUSE_INVALID_ALLOCATION_REQUEST = 4,
399 CCHCAUSE_INVALID_DEALLOCATION_REQUEST = 5,
400 CCHCAUSE_INVALID_INTERRUPT_REQUEST = 6,
401 CCHCAUSE_CCH_BUSY = 7,
402 CCHCAUSE_NO_CBRS_TO_ALLOCATE = 8,
403 CCHCAUSE_BAD_TFM_CONFIG = 9,
404 CCHCAUSE_CBR_RESOURCES_OVERSUBSCRIPED = 10,
405 CCHCAUSE_DSR_RESOURCES_OVERSUBSCRIPED = 11,
406 CCHCAUSE_CBR_DEALLOCATION_ERROR = 12,
407};
408/*
409 * CBE - Control Block Extended
410 * Maintains internal GRU state for active CBs.
411 *
412 */
413struct gru_control_block_extended {
414 unsigned int reserved0:1; /* DW 0 - low */
415 unsigned int imacpy:3;
416 unsigned int reserved1:4;
417 unsigned int xtypecpy:3;
418 unsigned int iaa0cpy:2;
419 unsigned int iaa1cpy:2;
420 unsigned int reserved2:1;
421 unsigned int opccpy:8;
422 unsigned int exopccpy:8;
423
424 unsigned int idef2cpy:22; /* DW 0 - high */
425 unsigned int reserved3:10;
426
427 unsigned int idef4cpy:22; /* DW 1 */
428 unsigned int reserved4:10;
429 unsigned int idef4upd:22;
430 unsigned int reserved5:10;
431
432 unsigned long idef1upd:64; /* DW 2 */
433
434 unsigned long idef5cpy:64; /* DW 3 */
435
436 unsigned long idef6cpy:64; /* DW 4 */
437
438 unsigned long idef3upd:64; /* DW 5 */
439
440 unsigned long idef5upd:64; /* DW 6 */
441
442 unsigned int idef2upd:22; /* DW 7 */
443 unsigned int reserved6:10;
444
445 unsigned int ecause:20;
446 unsigned int cbrstate:4;
447 unsigned int cbrexecstatus:8;
448};
449
450enum gru_cbr_state {
451 CBRSTATE_INACTIVE,
452 CBRSTATE_IDLE,
453 CBRSTATE_PE_CHECK,
454 CBRSTATE_QUEUED,
455 CBRSTATE_WAIT_RESPONSE,
456 CBRSTATE_INTERRUPTED,
457 CBRSTATE_INTERRUPTED_MISS_FMM,
458 CBRSTATE_BUSY_INTERRUPT_MISS_FMM,
459 CBRSTATE_INTERRUPTED_MISS_UPM,
460 CBRSTATE_BUSY_INTERRUPTED_MISS_UPM,
461 CBRSTATE_REQUEST_ISSUE,
462 CBRSTATE_BUSY_INTERRUPT,
463};
464
465/* CBE cbrexecstatus bits */
466#define CBR_EXS_ABORT_OCC_BIT 0
467#define CBR_EXS_INT_OCC_BIT 1
468#define CBR_EXS_PENDING_BIT 2
469#define CBR_EXS_QUEUED_BIT 3
470#define CBR_EXS_TLBHW_BIT 4
471#define CBR_EXS_EXCEPTION_BIT 5
472
473#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT)
474#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT)
475#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT)
476#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT)
477#define CBR_EXS_TLBHW (1 << CBR_EXS_TLBHW_BIT)
478#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT)
479
480/* CBE ecause bits - defined in gru_instructions.h */
481
482/*
483 * Convert a processor pagesize into the strange encoded pagesize used by the
484 * GRU. Processor pagesize is encoded as log of bytes per page. (or PAGE_SHIFT)
485 * pagesize log pagesize grupagesize
486 * 4k 12 0
487 * 16k 14 1
488 * 64k 16 2
489 * 256k 18 3
490 * 1m 20 4
491 * 2m 21 5
492 * 4m 22 6
493 * 16m 24 7
494 * 64m 26 8
495 * ...
496 */
497#define GRU_PAGESIZE(sh) ((((sh) > 20 ? (sh) + 2: (sh)) >> 1) - 6)
498#define GRU_SIZEAVAIL(sh) (1UL << GRU_PAGESIZE(sh))
499
500/* minimum TLB purge count to ensure a full purge */
501#define GRUMAXINVAL 1024UL
502
503
504/* Extract the status field from a kernel handle */
505#define GET_MSEG_HANDLE_STATUS(h) (((*(unsigned long *)(h)) >> 16) & 3)
506
507static inline void start_instruction(void *h)
508{
509 unsigned long *w0 = h;
510
511 wmb(); /* setting CMD bit must be last */
512 *w0 = *w0 | 1;
513 gru_flush_cache(h);
514}
515
516static inline int wait_instruction_complete(void *h)
517{
518 int status;
519
520 do {
521 cpu_relax();
522 barrier();
523 status = GET_MSEG_HANDLE_STATUS(h);
524 } while (status == CCHSTATUS_ACTIVE);
525 return status;
526}
527
528#if defined CONFIG_IA64
529static inline void cch_allocate_set_asids(
530 struct gru_context_configuration_handle *cch, int asidval)
531{
532 int i;
533
534 for (i = 0; i <= RGN_HPAGE; i++) { /* assume HPAGE is last region */
535 cch->asid[i] = (asidval++);
536#if 0
537 /* ZZZ hugepages not supported yet */
538 if (i == RGN_HPAGE)
539 cch->sizeavail[i] = GRU_SIZEAVAIL(hpage_shift);
540 else
541#endif
542 cch->sizeavail[i] = GRU_SIZEAVAIL(PAGE_SHIFT);
543 }
544}
545#elif defined CONFIG_X86_64
546static inline void cch_allocate_set_asids(
547 struct gru_context_configuration_handle *cch, int asidval)
548{
549 int i;
550
551 for (i = 0; i < 8; i++) {
552 cch->asid[i] = asidval++;
553 cch->sizeavail[i] = GRU_SIZEAVAIL(PAGE_SHIFT) |
554 GRU_SIZEAVAIL(21);
555 }
556}
557#endif
558
559static inline int cch_allocate(struct gru_context_configuration_handle *cch,
560 int asidval, unsigned long cbrmap,
561 unsigned long dsrmap)
562{
563 cch_allocate_set_asids(cch, asidval);
564 cch->dsr_allocation_map = dsrmap;
565 cch->cbr_allocation_map = cbrmap;
566 cch->opc = CCHOP_ALLOCATE;
567 start_instruction(cch);
568 return wait_instruction_complete(cch);
569}
570
571static inline int cch_start(struct gru_context_configuration_handle *cch)
572{
573 cch->opc = CCHOP_START;
574 start_instruction(cch);
575 return wait_instruction_complete(cch);
576}
577
578static inline int cch_interrupt(struct gru_context_configuration_handle *cch)
579{
580 cch->opc = CCHOP_INTERRUPT;
581 start_instruction(cch);
582 return wait_instruction_complete(cch);
583}
584
585static inline int cch_deallocate(struct gru_context_configuration_handle *cch)
586{
587 cch->opc = CCHOP_DEALLOCATE;
588 start_instruction(cch);
589 return wait_instruction_complete(cch);
590}
591
592static inline int cch_interrupt_sync(struct gru_context_configuration_handle
593 *cch)
594{
595 cch->opc = CCHOP_INTERRUPT_SYNC;
596 start_instruction(cch);
597 return wait_instruction_complete(cch);
598}
599
600static inline int tgh_invalidate(struct gru_tlb_global_handle *tgh,
601 unsigned long vaddr, unsigned long vaddrmask,
602 int asid, int pagesize, int global, int n,
603 unsigned short ctxbitmap)
604{
605 tgh->vaddr = vaddr;
606 tgh->asid = asid;
607 tgh->pagesize = pagesize;
608 tgh->n = n;
609 tgh->global = global;
610 tgh->vaddrmask = vaddrmask;
611 tgh->ctxbitmap = ctxbitmap;
612 tgh->opc = TGHOP_TLBINV;
613 start_instruction(tgh);
614 return wait_instruction_complete(tgh);
615}
616
617static inline void tfh_write_only(struct gru_tlb_fault_handle *tfh,
618 unsigned long pfn, unsigned long vaddr,
619 int asid, int dirty, int pagesize)
620{
621 tfh->fillasid = asid;
622 tfh->fillvaddr = vaddr;
623 tfh->pfn = pfn;
624 tfh->dirty = dirty;
625 tfh->pagesize = pagesize;
626 tfh->opc = TFHOP_WRITE_ONLY;
627 start_instruction(tfh);
628}
629
630static inline void tfh_write_restart(struct gru_tlb_fault_handle *tfh,
631 unsigned long paddr, int gaa,
632 unsigned long vaddr, int asid, int dirty,
633 int pagesize)
634{
635 tfh->fillasid = asid;
636 tfh->fillvaddr = vaddr;
637 tfh->pfn = paddr >> GRU_PADDR_SHIFT;
638 tfh->gaa = gaa;
639 tfh->dirty = dirty;
640 tfh->pagesize = pagesize;
641 tfh->opc = TFHOP_WRITE_RESTART;
642 start_instruction(tfh);
643}
644
645static inline void tfh_restart(struct gru_tlb_fault_handle *tfh)
646{
647 tfh->opc = TFHOP_RESTART;
648 start_instruction(tfh);
649}
650
651static inline void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh)
652{
653 tfh->opc = TFHOP_USER_POLLING_MODE;
654 start_instruction(tfh);
655}
656
657static inline void tfh_exception(struct gru_tlb_fault_handle *tfh)
658{
659 tfh->opc = TFHOP_EXCEPTION;
660 start_instruction(tfh);
661}
662
663#endif /* __GRUHANDLES_H__ */
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
new file mode 100644
index 000000000000..dfd49af0fe18
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -0,0 +1,679 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * KERNEL SERVICES THAT USE THE GRU
5 *
6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22
23#include <linux/kernel.h>
24#include <linux/errno.h>
25#include <linux/slab.h>
26#include <linux/mm.h>
27#include <linux/smp_lock.h>
28#include <linux/spinlock.h>
29#include <linux/device.h>
30#include <linux/miscdevice.h>
31#include <linux/proc_fs.h>
32#include <linux/interrupt.h>
33#include <linux/uaccess.h>
34#include "gru.h"
35#include "grulib.h"
36#include "grutables.h"
37#include "grukservices.h"
38#include "gru_instructions.h"
39#include <asm/uv/uv_hub.h>
40
41/*
42 * Kernel GRU Usage
43 *
44 * The following is an interim algorithm for management of kernel GRU
45 * resources. This will likely be replaced when we better understand the
46 * kernel/user requirements.
47 *
48 * At boot time, the kernel permanently reserves a fixed number of
49 * CBRs/DSRs for each cpu to use. The resources are all taken from
50 * the GRU chiplet 1 on the blade. This leaves the full set of resources
51 * of chiplet 0 available to be allocated to a single user.
52 */
53
54/* Blade percpu resources PERMANENTLY reserved for kernel use */
55#define GRU_NUM_KERNEL_CBR 1
56#define GRU_NUM_KERNEL_DSR_BYTES 256
57#define KERNEL_CTXNUM 15
58
59/* GRU instruction attributes for all instructions */
60#define IMA IMA_CB_DELAY
61
62/* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */
63#define __gru_cacheline_aligned__ \
64 __attribute__((__aligned__(GRU_CACHE_LINE_BYTES)))
65
66#define MAGIC 0x1234567887654321UL
67
68/* Default retry count for GRU errors on kernel instructions */
69#define EXCEPTION_RETRY_LIMIT 3
70
71/* Status of message queue sections */
72#define MQS_EMPTY 0
73#define MQS_FULL 1
74#define MQS_NOOP 2
75
76/*----------------- RESOURCE MANAGEMENT -------------------------------------*/
77/* optimized for x86_64 */
78struct message_queue {
79 union gru_mesqhead head __gru_cacheline_aligned__; /* CL 0 */
80 int qlines; /* DW 1 */
81 long hstatus[2];
82 void *next __gru_cacheline_aligned__;/* CL 1 */
83 void *limit;
84 void *start;
85 void *start2;
86 char data ____cacheline_aligned; /* CL 2 */
87};
88
89/* First word in every message - used by mesq interface */
90struct message_header {
91 char present;
92 char present2;
93 char lines;
94 char fill;
95};
96
97#define QLINES(mq) ((mq) + offsetof(struct message_queue, qlines))
98#define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h]))
99
100static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)
101{
102 struct gru_blade_state *bs;
103 int lcpu;
104
105 BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES);
106 preempt_disable();
107 bs = gru_base[uv_numa_blade_id()];
108 lcpu = uv_blade_processor_id();
109 *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE;
110 *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES;
111 return 0;
112}
113
114static void gru_free_cpu_resources(void *cb, void *dsr)
115{
116 preempt_enable();
117}
118
119int gru_get_cb_exception_detail(void *cb,
120 struct control_block_extended_exc_detail *excdet)
121{
122 struct gru_control_block_extended *cbe;
123
124 cbe = get_cbe(GRUBASE(cb), get_cb_number(cb));
125 excdet->opc = cbe->opccpy;
126 excdet->exopc = cbe->exopccpy;
127 excdet->ecause = cbe->ecause;
128 excdet->exceptdet0 = cbe->idef1upd;
129 excdet->exceptdet1 = cbe->idef3upd;
130 return 0;
131}
132
133char *gru_get_cb_exception_detail_str(int ret, void *cb,
134 char *buf, int size)
135{
136 struct gru_control_block_status *gen = (void *)cb;
137 struct control_block_extended_exc_detail excdet;
138
139 if (ret > 0 && gen->istatus == CBS_EXCEPTION) {
140 gru_get_cb_exception_detail(cb, &excdet);
141 snprintf(buf, size,
142 "GRU exception: cb %p, opc %d, exopc %d, ecause 0x%x,"
143 "excdet0 0x%lx, excdet1 0x%x",
144 gen, excdet.opc, excdet.exopc, excdet.ecause,
145 excdet.exceptdet0, excdet.exceptdet1);
146 } else {
147 snprintf(buf, size, "No exception");
148 }
149 return buf;
150}
151
152static int gru_wait_idle_or_exception(struct gru_control_block_status *gen)
153{
154 while (gen->istatus >= CBS_ACTIVE) {
155 cpu_relax();
156 barrier();
157 }
158 return gen->istatus;
159}
160
161static int gru_retry_exception(void *cb)
162{
163 struct gru_control_block_status *gen = (void *)cb;
164 struct control_block_extended_exc_detail excdet;
165 int retry = EXCEPTION_RETRY_LIMIT;
166
167 while (1) {
168 if (gru_get_cb_message_queue_substatus(cb))
169 break;
170 if (gru_wait_idle_or_exception(gen) == CBS_IDLE)
171 return CBS_IDLE;
172
173 gru_get_cb_exception_detail(cb, &excdet);
174 if (excdet.ecause & ~EXCEPTION_RETRY_BITS)
175 break;
176 if (retry-- == 0)
177 break;
178 gen->icmd = 1;
179 gru_flush_cache(gen);
180 }
181 return CBS_EXCEPTION;
182}
183
184int gru_check_status_proc(void *cb)
185{
186 struct gru_control_block_status *gen = (void *)cb;
187 int ret;
188
189 ret = gen->istatus;
190 if (ret != CBS_EXCEPTION)
191 return ret;
192 return gru_retry_exception(cb);
193
194}
195
196int gru_wait_proc(void *cb)
197{
198 struct gru_control_block_status *gen = (void *)cb;
199 int ret;
200
201 ret = gru_wait_idle_or_exception(gen);
202 if (ret == CBS_EXCEPTION)
203 ret = gru_retry_exception(cb);
204
205 return ret;
206}
207
208void gru_abort(int ret, void *cb, char *str)
209{
210 char buf[GRU_EXC_STR_SIZE];
211
212 panic("GRU FATAL ERROR: %s - %s\n", str,
213 gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf)));
214}
215
216void gru_wait_abort_proc(void *cb)
217{
218 int ret;
219
220 ret = gru_wait_proc(cb);
221 if (ret)
222 gru_abort(ret, cb, "gru_wait_abort");
223}
224
225
226/*------------------------------ MESSAGE QUEUES -----------------------------*/
227
228/* Internal status . These are NOT returned to the user. */
229#define MQIE_AGAIN -1 /* try again */
230
231
232/*
233 * Save/restore the "present" flag that is in the second line of 2-line
234 * messages
235 */
236static inline int get_present2(void *p)
237{
238 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
239 return mhdr->present;
240}
241
242static inline void restore_present2(void *p, int val)
243{
244 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
245 mhdr->present = val;
246}
247
248/*
249 * Create a message queue.
250 * qlines - message queue size in cache lines. Includes 2-line header.
251 */
252int gru_create_message_queue(void *p, unsigned int bytes)
253{
254 struct message_queue *mq = p;
255 unsigned int qlines;
256
257 qlines = bytes / GRU_CACHE_LINE_BYTES - 2;
258 memset(mq, 0, bytes);
259 mq->start = &mq->data;
260 mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES;
261 mq->next = &mq->data;
262 mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES;
263 mq->qlines = qlines;
264 mq->hstatus[0] = 0;
265 mq->hstatus[1] = 1;
266 mq->head = gru_mesq_head(2, qlines / 2 + 1);
267 return 0;
268}
269EXPORT_SYMBOL_GPL(gru_create_message_queue);
270
271/*
272 * Send a NOOP message to a message queue
273 * Returns:
274 * 0 - if queue is full after the send. This is the normal case
275 * but various races can change this.
276 * -1 - if mesq sent successfully but queue not full
277 * >0 - unexpected error. MQE_xxx returned
278 */
279static int send_noop_message(void *cb,
280 unsigned long mq, void *mesg)
281{
282 const struct message_header noop_header = {
283 .present = MQS_NOOP, .lines = 1};
284 unsigned long m;
285 int substatus, ret;
286 struct message_header save_mhdr, *mhdr = mesg;
287
288 STAT(mesq_noop);
289 save_mhdr = *mhdr;
290 *mhdr = noop_header;
291 gru_mesq(cb, mq, gru_get_tri(mhdr), 1, IMA);
292 ret = gru_wait(cb);
293
294 if (ret) {
295 substatus = gru_get_cb_message_queue_substatus(cb);
296 switch (substatus) {
297 case CBSS_NO_ERROR:
298 STAT(mesq_noop_unexpected_error);
299 ret = MQE_UNEXPECTED_CB_ERR;
300 break;
301 case CBSS_LB_OVERFLOWED:
302 STAT(mesq_noop_lb_overflow);
303 ret = MQE_CONGESTION;
304 break;
305 case CBSS_QLIMIT_REACHED:
306 STAT(mesq_noop_qlimit_reached);
307 ret = 0;
308 break;
309 case CBSS_AMO_NACKED:
310 STAT(mesq_noop_amo_nacked);
311 ret = MQE_CONGESTION;
312 break;
313 case CBSS_PUT_NACKED:
314 STAT(mesq_noop_put_nacked);
315 m = mq + (gru_get_amo_value_head(cb) << 6);
316 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1,
317 IMA);
318 if (gru_wait(cb) == CBS_IDLE)
319 ret = MQIE_AGAIN;
320 else
321 ret = MQE_UNEXPECTED_CB_ERR;
322 break;
323 case CBSS_PAGE_OVERFLOW:
324 default:
325 BUG();
326 }
327 }
328 *mhdr = save_mhdr;
329 return ret;
330}
331
332/*
333 * Handle a gru_mesq full.
334 */
335static int send_message_queue_full(void *cb,
336 unsigned long mq, void *mesg, int lines)
337{
338 union gru_mesqhead mqh;
339 unsigned int limit, head;
340 unsigned long avalue;
341 int half, qlines, save;
342
343 /* Determine if switching to first/second half of q */
344 avalue = gru_get_amo_value(cb);
345 head = gru_get_amo_value_head(cb);
346 limit = gru_get_amo_value_limit(cb);
347
348 /*
349 * Fetch "qlines" from the queue header. Since the queue may be
350 * in memory that can't be accessed using socket addresses, use
351 * the GRU to access the data. Use DSR space from the message.
352 */
353 save = *(int *)mesg;
354 gru_vload(cb, QLINES(mq), gru_get_tri(mesg), XTYPE_W, 1, 1, IMA);
355 if (gru_wait(cb) != CBS_IDLE)
356 goto cberr;
357 qlines = *(int *)mesg;
358 *(int *)mesg = save;
359 half = (limit != qlines);
360
361 if (half)
362 mqh = gru_mesq_head(qlines / 2 + 1, qlines);
363 else
364 mqh = gru_mesq_head(2, qlines / 2 + 1);
365
366 /* Try to get lock for switching head pointer */
367 gru_gamir(cb, EOP_IR_CLR, HSTATUS(mq, half), XTYPE_DW, IMA);
368 if (gru_wait(cb) != CBS_IDLE)
369 goto cberr;
370 if (!gru_get_amo_value(cb)) {
371 STAT(mesq_qf_locked);
372 return MQE_QUEUE_FULL;
373 }
374
375 /* Got the lock. Send optional NOP if queue not full, */
376 if (head != limit) {
377 if (send_noop_message(cb, mq, mesg)) {
378 gru_gamir(cb, EOP_IR_INC, HSTATUS(mq, half),
379 XTYPE_DW, IMA);
380 if (gru_wait(cb) != CBS_IDLE)
381 goto cberr;
382 STAT(mesq_qf_noop_not_full);
383 return MQIE_AGAIN;
384 }
385 avalue++;
386 }
387
388 /* Then flip queuehead to other half of queue. */
389 gru_gamer(cb, EOP_ERR_CSWAP, mq, XTYPE_DW, mqh.val, avalue, IMA);
390 if (gru_wait(cb) != CBS_IDLE)
391 goto cberr;
392
393 /* If not successfully in swapping queue head, clear the hstatus lock */
394 if (gru_get_amo_value(cb) != avalue) {
395 STAT(mesq_qf_switch_head_failed);
396 gru_gamir(cb, EOP_IR_INC, HSTATUS(mq, half), XTYPE_DW, IMA);
397 if (gru_wait(cb) != CBS_IDLE)
398 goto cberr;
399 }
400 return MQIE_AGAIN;
401cberr:
402 STAT(mesq_qf_unexpected_error);
403 return MQE_UNEXPECTED_CB_ERR;
404}
405
406
407/*
408 * Handle a gru_mesq failure. Some of these failures are software recoverable
409 * or retryable.
410 */
411static int send_message_failure(void *cb,
412 unsigned long mq,
413 void *mesg,
414 int lines)
415{
416 int substatus, ret = 0;
417 unsigned long m;
418
419 substatus = gru_get_cb_message_queue_substatus(cb);
420 switch (substatus) {
421 case CBSS_NO_ERROR:
422 STAT(mesq_send_unexpected_error);
423 ret = MQE_UNEXPECTED_CB_ERR;
424 break;
425 case CBSS_LB_OVERFLOWED:
426 STAT(mesq_send_lb_overflow);
427 ret = MQE_CONGESTION;
428 break;
429 case CBSS_QLIMIT_REACHED:
430 STAT(mesq_send_qlimit_reached);
431 ret = send_message_queue_full(cb, mq, mesg, lines);
432 break;
433 case CBSS_AMO_NACKED:
434 STAT(mesq_send_amo_nacked);
435 ret = MQE_CONGESTION;
436 break;
437 case CBSS_PUT_NACKED:
438 STAT(mesq_send_put_nacked);
439 m =mq + (gru_get_amo_value_head(cb) << 6);
440 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);
441 if (gru_wait(cb) == CBS_IDLE)
442 ret = MQE_OK;
443 else
444 ret = MQE_UNEXPECTED_CB_ERR;
445 break;
446 default:
447 BUG();
448 }
449 return ret;
450}
451
452/*
453 * Send a message to a message queue
454 * cb GRU control block to use to send message
455 * mq message queue
456 * mesg message. ust be vaddr within a GSEG
457 * bytes message size (<= 2 CL)
458 */
459int gru_send_message_gpa(unsigned long mq, void *mesg, unsigned int bytes)
460{
461 struct message_header *mhdr;
462 void *cb;
463 void *dsr;
464 int istatus, clines, ret;
465
466 STAT(mesq_send);
467 BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES);
468
469 clines = (bytes + GRU_CACHE_LINE_BYTES - 1) / GRU_CACHE_LINE_BYTES;
470 if (gru_get_cpu_resources(bytes, &cb, &dsr))
471 return MQE_BUG_NO_RESOURCES;
472 memcpy(dsr, mesg, bytes);
473 mhdr = dsr;
474 mhdr->present = MQS_FULL;
475 mhdr->lines = clines;
476 if (clines == 2) {
477 mhdr->present2 = get_present2(mhdr);
478 restore_present2(mhdr, MQS_FULL);
479 }
480
481 do {
482 ret = MQE_OK;
483 gru_mesq(cb, mq, gru_get_tri(mhdr), clines, IMA);
484 istatus = gru_wait(cb);
485 if (istatus != CBS_IDLE)
486 ret = send_message_failure(cb, mq, dsr, clines);
487 } while (ret == MQIE_AGAIN);
488 gru_free_cpu_resources(cb, dsr);
489
490 if (ret)
491 STAT(mesq_send_failed);
492 return ret;
493}
494EXPORT_SYMBOL_GPL(gru_send_message_gpa);
495
496/*
497 * Advance the receive pointer for the queue to the next message.
498 */
499void gru_free_message(void *rmq, void *mesg)
500{
501 struct message_queue *mq = rmq;
502 struct message_header *mhdr = mq->next;
503 void *next, *pnext;
504 int half = -1;
505 int lines = mhdr->lines;
506
507 if (lines == 2)
508 restore_present2(mhdr, MQS_EMPTY);
509 mhdr->present = MQS_EMPTY;
510
511 pnext = mq->next;
512 next = pnext + GRU_CACHE_LINE_BYTES * lines;
513 if (next == mq->limit) {
514 next = mq->start;
515 half = 1;
516 } else if (pnext < mq->start2 && next >= mq->start2) {
517 half = 0;
518 }
519
520 if (half >= 0)
521 mq->hstatus[half] = 1;
522 mq->next = next;
523}
524EXPORT_SYMBOL_GPL(gru_free_message);
525
526/*
527 * Get next message from message queue. Return NULL if no message
528 * present. User must call next_message() to move to next message.
529 * rmq message queue
530 */
531void *gru_get_next_message(void *rmq)
532{
533 struct message_queue *mq = rmq;
534 struct message_header *mhdr = mq->next;
535 int present = mhdr->present;
536
537 /* skip NOOP messages */
538 STAT(mesq_receive);
539 while (present == MQS_NOOP) {
540 gru_free_message(rmq, mhdr);
541 mhdr = mq->next;
542 present = mhdr->present;
543 }
544
545 /* Wait for both halves of 2 line messages */
546 if (present == MQS_FULL && mhdr->lines == 2 &&
547 get_present2(mhdr) == MQS_EMPTY)
548 present = MQS_EMPTY;
549
550 if (!present) {
551 STAT(mesq_receive_none);
552 return NULL;
553 }
554
555 if (mhdr->lines == 2)
556 restore_present2(mhdr, mhdr->present2);
557
558 return mhdr;
559}
560EXPORT_SYMBOL_GPL(gru_get_next_message);
561
562/* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/
563
564/*
565 * Copy a block of data using the GRU resources
566 */
567int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
568 unsigned int bytes)
569{
570 void *cb;
571 void *dsr;
572 int ret;
573
574 STAT(copy_gpa);
575 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
576 return MQE_BUG_NO_RESOURCES;
577 gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr),
578 XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_BYTES, IMA);
579 ret = gru_wait(cb);
580 gru_free_cpu_resources(cb, dsr);
581 return ret;
582}
583EXPORT_SYMBOL_GPL(gru_copy_gpa);
584
585/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/
586/* Temp - will delete after we gain confidence in the GRU */
587static __cacheline_aligned unsigned long word0;
588static __cacheline_aligned unsigned long word1;
589
590static int quicktest(struct gru_state *gru)
591{
592 void *cb;
593 void *ds;
594 unsigned long *p;
595
596 cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0);
597 ds = get_gseg_base_address_ds(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0);
598 p = ds;
599 word0 = MAGIC;
600
601 gru_vload(cb, uv_gpa(&word0), 0, XTYPE_DW, 1, 1, IMA);
602 if (gru_wait(cb) != CBS_IDLE)
603 BUG();
604
605 if (*(unsigned long *)ds != MAGIC)
606 BUG();
607 gru_vstore(cb, uv_gpa(&word1), 0, XTYPE_DW, 1, 1, IMA);
608 if (gru_wait(cb) != CBS_IDLE)
609 BUG();
610
611 if (word0 != word1 || word0 != MAGIC) {
612 printk
613 ("GRU quicktest err: gru %d, found 0x%lx, expected 0x%lx\n",
614 gru->gs_gid, word1, MAGIC);
615 BUG(); /* ZZZ should not be fatal */
616 }
617
618 return 0;
619}
620
621
622int gru_kservices_init(struct gru_state *gru)
623{
624 struct gru_blade_state *bs;
625 struct gru_context_configuration_handle *cch;
626 unsigned long cbr_map, dsr_map;
627 int err, num, cpus_possible;
628
629 /*
630 * Currently, resources are reserved ONLY on the second chiplet
631 * on each blade. This leaves ALL resources on chiplet 0 available
632 * for user code.
633 */
634 bs = gru->gs_blade;
635 if (gru != &bs->bs_grus[1])
636 return 0;
637
638 cpus_possible = uv_blade_nr_possible_cpus(gru->gs_blade_id);
639
640 num = GRU_NUM_KERNEL_CBR * cpus_possible;
641 cbr_map = gru_reserve_cb_resources(gru, GRU_CB_COUNT_TO_AU(num), NULL);
642 gru->gs_reserved_cbrs += num;
643
644 num = GRU_NUM_KERNEL_DSR_BYTES * cpus_possible;
645 dsr_map = gru_reserve_ds_resources(gru, GRU_DS_BYTES_TO_AU(num), NULL);
646 gru->gs_reserved_dsr_bytes += num;
647
648 gru->gs_active_contexts++;
649 __set_bit(KERNEL_CTXNUM, &gru->gs_context_map);
650 cch = get_cch(gru->gs_gru_base_vaddr, KERNEL_CTXNUM);
651
652 bs->kernel_cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr,
653 KERNEL_CTXNUM, 0);
654 bs->kernel_dsr = get_gseg_base_address_ds(gru->gs_gru_base_vaddr,
655 KERNEL_CTXNUM, 0);
656
657 lock_cch_handle(cch);
658 cch->tfm_fault_bit_enable = 0;
659 cch->tlb_int_enable = 0;
660 cch->tfm_done_bit_enable = 0;
661 cch->unmap_enable = 1;
662 err = cch_allocate(cch, 0, cbr_map, dsr_map);
663 if (err) {
664 gru_dbg(grudev,
665 "Unable to allocate kernel CCH: gru %d, err %d\n",
666 gru->gs_gid, err);
667 BUG();
668 }
669 if (cch_start(cch)) {
670 gru_dbg(grudev, "Unable to start kernel CCH: gru %d, err %d\n",
671 gru->gs_gid, err);
672 BUG();
673 }
674 unlock_cch_handle(cch);
675
676 if (gru_options & GRU_QUICKLOOK)
677 quicktest(gru);
678 return 0;
679}
diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h
new file mode 100644
index 000000000000..eb17e0a3ac61
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukservices.h
@@ -0,0 +1,134 @@
1
2/*
3 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19#ifndef __GRU_KSERVICES_H_
20#define __GRU_KSERVICES_H_
21
22
23/*
24 * Message queues using the GRU to send/receive messages.
25 *
26 * These function allow the user to create a message queue for
27 * sending/receiving 1 or 2 cacheline messages using the GRU.
28 *
29 * Processes SENDING messages will use a kernel CBR/DSR to send
30 * the message. This is transparent to the caller.
31 *
32 * The receiver does not use any GRU resources.
33 *
34 * The functions support:
35 * - single receiver
36 * - multiple senders
37 * - cross partition message
38 *
39 * Missing features ZZZ:
40 * - user options for dealing with timeouts, queue full, etc.
41 * - gru_create_message_queue() needs interrupt vector info
42 */
43
44/*
45 * Initialize a user allocated chunk of memory to be used as
46 * a message queue. The caller must ensure that the queue is
47 * in contiguous physical memory and is cacheline aligned.
48 *
49 * Message queue size is the total number of bytes allocated
50 * to the queue including a 2 cacheline header that is used
51 * to manage the queue.
52 *
53 * Input:
54 * p pointer to user allocated memory.
55 * bytes size of message queue in bytes
56 *
57 * Errors:
58 * 0 OK
59 * >0 error
60 */
61extern int gru_create_message_queue(void *p, unsigned int bytes);
62
63/*
64 * Send a message to a message queue.
65 *
66 * Note: The message queue transport mechanism uses the first 32
67 * bits of the message. Users should avoid using these bits.
68 *
69 *
70 * Input:
71 * xmq message queue - must be a UV global physical address
72 * mesg pointer to message. Must be 64-bit aligned
73 * bytes size of message in bytes
74 *
75 * Output:
76 * 0 message sent
77 * >0 Send failure - see error codes below
78 *
79 */
80extern int gru_send_message_gpa(unsigned long mq_gpa, void *mesg,
81 unsigned int bytes);
82
83/* Status values for gru_send_message() */
84#define MQE_OK 0 /* message sent successfully */
85#define MQE_CONGESTION 1 /* temporary congestion, try again */
86#define MQE_QUEUE_FULL 2 /* queue is full */
87#define MQE_UNEXPECTED_CB_ERR 3 /* unexpected CB error */
88#define MQE_PAGE_OVERFLOW 10 /* BUG - queue overflowed a page */
89#define MQE_BUG_NO_RESOURCES 11 /* BUG - could not alloc GRU cb/dsr */
90
91/*
92 * Advance the receive pointer for the message queue to the next message.
93 * Note: current API requires messages to be gotten & freed in order. Future
94 * API extensions may allow for out-of-order freeing.
95 *
96 * Input
97 * mq message queue
98 * mesq message being freed
99 */
100extern void gru_free_message(void *mq, void *mesq);
101
102/*
103 * Get next message from message queue. Returns pointer to
104 * message OR NULL if no message present.
105 * User must call gru_free_message() after message is processed
106 * in order to move the queue pointers to next message.
107 *
108 * Input
109 * mq message queue
110 *
111 * Output:
112 * p pointer to message
113 * NULL no message available
114 */
115extern void *gru_get_next_message(void *mq);
116
117
118/*
119 * Copy data using the GRU. Source or destination can be located in a remote
120 * partition.
121 *
122 * Input:
123 * dest_gpa destination global physical address
124 * src_gpa source global physical address
125 * bytes number of bytes to copy
126 *
127 * Output:
128 * 0 OK
129 * >0 error
130 */
131extern int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
132 unsigned int bytes);
133
134#endif /* __GRU_KSERVICES_H_ */
diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h
new file mode 100644
index 000000000000..e56e196a6998
--- /dev/null
+++ b/drivers/misc/sgi-gru/grulib.h
@@ -0,0 +1,97 @@
1/*
2 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as published by
6 * the Free Software Foundation; either version 2.1 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#ifndef __GRULIB_H__
20#define __GRULIB_H__
21
22#define GRU_BASENAME "gru"
23#define GRU_FULLNAME "/dev/gru"
24#define GRU_IOCTL_NUM 'G'
25
26/*
27 * Maximum number of GRU segments that a user can have open
28 * ZZZ temp - set high for testing. Revisit.
29 */
30#define GRU_MAX_OPEN_CONTEXTS 32
31
32/* Set Number of Request Blocks */
33#define GRU_CREATE_CONTEXT _IOWR(GRU_IOCTL_NUM, 1, void *)
34
35/* Register task as using the slice */
36#define GRU_SET_TASK_SLICE _IOWR(GRU_IOCTL_NUM, 5, void *)
37
38/* Fetch exception detail */
39#define GRU_USER_GET_EXCEPTION_DETAIL _IOWR(GRU_IOCTL_NUM, 6, void *)
40
41/* For user call_os handling - normally a TLB fault */
42#define GRU_USER_CALL_OS _IOWR(GRU_IOCTL_NUM, 8, void *)
43
44/* For user unload context */
45#define GRU_USER_UNLOAD_CONTEXT _IOWR(GRU_IOCTL_NUM, 9, void *)
46
47/* For fetching GRU chiplet status */
48#define GRU_GET_CHIPLET_STATUS _IOWR(GRU_IOCTL_NUM, 10, void *)
49
50/* For user TLB flushing (primarily for tests) */
51#define GRU_USER_FLUSH_TLB _IOWR(GRU_IOCTL_NUM, 50, void *)
52
53/* Get some config options (primarily for tests & emulator) */
54#define GRU_GET_CONFIG_INFO _IOWR(GRU_IOCTL_NUM, 51, void *)
55
56#define CONTEXT_WINDOW_BYTES(th) (GRU_GSEG_PAGESIZE * (th))
57#define THREAD_POINTER(p, th) (p + GRU_GSEG_PAGESIZE * (th))
58
59/*
60 * Structure used to pass TLB flush parameters to the driver
61 */
62struct gru_create_context_req {
63 unsigned long gseg;
64 unsigned int data_segment_bytes;
65 unsigned int control_blocks;
66 unsigned int maximum_thread_count;
67 unsigned int options;
68};
69
70/*
71 * Structure used to pass unload context parameters to the driver
72 */
73struct gru_unload_context_req {
74 unsigned long gseg;
75};
76
77/*
78 * Structure used to pass TLB flush parameters to the driver
79 */
80struct gru_flush_tlb_req {
81 unsigned long gseg;
82 unsigned long vaddr;
83 size_t len;
84};
85
86/*
87 * GRU configuration info (temp - for testing)
88 */
89struct gru_config_info {
90 int cpus;
91 int blades;
92 int nodes;
93 int chiplets;
94 int fill[16];
95};
96
97#endif /* __GRULIB_H__ */
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
new file mode 100644
index 000000000000..0eeb8dddd2f5
--- /dev/null
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -0,0 +1,802 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD
5 *
6 * This file is subject to the terms and conditions of the GNU General Public
7 * License. See the file "COPYING" in the main directory of this archive
8 * for more details.
9 *
10 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
11 */
12
13#include <linux/kernel.h>
14#include <linux/slab.h>
15#include <linux/mm.h>
16#include <linux/spinlock.h>
17#include <linux/sched.h>
18#include <linux/device.h>
19#include <linux/list.h>
20#include <asm/uv/uv_hub.h>
21#include "gru.h"
22#include "grutables.h"
23#include "gruhandles.h"
24
25unsigned long gru_options __read_mostly;
26
27static struct device_driver gru_driver = {
28 .name = "gru"
29};
30
31static struct device gru_device = {
32 .bus_id = {0},
33 .driver = &gru_driver,
34};
35
36struct device *grudev = &gru_device;
37
38/*
39 * Select a gru fault map to be used by the current cpu. Note that
40 * multiple cpus may be using the same map.
41 * ZZZ should "shift" be used?? Depends on HT cpu numbering
42 * ZZZ should be inline but did not work on emulator
43 */
44int gru_cpu_fault_map_id(void)
45{
46 return uv_blade_processor_id() % GRU_NUM_TFM;
47}
48
49/*--------- ASID Management -------------------------------------------
50 *
51 * Initially, assign asids sequentially from MIN_ASID .. MAX_ASID.
52 * Once MAX is reached, flush the TLB & start over. However,
53 * some asids may still be in use. There won't be many (percentage wise) still
54 * in use. Search active contexts & determine the value of the first
55 * asid in use ("x"s below). Set "limit" to this value.
56 * This defines a block of assignable asids.
57 *
58 * When "limit" is reached, search forward from limit+1 and determine the
59 * next block of assignable asids.
60 *
61 * Repeat until MAX_ASID is reached, then start over again.
62 *
63 * Each time MAX_ASID is reached, increment the asid generation. Since
64 * the search for in-use asids only checks contexts with GRUs currently
65 * assigned, asids in some contexts will be missed. Prior to loading
66 * a context, the asid generation of the GTS asid is rechecked. If it
67 * doesn't match the current generation, a new asid will be assigned.
68 *
69 * 0---------------x------------x---------------------x----|
70 * ^-next ^-limit ^-MAX_ASID
71 *
72 * All asid manipulation & context loading/unloading is protected by the
73 * gs_lock.
74 */
75
76/* Hit the asid limit. Start over */
77static int gru_wrap_asid(struct gru_state *gru)
78{
79 gru_dbg(grudev, "gru %p\n", gru);
80 STAT(asid_wrap);
81 gru->gs_asid_gen++;
82 gru_flush_all_tlb(gru);
83 return MIN_ASID;
84}
85
86/* Find the next chunk of unused asids */
87static int gru_reset_asid_limit(struct gru_state *gru, int asid)
88{
89 int i, gid, inuse_asid, limit;
90
91 gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid);
92 STAT(asid_next);
93 limit = MAX_ASID;
94 if (asid >= limit)
95 asid = gru_wrap_asid(gru);
96 gid = gru->gs_gid;
97again:
98 for (i = 0; i < GRU_NUM_CCH; i++) {
99 if (!gru->gs_gts[i])
100 continue;
101 inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid;
102 gru_dbg(grudev, "gru %p, inuse_asid 0x%x, cxtnum %d, gts %p\n",
103 gru, inuse_asid, i, gru->gs_gts[i]);
104 if (inuse_asid == asid) {
105 asid += ASID_INC;
106 if (asid >= limit) {
107 /*
108 * empty range: reset the range limit and
109 * start over
110 */
111 limit = MAX_ASID;
112 if (asid >= MAX_ASID)
113 asid = gru_wrap_asid(gru);
114 goto again;
115 }
116 }
117
118 if ((inuse_asid > asid) && (inuse_asid < limit))
119 limit = inuse_asid;
120 }
121 gru->gs_asid_limit = limit;
122 gru->gs_asid = asid;
123 gru_dbg(grudev, "gru %p, new asid 0x%x, new_limit 0x%x\n", gru, asid,
124 limit);
125 return asid;
126}
127
128/* Assign a new ASID to a thread context. */
129static int gru_assign_asid(struct gru_state *gru)
130{
131 int asid;
132
133 spin_lock(&gru->gs_asid_lock);
134 gru->gs_asid += ASID_INC;
135 asid = gru->gs_asid;
136 if (asid >= gru->gs_asid_limit)
137 asid = gru_reset_asid_limit(gru, asid);
138 spin_unlock(&gru->gs_asid_lock);
139
140 gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid);
141 return asid;
142}
143
144/*
145 * Clear n bits in a word. Return a word indicating the bits that were cleared.
146 * Optionally, build an array of chars that contain the bit numbers allocated.
147 */
148static unsigned long reserve_resources(unsigned long *p, int n, int mmax,
149 char *idx)
150{
151 unsigned long bits = 0;
152 int i;
153
154 do {
155 i = find_first_bit(p, mmax);
156 if (i == mmax)
157 BUG();
158 __clear_bit(i, p);
159 __set_bit(i, &bits);
160 if (idx)
161 *idx++ = i;
162 } while (--n);
163 return bits;
164}
165
166unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count,
167 char *cbmap)
168{
169 return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU,
170 cbmap);
171}
172
173unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count,
174 char *dsmap)
175{
176 return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU,
177 dsmap);
178}
179
180static void reserve_gru_resources(struct gru_state *gru,
181 struct gru_thread_state *gts)
182{
183 gru->gs_active_contexts++;
184 gts->ts_cbr_map =
185 gru_reserve_cb_resources(gru, gts->ts_cbr_au_count,
186 gts->ts_cbr_idx);
187 gts->ts_dsr_map =
188 gru_reserve_ds_resources(gru, gts->ts_dsr_au_count, NULL);
189}
190
191static void free_gru_resources(struct gru_state *gru,
192 struct gru_thread_state *gts)
193{
194 gru->gs_active_contexts--;
195 gru->gs_cbr_map |= gts->ts_cbr_map;
196 gru->gs_dsr_map |= gts->ts_dsr_map;
197}
198
199/*
200 * Check if a GRU has sufficient free resources to satisfy an allocation
201 * request. Note: GRU locks may or may not be held when this is called. If
202 * not held, recheck after acquiring the appropriate locks.
203 *
204 * Returns 1 if sufficient resources, 0 if not
205 */
206static int check_gru_resources(struct gru_state *gru, int cbr_au_count,
207 int dsr_au_count, int max_active_contexts)
208{
209 return hweight64(gru->gs_cbr_map) >= cbr_au_count
210 && hweight64(gru->gs_dsr_map) >= dsr_au_count
211 && gru->gs_active_contexts < max_active_contexts;
212}
213
214/*
215 * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG
216 * context.
217 */
218static int gru_load_mm_tracker(struct gru_state *gru, struct gru_mm_struct *gms,
219 int ctxnum)
220{
221 struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid];
222 unsigned short ctxbitmap = (1 << ctxnum);
223 int asid;
224
225 spin_lock(&gms->ms_asid_lock);
226 asid = asids->mt_asid;
227
228 if (asid == 0 || asids->mt_asid_gen != gru->gs_asid_gen) {
229 asid = gru_assign_asid(gru);
230 asids->mt_asid = asid;
231 asids->mt_asid_gen = gru->gs_asid_gen;
232 STAT(asid_new);
233 } else {
234 STAT(asid_reuse);
235 }
236
237 BUG_ON(asids->mt_ctxbitmap & ctxbitmap);
238 asids->mt_ctxbitmap |= ctxbitmap;
239 if (!test_bit(gru->gs_gid, gms->ms_asidmap))
240 __set_bit(gru->gs_gid, gms->ms_asidmap);
241 spin_unlock(&gms->ms_asid_lock);
242
243 gru_dbg(grudev,
244 "gru %x, gms %p, ctxnum 0x%d, asid 0x%x, asidmap 0x%lx\n",
245 gru->gs_gid, gms, ctxnum, asid, gms->ms_asidmap[0]);
246 return asid;
247}
248
249static void gru_unload_mm_tracker(struct gru_state *gru,
250 struct gru_mm_struct *gms, int ctxnum)
251{
252 struct gru_mm_tracker *asids;
253 unsigned short ctxbitmap;
254
255 asids = &gms->ms_asids[gru->gs_gid];
256 ctxbitmap = (1 << ctxnum);
257 spin_lock(&gms->ms_asid_lock);
258 BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap);
259 asids->mt_ctxbitmap ^= ctxbitmap;
260 gru_dbg(grudev, "gru %x, gms %p, ctxnum 0x%d, asidmap 0x%lx\n",
261 gru->gs_gid, gms, ctxnum, gms->ms_asidmap[0]);
262 spin_unlock(&gms->ms_asid_lock);
263}
264
265/*
266 * Decrement the reference count on a GTS structure. Free the structure
267 * if the reference count goes to zero.
268 */
269void gts_drop(struct gru_thread_state *gts)
270{
271 if (gts && atomic_dec_return(&gts->ts_refcnt) == 0) {
272 gru_drop_mmu_notifier(gts->ts_gms);
273 kfree(gts);
274 STAT(gts_free);
275 }
276}
277
278/*
279 * Locate the GTS structure for the current thread.
280 */
281static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data
282 *vdata, int tsid)
283{
284 struct gru_thread_state *gts;
285
286 list_for_each_entry(gts, &vdata->vd_head, ts_next)
287 if (gts->ts_tsid == tsid)
288 return gts;
289 return NULL;
290}
291
292/*
293 * Allocate a thread state structure.
294 */
295static struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
296 struct gru_vma_data *vdata,
297 int tsid)
298{
299 struct gru_thread_state *gts;
300 int bytes;
301
302 bytes = DSR_BYTES(vdata->vd_dsr_au_count) +
303 CBR_BYTES(vdata->vd_cbr_au_count);
304 bytes += sizeof(struct gru_thread_state);
305 gts = kzalloc(bytes, GFP_KERNEL);
306 if (!gts)
307 return NULL;
308
309 STAT(gts_alloc);
310 atomic_set(&gts->ts_refcnt, 1);
311 mutex_init(&gts->ts_ctxlock);
312 gts->ts_cbr_au_count = vdata->vd_cbr_au_count;
313 gts->ts_dsr_au_count = vdata->vd_dsr_au_count;
314 gts->ts_user_options = vdata->vd_user_options;
315 gts->ts_tsid = tsid;
316 gts->ts_user_options = vdata->vd_user_options;
317 gts->ts_ctxnum = NULLCTX;
318 gts->ts_mm = current->mm;
319 gts->ts_vma = vma;
320 gts->ts_tlb_int_select = -1;
321 gts->ts_gms = gru_register_mmu_notifier();
322 if (!gts->ts_gms)
323 goto err;
324
325 gru_dbg(grudev, "alloc vdata %p, new gts %p\n", vdata, gts);
326 return gts;
327
328err:
329 gts_drop(gts);
330 return NULL;
331}
332
333/*
334 * Allocate a vma private data structure.
335 */
336struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid)
337{
338 struct gru_vma_data *vdata = NULL;
339
340 vdata = kmalloc(sizeof(*vdata), GFP_KERNEL);
341 if (!vdata)
342 return NULL;
343
344 INIT_LIST_HEAD(&vdata->vd_head);
345 spin_lock_init(&vdata->vd_lock);
346 gru_dbg(grudev, "alloc vdata %p\n", vdata);
347 return vdata;
348}
349
350/*
351 * Find the thread state structure for the current thread.
352 */
353struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma,
354 int tsid)
355{
356 struct gru_vma_data *vdata = vma->vm_private_data;
357 struct gru_thread_state *gts;
358
359 spin_lock(&vdata->vd_lock);
360 gts = gru_find_current_gts_nolock(vdata, tsid);
361 spin_unlock(&vdata->vd_lock);
362 gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
363 return gts;
364}
365
366/*
367 * Allocate a new thread state for a GSEG. Note that races may allow
368 * another thread to race to create a gts.
369 */
370struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma,
371 int tsid)
372{
373 struct gru_vma_data *vdata = vma->vm_private_data;
374 struct gru_thread_state *gts, *ngts;
375
376 gts = gru_alloc_gts(vma, vdata, tsid);
377 if (!gts)
378 return NULL;
379
380 spin_lock(&vdata->vd_lock);
381 ngts = gru_find_current_gts_nolock(vdata, tsid);
382 if (ngts) {
383 gts_drop(gts);
384 gts = ngts;
385 STAT(gts_double_allocate);
386 } else {
387 list_add(&gts->ts_next, &vdata->vd_head);
388 }
389 spin_unlock(&vdata->vd_lock);
390 gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
391 return gts;
392}
393
394/*
395 * Free the GRU context assigned to the thread state.
396 */
397static void gru_free_gru_context(struct gru_thread_state *gts)
398{
399 struct gru_state *gru;
400
401 gru = gts->ts_gru;
402 gru_dbg(grudev, "gts %p, gru %p\n", gts, gru);
403
404 spin_lock(&gru->gs_lock);
405 gru->gs_gts[gts->ts_ctxnum] = NULL;
406 free_gru_resources(gru, gts);
407 BUG_ON(test_bit(gts->ts_ctxnum, &gru->gs_context_map) == 0);
408 __clear_bit(gts->ts_ctxnum, &gru->gs_context_map);
409 gts->ts_ctxnum = NULLCTX;
410 gts->ts_gru = NULL;
411 spin_unlock(&gru->gs_lock);
412
413 gts_drop(gts);
414 STAT(free_context);
415}
416
417/*
418 * Prefetching cachelines help hardware performance.
419 * (Strictly a performance enhancement. Not functionally required).
420 */
421static void prefetch_data(void *p, int num, int stride)
422{
423 while (num-- > 0) {
424 prefetchw(p);
425 p += stride;
426 }
427}
428
429static inline long gru_copy_handle(void *d, void *s)
430{
431 memcpy(d, s, GRU_HANDLE_BYTES);
432 return GRU_HANDLE_BYTES;
433}
434
435/* rewrite in assembly & use lots of prefetch */
436static void gru_load_context_data(void *save, void *grubase, int ctxnum,
437 unsigned long cbrmap, unsigned long dsrmap)
438{
439 void *gseg, *cb, *cbe;
440 unsigned long length;
441 int i, scr;
442
443 gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
444 length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
445 prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES,
446 GRU_CACHE_LINE_BYTES);
447
448 cb = gseg + GRU_CB_BASE;
449 cbe = grubase + GRU_CBE_BASE;
450 for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
451 prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES);
452 prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1,
453 GRU_CACHE_LINE_BYTES);
454 cb += GRU_HANDLE_STRIDE;
455 }
456
457 cb = gseg + GRU_CB_BASE;
458 for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
459 save += gru_copy_handle(cb, save);
460 save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, save);
461 cb += GRU_HANDLE_STRIDE;
462 }
463
464 memcpy(gseg + GRU_DS_BASE, save, length);
465}
466
467static void gru_unload_context_data(void *save, void *grubase, int ctxnum,
468 unsigned long cbrmap, unsigned long dsrmap)
469{
470 void *gseg, *cb, *cbe;
471 unsigned long length;
472 int i, scr;
473
474 gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
475
476 cb = gseg + GRU_CB_BASE;
477 cbe = grubase + GRU_CBE_BASE;
478 for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
479 save += gru_copy_handle(save, cb);
480 save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE);
481 cb += GRU_HANDLE_STRIDE;
482 }
483 length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
484 memcpy(save, gseg + GRU_DS_BASE, length);
485}
486
487void gru_unload_context(struct gru_thread_state *gts, int savestate)
488{
489 struct gru_state *gru = gts->ts_gru;
490 struct gru_context_configuration_handle *cch;
491 int ctxnum = gts->ts_ctxnum;
492
493 zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
494 cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
495
496 lock_cch_handle(cch);
497 if (cch_interrupt_sync(cch))
498 BUG();
499 gru_dbg(grudev, "gts %p\n", gts);
500
501 gru_unload_mm_tracker(gru, gts->ts_gms, gts->ts_ctxnum);
502 if (savestate)
503 gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr,
504 ctxnum, gts->ts_cbr_map,
505 gts->ts_dsr_map);
506
507 if (cch_deallocate(cch))
508 BUG();
509 gts->ts_force_unload = 0; /* ts_force_unload locked by CCH lock */
510 unlock_cch_handle(cch);
511
512 gru_free_gru_context(gts);
513 STAT(unload_context);
514}
515
516/*
517 * Load a GRU context by copying it from the thread data structure in memory
518 * to the GRU.
519 */
520static void gru_load_context(struct gru_thread_state *gts)
521{
522 struct gru_state *gru = gts->ts_gru;
523 struct gru_context_configuration_handle *cch;
524 int err, asid, ctxnum = gts->ts_ctxnum;
525
526 gru_dbg(grudev, "gts %p\n", gts);
527 cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
528
529 lock_cch_handle(cch);
530 asid = gru_load_mm_tracker(gru, gts->ts_gms, gts->ts_ctxnum);
531 cch->tfm_fault_bit_enable =
532 (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
533 || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
534 cch->tlb_int_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
535 if (cch->tlb_int_enable) {
536 gts->ts_tlb_int_select = gru_cpu_fault_map_id();
537 cch->tlb_int_select = gts->ts_tlb_int_select;
538 }
539 cch->tfm_done_bit_enable = 0;
540 err = cch_allocate(cch, asid, gts->ts_cbr_map, gts->ts_dsr_map);
541 if (err) {
542 gru_dbg(grudev,
543 "err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n",
544 err, cch, gts, gts->ts_cbr_map, gts->ts_dsr_map);
545 BUG();
546 }
547
548 gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum,
549 gts->ts_cbr_map, gts->ts_dsr_map);
550
551 if (cch_start(cch))
552 BUG();
553 unlock_cch_handle(cch);
554
555 STAT(load_context);
556}
557
558/*
559 * Update fields in an active CCH:
560 * - retarget interrupts on local blade
561 * - force a delayed context unload by clearing the CCH asids. This
562 * forces TLB misses for new GRU instructions. The context is unloaded
563 * when the next TLB miss occurs.
564 */
565static int gru_update_cch(struct gru_thread_state *gts, int int_select)
566{
567 struct gru_context_configuration_handle *cch;
568 struct gru_state *gru = gts->ts_gru;
569 int i, ctxnum = gts->ts_ctxnum, ret = 0;
570
571 cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
572
573 lock_cch_handle(cch);
574 if (cch->state == CCHSTATE_ACTIVE) {
575 if (gru->gs_gts[gts->ts_ctxnum] != gts)
576 goto exit;
577 if (cch_interrupt(cch))
578 BUG();
579 if (int_select >= 0) {
580 gts->ts_tlb_int_select = int_select;
581 cch->tlb_int_select = int_select;
582 } else {
583 for (i = 0; i < 8; i++)
584 cch->asid[i] = 0;
585 cch->tfm_fault_bit_enable = 0;
586 cch->tlb_int_enable = 0;
587 gts->ts_force_unload = 1;
588 }
589 if (cch_start(cch))
590 BUG();
591 ret = 1;
592 }
593exit:
594 unlock_cch_handle(cch);
595 return ret;
596}
597
598/*
599 * Update CCH tlb interrupt select. Required when all the following is true:
600 * - task's GRU context is loaded into a GRU
601 * - task is using interrupt notification for TLB faults
602 * - task has migrated to a different cpu on the same blade where
603 * it was previously running.
604 */
605static int gru_retarget_intr(struct gru_thread_state *gts)
606{
607 if (gts->ts_tlb_int_select < 0
608 || gts->ts_tlb_int_select == gru_cpu_fault_map_id())
609 return 0;
610
611 gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select,
612 gru_cpu_fault_map_id());
613 return gru_update_cch(gts, gru_cpu_fault_map_id());
614}
615
616
617/*
618 * Insufficient GRU resources available on the local blade. Steal a context from
619 * a process. This is a hack until a _real_ resource scheduler is written....
620 */
621#define next_ctxnum(n) ((n) < GRU_NUM_CCH - 2 ? (n) + 1 : 0)
622#define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \
623 ((g)+1) : &(b)->bs_grus[0])
624
625static void gru_steal_context(struct gru_thread_state *gts)
626{
627 struct gru_blade_state *blade;
628 struct gru_state *gru, *gru0;
629 struct gru_thread_state *ngts = NULL;
630 int ctxnum, ctxnum0, flag = 0, cbr, dsr;
631
632 cbr = gts->ts_cbr_au_count;
633 dsr = gts->ts_dsr_au_count;
634
635 preempt_disable();
636 blade = gru_base[uv_numa_blade_id()];
637 spin_lock(&blade->bs_lock);
638
639 ctxnum = next_ctxnum(blade->bs_lru_ctxnum);
640 gru = blade->bs_lru_gru;
641 if (ctxnum == 0)
642 gru = next_gru(blade, gru);
643 ctxnum0 = ctxnum;
644 gru0 = gru;
645 while (1) {
646 if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH))
647 break;
648 spin_lock(&gru->gs_lock);
649 for (; ctxnum < GRU_NUM_CCH; ctxnum++) {
650 if (flag && gru == gru0 && ctxnum == ctxnum0)
651 break;
652 ngts = gru->gs_gts[ctxnum];
653 /*
654 * We are grabbing locks out of order, so trylock is
655 * needed. GTSs are usually not locked, so the odds of
656 * success are high. If trylock fails, try to steal a
657 * different GSEG.
658 */
659 if (ngts && mutex_trylock(&ngts->ts_ctxlock))
660 break;
661 ngts = NULL;
662 flag = 1;
663 }
664 spin_unlock(&gru->gs_lock);
665 if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0))
666 break;
667 ctxnum = 0;
668 gru = next_gru(blade, gru);
669 }
670 blade->bs_lru_gru = gru;
671 blade->bs_lru_ctxnum = ctxnum;
672 spin_unlock(&blade->bs_lock);
673 preempt_enable();
674
675 if (ngts) {
676 STAT(steal_context);
677 ngts->ts_steal_jiffies = jiffies;
678 gru_unload_context(ngts, 1);
679 mutex_unlock(&ngts->ts_ctxlock);
680 } else {
681 STAT(steal_context_failed);
682 }
683 gru_dbg(grudev,
684 "stole gru %x, ctxnum %d from gts %p. Need cb %d, ds %d;"
685 " avail cb %ld, ds %ld\n",
686 gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map),
687 hweight64(gru->gs_dsr_map));
688}
689
690/*
691 * Scan the GRUs on the local blade & assign a GRU context.
692 */
693static struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts)
694{
695 struct gru_state *gru, *grux;
696 int i, max_active_contexts;
697
698 preempt_disable();
699
700again:
701 gru = NULL;
702 max_active_contexts = GRU_NUM_CCH;
703 for_each_gru_on_blade(grux, uv_numa_blade_id(), i) {
704 if (check_gru_resources(grux, gts->ts_cbr_au_count,
705 gts->ts_dsr_au_count,
706 max_active_contexts)) {
707 gru = grux;
708 max_active_contexts = grux->gs_active_contexts;
709 if (max_active_contexts == 0)
710 break;
711 }
712 }
713
714 if (gru) {
715 spin_lock(&gru->gs_lock);
716 if (!check_gru_resources(gru, gts->ts_cbr_au_count,
717 gts->ts_dsr_au_count, GRU_NUM_CCH)) {
718 spin_unlock(&gru->gs_lock);
719 goto again;
720 }
721 reserve_gru_resources(gru, gts);
722 gts->ts_gru = gru;
723 gts->ts_ctxnum =
724 find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH);
725 BUG_ON(gts->ts_ctxnum == GRU_NUM_CCH);
726 atomic_inc(&gts->ts_refcnt);
727 gru->gs_gts[gts->ts_ctxnum] = gts;
728 __set_bit(gts->ts_ctxnum, &gru->gs_context_map);
729 spin_unlock(&gru->gs_lock);
730
731 STAT(assign_context);
732 gru_dbg(grudev,
733 "gseg %p, gts %p, gru %x, ctx %d, cbr %d, dsr %d\n",
734 gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts,
735 gts->ts_gru->gs_gid, gts->ts_ctxnum,
736 gts->ts_cbr_au_count, gts->ts_dsr_au_count);
737 } else {
738 gru_dbg(grudev, "failed to allocate a GTS %s\n", "");
739 STAT(assign_context_failed);
740 }
741
742 preempt_enable();
743 return gru;
744}
745
746/*
747 * gru_nopage
748 *
749 * Map the user's GRU segment
750 *
751 * Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries.
752 */
753int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
754{
755 struct gru_thread_state *gts;
756 unsigned long paddr, vaddr;
757
758 vaddr = (unsigned long)vmf->virtual_address;
759 gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
760 vma, vaddr, GSEG_BASE(vaddr));
761 STAT(nopfn);
762
763 /* The following check ensures vaddr is a valid address in the VMA */
764 gts = gru_find_thread_state(vma, TSID(vaddr, vma));
765 if (!gts)
766 return VM_FAULT_SIGBUS;
767
768again:
769 preempt_disable();
770 mutex_lock(&gts->ts_ctxlock);
771 if (gts->ts_gru) {
772 if (gts->ts_gru->gs_blade_id != uv_numa_blade_id()) {
773 STAT(migrated_nopfn_unload);
774 gru_unload_context(gts, 1);
775 } else {
776 if (gru_retarget_intr(gts))
777 STAT(migrated_nopfn_retarget);
778 }
779 }
780
781 if (!gts->ts_gru) {
782 if (!gru_assign_gru_context(gts)) {
783 mutex_unlock(&gts->ts_ctxlock);
784 preempt_enable();
785 schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */
786 if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies)
787 gru_steal_context(gts);
788 goto again;
789 }
790 gru_load_context(gts);
791 paddr = gseg_physical_address(gts->ts_gru, gts->ts_ctxnum);
792 remap_pfn_range(vma, vaddr & ~(GRU_GSEG_PAGESIZE - 1),
793 paddr >> PAGE_SHIFT, GRU_GSEG_PAGESIZE,
794 vma->vm_page_prot);
795 }
796
797 mutex_unlock(&gts->ts_ctxlock);
798 preempt_enable();
799
800 return VM_FAULT_NOPAGE;
801}
802
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
new file mode 100644
index 000000000000..533923f83f1a
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -0,0 +1,336 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * PROC INTERFACES
5 *
6 * This file supports the /proc interfaces for the GRU driver
7 *
8 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25#include <linux/proc_fs.h>
26#include <linux/device.h>
27#include <linux/seq_file.h>
28#include <linux/uaccess.h>
29#include "gru.h"
30#include "grulib.h"
31#include "grutables.h"
32
33#define printstat(s, f) printstat_val(s, &gru_stats.f, #f)
34
35static void printstat_val(struct seq_file *s, atomic_long_t *v, char *id)
36{
37 unsigned long val = atomic_long_read(v);
38
39 if (val)
40 seq_printf(s, "%16lu %s\n", val, id);
41}
42
43static int statistics_show(struct seq_file *s, void *p)
44{
45 printstat(s, vdata_alloc);
46 printstat(s, vdata_free);
47 printstat(s, gts_alloc);
48 printstat(s, gts_free);
49 printstat(s, vdata_double_alloc);
50 printstat(s, gts_double_allocate);
51 printstat(s, assign_context);
52 printstat(s, assign_context_failed);
53 printstat(s, free_context);
54 printstat(s, load_context);
55 printstat(s, unload_context);
56 printstat(s, steal_context);
57 printstat(s, steal_context_failed);
58 printstat(s, nopfn);
59 printstat(s, break_cow);
60 printstat(s, asid_new);
61 printstat(s, asid_next);
62 printstat(s, asid_wrap);
63 printstat(s, asid_reuse);
64 printstat(s, intr);
65 printstat(s, call_os);
66 printstat(s, call_os_check_for_bug);
67 printstat(s, call_os_wait_queue);
68 printstat(s, user_flush_tlb);
69 printstat(s, user_unload_context);
70 printstat(s, user_exception);
71 printstat(s, set_task_slice);
72 printstat(s, migrate_check);
73 printstat(s, migrated_retarget);
74 printstat(s, migrated_unload);
75 printstat(s, migrated_unload_delay);
76 printstat(s, migrated_nopfn_retarget);
77 printstat(s, migrated_nopfn_unload);
78 printstat(s, tlb_dropin);
79 printstat(s, tlb_dropin_fail_no_asid);
80 printstat(s, tlb_dropin_fail_upm);
81 printstat(s, tlb_dropin_fail_invalid);
82 printstat(s, tlb_dropin_fail_range_active);
83 printstat(s, tlb_dropin_fail_idle);
84 printstat(s, tlb_dropin_fail_fmm);
85 printstat(s, mmu_invalidate_range);
86 printstat(s, mmu_invalidate_page);
87 printstat(s, mmu_clear_flush_young);
88 printstat(s, flush_tlb);
89 printstat(s, flush_tlb_gru);
90 printstat(s, flush_tlb_gru_tgh);
91 printstat(s, flush_tlb_gru_zero_asid);
92 printstat(s, copy_gpa);
93 printstat(s, mesq_receive);
94 printstat(s, mesq_receive_none);
95 printstat(s, mesq_send);
96 printstat(s, mesq_send_failed);
97 printstat(s, mesq_noop);
98 printstat(s, mesq_send_unexpected_error);
99 printstat(s, mesq_send_lb_overflow);
100 printstat(s, mesq_send_qlimit_reached);
101 printstat(s, mesq_send_amo_nacked);
102 printstat(s, mesq_send_put_nacked);
103 printstat(s, mesq_qf_not_full);
104 printstat(s, mesq_qf_locked);
105 printstat(s, mesq_qf_noop_not_full);
106 printstat(s, mesq_qf_switch_head_failed);
107 printstat(s, mesq_qf_unexpected_error);
108 printstat(s, mesq_noop_unexpected_error);
109 printstat(s, mesq_noop_lb_overflow);
110 printstat(s, mesq_noop_qlimit_reached);
111 printstat(s, mesq_noop_amo_nacked);
112 printstat(s, mesq_noop_put_nacked);
113 return 0;
114}
115
116static ssize_t statistics_write(struct file *file, const char __user *userbuf,
117 size_t count, loff_t *data)
118{
119 memset(&gru_stats, 0, sizeof(gru_stats));
120 return count;
121}
122
123static int options_show(struct seq_file *s, void *p)
124{
125 seq_printf(s, "0x%lx\n", gru_options);
126 return 0;
127}
128
129static ssize_t options_write(struct file *file, const char __user *userbuf,
130 size_t count, loff_t *data)
131{
132 unsigned long val;
133 char buf[80];
134
135 if (copy_from_user
136 (buf, userbuf, count < sizeof(buf) ? count : sizeof(buf)))
137 return -EFAULT;
138 if (!strict_strtoul(buf, 10, &val))
139 gru_options = val;
140
141 return count;
142}
143
144static int cch_seq_show(struct seq_file *file, void *data)
145{
146 long gid = *(long *)data;
147 int i;
148 struct gru_state *gru = GID_TO_GRU(gid);
149 struct gru_thread_state *ts;
150 const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" };
151
152 if (gid == 0)
153 seq_printf(file, "#%5s%5s%6s%9s%6s%8s%8s\n", "gid", "bid",
154 "ctx#", "pid", "cbrs", "dsbytes", "mode");
155 if (gru)
156 for (i = 0; i < GRU_NUM_CCH; i++) {
157 ts = gru->gs_gts[i];
158 if (!ts)
159 continue;
160 seq_printf(file, " %5d%5d%6d%9d%6d%8d%8s\n",
161 gru->gs_gid, gru->gs_blade_id, i,
162 ts->ts_tgid_owner,
163 ts->ts_cbr_au_count * GRU_CBR_AU_SIZE,
164 ts->ts_cbr_au_count * GRU_DSR_AU_BYTES,
165 mode[ts->ts_user_options &
166 GRU_OPT_MISS_MASK]);
167 }
168
169 return 0;
170}
171
172static int gru_seq_show(struct seq_file *file, void *data)
173{
174 long gid = *(long *)data, ctxfree, cbrfree, dsrfree;
175 struct gru_state *gru = GID_TO_GRU(gid);
176
177 if (gid == 0) {
178 seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "gid", "nid",
179 "ctx", "cbr", "dsr", "ctx", "cbr", "dsr");
180 seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "", "", "busy",
181 "busy", "busy", "free", "free", "free");
182 }
183 if (gru) {
184 ctxfree = GRU_NUM_CCH - gru->gs_active_contexts;
185 cbrfree = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
186 dsrfree = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
187 seq_printf(file, " %5d%5d%7ld%6ld%6ld%8ld%6ld%6ld\n",
188 gru->gs_gid, gru->gs_blade_id, GRU_NUM_CCH - ctxfree,
189 GRU_NUM_CBE - cbrfree, GRU_NUM_DSR_BYTES - dsrfree,
190 ctxfree, cbrfree, dsrfree);
191 }
192
193 return 0;
194}
195
196static void seq_stop(struct seq_file *file, void *data)
197{
198}
199
200static void *seq_start(struct seq_file *file, loff_t *gid)
201{
202 if (*gid < GRU_MAX_GRUS)
203 return gid;
204 return NULL;
205}
206
207static void *seq_next(struct seq_file *file, void *data, loff_t *gid)
208{
209 (*gid)++;
210 if (*gid < GRU_MAX_GRUS)
211 return gid;
212 return NULL;
213}
214
215static const struct seq_operations cch_seq_ops = {
216 .start = seq_start,
217 .next = seq_next,
218 .stop = seq_stop,
219 .show = cch_seq_show
220};
221
222static const struct seq_operations gru_seq_ops = {
223 .start = seq_start,
224 .next = seq_next,
225 .stop = seq_stop,
226 .show = gru_seq_show
227};
228
229static int statistics_open(struct inode *inode, struct file *file)
230{
231 return single_open(file, statistics_show, NULL);
232}
233
234static int options_open(struct inode *inode, struct file *file)
235{
236 return single_open(file, options_show, NULL);
237}
238
239static int cch_open(struct inode *inode, struct file *file)
240{
241 return seq_open(file, &cch_seq_ops);
242}
243
244static int gru_open(struct inode *inode, struct file *file)
245{
246 return seq_open(file, &gru_seq_ops);
247}
248
249/* *INDENT-OFF* */
250static const struct file_operations statistics_fops = {
251 .open = statistics_open,
252 .read = seq_read,
253 .write = statistics_write,
254 .llseek = seq_lseek,
255 .release = single_release,
256};
257
258static const struct file_operations options_fops = {
259 .open = options_open,
260 .read = seq_read,
261 .write = options_write,
262 .llseek = seq_lseek,
263 .release = single_release,
264};
265
266static const struct file_operations cch_fops = {
267 .open = cch_open,
268 .read = seq_read,
269 .llseek = seq_lseek,
270 .release = seq_release,
271};
272static const struct file_operations gru_fops = {
273 .open = gru_open,
274 .read = seq_read,
275 .llseek = seq_lseek,
276 .release = seq_release,
277};
278
279static struct proc_entry {
280 char *name;
281 int mode;
282 const struct file_operations *fops;
283 struct proc_dir_entry *entry;
284} proc_files[] = {
285 {"statistics", 0644, &statistics_fops},
286 {"debug_options", 0644, &options_fops},
287 {"cch_status", 0444, &cch_fops},
288 {"gru_status", 0444, &gru_fops},
289 {NULL}
290};
291/* *INDENT-ON* */
292
293static struct proc_dir_entry *proc_gru __read_mostly;
294
295static int create_proc_file(struct proc_entry *p)
296{
297 p->entry = create_proc_entry(p->name, p->mode, proc_gru);
298 if (!p->entry)
299 return -1;
300 p->entry->proc_fops = p->fops;
301 return 0;
302}
303
304static void delete_proc_files(void)
305{
306 struct proc_entry *p;
307
308 if (proc_gru) {
309 for (p = proc_files; p->name; p++)
310 if (p->entry)
311 remove_proc_entry(p->name, proc_gru);
312 remove_proc_entry("gru", NULL);
313 }
314}
315
316int gru_proc_init(void)
317{
318 struct proc_entry *p;
319
320 proc_mkdir("sgi_uv", NULL);
321 proc_gru = proc_mkdir("sgi_uv/gru", NULL);
322
323 for (p = proc_files; p->name; p++)
324 if (create_proc_file(p))
325 goto err;
326 return 0;
327
328err:
329 delete_proc_files();
330 return -1;
331}
332
333void gru_proc_exit(void)
334{
335 delete_proc_files();
336}
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
new file mode 100644
index 000000000000..4251018f70ff
--- /dev/null
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -0,0 +1,609 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * GRU DRIVER TABLES, MACROS, externs, etc
5 *
6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22
23#ifndef __GRUTABLES_H__
24#define __GRUTABLES_H__
25
26/*
27 * GRU Chiplet:
28 * The GRU is a user addressible memory accelerator. It provides
29 * several forms of load, store, memset, bcopy instructions. In addition, it
30 * contains special instructions for AMOs, sending messages to message
31 * queues, etc.
32 *
33 * The GRU is an integral part of the node controller. It connects
34 * directly to the cpu socket. In its current implementation, there are 2
35 * GRU chiplets in the node controller on each blade (~node).
36 *
37 * The entire GRU memory space is fully coherent and cacheable by the cpus.
38 *
39 * Each GRU chiplet has a physical memory map that looks like the following:
40 *
41 * +-----------------+
42 * |/////////////////|
43 * |/////////////////|
44 * |/////////////////|
45 * |/////////////////|
46 * |/////////////////|
47 * |/////////////////|
48 * |/////////////////|
49 * |/////////////////|
50 * +-----------------+
51 * | system control |
52 * +-----------------+ _______ +-------------+
53 * |/////////////////| / | |
54 * |/////////////////| / | |
55 * |/////////////////| / | instructions|
56 * |/////////////////| / | |
57 * |/////////////////| / | |
58 * |/////////////////| / |-------------|
59 * |/////////////////| / | |
60 * +-----------------+ | |
61 * | context 15 | | data |
62 * +-----------------+ | |
63 * | ...... | \ | |
64 * +-----------------+ \____________ +-------------+
65 * | context 1 |
66 * +-----------------+
67 * | context 0 |
68 * +-----------------+
69 *
70 * Each of the "contexts" is a chunk of memory that can be mmaped into user
71 * space. The context consists of 2 parts:
72 *
73 * - an instruction space that can be directly accessed by the user
74 * to issue GRU instructions and to check instruction status.
75 *
76 * - a data area that acts as normal RAM.
77 *
78 * User instructions contain virtual addresses of data to be accessed by the
79 * GRU. The GRU contains a TLB that is used to convert these user virtual
80 * addresses to physical addresses.
81 *
82 * The "system control" area of the GRU chiplet is used by the kernel driver
83 * to manage user contexts and to perform functions such as TLB dropin and
84 * purging.
85 *
86 * One context may be reserved for the kernel and used for cross-partition
87 * communication. The GRU will also be used to asynchronously zero out
88 * large blocks of memory (not currently implemented).
89 *
90 *
91 * Tables:
92 *
93 * VDATA-VMA Data - Holds a few parameters. Head of linked list of
94 * GTS tables for threads using the GSEG
95 * GTS - Gru Thread State - contains info for managing a GSEG context. A
96 * GTS is allocated for each thread accessing a
97 * GSEG.
98 * GTD - GRU Thread Data - contains shadow copy of GRU data when GSEG is
99 * not loaded into a GRU
100 * GMS - GRU Memory Struct - Used to manage TLB shootdowns. Tracks GRUs
101 * where a GSEG has been loaded. Similar to
102 * an mm_struct but for GRU.
103 *
104 * GS - GRU State - Used to manage the state of a GRU chiplet
105 * BS - Blade State - Used to manage state of all GRU chiplets
106 * on a blade
107 *
108 *
109 * Normal task tables for task using GRU.
110 * - 2 threads in process
111 * - 2 GSEGs open in process
112 * - GSEG1 is being used by both threads
113 * - GSEG2 is used only by thread 2
114 *
115 * task -->|
116 * task ---+---> mm ->------ (notifier) -------+-> gms
117 * | |
118 * |--> vma -> vdata ---> gts--->| GSEG1 (thread1)
119 * | | |
120 * | +-> gts--->| GSEG1 (thread2)
121 * | |
122 * |--> vma -> vdata ---> gts--->| GSEG2 (thread2)
123 * .
124 * .
125 *
126 * GSEGs are marked DONTCOPY on fork
127 *
128 * At open
129 * file.private_data -> NULL
130 *
131 * At mmap,
132 * vma -> vdata
133 *
134 * After gseg reference
135 * vma -> vdata ->gts
136 *
137 * After fork
138 * parent
139 * vma -> vdata -> gts
140 * child
141 * (vma is not copied)
142 *
143 */
144
145#include <linux/rmap.h>
146#include <linux/interrupt.h>
147#include <linux/mutex.h>
148#include <linux/wait.h>
149#include <linux/mmu_notifier.h>
150#include "gru.h"
151#include "gruhandles.h"
152
153extern struct gru_stats_s gru_stats;
154extern struct gru_blade_state *gru_base[];
155extern unsigned long gru_start_paddr, gru_end_paddr;
156
157#define GRU_MAX_BLADES MAX_NUMNODES
158#define GRU_MAX_GRUS (GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE)
159
160#define GRU_DRIVER_ID_STR "SGI GRU Device Driver"
161#define GRU_DRIVER_VERSION_STR "0.80"
162
163/*
164 * GRU statistics.
165 */
166struct gru_stats_s {
167 atomic_long_t vdata_alloc;
168 atomic_long_t vdata_free;
169 atomic_long_t gts_alloc;
170 atomic_long_t gts_free;
171 atomic_long_t vdata_double_alloc;
172 atomic_long_t gts_double_allocate;
173 atomic_long_t assign_context;
174 atomic_long_t assign_context_failed;
175 atomic_long_t free_context;
176 atomic_long_t load_context;
177 atomic_long_t unload_context;
178 atomic_long_t steal_context;
179 atomic_long_t steal_context_failed;
180 atomic_long_t nopfn;
181 atomic_long_t break_cow;
182 atomic_long_t asid_new;
183 atomic_long_t asid_next;
184 atomic_long_t asid_wrap;
185 atomic_long_t asid_reuse;
186 atomic_long_t intr;
187 atomic_long_t call_os;
188 atomic_long_t call_os_check_for_bug;
189 atomic_long_t call_os_wait_queue;
190 atomic_long_t user_flush_tlb;
191 atomic_long_t user_unload_context;
192 atomic_long_t user_exception;
193 atomic_long_t set_task_slice;
194 atomic_long_t migrate_check;
195 atomic_long_t migrated_retarget;
196 atomic_long_t migrated_unload;
197 atomic_long_t migrated_unload_delay;
198 atomic_long_t migrated_nopfn_retarget;
199 atomic_long_t migrated_nopfn_unload;
200 atomic_long_t tlb_dropin;
201 atomic_long_t tlb_dropin_fail_no_asid;
202 atomic_long_t tlb_dropin_fail_upm;
203 atomic_long_t tlb_dropin_fail_invalid;
204 atomic_long_t tlb_dropin_fail_range_active;
205 atomic_long_t tlb_dropin_fail_idle;
206 atomic_long_t tlb_dropin_fail_fmm;
207 atomic_long_t mmu_invalidate_range;
208 atomic_long_t mmu_invalidate_page;
209 atomic_long_t mmu_clear_flush_young;
210 atomic_long_t flush_tlb;
211 atomic_long_t flush_tlb_gru;
212 atomic_long_t flush_tlb_gru_tgh;
213 atomic_long_t flush_tlb_gru_zero_asid;
214
215 atomic_long_t copy_gpa;
216
217 atomic_long_t mesq_receive;
218 atomic_long_t mesq_receive_none;
219 atomic_long_t mesq_send;
220 atomic_long_t mesq_send_failed;
221 atomic_long_t mesq_noop;
222 atomic_long_t mesq_send_unexpected_error;
223 atomic_long_t mesq_send_lb_overflow;
224 atomic_long_t mesq_send_qlimit_reached;
225 atomic_long_t mesq_send_amo_nacked;
226 atomic_long_t mesq_send_put_nacked;
227 atomic_long_t mesq_qf_not_full;
228 atomic_long_t mesq_qf_locked;
229 atomic_long_t mesq_qf_noop_not_full;
230 atomic_long_t mesq_qf_switch_head_failed;
231 atomic_long_t mesq_qf_unexpected_error;
232 atomic_long_t mesq_noop_unexpected_error;
233 atomic_long_t mesq_noop_lb_overflow;
234 atomic_long_t mesq_noop_qlimit_reached;
235 atomic_long_t mesq_noop_amo_nacked;
236 atomic_long_t mesq_noop_put_nacked;
237
238};
239
240#define OPT_DPRINT 1
241#define OPT_STATS 2
242#define GRU_QUICKLOOK 4
243
244
245#define IRQ_GRU 110 /* Starting IRQ number for interrupts */
246
247/* Delay in jiffies between attempts to assign a GRU context */
248#define GRU_ASSIGN_DELAY ((HZ * 20) / 1000)
249
250/*
251 * If a process has it's context stolen, min delay in jiffies before trying to
252 * steal a context from another process.
253 */
254#define GRU_STEAL_DELAY ((HZ * 200) / 1000)
255
256#define STAT(id) do { \
257 if (gru_options & OPT_STATS) \
258 atomic_long_inc(&gru_stats.id); \
259 } while (0)
260
261#ifdef CONFIG_SGI_GRU_DEBUG
262#define gru_dbg(dev, fmt, x...) \
263 do { \
264 if (gru_options & OPT_DPRINT) \
265 dev_dbg(dev, "%s: " fmt, __func__, x); \
266 } while (0)
267#else
268#define gru_dbg(x...)
269#endif
270
271/*-----------------------------------------------------------------------------
272 * ASID management
273 */
274#define MAX_ASID 0xfffff0
275#define MIN_ASID 8
276#define ASID_INC 8 /* number of regions */
277
278/* Generate a GRU asid value from a GRU base asid & a virtual address. */
279#if defined CONFIG_IA64
280#define VADDR_HI_BIT 64
281#define GRUREGION(addr) ((addr) >> (VADDR_HI_BIT - 3) & 3)
282#elif defined __x86_64
283#define VADDR_HI_BIT 48
284#define GRUREGION(addr) (0) /* ZZZ could do better */
285#else
286#error "Unsupported architecture"
287#endif
288#define GRUASID(asid, addr) ((asid) + GRUREGION(addr))
289
290/*------------------------------------------------------------------------------
291 * File & VMS Tables
292 */
293
294struct gru_state;
295
296/*
297 * This structure is pointed to from the mmstruct via the notifier pointer.
298 * There is one of these per address space.
299 */
300struct gru_mm_tracker {
301 unsigned int mt_asid_gen; /* ASID wrap count */
302 int mt_asid; /* current base ASID for gru */
303 unsigned short mt_ctxbitmap; /* bitmap of contexts using
304 asid */
305};
306
307struct gru_mm_struct {
308 struct mmu_notifier ms_notifier;
309 atomic_t ms_refcnt;
310 spinlock_t ms_asid_lock; /* protects ASID assignment */
311 atomic_t ms_range_active;/* num range_invals active */
312 char ms_released;
313 wait_queue_head_t ms_wait_queue;
314 DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS);
315 struct gru_mm_tracker ms_asids[GRU_MAX_GRUS];
316};
317
318/*
319 * One of these structures is allocated when a GSEG is mmaped. The
320 * structure is pointed to by the vma->vm_private_data field in the vma struct.
321 */
322struct gru_vma_data {
323 spinlock_t vd_lock; /* Serialize access to vma */
324 struct list_head vd_head; /* head of linked list of gts */
325 long vd_user_options;/* misc user option flags */
326 int vd_cbr_au_count;
327 int vd_dsr_au_count;
328};
329
330/*
331 * One of these is allocated for each thread accessing a mmaped GRU. A linked
332 * list of these structure is hung off the struct gru_vma_data in the mm_struct.
333 */
334struct gru_thread_state {
335 struct list_head ts_next; /* list - head at vma-private */
336 struct mutex ts_ctxlock; /* load/unload CTX lock */
337 struct mm_struct *ts_mm; /* mm currently mapped to
338 context */
339 struct vm_area_struct *ts_vma; /* vma of GRU context */
340 struct gru_state *ts_gru; /* GRU where the context is
341 loaded */
342 struct gru_mm_struct *ts_gms; /* asid & ioproc struct */
343 unsigned long ts_cbr_map; /* map of allocated CBRs */
344 unsigned long ts_dsr_map; /* map of allocated DATA
345 resources */
346 unsigned long ts_steal_jiffies;/* jiffies when context last
347 stolen */
348 long ts_user_options;/* misc user option flags */
349 pid_t ts_tgid_owner; /* task that is using the
350 context - for migration */
351 int ts_tsid; /* thread that owns the
352 structure */
353 int ts_tlb_int_select;/* target cpu if interrupts
354 enabled */
355 int ts_ctxnum; /* context number where the
356 context is loaded */
357 atomic_t ts_refcnt; /* reference count GTS */
358 unsigned char ts_dsr_au_count;/* Number of DSR resources
359 required for contest */
360 unsigned char ts_cbr_au_count;/* Number of CBR resources
361 required for contest */
362 char ts_force_unload;/* force context to be unloaded
363 after migration */
364 char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each
365 allocated CB */
366 unsigned long ts_gdata[0]; /* save area for GRU data (CB,
367 DS, CBE) */
368};
369
370/*
371 * Threaded programs actually allocate an array of GSEGs when a context is
372 * created. Each thread uses a separate GSEG. TSID is the index into the GSEG
373 * array.
374 */
375#define TSID(a, v) (((a) - (v)->vm_start) / GRU_GSEG_PAGESIZE)
376#define UGRUADDR(gts) ((gts)->ts_vma->vm_start + \
377 (gts)->ts_tsid * GRU_GSEG_PAGESIZE)
378
379#define NULLCTX (-1) /* if context not loaded into GRU */
380
381/*-----------------------------------------------------------------------------
382 * GRU State Tables
383 */
384
385/*
386 * One of these exists for each GRU chiplet.
387 */
388struct gru_state {
389 struct gru_blade_state *gs_blade; /* GRU state for entire
390 blade */
391 unsigned long gs_gru_base_paddr; /* Physical address of
392 gru segments (64) */
393 void *gs_gru_base_vaddr; /* Virtual address of
394 gru segments (64) */
395 unsigned char gs_gid; /* unique GRU number */
396 unsigned char gs_tgh_local_shift; /* used to pick TGH for
397 local flush */
398 unsigned char gs_tgh_first_remote; /* starting TGH# for
399 remote flush */
400 unsigned short gs_blade_id; /* blade of GRU */
401 spinlock_t gs_asid_lock; /* lock used for
402 assigning asids */
403 spinlock_t gs_lock; /* lock used for
404 assigning contexts */
405
406 /* -- the following are protected by the gs_asid_lock spinlock ---- */
407 unsigned int gs_asid; /* Next availe ASID */
408 unsigned int gs_asid_limit; /* Limit of available
409 ASIDs */
410 unsigned int gs_asid_gen; /* asid generation.
411 Inc on wrap */
412
413 /* --- the following fields are protected by the gs_lock spinlock --- */
414 unsigned long gs_context_map; /* bitmap to manage
415 contexts in use */
416 unsigned long gs_cbr_map; /* bitmap to manage CB
417 resources */
418 unsigned long gs_dsr_map; /* bitmap used to manage
419 DATA resources */
420 unsigned int gs_reserved_cbrs; /* Number of kernel-
421 reserved cbrs */
422 unsigned int gs_reserved_dsr_bytes; /* Bytes of kernel-
423 reserved dsrs */
424 unsigned short gs_active_contexts; /* number of contexts
425 in use */
426 struct gru_thread_state *gs_gts[GRU_NUM_CCH]; /* GTS currently using
427 the context */
428};
429
430/*
431 * This structure contains the GRU state for all the GRUs on a blade.
432 */
433struct gru_blade_state {
434 void *kernel_cb; /* First kernel
435 reserved cb */
436 void *kernel_dsr; /* First kernel
437 reserved DSR */
438 /* ---- the following are protected by the bs_lock spinlock ---- */
439 spinlock_t bs_lock; /* lock used for
440 stealing contexts */
441 int bs_lru_ctxnum; /* STEAL - last context
442 stolen */
443 struct gru_state *bs_lru_gru; /* STEAL - last gru
444 stolen */
445
446 struct gru_state bs_grus[GRU_CHIPLETS_PER_BLADE];
447};
448
449/*-----------------------------------------------------------------------------
450 * Address Primitives
451 */
452#define get_tfm_for_cpu(g, c) \
453 ((struct gru_tlb_fault_map *)get_tfm((g)->gs_gru_base_vaddr, (c)))
454#define get_tfh_by_index(g, i) \
455 ((struct gru_tlb_fault_handle *)get_tfh((g)->gs_gru_base_vaddr, (i)))
456#define get_tgh_by_index(g, i) \
457 ((struct gru_tlb_global_handle *)get_tgh((g)->gs_gru_base_vaddr, (i)))
458#define get_cbe_by_index(g, i) \
459 ((struct gru_control_block_extended *)get_cbe((g)->gs_gru_base_vaddr,\
460 (i)))
461
462/*-----------------------------------------------------------------------------
463 * Useful Macros
464 */
465
466/* Given a blade# & chiplet#, get a pointer to the GRU */
467#define get_gru(b, c) (&gru_base[b]->bs_grus[c])
468
469/* Number of bytes to save/restore when unloading/loading GRU contexts */
470#define DSR_BYTES(dsr) ((dsr) * GRU_DSR_AU_BYTES)
471#define CBR_BYTES(cbr) ((cbr) * GRU_HANDLE_BYTES * GRU_CBR_AU_SIZE * 2)
472
473/* Convert a user CB number to the actual CBRNUM */
474#define thread_cbr_number(gts, n) ((gts)->ts_cbr_idx[(n) / GRU_CBR_AU_SIZE] \
475 * GRU_CBR_AU_SIZE + (n) % GRU_CBR_AU_SIZE)
476
477/* Convert a gid to a pointer to the GRU */
478#define GID_TO_GRU(gid) \
479 (gru_base[(gid) / GRU_CHIPLETS_PER_BLADE] ? \
480 (&gru_base[(gid) / GRU_CHIPLETS_PER_BLADE]-> \
481 bs_grus[(gid) % GRU_CHIPLETS_PER_BLADE]) : \
482 NULL)
483
484/* Scan all active GRUs in a GRU bitmap */
485#define for_each_gru_in_bitmap(gid, map) \
486 for ((gid) = find_first_bit((map), GRU_MAX_GRUS); (gid) < GRU_MAX_GRUS;\
487 (gid)++, (gid) = find_next_bit((map), GRU_MAX_GRUS, (gid)))
488
489/* Scan all active GRUs on a specific blade */
490#define for_each_gru_on_blade(gru, nid, i) \
491 for ((gru) = gru_base[nid]->bs_grus, (i) = 0; \
492 (i) < GRU_CHIPLETS_PER_BLADE; \
493 (i)++, (gru)++)
494
495/* Scan all active GTSs on a gru. Note: must hold ss_lock to use this macro. */
496#define for_each_gts_on_gru(gts, gru, ctxnum) \
497 for ((ctxnum) = 0; (ctxnum) < GRU_NUM_CCH; (ctxnum)++) \
498 if (((gts) = (gru)->gs_gts[ctxnum]))
499
500/* Scan each CBR whose bit is set in a TFM (or copy of) */
501#define for_each_cbr_in_tfm(i, map) \
502 for ((i) = find_first_bit(map, GRU_NUM_CBE); \
503 (i) < GRU_NUM_CBE; \
504 (i)++, (i) = find_next_bit(map, GRU_NUM_CBE, i))
505
506/* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */
507#define for_each_cbr_in_allocation_map(i, map, k) \
508 for ((k) = find_first_bit(map, GRU_CBR_AU); (k) < GRU_CBR_AU; \
509 (k) = find_next_bit(map, GRU_CBR_AU, (k) + 1)) \
510 for ((i) = (k)*GRU_CBR_AU_SIZE; \
511 (i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++)
512
513/* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */
514#define for_each_dsr_in_allocation_map(i, map, k) \
515 for ((k) = find_first_bit((const unsigned long *)map, GRU_DSR_AU);\
516 (k) < GRU_DSR_AU; \
517 (k) = find_next_bit((const unsigned long *)map, \
518 GRU_DSR_AU, (k) + 1)) \
519 for ((i) = (k) * GRU_DSR_AU_CL; \
520 (i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++)
521
522#define gseg_physical_address(gru, ctxnum) \
523 ((gru)->gs_gru_base_paddr + ctxnum * GRU_GSEG_STRIDE)
524#define gseg_virtual_address(gru, ctxnum) \
525 ((gru)->gs_gru_base_vaddr + ctxnum * GRU_GSEG_STRIDE)
526
527/*-----------------------------------------------------------------------------
528 * Lock / Unlock GRU handles
529 * Use the "delresp" bit in the handle as a "lock" bit.
530 */
531
532/* Lock hierarchy checking enabled only in emulator */
533
534static inline void __lock_handle(void *h)
535{
536 while (test_and_set_bit(1, h))
537 cpu_relax();
538}
539
540static inline void __unlock_handle(void *h)
541{
542 clear_bit(1, h);
543}
544
545static inline void lock_cch_handle(struct gru_context_configuration_handle *cch)
546{
547 __lock_handle(cch);
548}
549
550static inline void unlock_cch_handle(struct gru_context_configuration_handle
551 *cch)
552{
553 __unlock_handle(cch);
554}
555
556static inline void lock_tgh_handle(struct gru_tlb_global_handle *tgh)
557{
558 __lock_handle(tgh);
559}
560
561static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
562{
563 __unlock_handle(tgh);
564}
565
566/*-----------------------------------------------------------------------------
567 * Function prototypes & externs
568 */
569struct gru_unload_context_req;
570
571extern struct vm_operations_struct gru_vm_ops;
572extern struct device *grudev;
573
574extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma,
575 int tsid);
576extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct
577 *vma, int tsid);
578extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct
579 *vma, int tsid);
580extern void gru_unload_context(struct gru_thread_state *gts, int savestate);
581extern void gts_drop(struct gru_thread_state *gts);
582extern void gru_tgh_flush_init(struct gru_state *gru);
583extern int gru_kservices_init(struct gru_state *gru);
584extern irqreturn_t gru_intr(int irq, void *dev_id);
585extern int gru_handle_user_call_os(unsigned long address);
586extern int gru_user_flush_tlb(unsigned long arg);
587extern int gru_user_unload_context(unsigned long arg);
588extern int gru_get_exception_detail(unsigned long arg);
589extern int gru_set_task_slice(long address);
590extern int gru_cpu_fault_map_id(void);
591extern struct vm_area_struct *gru_find_vma(unsigned long vaddr);
592extern void gru_flush_all_tlb(struct gru_state *gru);
593extern int gru_proc_init(void);
594extern void gru_proc_exit(void);
595
596extern unsigned long gru_reserve_cb_resources(struct gru_state *gru,
597 int cbr_au_count, char *cbmap);
598extern unsigned long gru_reserve_ds_resources(struct gru_state *gru,
599 int dsr_au_count, char *dsmap);
600extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf);
601extern struct gru_mm_struct *gru_register_mmu_notifier(void);
602extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms);
603
604extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
605 unsigned long len);
606
607extern unsigned long gru_options;
608
609#endif /* __GRUTABLES_H__ */
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
new file mode 100644
index 000000000000..c84496a77691
--- /dev/null
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -0,0 +1,371 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * MMUOPS callbacks + TLB flushing
5 *
6 * This file handles emu notifier callbacks from the core kernel. The callbacks
7 * are used to update the TLB in the GRU as a result of changes in the
8 * state of a process address space. This file also handles TLB invalidates
9 * from the GRU driver.
10 *
11 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28#include <linux/kernel.h>
29#include <linux/list.h>
30#include <linux/spinlock.h>
31#include <linux/mm.h>
32#include <linux/slab.h>
33#include <linux/device.h>
34#include <linux/hugetlb.h>
35#include <linux/delay.h>
36#include <linux/timex.h>
37#include <linux/srcu.h>
38#include <asm/processor.h>
39#include "gru.h"
40#include "grutables.h"
41#include <asm/uv/uv_hub.h>
42
43#define gru_random() get_cycles()
44
45/* ---------------------------------- TLB Invalidation functions --------
46 * get_tgh_handle
47 *
48 * Find a TGH to use for issuing a TLB invalidate. For GRUs that are on the
49 * local blade, use a fixed TGH that is a function of the blade-local cpu
50 * number. Normally, this TGH is private to the cpu & no contention occurs for
51 * the TGH. For offblade GRUs, select a random TGH in the range above the
52 * private TGHs. A spinlock is required to access this TGH & the lock must be
53 * released when the invalidate is completes. This sucks, but it is the best we
54 * can do.
55 *
56 * Note that the spinlock is IN the TGH handle so locking does not involve
57 * additional cache lines.
58 *
59 */
60static inline int get_off_blade_tgh(struct gru_state *gru)
61{
62 int n;
63
64 n = GRU_NUM_TGH - gru->gs_tgh_first_remote;
65 n = gru_random() % n;
66 n += gru->gs_tgh_first_remote;
67 return n;
68}
69
70static inline int get_on_blade_tgh(struct gru_state *gru)
71{
72 return uv_blade_processor_id() >> gru->gs_tgh_local_shift;
73}
74
75static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state
76 *gru)
77{
78 struct gru_tlb_global_handle *tgh;
79 int n;
80
81 preempt_disable();
82 if (uv_numa_blade_id() == gru->gs_blade_id)
83 n = get_on_blade_tgh(gru);
84 else
85 n = get_off_blade_tgh(gru);
86 tgh = get_tgh_by_index(gru, n);
87 lock_tgh_handle(tgh);
88
89 return tgh;
90}
91
92static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
93{
94 unlock_tgh_handle(tgh);
95 preempt_enable();
96}
97
98/*
99 * gru_flush_tlb_range
100 *
101 * General purpose TLB invalidation function. This function scans every GRU in
102 * the ENTIRE system (partition) looking for GRUs where the specified MM has
103 * been accessed by the GRU. For each GRU found, the TLB must be invalidated OR
104 * the ASID invalidated. Invalidating an ASID causes a new ASID to be assigned
105 * on the next fault. This effectively flushes the ENTIRE TLB for the MM at the
106 * cost of (possibly) a large number of future TLBmisses.
107 *
108 * The current algorithm is optimized based on the following (somewhat true)
109 * assumptions:
110 * - GRU contexts are not loaded into a GRU unless a reference is made to
111 * the data segment or control block (this is true, not an assumption).
112 * If a DS/CB is referenced, the user will also issue instructions that
113 * cause TLBmisses. It is not necessary to optimize for the case where
114 * contexts are loaded but no instructions cause TLB misses. (I know
115 * this will happen but I'm not optimizing for it).
116 * - GRU instructions to invalidate TLB entries are SLOOOOWWW - normally
117 * a few usec but in unusual cases, it could be longer. Avoid if
118 * possible.
119 * - intrablade process migration between cpus is not frequent but is
120 * common.
121 * - a GRU context is not typically migrated to a different GRU on the
122 * blade because of intrablade migration
123 * - interblade migration is rare. Processes migrate their GRU context to
124 * the new blade.
125 * - if interblade migration occurs, migration back to the original blade
126 * is very very rare (ie., no optimization for this case)
127 * - most GRU instruction operate on a subset of the user REGIONS. Code
128 * & shared library regions are not likely targets of GRU instructions.
129 *
130 * To help improve the efficiency of TLB invalidation, the GMS data
131 * structure is maintained for EACH address space (MM struct). The GMS is
132 * also the structure that contains the pointer to the mmu callout
133 * functions. This structure is linked to the mm_struct for the address space
134 * using the mmu "register" function. The mmu interfaces are used to
135 * provide the callbacks for TLB invalidation. The GMS contains:
136 *
137 * - asid[maxgrus] array. ASIDs are assigned to a GRU when a context is
138 * loaded into the GRU.
139 * - asidmap[maxgrus]. bitmap to make it easier to find non-zero asids in
140 * the above array
141 * - ctxbitmap[maxgrus]. Indicates the contexts that are currently active
142 * in the GRU for the address space. This bitmap must be passed to the
143 * GRU to do an invalidate.
144 *
145 * The current algorithm for invalidating TLBs is:
146 * - scan the asidmap for GRUs where the context has been loaded, ie,
147 * asid is non-zero.
148 * - for each gru found:
149 * - if the ctxtmap is non-zero, there are active contexts in the
150 * GRU. TLB invalidate instructions must be issued to the GRU.
151 * - if the ctxtmap is zero, no context is active. Set the ASID to
152 * zero to force a full TLB invalidation. This is fast but will
153 * cause a lot of TLB misses if the context is reloaded onto the
154 * GRU
155 *
156 */
157
158void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
159 unsigned long len)
160{
161 struct gru_state *gru;
162 struct gru_mm_tracker *asids;
163 struct gru_tlb_global_handle *tgh;
164 unsigned long num;
165 int grupagesize, pagesize, pageshift, gid, asid;
166
167 /* ZZZ TODO - handle huge pages */
168 pageshift = PAGE_SHIFT;
169 pagesize = (1UL << pageshift);
170 grupagesize = GRU_PAGESIZE(pageshift);
171 num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL);
172
173 STAT(flush_tlb);
174 gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms,
175 start, len, gms->ms_asidmap[0]);
176
177 spin_lock(&gms->ms_asid_lock);
178 for_each_gru_in_bitmap(gid, gms->ms_asidmap) {
179 STAT(flush_tlb_gru);
180 gru = GID_TO_GRU(gid);
181 asids = gms->ms_asids + gid;
182 asid = asids->mt_asid;
183 if (asids->mt_ctxbitmap && asid) {
184 STAT(flush_tlb_gru_tgh);
185 asid = GRUASID(asid, start);
186 gru_dbg(grudev,
187 " FLUSH gruid %d, asid 0x%x, num %ld, cbmap 0x%x\n",
188 gid, asid, num, asids->mt_ctxbitmap);
189 tgh = get_lock_tgh_handle(gru);
190 tgh_invalidate(tgh, start, 0, asid, grupagesize, 0,
191 num - 1, asids->mt_ctxbitmap);
192 get_unlock_tgh_handle(tgh);
193 } else {
194 STAT(flush_tlb_gru_zero_asid);
195 asids->mt_asid = 0;
196 __clear_bit(gru->gs_gid, gms->ms_asidmap);
197 gru_dbg(grudev,
198 " CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n",
199 gid, asid, asids->mt_ctxbitmap,
200 gms->ms_asidmap[0]);
201 }
202 }
203 spin_unlock(&gms->ms_asid_lock);
204}
205
206/*
207 * Flush the entire TLB on a chiplet.
208 */
209void gru_flush_all_tlb(struct gru_state *gru)
210{
211 struct gru_tlb_global_handle *tgh;
212
213 gru_dbg(grudev, "gru %p, gid %d\n", gru, gru->gs_gid);
214 tgh = get_lock_tgh_handle(gru);
215 tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0);
216 get_unlock_tgh_handle(tgh);
217 preempt_enable();
218}
219
220/*
221 * MMUOPS notifier callout functions
222 */
223static void gru_invalidate_range_start(struct mmu_notifier *mn,
224 struct mm_struct *mm,
225 unsigned long start, unsigned long end)
226{
227 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
228 ms_notifier);
229
230 STAT(mmu_invalidate_range);
231 atomic_inc(&gms->ms_range_active);
232 gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
233 start, end, atomic_read(&gms->ms_range_active));
234 gru_flush_tlb_range(gms, start, end - start);
235}
236
237static void gru_invalidate_range_end(struct mmu_notifier *mn,
238 struct mm_struct *mm, unsigned long start,
239 unsigned long end)
240{
241 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
242 ms_notifier);
243
244 /* ..._and_test() provides needed barrier */
245 (void)atomic_dec_and_test(&gms->ms_range_active);
246
247 wake_up_all(&gms->ms_wait_queue);
248 gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
249}
250
251static void gru_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
252 unsigned long address)
253{
254 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
255 ms_notifier);
256
257 STAT(mmu_invalidate_page);
258 gru_flush_tlb_range(gms, address, PAGE_SIZE);
259 gru_dbg(grudev, "gms %p, address 0x%lx\n", gms, address);
260}
261
262static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
263{
264 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
265 ms_notifier);
266
267 gms->ms_released = 1;
268 gru_dbg(grudev, "gms %p\n", gms);
269}
270
271
272static const struct mmu_notifier_ops gru_mmuops = {
273 .invalidate_page = gru_invalidate_page,
274 .invalidate_range_start = gru_invalidate_range_start,
275 .invalidate_range_end = gru_invalidate_range_end,
276 .release = gru_release,
277};
278
279/* Move this to the basic mmu_notifier file. But for now... */
280static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm,
281 const struct mmu_notifier_ops *ops)
282{
283 struct mmu_notifier *mn, *gru_mn = NULL;
284 struct hlist_node *n;
285
286 if (mm->mmu_notifier_mm) {
287 rcu_read_lock();
288 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list,
289 hlist)
290 if (mn->ops == ops) {
291 gru_mn = mn;
292 break;
293 }
294 rcu_read_unlock();
295 }
296 return gru_mn;
297}
298
299struct gru_mm_struct *gru_register_mmu_notifier(void)
300{
301 struct gru_mm_struct *gms;
302 struct mmu_notifier *mn;
303
304 mn = mmu_find_ops(current->mm, &gru_mmuops);
305 if (mn) {
306 gms = container_of(mn, struct gru_mm_struct, ms_notifier);
307 atomic_inc(&gms->ms_refcnt);
308 } else {
309 gms = kzalloc(sizeof(*gms), GFP_KERNEL);
310 if (gms) {
311 spin_lock_init(&gms->ms_asid_lock);
312 gms->ms_notifier.ops = &gru_mmuops;
313 atomic_set(&gms->ms_refcnt, 1);
314 init_waitqueue_head(&gms->ms_wait_queue);
315 __mmu_notifier_register(&gms->ms_notifier, current->mm);
316 }
317 }
318 gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
319 atomic_read(&gms->ms_refcnt));
320 return gms;
321}
322
323void gru_drop_mmu_notifier(struct gru_mm_struct *gms)
324{
325 gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms,
326 atomic_read(&gms->ms_refcnt), gms->ms_released);
327 if (atomic_dec_return(&gms->ms_refcnt) == 0) {
328 if (!gms->ms_released)
329 mmu_notifier_unregister(&gms->ms_notifier, current->mm);
330 kfree(gms);
331 }
332}
333
334/*
335 * Setup TGH parameters. There are:
336 * - 24 TGH handles per GRU chiplet
337 * - a portion (MAX_LOCAL_TGH) of the handles are reserved for
338 * use by blade-local cpus
339 * - the rest are used by off-blade cpus. This usage is
340 * less frequent than blade-local usage.
341 *
342 * For now, use 16 handles for local flushes, 8 for remote flushes. If the blade
343 * has less tan or equal to 16 cpus, each cpu has a unique handle that it can
344 * use.
345 */
346#define MAX_LOCAL_TGH 16
347
348void gru_tgh_flush_init(struct gru_state *gru)
349{
350 int cpus, shift = 0, n;
351
352 cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id);
353
354 /* n = cpus rounded up to next power of 2 */
355 if (cpus) {
356 n = 1 << fls(cpus - 1);
357
358 /*
359 * shift count for converting local cpu# to TGH index
360 * 0 if cpus <= MAX_LOCAL_TGH,
361 * 1 if cpus <= 2*MAX_LOCAL_TGH,
362 * etc
363 */
364 shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1));
365 }
366 gru->gs_tgh_local_shift = shift;
367
368 /* first starting TGH index to use for remote purges */
369 gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift;
370
371}