aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/misc/sgi-gru
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-10-28 11:26:12 -0400
committerIngo Molnar <mingo@elte.hu>2008-10-28 11:26:12 -0400
commit7a9787e1eba95a166265e6a260cf30af04ef0a99 (patch)
treee730a4565e0318140d2fbd2f0415d18a339d7336 /drivers/misc/sgi-gru
parent41b9eb264c8407655db57b60b4457fe1b2ec9977 (diff)
parent0173a3265b228da319ceb9c1ec6a5682fd1b2d92 (diff)
Merge commit 'v2.6.28-rc2' into x86/pci-ioapic-boot-irq-quirks
Diffstat (limited to 'drivers/misc/sgi-gru')
-rw-r--r--drivers/misc/sgi-gru/Makefile3
-rw-r--r--drivers/misc/sgi-gru/gru.h67
-rw-r--r--drivers/misc/sgi-gru/gru_instructions.h671
-rw-r--r--drivers/misc/sgi-gru/grufault.c632
-rw-r--r--drivers/misc/sgi-gru/grufile.c494
-rw-r--r--drivers/misc/sgi-gru/gruhandles.h658
-rw-r--r--drivers/misc/sgi-gru/grukservices.c680
-rw-r--r--drivers/misc/sgi-gru/grukservices.h134
-rw-r--r--drivers/misc/sgi-gru/grulib.h97
-rw-r--r--drivers/misc/sgi-gru/grumain.c809
-rw-r--r--drivers/misc/sgi-gru/gruprocfs.c336
-rw-r--r--drivers/misc/sgi-gru/grutables.h609
-rw-r--r--drivers/misc/sgi-gru/grutlbpurge.c371
13 files changed, 5561 insertions, 0 deletions
diff --git a/drivers/misc/sgi-gru/Makefile b/drivers/misc/sgi-gru/Makefile
new file mode 100644
index 000000000000..d03597a521b0
--- /dev/null
+++ b/drivers/misc/sgi-gru/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_SGI_GRU) := gru.o
2gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o
3
diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h
new file mode 100644
index 000000000000..f93f03a9e6e9
--- /dev/null
+++ b/drivers/misc/sgi-gru/gru.h
@@ -0,0 +1,67 @@
1/*
2 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as published by
6 * the Free Software Foundation; either version 2.1 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#ifndef __GRU_H__
20#define __GRU_H__
21
22/*
23 * GRU architectural definitions
24 */
25#define GRU_CACHE_LINE_BYTES 64
26#define GRU_HANDLE_STRIDE 256
27#define GRU_CB_BASE 0
28#define GRU_DS_BASE 0x20000
29
30/*
31 * Size used to map GRU GSeg
32 */
33#if defined(CONFIG_IA64)
34#define GRU_GSEG_PAGESIZE (256 * 1024UL)
35#elif defined(CONFIG_X86_64)
36#define GRU_GSEG_PAGESIZE (256 * 1024UL) /* ZZZ 2MB ??? */
37#else
38#error "Unsupported architecture"
39#endif
40
41/*
42 * Structure for obtaining GRU resource information
43 */
44struct gru_chiplet_info {
45 int node;
46 int chiplet;
47 int blade;
48 int total_dsr_bytes;
49 int total_cbr;
50 int total_user_dsr_bytes;
51 int total_user_cbr;
52 int free_user_dsr_bytes;
53 int free_user_cbr;
54};
55
56/* Flags for GRU options on the gru_create_context() call */
57/* Select one of the follow 4 options to specify how TLB misses are handled */
58#define GRU_OPT_MISS_DEFAULT 0x0000 /* Use default mode */
59#define GRU_OPT_MISS_USER_POLL 0x0001 /* User will poll CB for faults */
60#define GRU_OPT_MISS_FMM_INTR 0x0002 /* Send interrupt to cpu to
61 handle fault */
62#define GRU_OPT_MISS_FMM_POLL 0x0003 /* Use system polling thread */
63#define GRU_OPT_MISS_MASK 0x0003 /* Mask for TLB MISS option */
64
65
66
67#endif /* __GRU_H__ */
diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h
new file mode 100644
index 000000000000..48762e7b98be
--- /dev/null
+++ b/drivers/misc/sgi-gru/gru_instructions.h
@@ -0,0 +1,671 @@
1/*
2 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as published by
6 * the Free Software Foundation; either version 2.1 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#ifndef __GRU_INSTRUCTIONS_H__
20#define __GRU_INSTRUCTIONS_H__
21
22#define gru_flush_cache_hook(p)
23#define gru_emulator_wait_hook(p, w)
24
25/*
26 * Architecture dependent functions
27 */
28
29#if defined(CONFIG_IA64)
30#include <linux/compiler.h>
31#include <asm/intrinsics.h>
32#define __flush_cache(p) ia64_fc(p)
33/* Use volatile on IA64 to ensure ordering via st4.rel */
34#define gru_ordered_store_int(p,v) \
35 do { \
36 barrier(); \
37 *((volatile int *)(p)) = v; /* force st.rel */ \
38 } while (0)
39#elif defined(CONFIG_X86_64)
40#define __flush_cache(p) clflush(p)
41#define gru_ordered_store_int(p,v) \
42 do { \
43 barrier(); \
44 *(int *)p = v; \
45 } while (0)
46#else
47#error "Unsupported architecture"
48#endif
49
50/*
51 * Control block status and exception codes
52 */
53#define CBS_IDLE 0
54#define CBS_EXCEPTION 1
55#define CBS_ACTIVE 2
56#define CBS_CALL_OS 3
57
58/* CB substatus bitmasks */
59#define CBSS_MSG_QUEUE_MASK 7
60#define CBSS_IMPLICIT_ABORT_ACTIVE_MASK 8
61
62/* CB substatus message queue values (low 3 bits of substatus) */
63#define CBSS_NO_ERROR 0
64#define CBSS_LB_OVERFLOWED 1
65#define CBSS_QLIMIT_REACHED 2
66#define CBSS_PAGE_OVERFLOW 3
67#define CBSS_AMO_NACKED 4
68#define CBSS_PUT_NACKED 5
69
70/*
71 * Structure used to fetch exception detail for CBs that terminate with
72 * CBS_EXCEPTION
73 */
74struct control_block_extended_exc_detail {
75 unsigned long cb;
76 int opc;
77 int ecause;
78 int exopc;
79 long exceptdet0;
80 int exceptdet1;
81};
82
83/*
84 * Instruction formats
85 */
86
87/*
88 * Generic instruction format.
89 * This definition has precise bit field definitions.
90 */
91struct gru_instruction_bits {
92 /* DW 0 - low */
93 unsigned int icmd: 1;
94 unsigned char ima: 3; /* CB_DelRep, unmapped mode */
95 unsigned char reserved0: 4;
96 unsigned int xtype: 3;
97 unsigned int iaa0: 2;
98 unsigned int iaa1: 2;
99 unsigned char reserved1: 1;
100 unsigned char opc: 8; /* opcode */
101 unsigned char exopc: 8; /* extended opcode */
102 /* DW 0 - high */
103 unsigned int idef2: 22; /* TRi0 */
104 unsigned char reserved2: 2;
105 unsigned char istatus: 2;
106 unsigned char isubstatus:4;
107 unsigned char reserved3: 2;
108 /* DW 1 */
109 unsigned long idef4; /* 42 bits: TRi1, BufSize */
110 /* DW 2-6 */
111 unsigned long idef1; /* BAddr0 */
112 unsigned long idef5; /* Nelem */
113 unsigned long idef6; /* Stride, Operand1 */
114 unsigned long idef3; /* BAddr1, Value, Operand2 */
115 unsigned long reserved4;
116 /* DW 7 */
117 unsigned long avalue; /* AValue */
118};
119
120/*
121 * Generic instruction with friendlier names. This format is used
122 * for inline instructions.
123 */
124struct gru_instruction {
125 /* DW 0 */
126 unsigned int op32; /* icmd,xtype,iaa0,ima,opc */
127 unsigned int tri0;
128 unsigned long tri1_bufsize; /* DW 1 */
129 unsigned long baddr0; /* DW 2 */
130 unsigned long nelem; /* DW 3 */
131 unsigned long op1_stride; /* DW 4 */
132 unsigned long op2_value_baddr1; /* DW 5 */
133 unsigned long reserved0; /* DW 6 */
134 unsigned long avalue; /* DW 7 */
135};
136
137/* Some shifts and masks for the low 32 bits of a GRU command */
138#define GRU_CB_ICMD_SHFT 0
139#define GRU_CB_ICMD_MASK 0x1
140#define GRU_CB_XTYPE_SHFT 8
141#define GRU_CB_XTYPE_MASK 0x7
142#define GRU_CB_IAA0_SHFT 11
143#define GRU_CB_IAA0_MASK 0x3
144#define GRU_CB_IAA1_SHFT 13
145#define GRU_CB_IAA1_MASK 0x3
146#define GRU_CB_IMA_SHFT 1
147#define GRU_CB_IMA_MASK 0x3
148#define GRU_CB_OPC_SHFT 16
149#define GRU_CB_OPC_MASK 0xff
150#define GRU_CB_EXOPC_SHFT 24
151#define GRU_CB_EXOPC_MASK 0xff
152
153/* GRU instruction opcodes (opc field) */
154#define OP_NOP 0x00
155#define OP_BCOPY 0x01
156#define OP_VLOAD 0x02
157#define OP_IVLOAD 0x03
158#define OP_VSTORE 0x04
159#define OP_IVSTORE 0x05
160#define OP_VSET 0x06
161#define OP_IVSET 0x07
162#define OP_MESQ 0x08
163#define OP_GAMXR 0x09
164#define OP_GAMIR 0x0a
165#define OP_GAMIRR 0x0b
166#define OP_GAMER 0x0c
167#define OP_GAMERR 0x0d
168#define OP_BSTORE 0x0e
169#define OP_VFLUSH 0x0f
170
171
172/* Extended opcodes values (exopc field) */
173
174/* GAMIR - AMOs with implicit operands */
175#define EOP_IR_FETCH 0x01 /* Plain fetch of memory */
176#define EOP_IR_CLR 0x02 /* Fetch and clear */
177#define EOP_IR_INC 0x05 /* Fetch and increment */
178#define EOP_IR_DEC 0x07 /* Fetch and decrement */
179#define EOP_IR_QCHK1 0x0d /* Queue check, 64 byte msg */
180#define EOP_IR_QCHK2 0x0e /* Queue check, 128 byte msg */
181
182/* GAMIRR - Registered AMOs with implicit operands */
183#define EOP_IRR_FETCH 0x01 /* Registered fetch of memory */
184#define EOP_IRR_CLR 0x02 /* Registered fetch and clear */
185#define EOP_IRR_INC 0x05 /* Registered fetch and increment */
186#define EOP_IRR_DEC 0x07 /* Registered fetch and decrement */
187#define EOP_IRR_DECZ 0x0f /* Registered fetch and decrement, update on zero*/
188
189/* GAMER - AMOs with explicit operands */
190#define EOP_ER_SWAP 0x00 /* Exchange argument and memory */
191#define EOP_ER_OR 0x01 /* Logical OR with memory */
192#define EOP_ER_AND 0x02 /* Logical AND with memory */
193#define EOP_ER_XOR 0x03 /* Logical XOR with memory */
194#define EOP_ER_ADD 0x04 /* Add value to memory */
195#define EOP_ER_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/
196#define EOP_ER_CADD 0x0c /* Queue check, operand1*64 byte msg */
197
198/* GAMERR - Registered AMOs with explicit operands */
199#define EOP_ERR_SWAP 0x00 /* Exchange argument and memory */
200#define EOP_ERR_OR 0x01 /* Logical OR with memory */
201#define EOP_ERR_AND 0x02 /* Logical AND with memory */
202#define EOP_ERR_XOR 0x03 /* Logical XOR with memory */
203#define EOP_ERR_ADD 0x04 /* Add value to memory */
204#define EOP_ERR_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/
205#define EOP_ERR_EPOLL 0x09 /* Poll for equality */
206#define EOP_ERR_NPOLL 0x0a /* Poll for inequality */
207
208/* GAMXR - SGI Arithmetic unit */
209#define EOP_XR_CSWAP 0x0b /* Masked compare exchange */
210
211
212/* Transfer types (xtype field) */
213#define XTYPE_B 0x0 /* byte */
214#define XTYPE_S 0x1 /* short (2-byte) */
215#define XTYPE_W 0x2 /* word (4-byte) */
216#define XTYPE_DW 0x3 /* doubleword (8-byte) */
217#define XTYPE_CL 0x6 /* cacheline (64-byte) */
218
219
220/* Instruction access attributes (iaa0, iaa1 fields) */
221#define IAA_RAM 0x0 /* normal cached RAM access */
222#define IAA_NCRAM 0x2 /* noncoherent RAM access */
223#define IAA_MMIO 0x1 /* noncoherent memory-mapped I/O space */
224#define IAA_REGISTER 0x3 /* memory-mapped registers, etc. */
225
226
227/* Instruction mode attributes (ima field) */
228#define IMA_MAPPED 0x0 /* Virtual mode */
229#define IMA_CB_DELAY 0x1 /* hold read responses until status changes */
230#define IMA_UNMAPPED 0x2 /* bypass the TLBs (OS only) */
231#define IMA_INTERRUPT 0x4 /* Interrupt when instruction completes */
232
233/* CBE ecause bits */
234#define CBE_CAUSE_RI (1 << 0)
235#define CBE_CAUSE_INVALID_INSTRUCTION (1 << 1)
236#define CBE_CAUSE_UNMAPPED_MODE_FORBIDDEN (1 << 2)
237#define CBE_CAUSE_PE_CHECK_DATA_ERROR (1 << 3)
238#define CBE_CAUSE_IAA_GAA_MISMATCH (1 << 4)
239#define CBE_CAUSE_DATA_SEGMENT_LIMIT_EXCEPTION (1 << 5)
240#define CBE_CAUSE_OS_FATAL_TLB_FAULT (1 << 6)
241#define CBE_CAUSE_EXECUTION_HW_ERROR (1 << 7)
242#define CBE_CAUSE_TLBHW_ERROR (1 << 8)
243#define CBE_CAUSE_RA_REQUEST_TIMEOUT (1 << 9)
244#define CBE_CAUSE_HA_REQUEST_TIMEOUT (1 << 10)
245#define CBE_CAUSE_RA_RESPONSE_FATAL (1 << 11)
246#define CBE_CAUSE_RA_RESPONSE_NON_FATAL (1 << 12)
247#define CBE_CAUSE_HA_RESPONSE_FATAL (1 << 13)
248#define CBE_CAUSE_HA_RESPONSE_NON_FATAL (1 << 14)
249#define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR (1 << 15)
250#define CBE_CAUSE_RESPONSE_DATA_ERROR (1 << 16)
251#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 17)
252
253/*
254 * Exceptions are retried for the following cases. If any OTHER bits are set
255 * in ecause, the exception is not retryable.
256 */
257#define EXCEPTION_RETRY_BITS (CBE_CAUSE_RESPONSE_DATA_ERROR | \
258 CBE_CAUSE_RA_REQUEST_TIMEOUT | \
259 CBE_CAUSE_TLBHW_ERROR | \
260 CBE_CAUSE_HA_REQUEST_TIMEOUT)
261
262/* Message queue head structure */
263union gru_mesqhead {
264 unsigned long val;
265 struct {
266 unsigned int head;
267 unsigned int limit;
268 };
269};
270
271
272/* Generate the low word of a GRU instruction */
273static inline unsigned int
274__opword(unsigned char opcode, unsigned char exopc, unsigned char xtype,
275 unsigned char iaa0, unsigned char iaa1,
276 unsigned char ima)
277{
278 return (1 << GRU_CB_ICMD_SHFT) |
279 (iaa0 << GRU_CB_IAA0_SHFT) |
280 (iaa1 << GRU_CB_IAA1_SHFT) |
281 (ima << GRU_CB_IMA_SHFT) |
282 (xtype << GRU_CB_XTYPE_SHFT) |
283 (opcode << GRU_CB_OPC_SHFT) |
284 (exopc << GRU_CB_EXOPC_SHFT);
285}
286
287/*
288 * Architecture specific intrinsics
289 */
290static inline void gru_flush_cache(void *p)
291{
292 __flush_cache(p);
293}
294
295/*
296 * Store the lower 32 bits of the command including the "start" bit. Then
297 * start the instruction executing.
298 */
299static inline void gru_start_instruction(struct gru_instruction *ins, int op32)
300{
301 gru_ordered_store_int(ins, op32);
302 gru_flush_cache(ins);
303}
304
305
306/* Convert "hints" to IMA */
307#define CB_IMA(h) ((h) | IMA_UNMAPPED)
308
309/* Convert data segment cache line index into TRI0 / TRI1 value */
310#define GRU_DINDEX(i) ((i) * GRU_CACHE_LINE_BYTES)
311
312/* Inline functions for GRU instructions.
313 * Note:
314 * - nelem and stride are in elements
315 * - tri0/tri1 is in bytes for the beginning of the data segment.
316 */
317static inline void gru_vload(void *cb, unsigned long mem_addr,
318 unsigned int tri0, unsigned char xtype, unsigned long nelem,
319 unsigned long stride, unsigned long hints)
320{
321 struct gru_instruction *ins = (struct gru_instruction *)cb;
322
323 ins->baddr0 = (long)mem_addr;
324 ins->nelem = nelem;
325 ins->tri0 = tri0;
326 ins->op1_stride = stride;
327 gru_start_instruction(ins, __opword(OP_VLOAD, 0, xtype, IAA_RAM, 0,
328 CB_IMA(hints)));
329}
330
331static inline void gru_vstore(void *cb, unsigned long mem_addr,
332 unsigned int tri0, unsigned char xtype, unsigned long nelem,
333 unsigned long stride, unsigned long hints)
334{
335 struct gru_instruction *ins = (void *)cb;
336
337 ins->baddr0 = (long)mem_addr;
338 ins->nelem = nelem;
339 ins->tri0 = tri0;
340 ins->op1_stride = stride;
341 gru_start_instruction(ins, __opword(OP_VSTORE, 0, xtype, IAA_RAM, 0,
342 CB_IMA(hints)));
343}
344
345static inline void gru_ivload(void *cb, unsigned long mem_addr,
346 unsigned int tri0, unsigned int tri1, unsigned char xtype,
347 unsigned long nelem, unsigned long hints)
348{
349 struct gru_instruction *ins = (void *)cb;
350
351 ins->baddr0 = (long)mem_addr;
352 ins->nelem = nelem;
353 ins->tri0 = tri0;
354 ins->tri1_bufsize = tri1;
355 gru_start_instruction(ins, __opword(OP_IVLOAD, 0, xtype, IAA_RAM, 0,
356 CB_IMA(hints)));
357}
358
359static inline void gru_ivstore(void *cb, unsigned long mem_addr,
360 unsigned int tri0, unsigned int tri1,
361 unsigned char xtype, unsigned long nelem, unsigned long hints)
362{
363 struct gru_instruction *ins = (void *)cb;
364
365 ins->baddr0 = (long)mem_addr;
366 ins->nelem = nelem;
367 ins->tri0 = tri0;
368 ins->tri1_bufsize = tri1;
369 gru_start_instruction(ins, __opword(OP_IVSTORE, 0, xtype, IAA_RAM, 0,
370 CB_IMA(hints)));
371}
372
373static inline void gru_vset(void *cb, unsigned long mem_addr,
374 unsigned long value, unsigned char xtype, unsigned long nelem,
375 unsigned long stride, unsigned long hints)
376{
377 struct gru_instruction *ins = (void *)cb;
378
379 ins->baddr0 = (long)mem_addr;
380 ins->op2_value_baddr1 = value;
381 ins->nelem = nelem;
382 ins->op1_stride = stride;
383 gru_start_instruction(ins, __opword(OP_VSET, 0, xtype, IAA_RAM, 0,
384 CB_IMA(hints)));
385}
386
387static inline void gru_ivset(void *cb, unsigned long mem_addr,
388 unsigned int tri1, unsigned long value, unsigned char xtype,
389 unsigned long nelem, unsigned long hints)
390{
391 struct gru_instruction *ins = (void *)cb;
392
393 ins->baddr0 = (long)mem_addr;
394 ins->op2_value_baddr1 = value;
395 ins->nelem = nelem;
396 ins->tri1_bufsize = tri1;
397 gru_start_instruction(ins, __opword(OP_IVSET, 0, xtype, IAA_RAM, 0,
398 CB_IMA(hints)));
399}
400
401static inline void gru_vflush(void *cb, unsigned long mem_addr,
402 unsigned long nelem, unsigned char xtype, unsigned long stride,
403 unsigned long hints)
404{
405 struct gru_instruction *ins = (void *)cb;
406
407 ins->baddr0 = (long)mem_addr;
408 ins->op1_stride = stride;
409 ins->nelem = nelem;
410 gru_start_instruction(ins, __opword(OP_VFLUSH, 0, xtype, IAA_RAM, 0,
411 CB_IMA(hints)));
412}
413
414static inline void gru_nop(void *cb, int hints)
415{
416 struct gru_instruction *ins = (void *)cb;
417
418 gru_start_instruction(ins, __opword(OP_NOP, 0, 0, 0, 0, CB_IMA(hints)));
419}
420
421
422static inline void gru_bcopy(void *cb, const unsigned long src,
423 unsigned long dest,
424 unsigned int tri0, unsigned int xtype, unsigned long nelem,
425 unsigned int bufsize, unsigned long hints)
426{
427 struct gru_instruction *ins = (void *)cb;
428
429 ins->baddr0 = (long)src;
430 ins->op2_value_baddr1 = (long)dest;
431 ins->nelem = nelem;
432 ins->tri0 = tri0;
433 ins->tri1_bufsize = bufsize;
434 gru_start_instruction(ins, __opword(OP_BCOPY, 0, xtype, IAA_RAM,
435 IAA_RAM, CB_IMA(hints)));
436}
437
438static inline void gru_bstore(void *cb, const unsigned long src,
439 unsigned long dest, unsigned int tri0, unsigned int xtype,
440 unsigned long nelem, unsigned long hints)
441{
442 struct gru_instruction *ins = (void *)cb;
443
444 ins->baddr0 = (long)src;
445 ins->op2_value_baddr1 = (long)dest;
446 ins->nelem = nelem;
447 ins->tri0 = tri0;
448 gru_start_instruction(ins, __opword(OP_BSTORE, 0, xtype, 0, IAA_RAM,
449 CB_IMA(hints)));
450}
451
452static inline void gru_gamir(void *cb, int exopc, unsigned long src,
453 unsigned int xtype, unsigned long hints)
454{
455 struct gru_instruction *ins = (void *)cb;
456
457 ins->baddr0 = (long)src;
458 gru_start_instruction(ins, __opword(OP_GAMIR, exopc, xtype, IAA_RAM, 0,
459 CB_IMA(hints)));
460}
461
462static inline void gru_gamirr(void *cb, int exopc, unsigned long src,
463 unsigned int xtype, unsigned long hints)
464{
465 struct gru_instruction *ins = (void *)cb;
466
467 ins->baddr0 = (long)src;
468 gru_start_instruction(ins, __opword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0,
469 CB_IMA(hints)));
470}
471
472static inline void gru_gamer(void *cb, int exopc, unsigned long src,
473 unsigned int xtype,
474 unsigned long operand1, unsigned long operand2,
475 unsigned long hints)
476{
477 struct gru_instruction *ins = (void *)cb;
478
479 ins->baddr0 = (long)src;
480 ins->op1_stride = operand1;
481 ins->op2_value_baddr1 = operand2;
482 gru_start_instruction(ins, __opword(OP_GAMER, exopc, xtype, IAA_RAM, 0,
483 CB_IMA(hints)));
484}
485
486static inline void gru_gamerr(void *cb, int exopc, unsigned long src,
487 unsigned int xtype, unsigned long operand1,
488 unsigned long operand2, unsigned long hints)
489{
490 struct gru_instruction *ins = (void *)cb;
491
492 ins->baddr0 = (long)src;
493 ins->op1_stride = operand1;
494 ins->op2_value_baddr1 = operand2;
495 gru_start_instruction(ins, __opword(OP_GAMERR, exopc, xtype, IAA_RAM, 0,
496 CB_IMA(hints)));
497}
498
499static inline void gru_gamxr(void *cb, unsigned long src,
500 unsigned int tri0, unsigned long hints)
501{
502 struct gru_instruction *ins = (void *)cb;
503
504 ins->baddr0 = (long)src;
505 ins->nelem = 4;
506 gru_start_instruction(ins, __opword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW,
507 IAA_RAM, 0, CB_IMA(hints)));
508}
509
510static inline void gru_mesq(void *cb, unsigned long queue,
511 unsigned long tri0, unsigned long nelem,
512 unsigned long hints)
513{
514 struct gru_instruction *ins = (void *)cb;
515
516 ins->baddr0 = (long)queue;
517 ins->nelem = nelem;
518 ins->tri0 = tri0;
519 gru_start_instruction(ins, __opword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0,
520 CB_IMA(hints)));
521}
522
523static inline unsigned long gru_get_amo_value(void *cb)
524{
525 struct gru_instruction *ins = (void *)cb;
526
527 return ins->avalue;
528}
529
530static inline int gru_get_amo_value_head(void *cb)
531{
532 struct gru_instruction *ins = (void *)cb;
533
534 return ins->avalue & 0xffffffff;
535}
536
537static inline int gru_get_amo_value_limit(void *cb)
538{
539 struct gru_instruction *ins = (void *)cb;
540
541 return ins->avalue >> 32;
542}
543
544static inline union gru_mesqhead gru_mesq_head(int head, int limit)
545{
546 union gru_mesqhead mqh;
547
548 mqh.head = head;
549 mqh.limit = limit;
550 return mqh;
551}
552
553/*
554 * Get struct control_block_extended_exc_detail for CB.
555 */
556extern int gru_get_cb_exception_detail(void *cb,
557 struct control_block_extended_exc_detail *excdet);
558
559#define GRU_EXC_STR_SIZE 256
560
561extern int gru_check_status_proc(void *cb);
562extern int gru_wait_proc(void *cb);
563extern void gru_wait_abort_proc(void *cb);
564
565/*
566 * Control block definition for checking status
567 */
568struct gru_control_block_status {
569 unsigned int icmd :1;
570 unsigned int unused1 :31;
571 unsigned int unused2 :24;
572 unsigned int istatus :2;
573 unsigned int isubstatus :4;
574 unsigned int inused3 :2;
575};
576
577/* Get CB status */
578static inline int gru_get_cb_status(void *cb)
579{
580 struct gru_control_block_status *cbs = (void *)cb;
581
582 return cbs->istatus;
583}
584
585/* Get CB message queue substatus */
586static inline int gru_get_cb_message_queue_substatus(void *cb)
587{
588 struct gru_control_block_status *cbs = (void *)cb;
589
590 return cbs->isubstatus & CBSS_MSG_QUEUE_MASK;
591}
592
593/* Get CB substatus */
594static inline int gru_get_cb_substatus(void *cb)
595{
596 struct gru_control_block_status *cbs = (void *)cb;
597
598 return cbs->isubstatus;
599}
600
601/* Check the status of a CB. If the CB is in UPM mode, call the
602 * OS to handle the UPM status.
603 * Returns the CB status field value (0 for normal completion)
604 */
605static inline int gru_check_status(void *cb)
606{
607 struct gru_control_block_status *cbs = (void *)cb;
608 int ret;
609
610 ret = cbs->istatus;
611 if (ret == CBS_CALL_OS)
612 ret = gru_check_status_proc(cb);
613 return ret;
614}
615
616/* Wait for CB to complete.
617 * Returns the CB status field value (0 for normal completion)
618 */
619static inline int gru_wait(void *cb)
620{
621 struct gru_control_block_status *cbs = (void *)cb;
622 int ret = cbs->istatus;
623
624 if (ret != CBS_IDLE)
625 ret = gru_wait_proc(cb);
626 return ret;
627}
628
629/* Wait for CB to complete. Aborts program if error. (Note: error does NOT
630 * mean TLB mis - only fatal errors such as memory parity error or user
631 * bugs will cause termination.
632 */
633static inline void gru_wait_abort(void *cb)
634{
635 struct gru_control_block_status *cbs = (void *)cb;
636
637 if (cbs->istatus != CBS_IDLE)
638 gru_wait_abort_proc(cb);
639}
640
641
642/*
643 * Get a pointer to a control block
644 * gseg - GSeg address returned from gru_get_thread_gru_segment()
645 * index - index of desired CB
646 */
647static inline void *gru_get_cb_pointer(void *gseg,
648 int index)
649{
650 return gseg + GRU_CB_BASE + index * GRU_HANDLE_STRIDE;
651}
652
653/*
654 * Get a pointer to a cacheline in the data segment portion of a GSeg
655 * gseg - GSeg address returned from gru_get_thread_gru_segment()
656 * index - index of desired cache line
657 */
658static inline void *gru_get_data_pointer(void *gseg, int index)
659{
660 return gseg + GRU_DS_BASE + index * GRU_CACHE_LINE_BYTES;
661}
662
663/*
664 * Convert a vaddr into the tri index within the GSEG
665 * vaddr - virtual address of within gseg
666 */
667static inline int gru_get_tri(void *vaddr)
668{
669 return ((unsigned long)vaddr & (GRU_GSEG_PAGESIZE - 1)) - GRU_DS_BASE;
670}
671#endif /* __GRU_INSTRUCTIONS_H__ */
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
new file mode 100644
index 000000000000..8c389d606c30
--- /dev/null
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -0,0 +1,632 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * FAULT HANDLER FOR GRU DETECTED TLB MISSES
5 *
6 * This file contains code that handles TLB misses within the GRU.
7 * These misses are reported either via interrupts or user polling of
8 * the user CB.
9 *
10 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 */
26
27#include <linux/kernel.h>
28#include <linux/errno.h>
29#include <linux/spinlock.h>
30#include <linux/mm.h>
31#include <linux/hugetlb.h>
32#include <linux/device.h>
33#include <linux/io.h>
34#include <linux/uaccess.h>
35#include <asm/pgtable.h>
36#include "gru.h"
37#include "grutables.h"
38#include "grulib.h"
39#include "gru_instructions.h"
40#include <asm/uv/uv_hub.h>
41
42/*
43 * Test if a physical address is a valid GRU GSEG address
44 */
45static inline int is_gru_paddr(unsigned long paddr)
46{
47 return paddr >= gru_start_paddr && paddr < gru_end_paddr;
48}
49
50/*
51 * Find the vma of a GRU segment. Caller must hold mmap_sem.
52 */
53struct vm_area_struct *gru_find_vma(unsigned long vaddr)
54{
55 struct vm_area_struct *vma;
56
57 vma = find_vma(current->mm, vaddr);
58 if (vma && vma->vm_start <= vaddr && vma->vm_ops == &gru_vm_ops)
59 return vma;
60 return NULL;
61}
62
63/*
64 * Find and lock the gts that contains the specified user vaddr.
65 *
66 * Returns:
67 * - *gts with the mmap_sem locked for read and the GTS locked.
68 * - NULL if vaddr invalid OR is not a valid GSEG vaddr.
69 */
70
71static struct gru_thread_state *gru_find_lock_gts(unsigned long vaddr)
72{
73 struct mm_struct *mm = current->mm;
74 struct vm_area_struct *vma;
75 struct gru_thread_state *gts = NULL;
76
77 down_read(&mm->mmap_sem);
78 vma = gru_find_vma(vaddr);
79 if (vma)
80 gts = gru_find_thread_state(vma, TSID(vaddr, vma));
81 if (gts)
82 mutex_lock(&gts->ts_ctxlock);
83 else
84 up_read(&mm->mmap_sem);
85 return gts;
86}
87
88static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr)
89{
90 struct mm_struct *mm = current->mm;
91 struct vm_area_struct *vma;
92 struct gru_thread_state *gts = NULL;
93
94 down_write(&mm->mmap_sem);
95 vma = gru_find_vma(vaddr);
96 if (vma)
97 gts = gru_alloc_thread_state(vma, TSID(vaddr, vma));
98 if (gts) {
99 mutex_lock(&gts->ts_ctxlock);
100 downgrade_write(&mm->mmap_sem);
101 } else {
102 up_write(&mm->mmap_sem);
103 }
104
105 return gts;
106}
107
108/*
109 * Unlock a GTS that was previously locked with gru_find_lock_gts().
110 */
111static void gru_unlock_gts(struct gru_thread_state *gts)
112{
113 mutex_unlock(&gts->ts_ctxlock);
114 up_read(&current->mm->mmap_sem);
115}
116
117/*
118 * Set a CB.istatus to active using a user virtual address. This must be done
119 * just prior to a TFH RESTART. The new cb.istatus is an in-cache status ONLY.
120 * If the line is evicted, the status may be lost. The in-cache update
121 * is necessary to prevent the user from seeing a stale cb.istatus that will
122 * change as soon as the TFH restart is complete. Races may cause an
123 * occasional failure to clear the cb.istatus, but that is ok.
124 *
125 * If the cb address is not valid (should not happen, but...), nothing
126 * bad will happen.. The get_user()/put_user() will fail but there
127 * are no bad side-effects.
128 */
129static void gru_cb_set_istatus_active(unsigned long __user *cb)
130{
131 union {
132 struct gru_instruction_bits bits;
133 unsigned long dw;
134 } u;
135
136 if (cb) {
137 get_user(u.dw, cb);
138 u.bits.istatus = CBS_ACTIVE;
139 put_user(u.dw, cb);
140 }
141}
142
143/*
144 * Convert a interrupt IRQ to a pointer to the GRU GTS that caused the
145 * interrupt. Interrupts are always sent to a cpu on the blade that contains the
146 * GRU (except for headless blades which are not currently supported). A blade
147 * has N grus; a block of N consecutive IRQs is assigned to the GRUs. The IRQ
148 * number uniquely identifies the GRU chiplet on the local blade that caused the
149 * interrupt. Always called in interrupt context.
150 */
151static inline struct gru_state *irq_to_gru(int irq)
152{
153 return &gru_base[uv_numa_blade_id()]->bs_grus[irq - IRQ_GRU];
154}
155
156/*
157 * Read & clear a TFM
158 *
159 * The GRU has an array of fault maps. A map is private to a cpu
160 * Only one cpu will be accessing a cpu's fault map.
161 *
162 * This function scans the cpu-private fault map & clears all bits that
163 * are set. The function returns a bitmap that indicates the bits that
164 * were cleared. Note that sense the maps may be updated asynchronously by
165 * the GRU, atomic operations must be used to clear bits.
166 */
167static void get_clear_fault_map(struct gru_state *gru,
168 struct gru_tlb_fault_map *map)
169{
170 unsigned long i, k;
171 struct gru_tlb_fault_map *tfm;
172
173 tfm = get_tfm_for_cpu(gru, gru_cpu_fault_map_id());
174 prefetchw(tfm); /* Helps on hardware, required for emulator */
175 for (i = 0; i < BITS_TO_LONGS(GRU_NUM_CBE); i++) {
176 k = tfm->fault_bits[i];
177 if (k)
178 k = xchg(&tfm->fault_bits[i], 0UL);
179 map->fault_bits[i] = k;
180 }
181
182 /*
183 * Not functionally required but helps performance. (Required
184 * on emulator)
185 */
186 gru_flush_cache(tfm);
187}
188
189/*
190 * Atomic (interrupt context) & non-atomic (user context) functions to
191 * convert a vaddr into a physical address. The size of the page
192 * is returned in pageshift.
193 * returns:
194 * 0 - successful
195 * < 0 - error code
196 * 1 - (atomic only) try again in non-atomic context
197 */
198static int non_atomic_pte_lookup(struct vm_area_struct *vma,
199 unsigned long vaddr, int write,
200 unsigned long *paddr, int *pageshift)
201{
202 struct page *page;
203
204 /* ZZZ Need to handle HUGE pages */
205 if (is_vm_hugetlb_page(vma))
206 return -EFAULT;
207 *pageshift = PAGE_SHIFT;
208 if (get_user_pages
209 (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0)
210 return -EFAULT;
211 *paddr = page_to_phys(page);
212 put_page(page);
213 return 0;
214}
215
216/*
217 * atomic_pte_lookup
218 *
219 * Convert a user virtual address to a physical address
220 * Only supports Intel large pages (2MB only) on x86_64.
221 * ZZZ - hugepage support is incomplete
222 *
223 * NOTE: mmap_sem is already held on entry to this function. This
224 * guarantees existence of the page tables.
225 */
226static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
227 int write, unsigned long *paddr, int *pageshift)
228{
229 pgd_t *pgdp;
230 pmd_t *pmdp;
231 pud_t *pudp;
232 pte_t pte;
233
234 pgdp = pgd_offset(vma->vm_mm, vaddr);
235 if (unlikely(pgd_none(*pgdp)))
236 goto err;
237
238 pudp = pud_offset(pgdp, vaddr);
239 if (unlikely(pud_none(*pudp)))
240 goto err;
241
242 pmdp = pmd_offset(pudp, vaddr);
243 if (unlikely(pmd_none(*pmdp)))
244 goto err;
245#ifdef CONFIG_X86_64
246 if (unlikely(pmd_large(*pmdp)))
247 pte = *(pte_t *) pmdp;
248 else
249#endif
250 pte = *pte_offset_kernel(pmdp, vaddr);
251
252 if (unlikely(!pte_present(pte) ||
253 (write && (!pte_write(pte) || !pte_dirty(pte)))))
254 return 1;
255
256 *paddr = pte_pfn(pte) << PAGE_SHIFT;
257 *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
258 return 0;
259
260err:
261 local_irq_enable();
262 return 1;
263}
264
265/*
266 * Drop a TLB entry into the GRU. The fault is described by info in an TFH.
267 * Input:
268 * cb Address of user CBR. Null if not running in user context
269 * Return:
270 * 0 = dropin, exception, or switch to UPM successful
271 * 1 = range invalidate active
272 * < 0 = error code
273 *
274 */
275static int gru_try_dropin(struct gru_thread_state *gts,
276 struct gru_tlb_fault_handle *tfh,
277 unsigned long __user *cb)
278{
279 struct mm_struct *mm = gts->ts_mm;
280 struct vm_area_struct *vma;
281 int pageshift, asid, write, ret;
282 unsigned long paddr, gpa, vaddr;
283
284 /*
285 * NOTE: The GRU contains magic hardware that eliminates races between
286 * TLB invalidates and TLB dropins. If an invalidate occurs
287 * in the window between reading the TFH and the subsequent TLB dropin,
288 * the dropin is ignored. This eliminates the need for additional locks.
289 */
290
291 /*
292 * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call.
293 * Might be a hardware race OR a stupid user. Ignore FMM because FMM
294 * is a transient state.
295 */
296 if (tfh->state == TFHSTATE_IDLE)
297 goto failidle;
298 if (tfh->state == TFHSTATE_MISS_FMM && cb)
299 goto failfmm;
300
301 write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0;
302 vaddr = tfh->missvaddr;
303 asid = tfh->missasid;
304 if (asid == 0)
305 goto failnoasid;
306
307 rmb(); /* TFH must be cache resident before reading ms_range_active */
308
309 /*
310 * TFH is cache resident - at least briefly. Fail the dropin
311 * if a range invalidate is active.
312 */
313 if (atomic_read(&gts->ts_gms->ms_range_active))
314 goto failactive;
315
316 vma = find_vma(mm, vaddr);
317 if (!vma)
318 goto failinval;
319
320 /*
321 * Atomic lookup is faster & usually works even if called in non-atomic
322 * context.
323 */
324 rmb(); /* Must/check ms_range_active before loading PTEs */
325 ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &pageshift);
326 if (ret) {
327 if (!cb)
328 goto failupm;
329 if (non_atomic_pte_lookup(vma, vaddr, write, &paddr,
330 &pageshift))
331 goto failinval;
332 }
333 if (is_gru_paddr(paddr))
334 goto failinval;
335
336 paddr = paddr & ~((1UL << pageshift) - 1);
337 gpa = uv_soc_phys_ram_to_gpa(paddr);
338 gru_cb_set_istatus_active(cb);
339 tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write,
340 GRU_PAGESIZE(pageshift));
341 STAT(tlb_dropin);
342 gru_dbg(grudev,
343 "%s: tfh 0x%p, vaddr 0x%lx, asid 0x%x, ps %d, gpa 0x%lx\n",
344 ret ? "non-atomic" : "atomic", tfh, vaddr, asid,
345 pageshift, gpa);
346 return 0;
347
348failnoasid:
349 /* No asid (delayed unload). */
350 STAT(tlb_dropin_fail_no_asid);
351 gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
352 if (!cb)
353 tfh_user_polling_mode(tfh);
354 else
355 gru_flush_cache(tfh);
356 return -EAGAIN;
357
358failupm:
359 /* Atomic failure switch CBR to UPM */
360 tfh_user_polling_mode(tfh);
361 STAT(tlb_dropin_fail_upm);
362 gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
363 return 1;
364
365failfmm:
366 /* FMM state on UPM call */
367 STAT(tlb_dropin_fail_fmm);
368 gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state);
369 return 0;
370
371failidle:
372 /* TFH was idle - no miss pending */
373 gru_flush_cache(tfh);
374 if (cb)
375 gru_flush_cache(cb);
376 STAT(tlb_dropin_fail_idle);
377 gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state);
378 return 0;
379
380failinval:
381 /* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */
382 tfh_exception(tfh);
383 STAT(tlb_dropin_fail_invalid);
384 gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
385 return -EFAULT;
386
387failactive:
388 /* Range invalidate active. Switch to UPM iff atomic */
389 if (!cb)
390 tfh_user_polling_mode(tfh);
391 else
392 gru_flush_cache(tfh);
393 STAT(tlb_dropin_fail_range_active);
394 gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n",
395 tfh, vaddr);
396 return 1;
397}
398
399/*
400 * Process an external interrupt from the GRU. This interrupt is
401 * caused by a TLB miss.
402 * Note that this is the interrupt handler that is registered with linux
403 * interrupt handlers.
404 */
405irqreturn_t gru_intr(int irq, void *dev_id)
406{
407 struct gru_state *gru;
408 struct gru_tlb_fault_map map;
409 struct gru_thread_state *gts;
410 struct gru_tlb_fault_handle *tfh = NULL;
411 int cbrnum, ctxnum;
412
413 STAT(intr);
414
415 gru = irq_to_gru(irq);
416 if (!gru) {
417 dev_err(grudev, "GRU: invalid interrupt: cpu %d, irq %d\n",
418 raw_smp_processor_id(), irq);
419 return IRQ_NONE;
420 }
421 get_clear_fault_map(gru, &map);
422 gru_dbg(grudev, "irq %d, gru %x, map 0x%lx\n", irq, gru->gs_gid,
423 map.fault_bits[0]);
424
425 for_each_cbr_in_tfm(cbrnum, map.fault_bits) {
426 tfh = get_tfh_by_index(gru, cbrnum);
427 prefetchw(tfh); /* Helps on hdw, required for emulator */
428
429 /*
430 * When hardware sets a bit in the faultmap, it implicitly
431 * locks the GRU context so that it cannot be unloaded.
432 * The gts cannot change until a TFH start/writestart command
433 * is issued.
434 */
435 ctxnum = tfh->ctxnum;
436 gts = gru->gs_gts[ctxnum];
437
438 /*
439 * This is running in interrupt context. Trylock the mmap_sem.
440 * If it fails, retry the fault in user context.
441 */
442 if (down_read_trylock(&gts->ts_mm->mmap_sem)) {
443 gru_try_dropin(gts, tfh, NULL);
444 up_read(&gts->ts_mm->mmap_sem);
445 } else {
446 tfh_user_polling_mode(tfh);
447 }
448 }
449 return IRQ_HANDLED;
450}
451
452
453static int gru_user_dropin(struct gru_thread_state *gts,
454 struct gru_tlb_fault_handle *tfh,
455 unsigned long __user *cb)
456{
457 struct gru_mm_struct *gms = gts->ts_gms;
458 int ret;
459
460 while (1) {
461 wait_event(gms->ms_wait_queue,
462 atomic_read(&gms->ms_range_active) == 0);
463 prefetchw(tfh); /* Helps on hdw, required for emulator */
464 ret = gru_try_dropin(gts, tfh, cb);
465 if (ret <= 0)
466 return ret;
467 STAT(call_os_wait_queue);
468 }
469}
470
471/*
472 * This interface is called as a result of a user detecting a "call OS" bit
473 * in a user CB. Normally means that a TLB fault has occurred.
474 * cb - user virtual address of the CB
475 */
476int gru_handle_user_call_os(unsigned long cb)
477{
478 struct gru_tlb_fault_handle *tfh;
479 struct gru_thread_state *gts;
480 unsigned long __user *cbp;
481 int ucbnum, cbrnum, ret = -EINVAL;
482
483 STAT(call_os);
484 gru_dbg(grudev, "address 0x%lx\n", cb);
485
486 /* sanity check the cb pointer */
487 ucbnum = get_cb_number((void *)cb);
488 if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB)
489 return -EINVAL;
490 cbp = (unsigned long *)cb;
491
492 gts = gru_find_lock_gts(cb);
493 if (!gts)
494 return -EINVAL;
495
496 if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) {
497 ret = -EINVAL;
498 goto exit;
499 }
500
501 /*
502 * If force_unload is set, the UPM TLB fault is phony. The task
503 * has migrated to another node and the GSEG must be moved. Just
504 * unload the context. The task will page fault and assign a new
505 * context.
506 */
507 ret = -EAGAIN;
508 cbrnum = thread_cbr_number(gts, ucbnum);
509 if (gts->ts_force_unload) {
510 gru_unload_context(gts, 1);
511 } else if (gts->ts_gru) {
512 tfh = get_tfh_by_index(gts->ts_gru, cbrnum);
513 ret = gru_user_dropin(gts, tfh, cbp);
514 }
515exit:
516 gru_unlock_gts(gts);
517 return ret;
518}
519
520/*
521 * Fetch the exception detail information for a CB that terminated with
522 * an exception.
523 */
524int gru_get_exception_detail(unsigned long arg)
525{
526 struct control_block_extended_exc_detail excdet;
527 struct gru_control_block_extended *cbe;
528 struct gru_thread_state *gts;
529 int ucbnum, cbrnum, ret;
530
531 STAT(user_exception);
532 if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet)))
533 return -EFAULT;
534
535 gru_dbg(grudev, "address 0x%lx\n", excdet.cb);
536 gts = gru_find_lock_gts(excdet.cb);
537 if (!gts)
538 return -EINVAL;
539
540 if (gts->ts_gru) {
541 ucbnum = get_cb_number((void *)excdet.cb);
542 cbrnum = thread_cbr_number(gts, ucbnum);
543 cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
544 prefetchw(cbe); /* Harmless on hardware, required for emulator */
545 excdet.opc = cbe->opccpy;
546 excdet.exopc = cbe->exopccpy;
547 excdet.ecause = cbe->ecause;
548 excdet.exceptdet0 = cbe->idef1upd;
549 excdet.exceptdet1 = cbe->idef3upd;
550 ret = 0;
551 } else {
552 ret = -EAGAIN;
553 }
554 gru_unlock_gts(gts);
555
556 gru_dbg(grudev, "address 0x%lx, ecause 0x%x\n", excdet.cb,
557 excdet.ecause);
558 if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet)))
559 ret = -EFAULT;
560 return ret;
561}
562
563/*
564 * User request to unload a context. Content is saved for possible reload.
565 */
566int gru_user_unload_context(unsigned long arg)
567{
568 struct gru_thread_state *gts;
569 struct gru_unload_context_req req;
570
571 STAT(user_unload_context);
572 if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
573 return -EFAULT;
574
575 gru_dbg(grudev, "gseg 0x%lx\n", req.gseg);
576
577 gts = gru_find_lock_gts(req.gseg);
578 if (!gts)
579 return -EINVAL;
580
581 if (gts->ts_gru)
582 gru_unload_context(gts, 1);
583 gru_unlock_gts(gts);
584
585 return 0;
586}
587
588/*
589 * User request to flush a range of virtual addresses from the GRU TLB
590 * (Mainly for testing).
591 */
592int gru_user_flush_tlb(unsigned long arg)
593{
594 struct gru_thread_state *gts;
595 struct gru_flush_tlb_req req;
596
597 STAT(user_flush_tlb);
598 if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
599 return -EFAULT;
600
601 gru_dbg(grudev, "gseg 0x%lx, vaddr 0x%lx, len 0x%lx\n", req.gseg,
602 req.vaddr, req.len);
603
604 gts = gru_find_lock_gts(req.gseg);
605 if (!gts)
606 return -EINVAL;
607
608 gru_flush_tlb_range(gts->ts_gms, req.vaddr, req.vaddr + req.len);
609 gru_unlock_gts(gts);
610
611 return 0;
612}
613
614/*
615 * Register the current task as the user of the GSEG slice.
616 * Needed for TLB fault interrupt targeting.
617 */
618int gru_set_task_slice(long address)
619{
620 struct gru_thread_state *gts;
621
622 STAT(set_task_slice);
623 gru_dbg(grudev, "address 0x%lx\n", address);
624 gts = gru_alloc_locked_gts(address);
625 if (!gts)
626 return -EINVAL;
627
628 gts->ts_tgid_owner = current->tgid;
629 gru_unlock_gts(gts);
630
631 return 0;
632}
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
new file mode 100644
index 000000000000..5c027b6b4e5a
--- /dev/null
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -0,0 +1,494 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * FILE OPERATIONS & DRIVER INITIALIZATION
5 *
6 * This file supports the user system call for file open, close, mmap, etc.
7 * This also incudes the driver initialization code.
8 *
9 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25
26#include <linux/module.h>
27#include <linux/kernel.h>
28#include <linux/errno.h>
29#include <linux/slab.h>
30#include <linux/mm.h>
31#include <linux/io.h>
32#include <linux/smp_lock.h>
33#include <linux/spinlock.h>
34#include <linux/device.h>
35#include <linux/miscdevice.h>
36#include <linux/interrupt.h>
37#include <linux/proc_fs.h>
38#include <linux/uaccess.h>
39#include "gru.h"
40#include "grulib.h"
41#include "grutables.h"
42
43#if defined CONFIG_X86_64
44#include <asm/genapic.h>
45#include <asm/irq.h>
46#define IS_UV() is_uv_system()
47#elif defined CONFIG_IA64
48#include <asm/system.h>
49#include <asm/sn/simulator.h>
50/* temp support for running on hardware simulator */
51#define IS_UV() IS_MEDUSA() || ia64_platform_is("uv")
52#else
53#define IS_UV() 0
54#endif
55
56#include <asm/uv/uv_hub.h>
57#include <asm/uv/uv_mmrs.h>
58
59struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly;
60unsigned long gru_start_paddr, gru_end_paddr __read_mostly;
61struct gru_stats_s gru_stats;
62
63/* Guaranteed user available resources on each node */
64static int max_user_cbrs, max_user_dsr_bytes;
65
66static struct file_operations gru_fops;
67static struct miscdevice gru_miscdev;
68
69
70/*
71 * gru_vma_close
72 *
73 * Called when unmapping a device mapping. Frees all gru resources
74 * and tables belonging to the vma.
75 */
76static void gru_vma_close(struct vm_area_struct *vma)
77{
78 struct gru_vma_data *vdata;
79 struct gru_thread_state *gts;
80 struct list_head *entry, *next;
81
82 if (!vma->vm_private_data)
83 return;
84
85 vdata = vma->vm_private_data;
86 vma->vm_private_data = NULL;
87 gru_dbg(grudev, "vma %p, file %p, vdata %p\n", vma, vma->vm_file,
88 vdata);
89 list_for_each_safe(entry, next, &vdata->vd_head) {
90 gts =
91 list_entry(entry, struct gru_thread_state, ts_next);
92 list_del(&gts->ts_next);
93 mutex_lock(&gts->ts_ctxlock);
94 if (gts->ts_gru)
95 gru_unload_context(gts, 0);
96 mutex_unlock(&gts->ts_ctxlock);
97 gts_drop(gts);
98 }
99 kfree(vdata);
100 STAT(vdata_free);
101}
102
103/*
104 * gru_file_mmap
105 *
106 * Called when mmaping the device. Initializes the vma with a fault handler
107 * and private data structure necessary to allocate, track, and free the
108 * underlying pages.
109 */
110static int gru_file_mmap(struct file *file, struct vm_area_struct *vma)
111{
112 if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE))
113 return -EPERM;
114
115 if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) ||
116 vma->vm_end & (GRU_GSEG_PAGESIZE - 1))
117 return -EINVAL;
118
119 vma->vm_flags |=
120 (VM_IO | VM_DONTCOPY | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP |
121 VM_RESERVED);
122 vma->vm_page_prot = PAGE_SHARED;
123 vma->vm_ops = &gru_vm_ops;
124
125 vma->vm_private_data = gru_alloc_vma_data(vma, 0);
126 if (!vma->vm_private_data)
127 return -ENOMEM;
128
129 gru_dbg(grudev, "file %p, vaddr 0x%lx, vma %p, vdata %p\n",
130 file, vma->vm_start, vma, vma->vm_private_data);
131 return 0;
132}
133
134/*
135 * Create a new GRU context
136 */
137static int gru_create_new_context(unsigned long arg)
138{
139 struct gru_create_context_req req;
140 struct vm_area_struct *vma;
141 struct gru_vma_data *vdata;
142 int ret = -EINVAL;
143
144
145 if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
146 return -EFAULT;
147
148 if (req.data_segment_bytes == 0 ||
149 req.data_segment_bytes > max_user_dsr_bytes)
150 return -EINVAL;
151 if (!req.control_blocks || !req.maximum_thread_count ||
152 req.control_blocks > max_user_cbrs)
153 return -EINVAL;
154
155 if (!(req.options & GRU_OPT_MISS_MASK))
156 req.options |= GRU_OPT_MISS_FMM_INTR;
157
158 down_write(&current->mm->mmap_sem);
159 vma = gru_find_vma(req.gseg);
160 if (vma) {
161 vdata = vma->vm_private_data;
162 vdata->vd_user_options = req.options;
163 vdata->vd_dsr_au_count =
164 GRU_DS_BYTES_TO_AU(req.data_segment_bytes);
165 vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks);
166 ret = 0;
167 }
168 up_write(&current->mm->mmap_sem);
169
170 return ret;
171}
172
173/*
174 * Get GRU configuration info (temp - for emulator testing)
175 */
176static long gru_get_config_info(unsigned long arg)
177{
178 struct gru_config_info info;
179 int nodesperblade;
180
181 if (num_online_nodes() > 1 &&
182 (uv_node_to_blade_id(1) == uv_node_to_blade_id(0)))
183 nodesperblade = 2;
184 else
185 nodesperblade = 1;
186 info.cpus = num_online_cpus();
187 info.nodes = num_online_nodes();
188 info.blades = info.nodes / nodesperblade;
189 info.chiplets = GRU_CHIPLETS_PER_BLADE * info.blades;
190
191 if (copy_to_user((void __user *)arg, &info, sizeof(info)))
192 return -EFAULT;
193 return 0;
194}
195
196/*
197 * Get GRU chiplet status
198 */
199static long gru_get_chiplet_status(unsigned long arg)
200{
201 struct gru_state *gru;
202 struct gru_chiplet_info info;
203
204 if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
205 return -EFAULT;
206
207 if (info.node == -1)
208 info.node = numa_node_id();
209 if (info.node >= num_possible_nodes() ||
210 info.chiplet >= GRU_CHIPLETS_PER_HUB ||
211 info.node < 0 || info.chiplet < 0)
212 return -EINVAL;
213
214 info.blade = uv_node_to_blade_id(info.node);
215 gru = get_gru(info.blade, info.chiplet);
216
217 info.total_dsr_bytes = GRU_NUM_DSR_BYTES;
218 info.total_cbr = GRU_NUM_CB;
219 info.total_user_dsr_bytes = GRU_NUM_DSR_BYTES -
220 gru->gs_reserved_dsr_bytes;
221 info.total_user_cbr = GRU_NUM_CB - gru->gs_reserved_cbrs;
222 info.free_user_dsr_bytes = hweight64(gru->gs_dsr_map) *
223 GRU_DSR_AU_BYTES;
224 info.free_user_cbr = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
225
226 if (copy_to_user((void __user *)arg, &info, sizeof(info)))
227 return -EFAULT;
228 return 0;
229}
230
231/*
232 * gru_file_unlocked_ioctl
233 *
234 * Called to update file attributes via IOCTL calls.
235 */
236static long gru_file_unlocked_ioctl(struct file *file, unsigned int req,
237 unsigned long arg)
238{
239 int err = -EBADRQC;
240
241 gru_dbg(grudev, "file %p\n", file);
242
243 switch (req) {
244 case GRU_CREATE_CONTEXT:
245 err = gru_create_new_context(arg);
246 break;
247 case GRU_SET_TASK_SLICE:
248 err = gru_set_task_slice(arg);
249 break;
250 case GRU_USER_GET_EXCEPTION_DETAIL:
251 err = gru_get_exception_detail(arg);
252 break;
253 case GRU_USER_UNLOAD_CONTEXT:
254 err = gru_user_unload_context(arg);
255 break;
256 case GRU_GET_CHIPLET_STATUS:
257 err = gru_get_chiplet_status(arg);
258 break;
259 case GRU_USER_FLUSH_TLB:
260 err = gru_user_flush_tlb(arg);
261 break;
262 case GRU_USER_CALL_OS:
263 err = gru_handle_user_call_os(arg);
264 break;
265 case GRU_GET_CONFIG_INFO:
266 err = gru_get_config_info(arg);
267 break;
268 }
269 return err;
270}
271
272/*
273 * Called at init time to build tables for all GRUs that are present in the
274 * system.
275 */
276static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr,
277 void *vaddr, int nid, int bid, int grunum)
278{
279 spin_lock_init(&gru->gs_lock);
280 spin_lock_init(&gru->gs_asid_lock);
281 gru->gs_gru_base_paddr = paddr;
282 gru->gs_gru_base_vaddr = vaddr;
283 gru->gs_gid = bid * GRU_CHIPLETS_PER_BLADE + grunum;
284 gru->gs_blade = gru_base[bid];
285 gru->gs_blade_id = bid;
286 gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1;
287 gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1;
288 gru_tgh_flush_init(gru);
289 gru_dbg(grudev, "bid %d, nid %d, gru %x, vaddr %p (0x%lx)\n",
290 bid, nid, gru->gs_gid, gru->gs_gru_base_vaddr,
291 gru->gs_gru_base_paddr);
292 gru_kservices_init(gru);
293}
294
295static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
296{
297 int pnode, nid, bid, chip;
298 int cbrs, dsrbytes, n;
299 int order = get_order(sizeof(struct gru_blade_state));
300 struct page *page;
301 struct gru_state *gru;
302 unsigned long paddr;
303 void *vaddr;
304
305 max_user_cbrs = GRU_NUM_CB;
306 max_user_dsr_bytes = GRU_NUM_DSR_BYTES;
307 for_each_online_node(nid) {
308 bid = uv_node_to_blade_id(nid);
309 pnode = uv_node_to_pnode(nid);
310 if (gru_base[bid])
311 continue;
312 page = alloc_pages_node(nid, GFP_KERNEL, order);
313 if (!page)
314 goto fail;
315 gru_base[bid] = page_address(page);
316 memset(gru_base[bid], 0, sizeof(struct gru_blade_state));
317 gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0];
318 spin_lock_init(&gru_base[bid]->bs_lock);
319
320 dsrbytes = 0;
321 cbrs = 0;
322 for (gru = gru_base[bid]->bs_grus, chip = 0;
323 chip < GRU_CHIPLETS_PER_BLADE;
324 chip++, gru++) {
325 paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip);
326 vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip);
327 gru_init_chiplet(gru, paddr, vaddr, bid, nid, chip);
328 n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
329 cbrs = max(cbrs, n);
330 n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
331 dsrbytes = max(dsrbytes, n);
332 }
333 max_user_cbrs = min(max_user_cbrs, cbrs);
334 max_user_dsr_bytes = min(max_user_dsr_bytes, dsrbytes);
335 }
336
337 return 0;
338
339fail:
340 for (nid--; nid >= 0; nid--)
341 free_pages((unsigned long)gru_base[nid], order);
342 return -ENOMEM;
343}
344
345#ifdef CONFIG_IA64
346
347static int get_base_irq(void)
348{
349 return IRQ_GRU;
350}
351
352#elif defined CONFIG_X86_64
353
354static void noop(unsigned int irq)
355{
356}
357
358static struct irq_chip gru_chip = {
359 .name = "gru",
360 .mask = noop,
361 .unmask = noop,
362 .ack = noop,
363};
364
365static int get_base_irq(void)
366{
367 set_irq_chip(IRQ_GRU, &gru_chip);
368 set_irq_chip(IRQ_GRU + 1, &gru_chip);
369 return IRQ_GRU;
370}
371#endif
372
373/*
374 * gru_init
375 *
376 * Called at boot or module load time to initialize the GRUs.
377 */
378static int __init gru_init(void)
379{
380 int ret, irq, chip;
381 char id[10];
382 void *gru_start_vaddr;
383
384 if (!IS_UV())
385 return 0;
386
387#if defined CONFIG_IA64
388 gru_start_paddr = 0xd000000000UL; /* ZZZZZZZZZZZZZZZZZZZ fixme */
389#else
390 gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR) &
391 0x7fffffffffffUL;
392
393#endif
394 gru_start_vaddr = __va(gru_start_paddr);
395 gru_end_paddr = gru_start_paddr + MAX_NUMNODES * GRU_SIZE;
396 printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n",
397 gru_start_paddr, gru_end_paddr);
398 irq = get_base_irq();
399 for (chip = 0; chip < GRU_CHIPLETS_PER_BLADE; chip++) {
400 ret = request_irq(irq + chip, gru_intr, 0, id, NULL);
401 /* TODO: fix irq handling on x86. For now ignore failures because
402 * interrupts are not required & not yet fully supported */
403 if (ret) {
404 printk("!!!WARNING: GRU ignoring request failure!!!\n");
405 ret = 0;
406 }
407 if (ret) {
408 printk(KERN_ERR "%s: request_irq failed\n",
409 GRU_DRIVER_ID_STR);
410 goto exit1;
411 }
412 }
413
414 ret = misc_register(&gru_miscdev);
415 if (ret) {
416 printk(KERN_ERR "%s: misc_register failed\n",
417 GRU_DRIVER_ID_STR);
418 goto exit1;
419 }
420
421 ret = gru_proc_init();
422 if (ret) {
423 printk(KERN_ERR "%s: proc init failed\n", GRU_DRIVER_ID_STR);
424 goto exit2;
425 }
426
427 ret = gru_init_tables(gru_start_paddr, gru_start_vaddr);
428 if (ret) {
429 printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR);
430 goto exit3;
431 }
432
433 printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR,
434 GRU_DRIVER_VERSION_STR);
435 return 0;
436
437exit3:
438 gru_proc_exit();
439exit2:
440 misc_deregister(&gru_miscdev);
441exit1:
442 for (--chip; chip >= 0; chip--)
443 free_irq(irq + chip, NULL);
444 return ret;
445
446}
447
448static void __exit gru_exit(void)
449{
450 int i, bid;
451 int order = get_order(sizeof(struct gru_state) *
452 GRU_CHIPLETS_PER_BLADE);
453
454 if (!IS_UV())
455 return;
456
457 for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++)
458 free_irq(IRQ_GRU + i, NULL);
459
460 for (bid = 0; bid < GRU_MAX_BLADES; bid++)
461 free_pages((unsigned long)gru_base[bid], order);
462
463 misc_deregister(&gru_miscdev);
464 gru_proc_exit();
465}
466
467static struct file_operations gru_fops = {
468 .owner = THIS_MODULE,
469 .unlocked_ioctl = gru_file_unlocked_ioctl,
470 .mmap = gru_file_mmap,
471};
472
473static struct miscdevice gru_miscdev = {
474 .minor = MISC_DYNAMIC_MINOR,
475 .name = "gru",
476 .fops = &gru_fops,
477};
478
479struct vm_operations_struct gru_vm_ops = {
480 .close = gru_vma_close,
481 .fault = gru_fault,
482};
483
484module_init(gru_init);
485module_exit(gru_exit);
486
487module_param(gru_options, ulong, 0644);
488MODULE_PARM_DESC(gru_options, "Various debug options");
489
490MODULE_AUTHOR("Silicon Graphics, Inc.");
491MODULE_LICENSE("GPL");
492MODULE_DESCRIPTION(GRU_DRIVER_ID_STR GRU_DRIVER_VERSION_STR);
493MODULE_VERSION(GRU_DRIVER_VERSION_STR);
494
diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h
new file mode 100644
index 000000000000..b63018d60fe1
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruhandles.h
@@ -0,0 +1,658 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * GRU HANDLE DEFINITION
5 *
6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22
23#ifndef __GRUHANDLES_H__
24#define __GRUHANDLES_H__
25#include "gru_instructions.h"
26
27/*
28 * Manifest constants for GRU Memory Map
29 */
30#define GRU_GSEG0_BASE 0
31#define GRU_MCS_BASE (64 * 1024 * 1024)
32#define GRU_SIZE (128UL * 1024 * 1024)
33
34/* Handle & resource counts */
35#define GRU_NUM_CB 128
36#define GRU_NUM_DSR_BYTES (32 * 1024)
37#define GRU_NUM_TFM 16
38#define GRU_NUM_TGH 24
39#define GRU_NUM_CBE 128
40#define GRU_NUM_TFH 128
41#define GRU_NUM_CCH 16
42#define GRU_NUM_GSH 1
43
44/* Maximum resource counts that can be reserved by user programs */
45#define GRU_NUM_USER_CBR GRU_NUM_CBE
46#define GRU_NUM_USER_DSR_BYTES GRU_NUM_DSR_BYTES
47
48/* Bytes per handle & handle stride. Code assumes all cb, tfh, cbe handles
49 * are the same */
50#define GRU_HANDLE_BYTES 64
51#define GRU_HANDLE_STRIDE 256
52
53/* Base addresses of handles */
54#define GRU_TFM_BASE (GRU_MCS_BASE + 0x00000)
55#define GRU_TGH_BASE (GRU_MCS_BASE + 0x08000)
56#define GRU_CBE_BASE (GRU_MCS_BASE + 0x10000)
57#define GRU_TFH_BASE (GRU_MCS_BASE + 0x18000)
58#define GRU_CCH_BASE (GRU_MCS_BASE + 0x20000)
59#define GRU_GSH_BASE (GRU_MCS_BASE + 0x30000)
60
61/* User gseg constants */
62#define GRU_GSEG_STRIDE (4 * 1024 * 1024)
63#define GSEG_BASE(a) ((a) & ~(GRU_GSEG_PAGESIZE - 1))
64
65/* Data segment constants */
66#define GRU_DSR_AU_BYTES 1024
67#define GRU_DSR_CL (GRU_NUM_DSR_BYTES / GRU_CACHE_LINE_BYTES)
68#define GRU_DSR_AU_CL (GRU_DSR_AU_BYTES / GRU_CACHE_LINE_BYTES)
69#define GRU_DSR_AU (GRU_NUM_DSR_BYTES / GRU_DSR_AU_BYTES)
70
71/* Control block constants */
72#define GRU_CBR_AU_SIZE 2
73#define GRU_CBR_AU (GRU_NUM_CBE / GRU_CBR_AU_SIZE)
74
75/* Convert resource counts to the number of AU */
76#define GRU_DS_BYTES_TO_AU(n) DIV_ROUND_UP(n, GRU_DSR_AU_BYTES)
77#define GRU_CB_COUNT_TO_AU(n) DIV_ROUND_UP(n, GRU_CBR_AU_SIZE)
78
79/* UV limits */
80#define GRU_CHIPLETS_PER_HUB 2
81#define GRU_HUBS_PER_BLADE 1
82#define GRU_CHIPLETS_PER_BLADE (GRU_HUBS_PER_BLADE * GRU_CHIPLETS_PER_HUB)
83
84/* User GRU Gseg offsets */
85#define GRU_CB_BASE 0
86#define GRU_CB_LIMIT (GRU_CB_BASE + GRU_HANDLE_STRIDE * GRU_NUM_CBE)
87#define GRU_DS_BASE 0x20000
88#define GRU_DS_LIMIT (GRU_DS_BASE + GRU_NUM_DSR_BYTES)
89
90/* Convert a GRU physical address to the chiplet offset */
91#define GSEGPOFF(h) ((h) & (GRU_SIZE - 1))
92
93/* Convert an arbitrary handle address to the beginning of the GRU segment */
94#define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1)))
95
96/* General addressing macros. */
97static inline void *get_gseg_base_address(void *base, int ctxnum)
98{
99 return (void *)(base + GRU_GSEG0_BASE + GRU_GSEG_STRIDE * ctxnum);
100}
101
102static inline void *get_gseg_base_address_cb(void *base, int ctxnum, int line)
103{
104 return (void *)(get_gseg_base_address(base, ctxnum) +
105 GRU_CB_BASE + GRU_HANDLE_STRIDE * line);
106}
107
108static inline void *get_gseg_base_address_ds(void *base, int ctxnum, int line)
109{
110 return (void *)(get_gseg_base_address(base, ctxnum) + GRU_DS_BASE +
111 GRU_CACHE_LINE_BYTES * line);
112}
113
114static inline struct gru_tlb_fault_map *get_tfm(void *base, int ctxnum)
115{
116 return (struct gru_tlb_fault_map *)(base + GRU_TFM_BASE +
117 ctxnum * GRU_HANDLE_STRIDE);
118}
119
120static inline struct gru_tlb_global_handle *get_tgh(void *base, int ctxnum)
121{
122 return (struct gru_tlb_global_handle *)(base + GRU_TGH_BASE +
123 ctxnum * GRU_HANDLE_STRIDE);
124}
125
126static inline struct gru_control_block_extended *get_cbe(void *base, int ctxnum)
127{
128 return (struct gru_control_block_extended *)(base + GRU_CBE_BASE +
129 ctxnum * GRU_HANDLE_STRIDE);
130}
131
132static inline struct gru_tlb_fault_handle *get_tfh(void *base, int ctxnum)
133{
134 return (struct gru_tlb_fault_handle *)(base + GRU_TFH_BASE +
135 ctxnum * GRU_HANDLE_STRIDE);
136}
137
138static inline struct gru_context_configuration_handle *get_cch(void *base,
139 int ctxnum)
140{
141 return (struct gru_context_configuration_handle *)(base +
142 GRU_CCH_BASE + ctxnum * GRU_HANDLE_STRIDE);
143}
144
145static inline unsigned long get_cb_number(void *cb)
146{
147 return (((unsigned long)cb - GRU_CB_BASE) % GRU_GSEG_PAGESIZE) /
148 GRU_HANDLE_STRIDE;
149}
150
151/* byte offset to a specific GRU chiplet. (p=pnode, c=chiplet (0 or 1)*/
152static inline unsigned long gru_chiplet_paddr(unsigned long paddr, int pnode,
153 int chiplet)
154{
155 return paddr + GRU_SIZE * (2 * pnode + chiplet);
156}
157
158static inline void *gru_chiplet_vaddr(void *vaddr, int pnode, int chiplet)
159{
160 return vaddr + GRU_SIZE * (2 * pnode + chiplet);
161}
162
163
164
165/*
166 * Global TLB Fault Map
167 * Bitmap of outstanding TLB misses needing interrupt/polling service.
168 *
169 */
170struct gru_tlb_fault_map {
171 unsigned long fault_bits[BITS_TO_LONGS(GRU_NUM_CBE)];
172 unsigned long fill0[2];
173 unsigned long done_bits[BITS_TO_LONGS(GRU_NUM_CBE)];
174 unsigned long fill1[2];
175};
176
177/*
178 * TGH - TLB Global Handle
179 * Used for TLB flushing.
180 *
181 */
182struct gru_tlb_global_handle {
183 unsigned int cmd:1; /* DW 0 */
184 unsigned int delresp:1;
185 unsigned int opc:1;
186 unsigned int fill1:5;
187
188 unsigned int fill2:8;
189
190 unsigned int status:2;
191 unsigned long fill3:2;
192 unsigned int state:3;
193 unsigned long fill4:1;
194
195 unsigned int cause:3;
196 unsigned long fill5:37;
197
198 unsigned long vaddr:64; /* DW 1 */
199
200 unsigned int asid:24; /* DW 2 */
201 unsigned int fill6:8;
202
203 unsigned int pagesize:5;
204 unsigned int fill7:11;
205
206 unsigned int global:1;
207 unsigned int fill8:15;
208
209 unsigned long vaddrmask:39; /* DW 3 */
210 unsigned int fill9:9;
211 unsigned int n:10;
212 unsigned int fill10:6;
213
214 unsigned int ctxbitmap:16; /* DW4 */
215 unsigned long fill11[3];
216};
217
218enum gru_tgh_cmd {
219 TGHCMD_START
220};
221
222enum gru_tgh_opc {
223 TGHOP_TLBNOP,
224 TGHOP_TLBINV
225};
226
227enum gru_tgh_status {
228 TGHSTATUS_IDLE,
229 TGHSTATUS_EXCEPTION,
230 TGHSTATUS_ACTIVE
231};
232
233enum gru_tgh_state {
234 TGHSTATE_IDLE,
235 TGHSTATE_PE_INVAL,
236 TGHSTATE_INTERRUPT_INVAL,
237 TGHSTATE_WAITDONE,
238 TGHSTATE_RESTART_CTX,
239};
240
241/*
242 * TFH - TLB Global Handle
243 * Used for TLB dropins into the GRU TLB.
244 *
245 */
246struct gru_tlb_fault_handle {
247 unsigned int cmd:1; /* DW 0 - low 32*/
248 unsigned int delresp:1;
249 unsigned int fill0:2;
250 unsigned int opc:3;
251 unsigned int fill1:9;
252
253 unsigned int status:2;
254 unsigned int fill2:1;
255 unsigned int color:1;
256 unsigned int state:3;
257 unsigned int fill3:1;
258
259 unsigned int cause:7; /* DW 0 - high 32 */
260 unsigned int fill4:1;
261
262 unsigned int indexway:12;
263 unsigned int fill5:4;
264
265 unsigned int ctxnum:4;
266 unsigned int fill6:12;
267
268 unsigned long missvaddr:64; /* DW 1 */
269
270 unsigned int missasid:24; /* DW 2 */
271 unsigned int fill7:8;
272 unsigned int fillasid:24;
273 unsigned int dirty:1;
274 unsigned int gaa:2;
275 unsigned long fill8:5;
276
277 unsigned long pfn:41; /* DW 3 */
278 unsigned int fill9:7;
279 unsigned int pagesize:5;
280 unsigned int fill10:11;
281
282 unsigned long fillvaddr:64; /* DW 4 */
283
284 unsigned long fill11[3];
285};
286
287enum gru_tfh_opc {
288 TFHOP_NOOP,
289 TFHOP_RESTART,
290 TFHOP_WRITE_ONLY,
291 TFHOP_WRITE_RESTART,
292 TFHOP_EXCEPTION,
293 TFHOP_USER_POLLING_MODE = 7,
294};
295
296enum tfh_status {
297 TFHSTATUS_IDLE,
298 TFHSTATUS_EXCEPTION,
299 TFHSTATUS_ACTIVE,
300};
301
302enum tfh_state {
303 TFHSTATE_INACTIVE,
304 TFHSTATE_IDLE,
305 TFHSTATE_MISS_UPM,
306 TFHSTATE_MISS_FMM,
307 TFHSTATE_HW_ERR,
308 TFHSTATE_WRITE_TLB,
309 TFHSTATE_RESTART_CBR,
310};
311
312/* TFH cause bits */
313enum tfh_cause {
314 TFHCAUSE_NONE,
315 TFHCAUSE_TLB_MISS,
316 TFHCAUSE_TLB_MOD,
317 TFHCAUSE_HW_ERROR_RR,
318 TFHCAUSE_HW_ERROR_MAIN_ARRAY,
319 TFHCAUSE_HW_ERROR_VALID,
320 TFHCAUSE_HW_ERROR_PAGESIZE,
321 TFHCAUSE_INSTRUCTION_EXCEPTION,
322 TFHCAUSE_UNCORRECTIBLE_ERROR,
323};
324
325/* GAA values */
326#define GAA_RAM 0x0
327#define GAA_NCRAM 0x2
328#define GAA_MMIO 0x1
329#define GAA_REGISTER 0x3
330
331/* GRU paddr shift for pfn. (NOTE: shift is NOT by actual pagesize) */
332#define GRU_PADDR_SHIFT 12
333
334/*
335 * Context Configuration handle
336 * Used to allocate resources to a GSEG context.
337 *
338 */
339struct gru_context_configuration_handle {
340 unsigned int cmd:1; /* DW0 */
341 unsigned int delresp:1;
342 unsigned int opc:3;
343 unsigned int unmap_enable:1;
344 unsigned int req_slice_set_enable:1;
345 unsigned int req_slice:2;
346 unsigned int cb_int_enable:1;
347 unsigned int tlb_int_enable:1;
348 unsigned int tfm_fault_bit_enable:1;
349 unsigned int tlb_int_select:4;
350
351 unsigned int status:2;
352 unsigned int state:2;
353 unsigned int reserved2:4;
354
355 unsigned int cause:4;
356 unsigned int tfm_done_bit_enable:1;
357 unsigned int unused:3;
358
359 unsigned int dsr_allocation_map;
360
361 unsigned long cbr_allocation_map; /* DW1 */
362
363 unsigned int asid[8]; /* DW 2 - 5 */
364 unsigned short sizeavail[8]; /* DW 6 - 7 */
365} __attribute__ ((packed));
366
367enum gru_cch_opc {
368 CCHOP_START = 1,
369 CCHOP_ALLOCATE,
370 CCHOP_INTERRUPT,
371 CCHOP_DEALLOCATE,
372 CCHOP_INTERRUPT_SYNC,
373};
374
375enum gru_cch_status {
376 CCHSTATUS_IDLE,
377 CCHSTATUS_EXCEPTION,
378 CCHSTATUS_ACTIVE,
379};
380
381enum gru_cch_state {
382 CCHSTATE_INACTIVE,
383 CCHSTATE_MAPPED,
384 CCHSTATE_ACTIVE,
385 CCHSTATE_INTERRUPTED,
386};
387
388/* CCH Exception cause */
389enum gru_cch_cause {
390 CCHCAUSE_REGION_REGISTER_WRITE_ERROR = 1,
391 CCHCAUSE_ILLEGAL_OPCODE = 2,
392 CCHCAUSE_INVALID_START_REQUEST = 3,
393 CCHCAUSE_INVALID_ALLOCATION_REQUEST = 4,
394 CCHCAUSE_INVALID_DEALLOCATION_REQUEST = 5,
395 CCHCAUSE_INVALID_INTERRUPT_REQUEST = 6,
396 CCHCAUSE_CCH_BUSY = 7,
397 CCHCAUSE_NO_CBRS_TO_ALLOCATE = 8,
398 CCHCAUSE_BAD_TFM_CONFIG = 9,
399 CCHCAUSE_CBR_RESOURCES_OVERSUBSCRIPED = 10,
400 CCHCAUSE_DSR_RESOURCES_OVERSUBSCRIPED = 11,
401 CCHCAUSE_CBR_DEALLOCATION_ERROR = 12,
402};
403/*
404 * CBE - Control Block Extended
405 * Maintains internal GRU state for active CBs.
406 *
407 */
408struct gru_control_block_extended {
409 unsigned int reserved0:1; /* DW 0 - low */
410 unsigned int imacpy:3;
411 unsigned int reserved1:4;
412 unsigned int xtypecpy:3;
413 unsigned int iaa0cpy:2;
414 unsigned int iaa1cpy:2;
415 unsigned int reserved2:1;
416 unsigned int opccpy:8;
417 unsigned int exopccpy:8;
418
419 unsigned int idef2cpy:22; /* DW 0 - high */
420 unsigned int reserved3:10;
421
422 unsigned int idef4cpy:22; /* DW 1 */
423 unsigned int reserved4:10;
424 unsigned int idef4upd:22;
425 unsigned int reserved5:10;
426
427 unsigned long idef1upd:64; /* DW 2 */
428
429 unsigned long idef5cpy:64; /* DW 3 */
430
431 unsigned long idef6cpy:64; /* DW 4 */
432
433 unsigned long idef3upd:64; /* DW 5 */
434
435 unsigned long idef5upd:64; /* DW 6 */
436
437 unsigned int idef2upd:22; /* DW 7 */
438 unsigned int reserved6:10;
439
440 unsigned int ecause:20;
441 unsigned int cbrstate:4;
442 unsigned int cbrexecstatus:8;
443};
444
445enum gru_cbr_state {
446 CBRSTATE_INACTIVE,
447 CBRSTATE_IDLE,
448 CBRSTATE_PE_CHECK,
449 CBRSTATE_QUEUED,
450 CBRSTATE_WAIT_RESPONSE,
451 CBRSTATE_INTERRUPTED,
452 CBRSTATE_INTERRUPTED_MISS_FMM,
453 CBRSTATE_BUSY_INTERRUPT_MISS_FMM,
454 CBRSTATE_INTERRUPTED_MISS_UPM,
455 CBRSTATE_BUSY_INTERRUPTED_MISS_UPM,
456 CBRSTATE_REQUEST_ISSUE,
457 CBRSTATE_BUSY_INTERRUPT,
458};
459
460/* CBE cbrexecstatus bits */
461#define CBR_EXS_ABORT_OCC_BIT 0
462#define CBR_EXS_INT_OCC_BIT 1
463#define CBR_EXS_PENDING_BIT 2
464#define CBR_EXS_QUEUED_BIT 3
465#define CBR_EXS_TLBHW_BIT 4
466#define CBR_EXS_EXCEPTION_BIT 5
467
468#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT)
469#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT)
470#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT)
471#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT)
472#define CBR_EXS_TLBHW (1 << CBR_EXS_TLBHW_BIT)
473#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT)
474
475/* CBE ecause bits - defined in gru_instructions.h */
476
477/*
478 * Convert a processor pagesize into the strange encoded pagesize used by the
479 * GRU. Processor pagesize is encoded as log of bytes per page. (or PAGE_SHIFT)
480 * pagesize log pagesize grupagesize
481 * 4k 12 0
482 * 16k 14 1
483 * 64k 16 2
484 * 256k 18 3
485 * 1m 20 4
486 * 2m 21 5
487 * 4m 22 6
488 * 16m 24 7
489 * 64m 26 8
490 * ...
491 */
492#define GRU_PAGESIZE(sh) ((((sh) > 20 ? (sh) + 2: (sh)) >> 1) - 6)
493#define GRU_SIZEAVAIL(sh) (1UL << GRU_PAGESIZE(sh))
494
495/* minimum TLB purge count to ensure a full purge */
496#define GRUMAXINVAL 1024UL
497
498
499/* Extract the status field from a kernel handle */
500#define GET_MSEG_HANDLE_STATUS(h) (((*(unsigned long *)(h)) >> 16) & 3)
501
502static inline void start_instruction(void *h)
503{
504 unsigned long *w0 = h;
505
506 wmb(); /* setting CMD bit must be last */
507 *w0 = *w0 | 1;
508 gru_flush_cache(h);
509}
510
511static inline int wait_instruction_complete(void *h)
512{
513 int status;
514
515 do {
516 cpu_relax();
517 barrier();
518 status = GET_MSEG_HANDLE_STATUS(h);
519 } while (status == CCHSTATUS_ACTIVE);
520 return status;
521}
522
523#if defined CONFIG_IA64
524static inline void cch_allocate_set_asids(
525 struct gru_context_configuration_handle *cch, int asidval)
526{
527 int i;
528
529 for (i = 0; i <= RGN_HPAGE; i++) { /* assume HPAGE is last region */
530 cch->asid[i] = (asidval++);
531#if 0
532 /* ZZZ hugepages not supported yet */
533 if (i == RGN_HPAGE)
534 cch->sizeavail[i] = GRU_SIZEAVAIL(hpage_shift);
535 else
536#endif
537 cch->sizeavail[i] = GRU_SIZEAVAIL(PAGE_SHIFT);
538 }
539}
540#elif defined CONFIG_X86_64
541static inline void cch_allocate_set_asids(
542 struct gru_context_configuration_handle *cch, int asidval)
543{
544 int i;
545
546 for (i = 0; i < 8; i++) {
547 cch->asid[i] = asidval++;
548 cch->sizeavail[i] = GRU_SIZEAVAIL(PAGE_SHIFT) |
549 GRU_SIZEAVAIL(21);
550 }
551}
552#endif
553
554static inline int cch_allocate(struct gru_context_configuration_handle *cch,
555 int asidval, unsigned long cbrmap,
556 unsigned long dsrmap)
557{
558 cch_allocate_set_asids(cch, asidval);
559 cch->dsr_allocation_map = dsrmap;
560 cch->cbr_allocation_map = cbrmap;
561 cch->opc = CCHOP_ALLOCATE;
562 start_instruction(cch);
563 return wait_instruction_complete(cch);
564}
565
566static inline int cch_start(struct gru_context_configuration_handle *cch)
567{
568 cch->opc = CCHOP_START;
569 start_instruction(cch);
570 return wait_instruction_complete(cch);
571}
572
573static inline int cch_interrupt(struct gru_context_configuration_handle *cch)
574{
575 cch->opc = CCHOP_INTERRUPT;
576 start_instruction(cch);
577 return wait_instruction_complete(cch);
578}
579
580static inline int cch_deallocate(struct gru_context_configuration_handle *cch)
581{
582 cch->opc = CCHOP_DEALLOCATE;
583 start_instruction(cch);
584 return wait_instruction_complete(cch);
585}
586
587static inline int cch_interrupt_sync(struct gru_context_configuration_handle
588 *cch)
589{
590 cch->opc = CCHOP_INTERRUPT_SYNC;
591 start_instruction(cch);
592 return wait_instruction_complete(cch);
593}
594
595static inline int tgh_invalidate(struct gru_tlb_global_handle *tgh,
596 unsigned long vaddr, unsigned long vaddrmask,
597 int asid, int pagesize, int global, int n,
598 unsigned short ctxbitmap)
599{
600 tgh->vaddr = vaddr;
601 tgh->asid = asid;
602 tgh->pagesize = pagesize;
603 tgh->n = n;
604 tgh->global = global;
605 tgh->vaddrmask = vaddrmask;
606 tgh->ctxbitmap = ctxbitmap;
607 tgh->opc = TGHOP_TLBINV;
608 start_instruction(tgh);
609 return wait_instruction_complete(tgh);
610}
611
612static inline void tfh_write_only(struct gru_tlb_fault_handle *tfh,
613 unsigned long pfn, unsigned long vaddr,
614 int asid, int dirty, int pagesize)
615{
616 tfh->fillasid = asid;
617 tfh->fillvaddr = vaddr;
618 tfh->pfn = pfn;
619 tfh->dirty = dirty;
620 tfh->pagesize = pagesize;
621 tfh->opc = TFHOP_WRITE_ONLY;
622 start_instruction(tfh);
623}
624
625static inline void tfh_write_restart(struct gru_tlb_fault_handle *tfh,
626 unsigned long paddr, int gaa,
627 unsigned long vaddr, int asid, int dirty,
628 int pagesize)
629{
630 tfh->fillasid = asid;
631 tfh->fillvaddr = vaddr;
632 tfh->pfn = paddr >> GRU_PADDR_SHIFT;
633 tfh->gaa = gaa;
634 tfh->dirty = dirty;
635 tfh->pagesize = pagesize;
636 tfh->opc = TFHOP_WRITE_RESTART;
637 start_instruction(tfh);
638}
639
640static inline void tfh_restart(struct gru_tlb_fault_handle *tfh)
641{
642 tfh->opc = TFHOP_RESTART;
643 start_instruction(tfh);
644}
645
646static inline void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh)
647{
648 tfh->opc = TFHOP_USER_POLLING_MODE;
649 start_instruction(tfh);
650}
651
652static inline void tfh_exception(struct gru_tlb_fault_handle *tfh)
653{
654 tfh->opc = TFHOP_EXCEPTION;
655 start_instruction(tfh);
656}
657
658#endif /* __GRUHANDLES_H__ */
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
new file mode 100644
index 000000000000..880c55dfb662
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -0,0 +1,680 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * KERNEL SERVICES THAT USE THE GRU
5 *
6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22
23#include <linux/kernel.h>
24#include <linux/errno.h>
25#include <linux/slab.h>
26#include <linux/mm.h>
27#include <linux/smp_lock.h>
28#include <linux/spinlock.h>
29#include <linux/device.h>
30#include <linux/miscdevice.h>
31#include <linux/proc_fs.h>
32#include <linux/interrupt.h>
33#include <linux/uaccess.h>
34#include "gru.h"
35#include "grulib.h"
36#include "grutables.h"
37#include "grukservices.h"
38#include "gru_instructions.h"
39#include <asm/uv/uv_hub.h>
40
41/*
42 * Kernel GRU Usage
43 *
44 * The following is an interim algorithm for management of kernel GRU
45 * resources. This will likely be replaced when we better understand the
46 * kernel/user requirements.
47 *
48 * At boot time, the kernel permanently reserves a fixed number of
49 * CBRs/DSRs for each cpu to use. The resources are all taken from
50 * the GRU chiplet 1 on the blade. This leaves the full set of resources
51 * of chiplet 0 available to be allocated to a single user.
52 */
53
54/* Blade percpu resources PERMANENTLY reserved for kernel use */
55#define GRU_NUM_KERNEL_CBR 1
56#define GRU_NUM_KERNEL_DSR_BYTES 256
57#define KERNEL_CTXNUM 15
58
59/* GRU instruction attributes for all instructions */
60#define IMA IMA_CB_DELAY
61
62/* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */
63#define __gru_cacheline_aligned__ \
64 __attribute__((__aligned__(GRU_CACHE_LINE_BYTES)))
65
66#define MAGIC 0x1234567887654321UL
67
68/* Default retry count for GRU errors on kernel instructions */
69#define EXCEPTION_RETRY_LIMIT 3
70
71/* Status of message queue sections */
72#define MQS_EMPTY 0
73#define MQS_FULL 1
74#define MQS_NOOP 2
75
76/*----------------- RESOURCE MANAGEMENT -------------------------------------*/
77/* optimized for x86_64 */
78struct message_queue {
79 union gru_mesqhead head __gru_cacheline_aligned__; /* CL 0 */
80 int qlines; /* DW 1 */
81 long hstatus[2];
82 void *next __gru_cacheline_aligned__;/* CL 1 */
83 void *limit;
84 void *start;
85 void *start2;
86 char data ____cacheline_aligned; /* CL 2 */
87};
88
89/* First word in every message - used by mesq interface */
90struct message_header {
91 char present;
92 char present2;
93 char lines;
94 char fill;
95};
96
97#define QLINES(mq) ((mq) + offsetof(struct message_queue, qlines))
98#define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h]))
99
100static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)
101{
102 struct gru_blade_state *bs;
103 int lcpu;
104
105 BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES);
106 preempt_disable();
107 bs = gru_base[uv_numa_blade_id()];
108 lcpu = uv_blade_processor_id();
109 *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE;
110 *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES;
111 return 0;
112}
113
114static void gru_free_cpu_resources(void *cb, void *dsr)
115{
116 preempt_enable();
117}
118
119int gru_get_cb_exception_detail(void *cb,
120 struct control_block_extended_exc_detail *excdet)
121{
122 struct gru_control_block_extended *cbe;
123
124 cbe = get_cbe(GRUBASE(cb), get_cb_number(cb));
125 prefetchw(cbe); /* Harmless on hardware, required for emulator */
126 excdet->opc = cbe->opccpy;
127 excdet->exopc = cbe->exopccpy;
128 excdet->ecause = cbe->ecause;
129 excdet->exceptdet0 = cbe->idef1upd;
130 excdet->exceptdet1 = cbe->idef3upd;
131 return 0;
132}
133
134char *gru_get_cb_exception_detail_str(int ret, void *cb,
135 char *buf, int size)
136{
137 struct gru_control_block_status *gen = (void *)cb;
138 struct control_block_extended_exc_detail excdet;
139
140 if (ret > 0 && gen->istatus == CBS_EXCEPTION) {
141 gru_get_cb_exception_detail(cb, &excdet);
142 snprintf(buf, size,
143 "GRU exception: cb %p, opc %d, exopc %d, ecause 0x%x,"
144 "excdet0 0x%lx, excdet1 0x%x",
145 gen, excdet.opc, excdet.exopc, excdet.ecause,
146 excdet.exceptdet0, excdet.exceptdet1);
147 } else {
148 snprintf(buf, size, "No exception");
149 }
150 return buf;
151}
152
153static int gru_wait_idle_or_exception(struct gru_control_block_status *gen)
154{
155 while (gen->istatus >= CBS_ACTIVE) {
156 cpu_relax();
157 barrier();
158 }
159 return gen->istatus;
160}
161
162static int gru_retry_exception(void *cb)
163{
164 struct gru_control_block_status *gen = (void *)cb;
165 struct control_block_extended_exc_detail excdet;
166 int retry = EXCEPTION_RETRY_LIMIT;
167
168 while (1) {
169 if (gru_get_cb_message_queue_substatus(cb))
170 break;
171 if (gru_wait_idle_or_exception(gen) == CBS_IDLE)
172 return CBS_IDLE;
173
174 gru_get_cb_exception_detail(cb, &excdet);
175 if (excdet.ecause & ~EXCEPTION_RETRY_BITS)
176 break;
177 if (retry-- == 0)
178 break;
179 gen->icmd = 1;
180 gru_flush_cache(gen);
181 }
182 return CBS_EXCEPTION;
183}
184
185int gru_check_status_proc(void *cb)
186{
187 struct gru_control_block_status *gen = (void *)cb;
188 int ret;
189
190 ret = gen->istatus;
191 if (ret != CBS_EXCEPTION)
192 return ret;
193 return gru_retry_exception(cb);
194
195}
196
197int gru_wait_proc(void *cb)
198{
199 struct gru_control_block_status *gen = (void *)cb;
200 int ret;
201
202 ret = gru_wait_idle_or_exception(gen);
203 if (ret == CBS_EXCEPTION)
204 ret = gru_retry_exception(cb);
205
206 return ret;
207}
208
209void gru_abort(int ret, void *cb, char *str)
210{
211 char buf[GRU_EXC_STR_SIZE];
212
213 panic("GRU FATAL ERROR: %s - %s\n", str,
214 gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf)));
215}
216
217void gru_wait_abort_proc(void *cb)
218{
219 int ret;
220
221 ret = gru_wait_proc(cb);
222 if (ret)
223 gru_abort(ret, cb, "gru_wait_abort");
224}
225
226
227/*------------------------------ MESSAGE QUEUES -----------------------------*/
228
229/* Internal status . These are NOT returned to the user. */
230#define MQIE_AGAIN -1 /* try again */
231
232
233/*
234 * Save/restore the "present" flag that is in the second line of 2-line
235 * messages
236 */
237static inline int get_present2(void *p)
238{
239 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
240 return mhdr->present;
241}
242
243static inline void restore_present2(void *p, int val)
244{
245 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
246 mhdr->present = val;
247}
248
249/*
250 * Create a message queue.
251 * qlines - message queue size in cache lines. Includes 2-line header.
252 */
253int gru_create_message_queue(void *p, unsigned int bytes)
254{
255 struct message_queue *mq = p;
256 unsigned int qlines;
257
258 qlines = bytes / GRU_CACHE_LINE_BYTES - 2;
259 memset(mq, 0, bytes);
260 mq->start = &mq->data;
261 mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES;
262 mq->next = &mq->data;
263 mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES;
264 mq->qlines = qlines;
265 mq->hstatus[0] = 0;
266 mq->hstatus[1] = 1;
267 mq->head = gru_mesq_head(2, qlines / 2 + 1);
268 return 0;
269}
270EXPORT_SYMBOL_GPL(gru_create_message_queue);
271
272/*
273 * Send a NOOP message to a message queue
274 * Returns:
275 * 0 - if queue is full after the send. This is the normal case
276 * but various races can change this.
277 * -1 - if mesq sent successfully but queue not full
278 * >0 - unexpected error. MQE_xxx returned
279 */
280static int send_noop_message(void *cb,
281 unsigned long mq, void *mesg)
282{
283 const struct message_header noop_header = {
284 .present = MQS_NOOP, .lines = 1};
285 unsigned long m;
286 int substatus, ret;
287 struct message_header save_mhdr, *mhdr = mesg;
288
289 STAT(mesq_noop);
290 save_mhdr = *mhdr;
291 *mhdr = noop_header;
292 gru_mesq(cb, mq, gru_get_tri(mhdr), 1, IMA);
293 ret = gru_wait(cb);
294
295 if (ret) {
296 substatus = gru_get_cb_message_queue_substatus(cb);
297 switch (substatus) {
298 case CBSS_NO_ERROR:
299 STAT(mesq_noop_unexpected_error);
300 ret = MQE_UNEXPECTED_CB_ERR;
301 break;
302 case CBSS_LB_OVERFLOWED:
303 STAT(mesq_noop_lb_overflow);
304 ret = MQE_CONGESTION;
305 break;
306 case CBSS_QLIMIT_REACHED:
307 STAT(mesq_noop_qlimit_reached);
308 ret = 0;
309 break;
310 case CBSS_AMO_NACKED:
311 STAT(mesq_noop_amo_nacked);
312 ret = MQE_CONGESTION;
313 break;
314 case CBSS_PUT_NACKED:
315 STAT(mesq_noop_put_nacked);
316 m = mq + (gru_get_amo_value_head(cb) << 6);
317 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1,
318 IMA);
319 if (gru_wait(cb) == CBS_IDLE)
320 ret = MQIE_AGAIN;
321 else
322 ret = MQE_UNEXPECTED_CB_ERR;
323 break;
324 case CBSS_PAGE_OVERFLOW:
325 default:
326 BUG();
327 }
328 }
329 *mhdr = save_mhdr;
330 return ret;
331}
332
333/*
334 * Handle a gru_mesq full.
335 */
336static int send_message_queue_full(void *cb,
337 unsigned long mq, void *mesg, int lines)
338{
339 union gru_mesqhead mqh;
340 unsigned int limit, head;
341 unsigned long avalue;
342 int half, qlines, save;
343
344 /* Determine if switching to first/second half of q */
345 avalue = gru_get_amo_value(cb);
346 head = gru_get_amo_value_head(cb);
347 limit = gru_get_amo_value_limit(cb);
348
349 /*
350 * Fetch "qlines" from the queue header. Since the queue may be
351 * in memory that can't be accessed using socket addresses, use
352 * the GRU to access the data. Use DSR space from the message.
353 */
354 save = *(int *)mesg;
355 gru_vload(cb, QLINES(mq), gru_get_tri(mesg), XTYPE_W, 1, 1, IMA);
356 if (gru_wait(cb) != CBS_IDLE)
357 goto cberr;
358 qlines = *(int *)mesg;
359 *(int *)mesg = save;
360 half = (limit != qlines);
361
362 if (half)
363 mqh = gru_mesq_head(qlines / 2 + 1, qlines);
364 else
365 mqh = gru_mesq_head(2, qlines / 2 + 1);
366
367 /* Try to get lock for switching head pointer */
368 gru_gamir(cb, EOP_IR_CLR, HSTATUS(mq, half), XTYPE_DW, IMA);
369 if (gru_wait(cb) != CBS_IDLE)
370 goto cberr;
371 if (!gru_get_amo_value(cb)) {
372 STAT(mesq_qf_locked);
373 return MQE_QUEUE_FULL;
374 }
375
376 /* Got the lock. Send optional NOP if queue not full, */
377 if (head != limit) {
378 if (send_noop_message(cb, mq, mesg)) {
379 gru_gamir(cb, EOP_IR_INC, HSTATUS(mq, half),
380 XTYPE_DW, IMA);
381 if (gru_wait(cb) != CBS_IDLE)
382 goto cberr;
383 STAT(mesq_qf_noop_not_full);
384 return MQIE_AGAIN;
385 }
386 avalue++;
387 }
388
389 /* Then flip queuehead to other half of queue. */
390 gru_gamer(cb, EOP_ERR_CSWAP, mq, XTYPE_DW, mqh.val, avalue, IMA);
391 if (gru_wait(cb) != CBS_IDLE)
392 goto cberr;
393
394 /* If not successfully in swapping queue head, clear the hstatus lock */
395 if (gru_get_amo_value(cb) != avalue) {
396 STAT(mesq_qf_switch_head_failed);
397 gru_gamir(cb, EOP_IR_INC, HSTATUS(mq, half), XTYPE_DW, IMA);
398 if (gru_wait(cb) != CBS_IDLE)
399 goto cberr;
400 }
401 return MQIE_AGAIN;
402cberr:
403 STAT(mesq_qf_unexpected_error);
404 return MQE_UNEXPECTED_CB_ERR;
405}
406
407
408/*
409 * Handle a gru_mesq failure. Some of these failures are software recoverable
410 * or retryable.
411 */
412static int send_message_failure(void *cb,
413 unsigned long mq,
414 void *mesg,
415 int lines)
416{
417 int substatus, ret = 0;
418 unsigned long m;
419
420 substatus = gru_get_cb_message_queue_substatus(cb);
421 switch (substatus) {
422 case CBSS_NO_ERROR:
423 STAT(mesq_send_unexpected_error);
424 ret = MQE_UNEXPECTED_CB_ERR;
425 break;
426 case CBSS_LB_OVERFLOWED:
427 STAT(mesq_send_lb_overflow);
428 ret = MQE_CONGESTION;
429 break;
430 case CBSS_QLIMIT_REACHED:
431 STAT(mesq_send_qlimit_reached);
432 ret = send_message_queue_full(cb, mq, mesg, lines);
433 break;
434 case CBSS_AMO_NACKED:
435 STAT(mesq_send_amo_nacked);
436 ret = MQE_CONGESTION;
437 break;
438 case CBSS_PUT_NACKED:
439 STAT(mesq_send_put_nacked);
440 m =mq + (gru_get_amo_value_head(cb) << 6);
441 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);
442 if (gru_wait(cb) == CBS_IDLE)
443 ret = MQE_OK;
444 else
445 ret = MQE_UNEXPECTED_CB_ERR;
446 break;
447 default:
448 BUG();
449 }
450 return ret;
451}
452
453/*
454 * Send a message to a message queue
455 * cb GRU control block to use to send message
456 * mq message queue
457 * mesg message. ust be vaddr within a GSEG
458 * bytes message size (<= 2 CL)
459 */
460int gru_send_message_gpa(unsigned long mq, void *mesg, unsigned int bytes)
461{
462 struct message_header *mhdr;
463 void *cb;
464 void *dsr;
465 int istatus, clines, ret;
466
467 STAT(mesq_send);
468 BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES);
469
470 clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES);
471 if (gru_get_cpu_resources(bytes, &cb, &dsr))
472 return MQE_BUG_NO_RESOURCES;
473 memcpy(dsr, mesg, bytes);
474 mhdr = dsr;
475 mhdr->present = MQS_FULL;
476 mhdr->lines = clines;
477 if (clines == 2) {
478 mhdr->present2 = get_present2(mhdr);
479 restore_present2(mhdr, MQS_FULL);
480 }
481
482 do {
483 ret = MQE_OK;
484 gru_mesq(cb, mq, gru_get_tri(mhdr), clines, IMA);
485 istatus = gru_wait(cb);
486 if (istatus != CBS_IDLE)
487 ret = send_message_failure(cb, mq, dsr, clines);
488 } while (ret == MQIE_AGAIN);
489 gru_free_cpu_resources(cb, dsr);
490
491 if (ret)
492 STAT(mesq_send_failed);
493 return ret;
494}
495EXPORT_SYMBOL_GPL(gru_send_message_gpa);
496
497/*
498 * Advance the receive pointer for the queue to the next message.
499 */
500void gru_free_message(void *rmq, void *mesg)
501{
502 struct message_queue *mq = rmq;
503 struct message_header *mhdr = mq->next;
504 void *next, *pnext;
505 int half = -1;
506 int lines = mhdr->lines;
507
508 if (lines == 2)
509 restore_present2(mhdr, MQS_EMPTY);
510 mhdr->present = MQS_EMPTY;
511
512 pnext = mq->next;
513 next = pnext + GRU_CACHE_LINE_BYTES * lines;
514 if (next == mq->limit) {
515 next = mq->start;
516 half = 1;
517 } else if (pnext < mq->start2 && next >= mq->start2) {
518 half = 0;
519 }
520
521 if (half >= 0)
522 mq->hstatus[half] = 1;
523 mq->next = next;
524}
525EXPORT_SYMBOL_GPL(gru_free_message);
526
527/*
528 * Get next message from message queue. Return NULL if no message
529 * present. User must call next_message() to move to next message.
530 * rmq message queue
531 */
532void *gru_get_next_message(void *rmq)
533{
534 struct message_queue *mq = rmq;
535 struct message_header *mhdr = mq->next;
536 int present = mhdr->present;
537
538 /* skip NOOP messages */
539 STAT(mesq_receive);
540 while (present == MQS_NOOP) {
541 gru_free_message(rmq, mhdr);
542 mhdr = mq->next;
543 present = mhdr->present;
544 }
545
546 /* Wait for both halves of 2 line messages */
547 if (present == MQS_FULL && mhdr->lines == 2 &&
548 get_present2(mhdr) == MQS_EMPTY)
549 present = MQS_EMPTY;
550
551 if (!present) {
552 STAT(mesq_receive_none);
553 return NULL;
554 }
555
556 if (mhdr->lines == 2)
557 restore_present2(mhdr, mhdr->present2);
558
559 return mhdr;
560}
561EXPORT_SYMBOL_GPL(gru_get_next_message);
562
563/* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/
564
565/*
566 * Copy a block of data using the GRU resources
567 */
568int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
569 unsigned int bytes)
570{
571 void *cb;
572 void *dsr;
573 int ret;
574
575 STAT(copy_gpa);
576 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
577 return MQE_BUG_NO_RESOURCES;
578 gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr),
579 XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_BYTES, IMA);
580 ret = gru_wait(cb);
581 gru_free_cpu_resources(cb, dsr);
582 return ret;
583}
584EXPORT_SYMBOL_GPL(gru_copy_gpa);
585
586/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/
587/* Temp - will delete after we gain confidence in the GRU */
588static __cacheline_aligned unsigned long word0;
589static __cacheline_aligned unsigned long word1;
590
591static int quicktest(struct gru_state *gru)
592{
593 void *cb;
594 void *ds;
595 unsigned long *p;
596
597 cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0);
598 ds = get_gseg_base_address_ds(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0);
599 p = ds;
600 word0 = MAGIC;
601
602 gru_vload(cb, uv_gpa(&word0), 0, XTYPE_DW, 1, 1, IMA);
603 if (gru_wait(cb) != CBS_IDLE)
604 BUG();
605
606 if (*(unsigned long *)ds != MAGIC)
607 BUG();
608 gru_vstore(cb, uv_gpa(&word1), 0, XTYPE_DW, 1, 1, IMA);
609 if (gru_wait(cb) != CBS_IDLE)
610 BUG();
611
612 if (word0 != word1 || word0 != MAGIC) {
613 printk
614 ("GRU quicktest err: gru %d, found 0x%lx, expected 0x%lx\n",
615 gru->gs_gid, word1, MAGIC);
616 BUG(); /* ZZZ should not be fatal */
617 }
618
619 return 0;
620}
621
622
623int gru_kservices_init(struct gru_state *gru)
624{
625 struct gru_blade_state *bs;
626 struct gru_context_configuration_handle *cch;
627 unsigned long cbr_map, dsr_map;
628 int err, num, cpus_possible;
629
630 /*
631 * Currently, resources are reserved ONLY on the second chiplet
632 * on each blade. This leaves ALL resources on chiplet 0 available
633 * for user code.
634 */
635 bs = gru->gs_blade;
636 if (gru != &bs->bs_grus[1])
637 return 0;
638
639 cpus_possible = uv_blade_nr_possible_cpus(gru->gs_blade_id);
640
641 num = GRU_NUM_KERNEL_CBR * cpus_possible;
642 cbr_map = gru_reserve_cb_resources(gru, GRU_CB_COUNT_TO_AU(num), NULL);
643 gru->gs_reserved_cbrs += num;
644
645 num = GRU_NUM_KERNEL_DSR_BYTES * cpus_possible;
646 dsr_map = gru_reserve_ds_resources(gru, GRU_DS_BYTES_TO_AU(num), NULL);
647 gru->gs_reserved_dsr_bytes += num;
648
649 gru->gs_active_contexts++;
650 __set_bit(KERNEL_CTXNUM, &gru->gs_context_map);
651 cch = get_cch(gru->gs_gru_base_vaddr, KERNEL_CTXNUM);
652
653 bs->kernel_cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr,
654 KERNEL_CTXNUM, 0);
655 bs->kernel_dsr = get_gseg_base_address_ds(gru->gs_gru_base_vaddr,
656 KERNEL_CTXNUM, 0);
657
658 lock_cch_handle(cch);
659 cch->tfm_fault_bit_enable = 0;
660 cch->tlb_int_enable = 0;
661 cch->tfm_done_bit_enable = 0;
662 cch->unmap_enable = 1;
663 err = cch_allocate(cch, 0, cbr_map, dsr_map);
664 if (err) {
665 gru_dbg(grudev,
666 "Unable to allocate kernel CCH: gru %d, err %d\n",
667 gru->gs_gid, err);
668 BUG();
669 }
670 if (cch_start(cch)) {
671 gru_dbg(grudev, "Unable to start kernel CCH: gru %d, err %d\n",
672 gru->gs_gid, err);
673 BUG();
674 }
675 unlock_cch_handle(cch);
676
677 if (gru_options & GRU_QUICKLOOK)
678 quicktest(gru);
679 return 0;
680}
diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h
new file mode 100644
index 000000000000..eb17e0a3ac61
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukservices.h
@@ -0,0 +1,134 @@
1
2/*
3 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19#ifndef __GRU_KSERVICES_H_
20#define __GRU_KSERVICES_H_
21
22
23/*
24 * Message queues using the GRU to send/receive messages.
25 *
26 * These function allow the user to create a message queue for
27 * sending/receiving 1 or 2 cacheline messages using the GRU.
28 *
29 * Processes SENDING messages will use a kernel CBR/DSR to send
30 * the message. This is transparent to the caller.
31 *
32 * The receiver does not use any GRU resources.
33 *
34 * The functions support:
35 * - single receiver
36 * - multiple senders
37 * - cross partition message
38 *
39 * Missing features ZZZ:
40 * - user options for dealing with timeouts, queue full, etc.
41 * - gru_create_message_queue() needs interrupt vector info
42 */
43
44/*
45 * Initialize a user allocated chunk of memory to be used as
46 * a message queue. The caller must ensure that the queue is
47 * in contiguous physical memory and is cacheline aligned.
48 *
49 * Message queue size is the total number of bytes allocated
50 * to the queue including a 2 cacheline header that is used
51 * to manage the queue.
52 *
53 * Input:
54 * p pointer to user allocated memory.
55 * bytes size of message queue in bytes
56 *
57 * Errors:
58 * 0 OK
59 * >0 error
60 */
61extern int gru_create_message_queue(void *p, unsigned int bytes);
62
63/*
64 * Send a message to a message queue.
65 *
66 * Note: The message queue transport mechanism uses the first 32
67 * bits of the message. Users should avoid using these bits.
68 *
69 *
70 * Input:
71 * xmq message queue - must be a UV global physical address
72 * mesg pointer to message. Must be 64-bit aligned
73 * bytes size of message in bytes
74 *
75 * Output:
76 * 0 message sent
77 * >0 Send failure - see error codes below
78 *
79 */
80extern int gru_send_message_gpa(unsigned long mq_gpa, void *mesg,
81 unsigned int bytes);
82
83/* Status values for gru_send_message() */
84#define MQE_OK 0 /* message sent successfully */
85#define MQE_CONGESTION 1 /* temporary congestion, try again */
86#define MQE_QUEUE_FULL 2 /* queue is full */
87#define MQE_UNEXPECTED_CB_ERR 3 /* unexpected CB error */
88#define MQE_PAGE_OVERFLOW 10 /* BUG - queue overflowed a page */
89#define MQE_BUG_NO_RESOURCES 11 /* BUG - could not alloc GRU cb/dsr */
90
91/*
92 * Advance the receive pointer for the message queue to the next message.
93 * Note: current API requires messages to be gotten & freed in order. Future
94 * API extensions may allow for out-of-order freeing.
95 *
96 * Input
97 * mq message queue
98 * mesq message being freed
99 */
100extern void gru_free_message(void *mq, void *mesq);
101
102/*
103 * Get next message from message queue. Returns pointer to
104 * message OR NULL if no message present.
105 * User must call gru_free_message() after message is processed
106 * in order to move the queue pointers to next message.
107 *
108 * Input
109 * mq message queue
110 *
111 * Output:
112 * p pointer to message
113 * NULL no message available
114 */
115extern void *gru_get_next_message(void *mq);
116
117
118/*
119 * Copy data using the GRU. Source or destination can be located in a remote
120 * partition.
121 *
122 * Input:
123 * dest_gpa destination global physical address
124 * src_gpa source global physical address
125 * bytes number of bytes to copy
126 *
127 * Output:
128 * 0 OK
129 * >0 error
130 */
131extern int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
132 unsigned int bytes);
133
134#endif /* __GRU_KSERVICES_H_ */
diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h
new file mode 100644
index 000000000000..e56e196a6998
--- /dev/null
+++ b/drivers/misc/sgi-gru/grulib.h
@@ -0,0 +1,97 @@
1/*
2 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as published by
6 * the Free Software Foundation; either version 2.1 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#ifndef __GRULIB_H__
20#define __GRULIB_H__
21
22#define GRU_BASENAME "gru"
23#define GRU_FULLNAME "/dev/gru"
24#define GRU_IOCTL_NUM 'G'
25
26/*
27 * Maximum number of GRU segments that a user can have open
28 * ZZZ temp - set high for testing. Revisit.
29 */
30#define GRU_MAX_OPEN_CONTEXTS 32
31
32/* Set Number of Request Blocks */
33#define GRU_CREATE_CONTEXT _IOWR(GRU_IOCTL_NUM, 1, void *)
34
35/* Register task as using the slice */
36#define GRU_SET_TASK_SLICE _IOWR(GRU_IOCTL_NUM, 5, void *)
37
38/* Fetch exception detail */
39#define GRU_USER_GET_EXCEPTION_DETAIL _IOWR(GRU_IOCTL_NUM, 6, void *)
40
41/* For user call_os handling - normally a TLB fault */
42#define GRU_USER_CALL_OS _IOWR(GRU_IOCTL_NUM, 8, void *)
43
44/* For user unload context */
45#define GRU_USER_UNLOAD_CONTEXT _IOWR(GRU_IOCTL_NUM, 9, void *)
46
47/* For fetching GRU chiplet status */
48#define GRU_GET_CHIPLET_STATUS _IOWR(GRU_IOCTL_NUM, 10, void *)
49
50/* For user TLB flushing (primarily for tests) */
51#define GRU_USER_FLUSH_TLB _IOWR(GRU_IOCTL_NUM, 50, void *)
52
53/* Get some config options (primarily for tests & emulator) */
54#define GRU_GET_CONFIG_INFO _IOWR(GRU_IOCTL_NUM, 51, void *)
55
56#define CONTEXT_WINDOW_BYTES(th) (GRU_GSEG_PAGESIZE * (th))
57#define THREAD_POINTER(p, th) (p + GRU_GSEG_PAGESIZE * (th))
58
59/*
60 * Structure used to pass TLB flush parameters to the driver
61 */
62struct gru_create_context_req {
63 unsigned long gseg;
64 unsigned int data_segment_bytes;
65 unsigned int control_blocks;
66 unsigned int maximum_thread_count;
67 unsigned int options;
68};
69
70/*
71 * Structure used to pass unload context parameters to the driver
72 */
73struct gru_unload_context_req {
74 unsigned long gseg;
75};
76
77/*
78 * Structure used to pass TLB flush parameters to the driver
79 */
80struct gru_flush_tlb_req {
81 unsigned long gseg;
82 unsigned long vaddr;
83 size_t len;
84};
85
86/*
87 * GRU configuration info (temp - for testing)
88 */
89struct gru_config_info {
90 int cpus;
91 int blades;
92 int nodes;
93 int chiplets;
94 int fill[16];
95};
96
97#endif /* __GRULIB_H__ */
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
new file mode 100644
index 000000000000..e11e1ac50900
--- /dev/null
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -0,0 +1,809 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD
5 *
6 * This file is subject to the terms and conditions of the GNU General Public
7 * License. See the file "COPYING" in the main directory of this archive
8 * for more details.
9 *
10 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
11 */
12
13#include <linux/kernel.h>
14#include <linux/slab.h>
15#include <linux/mm.h>
16#include <linux/spinlock.h>
17#include <linux/sched.h>
18#include <linux/device.h>
19#include <linux/list.h>
20#include <asm/uv/uv_hub.h>
21#include "gru.h"
22#include "grutables.h"
23#include "gruhandles.h"
24
25unsigned long gru_options __read_mostly;
26
27static struct device_driver gru_driver = {
28 .name = "gru"
29};
30
31static struct device gru_device = {
32 .bus_id = {0},
33 .driver = &gru_driver,
34};
35
36struct device *grudev = &gru_device;
37
38/*
39 * Select a gru fault map to be used by the current cpu. Note that
40 * multiple cpus may be using the same map.
41 * ZZZ should "shift" be used?? Depends on HT cpu numbering
42 * ZZZ should be inline but did not work on emulator
43 */
44int gru_cpu_fault_map_id(void)
45{
46 return uv_blade_processor_id() % GRU_NUM_TFM;
47}
48
49/*--------- ASID Management -------------------------------------------
50 *
51 * Initially, assign asids sequentially from MIN_ASID .. MAX_ASID.
52 * Once MAX is reached, flush the TLB & start over. However,
53 * some asids may still be in use. There won't be many (percentage wise) still
54 * in use. Search active contexts & determine the value of the first
55 * asid in use ("x"s below). Set "limit" to this value.
56 * This defines a block of assignable asids.
57 *
58 * When "limit" is reached, search forward from limit+1 and determine the
59 * next block of assignable asids.
60 *
61 * Repeat until MAX_ASID is reached, then start over again.
62 *
63 * Each time MAX_ASID is reached, increment the asid generation. Since
64 * the search for in-use asids only checks contexts with GRUs currently
65 * assigned, asids in some contexts will be missed. Prior to loading
66 * a context, the asid generation of the GTS asid is rechecked. If it
67 * doesn't match the current generation, a new asid will be assigned.
68 *
69 * 0---------------x------------x---------------------x----|
70 * ^-next ^-limit ^-MAX_ASID
71 *
72 * All asid manipulation & context loading/unloading is protected by the
73 * gs_lock.
74 */
75
76/* Hit the asid limit. Start over */
77static int gru_wrap_asid(struct gru_state *gru)
78{
79 gru_dbg(grudev, "gru %p\n", gru);
80 STAT(asid_wrap);
81 gru->gs_asid_gen++;
82 gru_flush_all_tlb(gru);
83 return MIN_ASID;
84}
85
86/* Find the next chunk of unused asids */
87static int gru_reset_asid_limit(struct gru_state *gru, int asid)
88{
89 int i, gid, inuse_asid, limit;
90
91 gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid);
92 STAT(asid_next);
93 limit = MAX_ASID;
94 if (asid >= limit)
95 asid = gru_wrap_asid(gru);
96 gid = gru->gs_gid;
97again:
98 for (i = 0; i < GRU_NUM_CCH; i++) {
99 if (!gru->gs_gts[i])
100 continue;
101 inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid;
102 gru_dbg(grudev, "gru %p, inuse_asid 0x%x, cxtnum %d, gts %p\n",
103 gru, inuse_asid, i, gru->gs_gts[i]);
104 if (inuse_asid == asid) {
105 asid += ASID_INC;
106 if (asid >= limit) {
107 /*
108 * empty range: reset the range limit and
109 * start over
110 */
111 limit = MAX_ASID;
112 if (asid >= MAX_ASID)
113 asid = gru_wrap_asid(gru);
114 goto again;
115 }
116 }
117
118 if ((inuse_asid > asid) && (inuse_asid < limit))
119 limit = inuse_asid;
120 }
121 gru->gs_asid_limit = limit;
122 gru->gs_asid = asid;
123 gru_dbg(grudev, "gru %p, new asid 0x%x, new_limit 0x%x\n", gru, asid,
124 limit);
125 return asid;
126}
127
128/* Assign a new ASID to a thread context. */
129static int gru_assign_asid(struct gru_state *gru)
130{
131 int asid;
132
133 spin_lock(&gru->gs_asid_lock);
134 gru->gs_asid += ASID_INC;
135 asid = gru->gs_asid;
136 if (asid >= gru->gs_asid_limit)
137 asid = gru_reset_asid_limit(gru, asid);
138 spin_unlock(&gru->gs_asid_lock);
139
140 gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid);
141 return asid;
142}
143
144/*
145 * Clear n bits in a word. Return a word indicating the bits that were cleared.
146 * Optionally, build an array of chars that contain the bit numbers allocated.
147 */
148static unsigned long reserve_resources(unsigned long *p, int n, int mmax,
149 char *idx)
150{
151 unsigned long bits = 0;
152 int i;
153
154 do {
155 i = find_first_bit(p, mmax);
156 if (i == mmax)
157 BUG();
158 __clear_bit(i, p);
159 __set_bit(i, &bits);
160 if (idx)
161 *idx++ = i;
162 } while (--n);
163 return bits;
164}
165
166unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count,
167 char *cbmap)
168{
169 return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU,
170 cbmap);
171}
172
173unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count,
174 char *dsmap)
175{
176 return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU,
177 dsmap);
178}
179
180static void reserve_gru_resources(struct gru_state *gru,
181 struct gru_thread_state *gts)
182{
183 gru->gs_active_contexts++;
184 gts->ts_cbr_map =
185 gru_reserve_cb_resources(gru, gts->ts_cbr_au_count,
186 gts->ts_cbr_idx);
187 gts->ts_dsr_map =
188 gru_reserve_ds_resources(gru, gts->ts_dsr_au_count, NULL);
189}
190
191static void free_gru_resources(struct gru_state *gru,
192 struct gru_thread_state *gts)
193{
194 gru->gs_active_contexts--;
195 gru->gs_cbr_map |= gts->ts_cbr_map;
196 gru->gs_dsr_map |= gts->ts_dsr_map;
197}
198
199/*
200 * Check if a GRU has sufficient free resources to satisfy an allocation
201 * request. Note: GRU locks may or may not be held when this is called. If
202 * not held, recheck after acquiring the appropriate locks.
203 *
204 * Returns 1 if sufficient resources, 0 if not
205 */
206static int check_gru_resources(struct gru_state *gru, int cbr_au_count,
207 int dsr_au_count, int max_active_contexts)
208{
209 return hweight64(gru->gs_cbr_map) >= cbr_au_count
210 && hweight64(gru->gs_dsr_map) >= dsr_au_count
211 && gru->gs_active_contexts < max_active_contexts;
212}
213
214/*
215 * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG
216 * context.
217 */
218static int gru_load_mm_tracker(struct gru_state *gru, struct gru_mm_struct *gms,
219 int ctxnum)
220{
221 struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid];
222 unsigned short ctxbitmap = (1 << ctxnum);
223 int asid;
224
225 spin_lock(&gms->ms_asid_lock);
226 asid = asids->mt_asid;
227
228 if (asid == 0 || asids->mt_asid_gen != gru->gs_asid_gen) {
229 asid = gru_assign_asid(gru);
230 asids->mt_asid = asid;
231 asids->mt_asid_gen = gru->gs_asid_gen;
232 STAT(asid_new);
233 } else {
234 STAT(asid_reuse);
235 }
236
237 BUG_ON(asids->mt_ctxbitmap & ctxbitmap);
238 asids->mt_ctxbitmap |= ctxbitmap;
239 if (!test_bit(gru->gs_gid, gms->ms_asidmap))
240 __set_bit(gru->gs_gid, gms->ms_asidmap);
241 spin_unlock(&gms->ms_asid_lock);
242
243 gru_dbg(grudev,
244 "gru %x, gms %p, ctxnum 0x%d, asid 0x%x, asidmap 0x%lx\n",
245 gru->gs_gid, gms, ctxnum, asid, gms->ms_asidmap[0]);
246 return asid;
247}
248
249static void gru_unload_mm_tracker(struct gru_state *gru,
250 struct gru_mm_struct *gms, int ctxnum)
251{
252 struct gru_mm_tracker *asids;
253 unsigned short ctxbitmap;
254
255 asids = &gms->ms_asids[gru->gs_gid];
256 ctxbitmap = (1 << ctxnum);
257 spin_lock(&gms->ms_asid_lock);
258 BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap);
259 asids->mt_ctxbitmap ^= ctxbitmap;
260 gru_dbg(grudev, "gru %x, gms %p, ctxnum 0x%d, asidmap 0x%lx\n",
261 gru->gs_gid, gms, ctxnum, gms->ms_asidmap[0]);
262 spin_unlock(&gms->ms_asid_lock);
263}
264
265/*
266 * Decrement the reference count on a GTS structure. Free the structure
267 * if the reference count goes to zero.
268 */
269void gts_drop(struct gru_thread_state *gts)
270{
271 if (gts && atomic_dec_return(&gts->ts_refcnt) == 0) {
272 gru_drop_mmu_notifier(gts->ts_gms);
273 kfree(gts);
274 STAT(gts_free);
275 }
276}
277
278/*
279 * Locate the GTS structure for the current thread.
280 */
281static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data
282 *vdata, int tsid)
283{
284 struct gru_thread_state *gts;
285
286 list_for_each_entry(gts, &vdata->vd_head, ts_next)
287 if (gts->ts_tsid == tsid)
288 return gts;
289 return NULL;
290}
291
292/*
293 * Allocate a thread state structure.
294 */
295static struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
296 struct gru_vma_data *vdata,
297 int tsid)
298{
299 struct gru_thread_state *gts;
300 int bytes;
301
302 bytes = DSR_BYTES(vdata->vd_dsr_au_count) +
303 CBR_BYTES(vdata->vd_cbr_au_count);
304 bytes += sizeof(struct gru_thread_state);
305 gts = kzalloc(bytes, GFP_KERNEL);
306 if (!gts)
307 return NULL;
308
309 STAT(gts_alloc);
310 atomic_set(&gts->ts_refcnt, 1);
311 mutex_init(&gts->ts_ctxlock);
312 gts->ts_cbr_au_count = vdata->vd_cbr_au_count;
313 gts->ts_dsr_au_count = vdata->vd_dsr_au_count;
314 gts->ts_user_options = vdata->vd_user_options;
315 gts->ts_tsid = tsid;
316 gts->ts_user_options = vdata->vd_user_options;
317 gts->ts_ctxnum = NULLCTX;
318 gts->ts_mm = current->mm;
319 gts->ts_vma = vma;
320 gts->ts_tlb_int_select = -1;
321 gts->ts_gms = gru_register_mmu_notifier();
322 if (!gts->ts_gms)
323 goto err;
324
325 gru_dbg(grudev, "alloc vdata %p, new gts %p\n", vdata, gts);
326 return gts;
327
328err:
329 gts_drop(gts);
330 return NULL;
331}
332
333/*
334 * Allocate a vma private data structure.
335 */
336struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid)
337{
338 struct gru_vma_data *vdata = NULL;
339
340 vdata = kmalloc(sizeof(*vdata), GFP_KERNEL);
341 if (!vdata)
342 return NULL;
343
344 INIT_LIST_HEAD(&vdata->vd_head);
345 spin_lock_init(&vdata->vd_lock);
346 gru_dbg(grudev, "alloc vdata %p\n", vdata);
347 return vdata;
348}
349
350/*
351 * Find the thread state structure for the current thread.
352 */
353struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma,
354 int tsid)
355{
356 struct gru_vma_data *vdata = vma->vm_private_data;
357 struct gru_thread_state *gts;
358
359 spin_lock(&vdata->vd_lock);
360 gts = gru_find_current_gts_nolock(vdata, tsid);
361 spin_unlock(&vdata->vd_lock);
362 gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
363 return gts;
364}
365
366/*
367 * Allocate a new thread state for a GSEG. Note that races may allow
368 * another thread to race to create a gts.
369 */
370struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma,
371 int tsid)
372{
373 struct gru_vma_data *vdata = vma->vm_private_data;
374 struct gru_thread_state *gts, *ngts;
375
376 gts = gru_alloc_gts(vma, vdata, tsid);
377 if (!gts)
378 return NULL;
379
380 spin_lock(&vdata->vd_lock);
381 ngts = gru_find_current_gts_nolock(vdata, tsid);
382 if (ngts) {
383 gts_drop(gts);
384 gts = ngts;
385 STAT(gts_double_allocate);
386 } else {
387 list_add(&gts->ts_next, &vdata->vd_head);
388 }
389 spin_unlock(&vdata->vd_lock);
390 gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
391 return gts;
392}
393
394/*
395 * Free the GRU context assigned to the thread state.
396 */
397static void gru_free_gru_context(struct gru_thread_state *gts)
398{
399 struct gru_state *gru;
400
401 gru = gts->ts_gru;
402 gru_dbg(grudev, "gts %p, gru %p\n", gts, gru);
403
404 spin_lock(&gru->gs_lock);
405 gru->gs_gts[gts->ts_ctxnum] = NULL;
406 free_gru_resources(gru, gts);
407 BUG_ON(test_bit(gts->ts_ctxnum, &gru->gs_context_map) == 0);
408 __clear_bit(gts->ts_ctxnum, &gru->gs_context_map);
409 gts->ts_ctxnum = NULLCTX;
410 gts->ts_gru = NULL;
411 spin_unlock(&gru->gs_lock);
412
413 gts_drop(gts);
414 STAT(free_context);
415}
416
417/*
418 * Prefetching cachelines help hardware performance.
419 * (Strictly a performance enhancement. Not functionally required).
420 */
421static void prefetch_data(void *p, int num, int stride)
422{
423 while (num-- > 0) {
424 prefetchw(p);
425 p += stride;
426 }
427}
428
429static inline long gru_copy_handle(void *d, void *s)
430{
431 memcpy(d, s, GRU_HANDLE_BYTES);
432 return GRU_HANDLE_BYTES;
433}
434
435static void gru_prefetch_context(void *gseg, void *cb, void *cbe, unsigned long cbrmap,
436 unsigned long length)
437{
438 int i, scr;
439
440 prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES,
441 GRU_CACHE_LINE_BYTES);
442
443 for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
444 prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES);
445 prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1,
446 GRU_CACHE_LINE_BYTES);
447 cb += GRU_HANDLE_STRIDE;
448 }
449}
450
451static void gru_load_context_data(void *save, void *grubase, int ctxnum,
452 unsigned long cbrmap, unsigned long dsrmap)
453{
454 void *gseg, *cb, *cbe;
455 unsigned long length;
456 int i, scr;
457
458 gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
459 cb = gseg + GRU_CB_BASE;
460 cbe = grubase + GRU_CBE_BASE;
461 length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
462 gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
463
464 for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
465 save += gru_copy_handle(cb, save);
466 save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, save);
467 cb += GRU_HANDLE_STRIDE;
468 }
469
470 memcpy(gseg + GRU_DS_BASE, save, length);
471}
472
473static void gru_unload_context_data(void *save, void *grubase, int ctxnum,
474 unsigned long cbrmap, unsigned long dsrmap)
475{
476 void *gseg, *cb, *cbe;
477 unsigned long length;
478 int i, scr;
479
480 gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
481 cb = gseg + GRU_CB_BASE;
482 cbe = grubase + GRU_CBE_BASE;
483 length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
484 gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
485
486 for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
487 save += gru_copy_handle(save, cb);
488 save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE);
489 cb += GRU_HANDLE_STRIDE;
490 }
491 memcpy(save, gseg + GRU_DS_BASE, length);
492}
493
494void gru_unload_context(struct gru_thread_state *gts, int savestate)
495{
496 struct gru_state *gru = gts->ts_gru;
497 struct gru_context_configuration_handle *cch;
498 int ctxnum = gts->ts_ctxnum;
499
500 zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
501 cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
502
503 lock_cch_handle(cch);
504 if (cch_interrupt_sync(cch))
505 BUG();
506 gru_dbg(grudev, "gts %p\n", gts);
507
508 gru_unload_mm_tracker(gru, gts->ts_gms, gts->ts_ctxnum);
509 if (savestate)
510 gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr,
511 ctxnum, gts->ts_cbr_map,
512 gts->ts_dsr_map);
513
514 if (cch_deallocate(cch))
515 BUG();
516 gts->ts_force_unload = 0; /* ts_force_unload locked by CCH lock */
517 unlock_cch_handle(cch);
518
519 gru_free_gru_context(gts);
520 STAT(unload_context);
521}
522
523/*
524 * Load a GRU context by copying it from the thread data structure in memory
525 * to the GRU.
526 */
527static void gru_load_context(struct gru_thread_state *gts)
528{
529 struct gru_state *gru = gts->ts_gru;
530 struct gru_context_configuration_handle *cch;
531 int err, asid, ctxnum = gts->ts_ctxnum;
532
533 gru_dbg(grudev, "gts %p\n", gts);
534 cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
535
536 lock_cch_handle(cch);
537 asid = gru_load_mm_tracker(gru, gts->ts_gms, gts->ts_ctxnum);
538 cch->tfm_fault_bit_enable =
539 (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
540 || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
541 cch->tlb_int_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
542 if (cch->tlb_int_enable) {
543 gts->ts_tlb_int_select = gru_cpu_fault_map_id();
544 cch->tlb_int_select = gts->ts_tlb_int_select;
545 }
546 cch->tfm_done_bit_enable = 0;
547 err = cch_allocate(cch, asid, gts->ts_cbr_map, gts->ts_dsr_map);
548 if (err) {
549 gru_dbg(grudev,
550 "err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n",
551 err, cch, gts, gts->ts_cbr_map, gts->ts_dsr_map);
552 BUG();
553 }
554
555 gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum,
556 gts->ts_cbr_map, gts->ts_dsr_map);
557
558 if (cch_start(cch))
559 BUG();
560 unlock_cch_handle(cch);
561
562 STAT(load_context);
563}
564
565/*
566 * Update fields in an active CCH:
567 * - retarget interrupts on local blade
568 * - force a delayed context unload by clearing the CCH asids. This
569 * forces TLB misses for new GRU instructions. The context is unloaded
570 * when the next TLB miss occurs.
571 */
572static int gru_update_cch(struct gru_thread_state *gts, int int_select)
573{
574 struct gru_context_configuration_handle *cch;
575 struct gru_state *gru = gts->ts_gru;
576 int i, ctxnum = gts->ts_ctxnum, ret = 0;
577
578 cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
579
580 lock_cch_handle(cch);
581 if (cch->state == CCHSTATE_ACTIVE) {
582 if (gru->gs_gts[gts->ts_ctxnum] != gts)
583 goto exit;
584 if (cch_interrupt(cch))
585 BUG();
586 if (int_select >= 0) {
587 gts->ts_tlb_int_select = int_select;
588 cch->tlb_int_select = int_select;
589 } else {
590 for (i = 0; i < 8; i++)
591 cch->asid[i] = 0;
592 cch->tfm_fault_bit_enable = 0;
593 cch->tlb_int_enable = 0;
594 gts->ts_force_unload = 1;
595 }
596 if (cch_start(cch))
597 BUG();
598 ret = 1;
599 }
600exit:
601 unlock_cch_handle(cch);
602 return ret;
603}
604
605/*
606 * Update CCH tlb interrupt select. Required when all the following is true:
607 * - task's GRU context is loaded into a GRU
608 * - task is using interrupt notification for TLB faults
609 * - task has migrated to a different cpu on the same blade where
610 * it was previously running.
611 */
612static int gru_retarget_intr(struct gru_thread_state *gts)
613{
614 if (gts->ts_tlb_int_select < 0
615 || gts->ts_tlb_int_select == gru_cpu_fault_map_id())
616 return 0;
617
618 gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select,
619 gru_cpu_fault_map_id());
620 return gru_update_cch(gts, gru_cpu_fault_map_id());
621}
622
623
624/*
625 * Insufficient GRU resources available on the local blade. Steal a context from
626 * a process. This is a hack until a _real_ resource scheduler is written....
627 */
628#define next_ctxnum(n) ((n) < GRU_NUM_CCH - 2 ? (n) + 1 : 0)
629#define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \
630 ((g)+1) : &(b)->bs_grus[0])
631
632static void gru_steal_context(struct gru_thread_state *gts)
633{
634 struct gru_blade_state *blade;
635 struct gru_state *gru, *gru0;
636 struct gru_thread_state *ngts = NULL;
637 int ctxnum, ctxnum0, flag = 0, cbr, dsr;
638
639 cbr = gts->ts_cbr_au_count;
640 dsr = gts->ts_dsr_au_count;
641
642 preempt_disable();
643 blade = gru_base[uv_numa_blade_id()];
644 spin_lock(&blade->bs_lock);
645
646 ctxnum = next_ctxnum(blade->bs_lru_ctxnum);
647 gru = blade->bs_lru_gru;
648 if (ctxnum == 0)
649 gru = next_gru(blade, gru);
650 ctxnum0 = ctxnum;
651 gru0 = gru;
652 while (1) {
653 if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH))
654 break;
655 spin_lock(&gru->gs_lock);
656 for (; ctxnum < GRU_NUM_CCH; ctxnum++) {
657 if (flag && gru == gru0 && ctxnum == ctxnum0)
658 break;
659 ngts = gru->gs_gts[ctxnum];
660 /*
661 * We are grabbing locks out of order, so trylock is
662 * needed. GTSs are usually not locked, so the odds of
663 * success are high. If trylock fails, try to steal a
664 * different GSEG.
665 */
666 if (ngts && mutex_trylock(&ngts->ts_ctxlock))
667 break;
668 ngts = NULL;
669 flag = 1;
670 }
671 spin_unlock(&gru->gs_lock);
672 if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0))
673 break;
674 ctxnum = 0;
675 gru = next_gru(blade, gru);
676 }
677 blade->bs_lru_gru = gru;
678 blade->bs_lru_ctxnum = ctxnum;
679 spin_unlock(&blade->bs_lock);
680 preempt_enable();
681
682 if (ngts) {
683 STAT(steal_context);
684 ngts->ts_steal_jiffies = jiffies;
685 gru_unload_context(ngts, 1);
686 mutex_unlock(&ngts->ts_ctxlock);
687 } else {
688 STAT(steal_context_failed);
689 }
690 gru_dbg(grudev,
691 "stole gru %x, ctxnum %d from gts %p. Need cb %d, ds %d;"
692 " avail cb %ld, ds %ld\n",
693 gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map),
694 hweight64(gru->gs_dsr_map));
695}
696
697/*
698 * Scan the GRUs on the local blade & assign a GRU context.
699 */
700static struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts)
701{
702 struct gru_state *gru, *grux;
703 int i, max_active_contexts;
704
705 preempt_disable();
706
707again:
708 gru = NULL;
709 max_active_contexts = GRU_NUM_CCH;
710 for_each_gru_on_blade(grux, uv_numa_blade_id(), i) {
711 if (check_gru_resources(grux, gts->ts_cbr_au_count,
712 gts->ts_dsr_au_count,
713 max_active_contexts)) {
714 gru = grux;
715 max_active_contexts = grux->gs_active_contexts;
716 if (max_active_contexts == 0)
717 break;
718 }
719 }
720
721 if (gru) {
722 spin_lock(&gru->gs_lock);
723 if (!check_gru_resources(gru, gts->ts_cbr_au_count,
724 gts->ts_dsr_au_count, GRU_NUM_CCH)) {
725 spin_unlock(&gru->gs_lock);
726 goto again;
727 }
728 reserve_gru_resources(gru, gts);
729 gts->ts_gru = gru;
730 gts->ts_ctxnum =
731 find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH);
732 BUG_ON(gts->ts_ctxnum == GRU_NUM_CCH);
733 atomic_inc(&gts->ts_refcnt);
734 gru->gs_gts[gts->ts_ctxnum] = gts;
735 __set_bit(gts->ts_ctxnum, &gru->gs_context_map);
736 spin_unlock(&gru->gs_lock);
737
738 STAT(assign_context);
739 gru_dbg(grudev,
740 "gseg %p, gts %p, gru %x, ctx %d, cbr %d, dsr %d\n",
741 gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts,
742 gts->ts_gru->gs_gid, gts->ts_ctxnum,
743 gts->ts_cbr_au_count, gts->ts_dsr_au_count);
744 } else {
745 gru_dbg(grudev, "failed to allocate a GTS %s\n", "");
746 STAT(assign_context_failed);
747 }
748
749 preempt_enable();
750 return gru;
751}
752
753/*
754 * gru_nopage
755 *
756 * Map the user's GRU segment
757 *
758 * Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries.
759 */
760int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
761{
762 struct gru_thread_state *gts;
763 unsigned long paddr, vaddr;
764
765 vaddr = (unsigned long)vmf->virtual_address;
766 gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
767 vma, vaddr, GSEG_BASE(vaddr));
768 STAT(nopfn);
769
770 /* The following check ensures vaddr is a valid address in the VMA */
771 gts = gru_find_thread_state(vma, TSID(vaddr, vma));
772 if (!gts)
773 return VM_FAULT_SIGBUS;
774
775again:
776 preempt_disable();
777 mutex_lock(&gts->ts_ctxlock);
778 if (gts->ts_gru) {
779 if (gts->ts_gru->gs_blade_id != uv_numa_blade_id()) {
780 STAT(migrated_nopfn_unload);
781 gru_unload_context(gts, 1);
782 } else {
783 if (gru_retarget_intr(gts))
784 STAT(migrated_nopfn_retarget);
785 }
786 }
787
788 if (!gts->ts_gru) {
789 if (!gru_assign_gru_context(gts)) {
790 mutex_unlock(&gts->ts_ctxlock);
791 preempt_enable();
792 schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */
793 if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies)
794 gru_steal_context(gts);
795 goto again;
796 }
797 gru_load_context(gts);
798 paddr = gseg_physical_address(gts->ts_gru, gts->ts_ctxnum);
799 remap_pfn_range(vma, vaddr & ~(GRU_GSEG_PAGESIZE - 1),
800 paddr >> PAGE_SHIFT, GRU_GSEG_PAGESIZE,
801 vma->vm_page_prot);
802 }
803
804 mutex_unlock(&gts->ts_ctxlock);
805 preempt_enable();
806
807 return VM_FAULT_NOPAGE;
808}
809
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
new file mode 100644
index 000000000000..533923f83f1a
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -0,0 +1,336 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * PROC INTERFACES
5 *
6 * This file supports the /proc interfaces for the GRU driver
7 *
8 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25#include <linux/proc_fs.h>
26#include <linux/device.h>
27#include <linux/seq_file.h>
28#include <linux/uaccess.h>
29#include "gru.h"
30#include "grulib.h"
31#include "grutables.h"
32
33#define printstat(s, f) printstat_val(s, &gru_stats.f, #f)
34
35static void printstat_val(struct seq_file *s, atomic_long_t *v, char *id)
36{
37 unsigned long val = atomic_long_read(v);
38
39 if (val)
40 seq_printf(s, "%16lu %s\n", val, id);
41}
42
43static int statistics_show(struct seq_file *s, void *p)
44{
45 printstat(s, vdata_alloc);
46 printstat(s, vdata_free);
47 printstat(s, gts_alloc);
48 printstat(s, gts_free);
49 printstat(s, vdata_double_alloc);
50 printstat(s, gts_double_allocate);
51 printstat(s, assign_context);
52 printstat(s, assign_context_failed);
53 printstat(s, free_context);
54 printstat(s, load_context);
55 printstat(s, unload_context);
56 printstat(s, steal_context);
57 printstat(s, steal_context_failed);
58 printstat(s, nopfn);
59 printstat(s, break_cow);
60 printstat(s, asid_new);
61 printstat(s, asid_next);
62 printstat(s, asid_wrap);
63 printstat(s, asid_reuse);
64 printstat(s, intr);
65 printstat(s, call_os);
66 printstat(s, call_os_check_for_bug);
67 printstat(s, call_os_wait_queue);
68 printstat(s, user_flush_tlb);
69 printstat(s, user_unload_context);
70 printstat(s, user_exception);
71 printstat(s, set_task_slice);
72 printstat(s, migrate_check);
73 printstat(s, migrated_retarget);
74 printstat(s, migrated_unload);
75 printstat(s, migrated_unload_delay);
76 printstat(s, migrated_nopfn_retarget);
77 printstat(s, migrated_nopfn_unload);
78 printstat(s, tlb_dropin);
79 printstat(s, tlb_dropin_fail_no_asid);
80 printstat(s, tlb_dropin_fail_upm);
81 printstat(s, tlb_dropin_fail_invalid);
82 printstat(s, tlb_dropin_fail_range_active);
83 printstat(s, tlb_dropin_fail_idle);
84 printstat(s, tlb_dropin_fail_fmm);
85 printstat(s, mmu_invalidate_range);
86 printstat(s, mmu_invalidate_page);
87 printstat(s, mmu_clear_flush_young);
88 printstat(s, flush_tlb);
89 printstat(s, flush_tlb_gru);
90 printstat(s, flush_tlb_gru_tgh);
91 printstat(s, flush_tlb_gru_zero_asid);
92 printstat(s, copy_gpa);
93 printstat(s, mesq_receive);
94 printstat(s, mesq_receive_none);
95 printstat(s, mesq_send);
96 printstat(s, mesq_send_failed);
97 printstat(s, mesq_noop);
98 printstat(s, mesq_send_unexpected_error);
99 printstat(s, mesq_send_lb_overflow);
100 printstat(s, mesq_send_qlimit_reached);
101 printstat(s, mesq_send_amo_nacked);
102 printstat(s, mesq_send_put_nacked);
103 printstat(s, mesq_qf_not_full);
104 printstat(s, mesq_qf_locked);
105 printstat(s, mesq_qf_noop_not_full);
106 printstat(s, mesq_qf_switch_head_failed);
107 printstat(s, mesq_qf_unexpected_error);
108 printstat(s, mesq_noop_unexpected_error);
109 printstat(s, mesq_noop_lb_overflow);
110 printstat(s, mesq_noop_qlimit_reached);
111 printstat(s, mesq_noop_amo_nacked);
112 printstat(s, mesq_noop_put_nacked);
113 return 0;
114}
115
116static ssize_t statistics_write(struct file *file, const char __user *userbuf,
117 size_t count, loff_t *data)
118{
119 memset(&gru_stats, 0, sizeof(gru_stats));
120 return count;
121}
122
123static int options_show(struct seq_file *s, void *p)
124{
125 seq_printf(s, "0x%lx\n", gru_options);
126 return 0;
127}
128
129static ssize_t options_write(struct file *file, const char __user *userbuf,
130 size_t count, loff_t *data)
131{
132 unsigned long val;
133 char buf[80];
134
135 if (copy_from_user
136 (buf, userbuf, count < sizeof(buf) ? count : sizeof(buf)))
137 return -EFAULT;
138 if (!strict_strtoul(buf, 10, &val))
139 gru_options = val;
140
141 return count;
142}
143
144static int cch_seq_show(struct seq_file *file, void *data)
145{
146 long gid = *(long *)data;
147 int i;
148 struct gru_state *gru = GID_TO_GRU(gid);
149 struct gru_thread_state *ts;
150 const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" };
151
152 if (gid == 0)
153 seq_printf(file, "#%5s%5s%6s%9s%6s%8s%8s\n", "gid", "bid",
154 "ctx#", "pid", "cbrs", "dsbytes", "mode");
155 if (gru)
156 for (i = 0; i < GRU_NUM_CCH; i++) {
157 ts = gru->gs_gts[i];
158 if (!ts)
159 continue;
160 seq_printf(file, " %5d%5d%6d%9d%6d%8d%8s\n",
161 gru->gs_gid, gru->gs_blade_id, i,
162 ts->ts_tgid_owner,
163 ts->ts_cbr_au_count * GRU_CBR_AU_SIZE,
164 ts->ts_cbr_au_count * GRU_DSR_AU_BYTES,
165 mode[ts->ts_user_options &
166 GRU_OPT_MISS_MASK]);
167 }
168
169 return 0;
170}
171
172static int gru_seq_show(struct seq_file *file, void *data)
173{
174 long gid = *(long *)data, ctxfree, cbrfree, dsrfree;
175 struct gru_state *gru = GID_TO_GRU(gid);
176
177 if (gid == 0) {
178 seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "gid", "nid",
179 "ctx", "cbr", "dsr", "ctx", "cbr", "dsr");
180 seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "", "", "busy",
181 "busy", "busy", "free", "free", "free");
182 }
183 if (gru) {
184 ctxfree = GRU_NUM_CCH - gru->gs_active_contexts;
185 cbrfree = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
186 dsrfree = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
187 seq_printf(file, " %5d%5d%7ld%6ld%6ld%8ld%6ld%6ld\n",
188 gru->gs_gid, gru->gs_blade_id, GRU_NUM_CCH - ctxfree,
189 GRU_NUM_CBE - cbrfree, GRU_NUM_DSR_BYTES - dsrfree,
190 ctxfree, cbrfree, dsrfree);
191 }
192
193 return 0;
194}
195
196static void seq_stop(struct seq_file *file, void *data)
197{
198}
199
200static void *seq_start(struct seq_file *file, loff_t *gid)
201{
202 if (*gid < GRU_MAX_GRUS)
203 return gid;
204 return NULL;
205}
206
207static void *seq_next(struct seq_file *file, void *data, loff_t *gid)
208{
209 (*gid)++;
210 if (*gid < GRU_MAX_GRUS)
211 return gid;
212 return NULL;
213}
214
215static const struct seq_operations cch_seq_ops = {
216 .start = seq_start,
217 .next = seq_next,
218 .stop = seq_stop,
219 .show = cch_seq_show
220};
221
222static const struct seq_operations gru_seq_ops = {
223 .start = seq_start,
224 .next = seq_next,
225 .stop = seq_stop,
226 .show = gru_seq_show
227};
228
229static int statistics_open(struct inode *inode, struct file *file)
230{
231 return single_open(file, statistics_show, NULL);
232}
233
234static int options_open(struct inode *inode, struct file *file)
235{
236 return single_open(file, options_show, NULL);
237}
238
239static int cch_open(struct inode *inode, struct file *file)
240{
241 return seq_open(file, &cch_seq_ops);
242}
243
244static int gru_open(struct inode *inode, struct file *file)
245{
246 return seq_open(file, &gru_seq_ops);
247}
248
249/* *INDENT-OFF* */
250static const struct file_operations statistics_fops = {
251 .open = statistics_open,
252 .read = seq_read,
253 .write = statistics_write,
254 .llseek = seq_lseek,
255 .release = single_release,
256};
257
258static const struct file_operations options_fops = {
259 .open = options_open,
260 .read = seq_read,
261 .write = options_write,
262 .llseek = seq_lseek,
263 .release = single_release,
264};
265
266static const struct file_operations cch_fops = {
267 .open = cch_open,
268 .read = seq_read,
269 .llseek = seq_lseek,
270 .release = seq_release,
271};
272static const struct file_operations gru_fops = {
273 .open = gru_open,
274 .read = seq_read,
275 .llseek = seq_lseek,
276 .release = seq_release,
277};
278
279static struct proc_entry {
280 char *name;
281 int mode;
282 const struct file_operations *fops;
283 struct proc_dir_entry *entry;
284} proc_files[] = {
285 {"statistics", 0644, &statistics_fops},
286 {"debug_options", 0644, &options_fops},
287 {"cch_status", 0444, &cch_fops},
288 {"gru_status", 0444, &gru_fops},
289 {NULL}
290};
291/* *INDENT-ON* */
292
293static struct proc_dir_entry *proc_gru __read_mostly;
294
295static int create_proc_file(struct proc_entry *p)
296{
297 p->entry = create_proc_entry(p->name, p->mode, proc_gru);
298 if (!p->entry)
299 return -1;
300 p->entry->proc_fops = p->fops;
301 return 0;
302}
303
304static void delete_proc_files(void)
305{
306 struct proc_entry *p;
307
308 if (proc_gru) {
309 for (p = proc_files; p->name; p++)
310 if (p->entry)
311 remove_proc_entry(p->name, proc_gru);
312 remove_proc_entry("gru", NULL);
313 }
314}
315
316int gru_proc_init(void)
317{
318 struct proc_entry *p;
319
320 proc_mkdir("sgi_uv", NULL);
321 proc_gru = proc_mkdir("sgi_uv/gru", NULL);
322
323 for (p = proc_files; p->name; p++)
324 if (create_proc_file(p))
325 goto err;
326 return 0;
327
328err:
329 delete_proc_files();
330 return -1;
331}
332
333void gru_proc_exit(void)
334{
335 delete_proc_files();
336}
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
new file mode 100644
index 000000000000..a78f70deeb59
--- /dev/null
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -0,0 +1,609 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * GRU DRIVER TABLES, MACROS, externs, etc
5 *
6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22
23#ifndef __GRUTABLES_H__
24#define __GRUTABLES_H__
25
26/*
27 * GRU Chiplet:
28 * The GRU is a user addressible memory accelerator. It provides
29 * several forms of load, store, memset, bcopy instructions. In addition, it
30 * contains special instructions for AMOs, sending messages to message
31 * queues, etc.
32 *
33 * The GRU is an integral part of the node controller. It connects
34 * directly to the cpu socket. In its current implementation, there are 2
35 * GRU chiplets in the node controller on each blade (~node).
36 *
37 * The entire GRU memory space is fully coherent and cacheable by the cpus.
38 *
39 * Each GRU chiplet has a physical memory map that looks like the following:
40 *
41 * +-----------------+
42 * |/////////////////|
43 * |/////////////////|
44 * |/////////////////|
45 * |/////////////////|
46 * |/////////////////|
47 * |/////////////////|
48 * |/////////////////|
49 * |/////////////////|
50 * +-----------------+
51 * | system control |
52 * +-----------------+ _______ +-------------+
53 * |/////////////////| / | |
54 * |/////////////////| / | |
55 * |/////////////////| / | instructions|
56 * |/////////////////| / | |
57 * |/////////////////| / | |
58 * |/////////////////| / |-------------|
59 * |/////////////////| / | |
60 * +-----------------+ | |
61 * | context 15 | | data |
62 * +-----------------+ | |
63 * | ...... | \ | |
64 * +-----------------+ \____________ +-------------+
65 * | context 1 |
66 * +-----------------+
67 * | context 0 |
68 * +-----------------+
69 *
70 * Each of the "contexts" is a chunk of memory that can be mmaped into user
71 * space. The context consists of 2 parts:
72 *
73 * - an instruction space that can be directly accessed by the user
74 * to issue GRU instructions and to check instruction status.
75 *
76 * - a data area that acts as normal RAM.
77 *
78 * User instructions contain virtual addresses of data to be accessed by the
79 * GRU. The GRU contains a TLB that is used to convert these user virtual
80 * addresses to physical addresses.
81 *
82 * The "system control" area of the GRU chiplet is used by the kernel driver
83 * to manage user contexts and to perform functions such as TLB dropin and
84 * purging.
85 *
86 * One context may be reserved for the kernel and used for cross-partition
87 * communication. The GRU will also be used to asynchronously zero out
88 * large blocks of memory (not currently implemented).
89 *
90 *
91 * Tables:
92 *
93 * VDATA-VMA Data - Holds a few parameters. Head of linked list of
94 * GTS tables for threads using the GSEG
95 * GTS - Gru Thread State - contains info for managing a GSEG context. A
96 * GTS is allocated for each thread accessing a
97 * GSEG.
98 * GTD - GRU Thread Data - contains shadow copy of GRU data when GSEG is
99 * not loaded into a GRU
100 * GMS - GRU Memory Struct - Used to manage TLB shootdowns. Tracks GRUs
101 * where a GSEG has been loaded. Similar to
102 * an mm_struct but for GRU.
103 *
104 * GS - GRU State - Used to manage the state of a GRU chiplet
105 * BS - Blade State - Used to manage state of all GRU chiplets
106 * on a blade
107 *
108 *
109 * Normal task tables for task using GRU.
110 * - 2 threads in process
111 * - 2 GSEGs open in process
112 * - GSEG1 is being used by both threads
113 * - GSEG2 is used only by thread 2
114 *
115 * task -->|
116 * task ---+---> mm ->------ (notifier) -------+-> gms
117 * | |
118 * |--> vma -> vdata ---> gts--->| GSEG1 (thread1)
119 * | | |
120 * | +-> gts--->| GSEG1 (thread2)
121 * | |
122 * |--> vma -> vdata ---> gts--->| GSEG2 (thread2)
123 * .
124 * .
125 *
126 * GSEGs are marked DONTCOPY on fork
127 *
128 * At open
129 * file.private_data -> NULL
130 *
131 * At mmap,
132 * vma -> vdata
133 *
134 * After gseg reference
135 * vma -> vdata ->gts
136 *
137 * After fork
138 * parent
139 * vma -> vdata -> gts
140 * child
141 * (vma is not copied)
142 *
143 */
144
145#include <linux/rmap.h>
146#include <linux/interrupt.h>
147#include <linux/mutex.h>
148#include <linux/wait.h>
149#include <linux/mmu_notifier.h>
150#include "gru.h"
151#include "gruhandles.h"
152
153extern struct gru_stats_s gru_stats;
154extern struct gru_blade_state *gru_base[];
155extern unsigned long gru_start_paddr, gru_end_paddr;
156
157#define GRU_MAX_BLADES MAX_NUMNODES
158#define GRU_MAX_GRUS (GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE)
159
160#define GRU_DRIVER_ID_STR "SGI GRU Device Driver"
161#define GRU_DRIVER_VERSION_STR "0.80"
162
163/*
164 * GRU statistics.
165 */
166struct gru_stats_s {
167 atomic_long_t vdata_alloc;
168 atomic_long_t vdata_free;
169 atomic_long_t gts_alloc;
170 atomic_long_t gts_free;
171 atomic_long_t vdata_double_alloc;
172 atomic_long_t gts_double_allocate;
173 atomic_long_t assign_context;
174 atomic_long_t assign_context_failed;
175 atomic_long_t free_context;
176 atomic_long_t load_context;
177 atomic_long_t unload_context;
178 atomic_long_t steal_context;
179 atomic_long_t steal_context_failed;
180 atomic_long_t nopfn;
181 atomic_long_t break_cow;
182 atomic_long_t asid_new;
183 atomic_long_t asid_next;
184 atomic_long_t asid_wrap;
185 atomic_long_t asid_reuse;
186 atomic_long_t intr;
187 atomic_long_t call_os;
188 atomic_long_t call_os_check_for_bug;
189 atomic_long_t call_os_wait_queue;
190 atomic_long_t user_flush_tlb;
191 atomic_long_t user_unload_context;
192 atomic_long_t user_exception;
193 atomic_long_t set_task_slice;
194 atomic_long_t migrate_check;
195 atomic_long_t migrated_retarget;
196 atomic_long_t migrated_unload;
197 atomic_long_t migrated_unload_delay;
198 atomic_long_t migrated_nopfn_retarget;
199 atomic_long_t migrated_nopfn_unload;
200 atomic_long_t tlb_dropin;
201 atomic_long_t tlb_dropin_fail_no_asid;
202 atomic_long_t tlb_dropin_fail_upm;
203 atomic_long_t tlb_dropin_fail_invalid;
204 atomic_long_t tlb_dropin_fail_range_active;
205 atomic_long_t tlb_dropin_fail_idle;
206 atomic_long_t tlb_dropin_fail_fmm;
207 atomic_long_t mmu_invalidate_range;
208 atomic_long_t mmu_invalidate_page;
209 atomic_long_t mmu_clear_flush_young;
210 atomic_long_t flush_tlb;
211 atomic_long_t flush_tlb_gru;
212 atomic_long_t flush_tlb_gru_tgh;
213 atomic_long_t flush_tlb_gru_zero_asid;
214
215 atomic_long_t copy_gpa;
216
217 atomic_long_t mesq_receive;
218 atomic_long_t mesq_receive_none;
219 atomic_long_t mesq_send;
220 atomic_long_t mesq_send_failed;
221 atomic_long_t mesq_noop;
222 atomic_long_t mesq_send_unexpected_error;
223 atomic_long_t mesq_send_lb_overflow;
224 atomic_long_t mesq_send_qlimit_reached;
225 atomic_long_t mesq_send_amo_nacked;
226 atomic_long_t mesq_send_put_nacked;
227 atomic_long_t mesq_qf_not_full;
228 atomic_long_t mesq_qf_locked;
229 atomic_long_t mesq_qf_noop_not_full;
230 atomic_long_t mesq_qf_switch_head_failed;
231 atomic_long_t mesq_qf_unexpected_error;
232 atomic_long_t mesq_noop_unexpected_error;
233 atomic_long_t mesq_noop_lb_overflow;
234 atomic_long_t mesq_noop_qlimit_reached;
235 atomic_long_t mesq_noop_amo_nacked;
236 atomic_long_t mesq_noop_put_nacked;
237
238};
239
240#define OPT_DPRINT 1
241#define OPT_STATS 2
242#define GRU_QUICKLOOK 4
243
244
245#define IRQ_GRU 110 /* Starting IRQ number for interrupts */
246
247/* Delay in jiffies between attempts to assign a GRU context */
248#define GRU_ASSIGN_DELAY ((HZ * 20) / 1000)
249
250/*
251 * If a process has it's context stolen, min delay in jiffies before trying to
252 * steal a context from another process.
253 */
254#define GRU_STEAL_DELAY ((HZ * 200) / 1000)
255
256#define STAT(id) do { \
257 if (gru_options & OPT_STATS) \
258 atomic_long_inc(&gru_stats.id); \
259 } while (0)
260
261#ifdef CONFIG_SGI_GRU_DEBUG
262#define gru_dbg(dev, fmt, x...) \
263 do { \
264 if (gru_options & OPT_DPRINT) \
265 dev_dbg(dev, "%s: " fmt, __func__, x); \
266 } while (0)
267#else
268#define gru_dbg(x...)
269#endif
270
271/*-----------------------------------------------------------------------------
272 * ASID management
273 */
274#define MAX_ASID 0xfffff0
275#define MIN_ASID 8
276#define ASID_INC 8 /* number of regions */
277
278/* Generate a GRU asid value from a GRU base asid & a virtual address. */
279#if defined CONFIG_IA64
280#define VADDR_HI_BIT 64
281#define GRUREGION(addr) ((addr) >> (VADDR_HI_BIT - 3) & 3)
282#elif defined CONFIG_X86_64
283#define VADDR_HI_BIT 48
284#define GRUREGION(addr) (0) /* ZZZ could do better */
285#else
286#error "Unsupported architecture"
287#endif
288#define GRUASID(asid, addr) ((asid) + GRUREGION(addr))
289
290/*------------------------------------------------------------------------------
291 * File & VMS Tables
292 */
293
294struct gru_state;
295
296/*
297 * This structure is pointed to from the mmstruct via the notifier pointer.
298 * There is one of these per address space.
299 */
300struct gru_mm_tracker {
301 unsigned int mt_asid_gen; /* ASID wrap count */
302 int mt_asid; /* current base ASID for gru */
303 unsigned short mt_ctxbitmap; /* bitmap of contexts using
304 asid */
305};
306
307struct gru_mm_struct {
308 struct mmu_notifier ms_notifier;
309 atomic_t ms_refcnt;
310 spinlock_t ms_asid_lock; /* protects ASID assignment */
311 atomic_t ms_range_active;/* num range_invals active */
312 char ms_released;
313 wait_queue_head_t ms_wait_queue;
314 DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS);
315 struct gru_mm_tracker ms_asids[GRU_MAX_GRUS];
316};
317
318/*
319 * One of these structures is allocated when a GSEG is mmaped. The
320 * structure is pointed to by the vma->vm_private_data field in the vma struct.
321 */
322struct gru_vma_data {
323 spinlock_t vd_lock; /* Serialize access to vma */
324 struct list_head vd_head; /* head of linked list of gts */
325 long vd_user_options;/* misc user option flags */
326 int vd_cbr_au_count;
327 int vd_dsr_au_count;
328};
329
330/*
331 * One of these is allocated for each thread accessing a mmaped GRU. A linked
332 * list of these structure is hung off the struct gru_vma_data in the mm_struct.
333 */
334struct gru_thread_state {
335 struct list_head ts_next; /* list - head at vma-private */
336 struct mutex ts_ctxlock; /* load/unload CTX lock */
337 struct mm_struct *ts_mm; /* mm currently mapped to
338 context */
339 struct vm_area_struct *ts_vma; /* vma of GRU context */
340 struct gru_state *ts_gru; /* GRU where the context is
341 loaded */
342 struct gru_mm_struct *ts_gms; /* asid & ioproc struct */
343 unsigned long ts_cbr_map; /* map of allocated CBRs */
344 unsigned long ts_dsr_map; /* map of allocated DATA
345 resources */
346 unsigned long ts_steal_jiffies;/* jiffies when context last
347 stolen */
348 long ts_user_options;/* misc user option flags */
349 pid_t ts_tgid_owner; /* task that is using the
350 context - for migration */
351 int ts_tsid; /* thread that owns the
352 structure */
353 int ts_tlb_int_select;/* target cpu if interrupts
354 enabled */
355 int ts_ctxnum; /* context number where the
356 context is loaded */
357 atomic_t ts_refcnt; /* reference count GTS */
358 unsigned char ts_dsr_au_count;/* Number of DSR resources
359 required for contest */
360 unsigned char ts_cbr_au_count;/* Number of CBR resources
361 required for contest */
362 char ts_force_unload;/* force context to be unloaded
363 after migration */
364 char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each
365 allocated CB */
366 unsigned long ts_gdata[0]; /* save area for GRU data (CB,
367 DS, CBE) */
368};
369
370/*
371 * Threaded programs actually allocate an array of GSEGs when a context is
372 * created. Each thread uses a separate GSEG. TSID is the index into the GSEG
373 * array.
374 */
375#define TSID(a, v) (((a) - (v)->vm_start) / GRU_GSEG_PAGESIZE)
376#define UGRUADDR(gts) ((gts)->ts_vma->vm_start + \
377 (gts)->ts_tsid * GRU_GSEG_PAGESIZE)
378
379#define NULLCTX (-1) /* if context not loaded into GRU */
380
381/*-----------------------------------------------------------------------------
382 * GRU State Tables
383 */
384
385/*
386 * One of these exists for each GRU chiplet.
387 */
388struct gru_state {
389 struct gru_blade_state *gs_blade; /* GRU state for entire
390 blade */
391 unsigned long gs_gru_base_paddr; /* Physical address of
392 gru segments (64) */
393 void *gs_gru_base_vaddr; /* Virtual address of
394 gru segments (64) */
395 unsigned char gs_gid; /* unique GRU number */
396 unsigned char gs_tgh_local_shift; /* used to pick TGH for
397 local flush */
398 unsigned char gs_tgh_first_remote; /* starting TGH# for
399 remote flush */
400 unsigned short gs_blade_id; /* blade of GRU */
401 spinlock_t gs_asid_lock; /* lock used for
402 assigning asids */
403 spinlock_t gs_lock; /* lock used for
404 assigning contexts */
405
406 /* -- the following are protected by the gs_asid_lock spinlock ---- */
407 unsigned int gs_asid; /* Next availe ASID */
408 unsigned int gs_asid_limit; /* Limit of available
409 ASIDs */
410 unsigned int gs_asid_gen; /* asid generation.
411 Inc on wrap */
412
413 /* --- the following fields are protected by the gs_lock spinlock --- */
414 unsigned long gs_context_map; /* bitmap to manage
415 contexts in use */
416 unsigned long gs_cbr_map; /* bitmap to manage CB
417 resources */
418 unsigned long gs_dsr_map; /* bitmap used to manage
419 DATA resources */
420 unsigned int gs_reserved_cbrs; /* Number of kernel-
421 reserved cbrs */
422 unsigned int gs_reserved_dsr_bytes; /* Bytes of kernel-
423 reserved dsrs */
424 unsigned short gs_active_contexts; /* number of contexts
425 in use */
426 struct gru_thread_state *gs_gts[GRU_NUM_CCH]; /* GTS currently using
427 the context */
428};
429
430/*
431 * This structure contains the GRU state for all the GRUs on a blade.
432 */
433struct gru_blade_state {
434 void *kernel_cb; /* First kernel
435 reserved cb */
436 void *kernel_dsr; /* First kernel
437 reserved DSR */
438 /* ---- the following are protected by the bs_lock spinlock ---- */
439 spinlock_t bs_lock; /* lock used for
440 stealing contexts */
441 int bs_lru_ctxnum; /* STEAL - last context
442 stolen */
443 struct gru_state *bs_lru_gru; /* STEAL - last gru
444 stolen */
445
446 struct gru_state bs_grus[GRU_CHIPLETS_PER_BLADE];
447};
448
449/*-----------------------------------------------------------------------------
450 * Address Primitives
451 */
452#define get_tfm_for_cpu(g, c) \
453 ((struct gru_tlb_fault_map *)get_tfm((g)->gs_gru_base_vaddr, (c)))
454#define get_tfh_by_index(g, i) \
455 ((struct gru_tlb_fault_handle *)get_tfh((g)->gs_gru_base_vaddr, (i)))
456#define get_tgh_by_index(g, i) \
457 ((struct gru_tlb_global_handle *)get_tgh((g)->gs_gru_base_vaddr, (i)))
458#define get_cbe_by_index(g, i) \
459 ((struct gru_control_block_extended *)get_cbe((g)->gs_gru_base_vaddr,\
460 (i)))
461
462/*-----------------------------------------------------------------------------
463 * Useful Macros
464 */
465
466/* Given a blade# & chiplet#, get a pointer to the GRU */
467#define get_gru(b, c) (&gru_base[b]->bs_grus[c])
468
469/* Number of bytes to save/restore when unloading/loading GRU contexts */
470#define DSR_BYTES(dsr) ((dsr) * GRU_DSR_AU_BYTES)
471#define CBR_BYTES(cbr) ((cbr) * GRU_HANDLE_BYTES * GRU_CBR_AU_SIZE * 2)
472
473/* Convert a user CB number to the actual CBRNUM */
474#define thread_cbr_number(gts, n) ((gts)->ts_cbr_idx[(n) / GRU_CBR_AU_SIZE] \
475 * GRU_CBR_AU_SIZE + (n) % GRU_CBR_AU_SIZE)
476
477/* Convert a gid to a pointer to the GRU */
478#define GID_TO_GRU(gid) \
479 (gru_base[(gid) / GRU_CHIPLETS_PER_BLADE] ? \
480 (&gru_base[(gid) / GRU_CHIPLETS_PER_BLADE]-> \
481 bs_grus[(gid) % GRU_CHIPLETS_PER_BLADE]) : \
482 NULL)
483
484/* Scan all active GRUs in a GRU bitmap */
485#define for_each_gru_in_bitmap(gid, map) \
486 for ((gid) = find_first_bit((map), GRU_MAX_GRUS); (gid) < GRU_MAX_GRUS;\
487 (gid)++, (gid) = find_next_bit((map), GRU_MAX_GRUS, (gid)))
488
489/* Scan all active GRUs on a specific blade */
490#define for_each_gru_on_blade(gru, nid, i) \
491 for ((gru) = gru_base[nid]->bs_grus, (i) = 0; \
492 (i) < GRU_CHIPLETS_PER_BLADE; \
493 (i)++, (gru)++)
494
495/* Scan all active GTSs on a gru. Note: must hold ss_lock to use this macro. */
496#define for_each_gts_on_gru(gts, gru, ctxnum) \
497 for ((ctxnum) = 0; (ctxnum) < GRU_NUM_CCH; (ctxnum)++) \
498 if (((gts) = (gru)->gs_gts[ctxnum]))
499
500/* Scan each CBR whose bit is set in a TFM (or copy of) */
501#define for_each_cbr_in_tfm(i, map) \
502 for ((i) = find_first_bit(map, GRU_NUM_CBE); \
503 (i) < GRU_NUM_CBE; \
504 (i)++, (i) = find_next_bit(map, GRU_NUM_CBE, i))
505
506/* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */
507#define for_each_cbr_in_allocation_map(i, map, k) \
508 for ((k) = find_first_bit(map, GRU_CBR_AU); (k) < GRU_CBR_AU; \
509 (k) = find_next_bit(map, GRU_CBR_AU, (k) + 1)) \
510 for ((i) = (k)*GRU_CBR_AU_SIZE; \
511 (i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++)
512
513/* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */
514#define for_each_dsr_in_allocation_map(i, map, k) \
515 for ((k) = find_first_bit((const unsigned long *)map, GRU_DSR_AU);\
516 (k) < GRU_DSR_AU; \
517 (k) = find_next_bit((const unsigned long *)map, \
518 GRU_DSR_AU, (k) + 1)) \
519 for ((i) = (k) * GRU_DSR_AU_CL; \
520 (i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++)
521
522#define gseg_physical_address(gru, ctxnum) \
523 ((gru)->gs_gru_base_paddr + ctxnum * GRU_GSEG_STRIDE)
524#define gseg_virtual_address(gru, ctxnum) \
525 ((gru)->gs_gru_base_vaddr + ctxnum * GRU_GSEG_STRIDE)
526
527/*-----------------------------------------------------------------------------
528 * Lock / Unlock GRU handles
529 * Use the "delresp" bit in the handle as a "lock" bit.
530 */
531
532/* Lock hierarchy checking enabled only in emulator */
533
534static inline void __lock_handle(void *h)
535{
536 while (test_and_set_bit(1, h))
537 cpu_relax();
538}
539
540static inline void __unlock_handle(void *h)
541{
542 clear_bit(1, h);
543}
544
545static inline void lock_cch_handle(struct gru_context_configuration_handle *cch)
546{
547 __lock_handle(cch);
548}
549
550static inline void unlock_cch_handle(struct gru_context_configuration_handle
551 *cch)
552{
553 __unlock_handle(cch);
554}
555
556static inline void lock_tgh_handle(struct gru_tlb_global_handle *tgh)
557{
558 __lock_handle(tgh);
559}
560
561static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
562{
563 __unlock_handle(tgh);
564}
565
566/*-----------------------------------------------------------------------------
567 * Function prototypes & externs
568 */
569struct gru_unload_context_req;
570
571extern struct vm_operations_struct gru_vm_ops;
572extern struct device *grudev;
573
574extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma,
575 int tsid);
576extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct
577 *vma, int tsid);
578extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct
579 *vma, int tsid);
580extern void gru_unload_context(struct gru_thread_state *gts, int savestate);
581extern void gts_drop(struct gru_thread_state *gts);
582extern void gru_tgh_flush_init(struct gru_state *gru);
583extern int gru_kservices_init(struct gru_state *gru);
584extern irqreturn_t gru_intr(int irq, void *dev_id);
585extern int gru_handle_user_call_os(unsigned long address);
586extern int gru_user_flush_tlb(unsigned long arg);
587extern int gru_user_unload_context(unsigned long arg);
588extern int gru_get_exception_detail(unsigned long arg);
589extern int gru_set_task_slice(long address);
590extern int gru_cpu_fault_map_id(void);
591extern struct vm_area_struct *gru_find_vma(unsigned long vaddr);
592extern void gru_flush_all_tlb(struct gru_state *gru);
593extern int gru_proc_init(void);
594extern void gru_proc_exit(void);
595
596extern unsigned long gru_reserve_cb_resources(struct gru_state *gru,
597 int cbr_au_count, char *cbmap);
598extern unsigned long gru_reserve_ds_resources(struct gru_state *gru,
599 int dsr_au_count, char *dsmap);
600extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf);
601extern struct gru_mm_struct *gru_register_mmu_notifier(void);
602extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms);
603
604extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
605 unsigned long len);
606
607extern unsigned long gru_options;
608
609#endif /* __GRUTABLES_H__ */
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
new file mode 100644
index 000000000000..c84496a77691
--- /dev/null
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -0,0 +1,371 @@
1/*
2 * SN Platform GRU Driver
3 *
4 * MMUOPS callbacks + TLB flushing
5 *
6 * This file handles emu notifier callbacks from the core kernel. The callbacks
7 * are used to update the TLB in the GRU as a result of changes in the
8 * state of a process address space. This file also handles TLB invalidates
9 * from the GRU driver.
10 *
11 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28#include <linux/kernel.h>
29#include <linux/list.h>
30#include <linux/spinlock.h>
31#include <linux/mm.h>
32#include <linux/slab.h>
33#include <linux/device.h>
34#include <linux/hugetlb.h>
35#include <linux/delay.h>
36#include <linux/timex.h>
37#include <linux/srcu.h>
38#include <asm/processor.h>
39#include "gru.h"
40#include "grutables.h"
41#include <asm/uv/uv_hub.h>
42
43#define gru_random() get_cycles()
44
45/* ---------------------------------- TLB Invalidation functions --------
46 * get_tgh_handle
47 *
48 * Find a TGH to use for issuing a TLB invalidate. For GRUs that are on the
49 * local blade, use a fixed TGH that is a function of the blade-local cpu
50 * number. Normally, this TGH is private to the cpu & no contention occurs for
51 * the TGH. For offblade GRUs, select a random TGH in the range above the
52 * private TGHs. A spinlock is required to access this TGH & the lock must be
53 * released when the invalidate is completes. This sucks, but it is the best we
54 * can do.
55 *
56 * Note that the spinlock is IN the TGH handle so locking does not involve
57 * additional cache lines.
58 *
59 */
60static inline int get_off_blade_tgh(struct gru_state *gru)
61{
62 int n;
63
64 n = GRU_NUM_TGH - gru->gs_tgh_first_remote;
65 n = gru_random() % n;
66 n += gru->gs_tgh_first_remote;
67 return n;
68}
69
70static inline int get_on_blade_tgh(struct gru_state *gru)
71{
72 return uv_blade_processor_id() >> gru->gs_tgh_local_shift;
73}
74
75static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state
76 *gru)
77{
78 struct gru_tlb_global_handle *tgh;
79 int n;
80
81 preempt_disable();
82 if (uv_numa_blade_id() == gru->gs_blade_id)
83 n = get_on_blade_tgh(gru);
84 else
85 n = get_off_blade_tgh(gru);
86 tgh = get_tgh_by_index(gru, n);
87 lock_tgh_handle(tgh);
88
89 return tgh;
90}
91
92static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
93{
94 unlock_tgh_handle(tgh);
95 preempt_enable();
96}
97
98/*
99 * gru_flush_tlb_range
100 *
101 * General purpose TLB invalidation function. This function scans every GRU in
102 * the ENTIRE system (partition) looking for GRUs where the specified MM has
103 * been accessed by the GRU. For each GRU found, the TLB must be invalidated OR
104 * the ASID invalidated. Invalidating an ASID causes a new ASID to be assigned
105 * on the next fault. This effectively flushes the ENTIRE TLB for the MM at the
106 * cost of (possibly) a large number of future TLBmisses.
107 *
108 * The current algorithm is optimized based on the following (somewhat true)
109 * assumptions:
110 * - GRU contexts are not loaded into a GRU unless a reference is made to
111 * the data segment or control block (this is true, not an assumption).
112 * If a DS/CB is referenced, the user will also issue instructions that
113 * cause TLBmisses. It is not necessary to optimize for the case where
114 * contexts are loaded but no instructions cause TLB misses. (I know
115 * this will happen but I'm not optimizing for it).
116 * - GRU instructions to invalidate TLB entries are SLOOOOWWW - normally
117 * a few usec but in unusual cases, it could be longer. Avoid if
118 * possible.
119 * - intrablade process migration between cpus is not frequent but is
120 * common.
121 * - a GRU context is not typically migrated to a different GRU on the
122 * blade because of intrablade migration
123 * - interblade migration is rare. Processes migrate their GRU context to
124 * the new blade.
125 * - if interblade migration occurs, migration back to the original blade
126 * is very very rare (ie., no optimization for this case)
127 * - most GRU instruction operate on a subset of the user REGIONS. Code
128 * & shared library regions are not likely targets of GRU instructions.
129 *
130 * To help improve the efficiency of TLB invalidation, the GMS data
131 * structure is maintained for EACH address space (MM struct). The GMS is
132 * also the structure that contains the pointer to the mmu callout
133 * functions. This structure is linked to the mm_struct for the address space
134 * using the mmu "register" function. The mmu interfaces are used to
135 * provide the callbacks for TLB invalidation. The GMS contains:
136 *
137 * - asid[maxgrus] array. ASIDs are assigned to a GRU when a context is
138 * loaded into the GRU.
139 * - asidmap[maxgrus]. bitmap to make it easier to find non-zero asids in
140 * the above array
141 * - ctxbitmap[maxgrus]. Indicates the contexts that are currently active
142 * in the GRU for the address space. This bitmap must be passed to the
143 * GRU to do an invalidate.
144 *
145 * The current algorithm for invalidating TLBs is:
146 * - scan the asidmap for GRUs where the context has been loaded, ie,
147 * asid is non-zero.
148 * - for each gru found:
149 * - if the ctxtmap is non-zero, there are active contexts in the
150 * GRU. TLB invalidate instructions must be issued to the GRU.
151 * - if the ctxtmap is zero, no context is active. Set the ASID to
152 * zero to force a full TLB invalidation. This is fast but will
153 * cause a lot of TLB misses if the context is reloaded onto the
154 * GRU
155 *
156 */
157
158void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
159 unsigned long len)
160{
161 struct gru_state *gru;
162 struct gru_mm_tracker *asids;
163 struct gru_tlb_global_handle *tgh;
164 unsigned long num;
165 int grupagesize, pagesize, pageshift, gid, asid;
166
167 /* ZZZ TODO - handle huge pages */
168 pageshift = PAGE_SHIFT;
169 pagesize = (1UL << pageshift);
170 grupagesize = GRU_PAGESIZE(pageshift);
171 num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL);
172
173 STAT(flush_tlb);
174 gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms,
175 start, len, gms->ms_asidmap[0]);
176
177 spin_lock(&gms->ms_asid_lock);
178 for_each_gru_in_bitmap(gid, gms->ms_asidmap) {
179 STAT(flush_tlb_gru);
180 gru = GID_TO_GRU(gid);
181 asids = gms->ms_asids + gid;
182 asid = asids->mt_asid;
183 if (asids->mt_ctxbitmap && asid) {
184 STAT(flush_tlb_gru_tgh);
185 asid = GRUASID(asid, start);
186 gru_dbg(grudev,
187 " FLUSH gruid %d, asid 0x%x, num %ld, cbmap 0x%x\n",
188 gid, asid, num, asids->mt_ctxbitmap);
189 tgh = get_lock_tgh_handle(gru);
190 tgh_invalidate(tgh, start, 0, asid, grupagesize, 0,
191 num - 1, asids->mt_ctxbitmap);
192 get_unlock_tgh_handle(tgh);
193 } else {
194 STAT(flush_tlb_gru_zero_asid);
195 asids->mt_asid = 0;
196 __clear_bit(gru->gs_gid, gms->ms_asidmap);
197 gru_dbg(grudev,
198 " CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n",
199 gid, asid, asids->mt_ctxbitmap,
200 gms->ms_asidmap[0]);
201 }
202 }
203 spin_unlock(&gms->ms_asid_lock);
204}
205
206/*
207 * Flush the entire TLB on a chiplet.
208 */
209void gru_flush_all_tlb(struct gru_state *gru)
210{
211 struct gru_tlb_global_handle *tgh;
212
213 gru_dbg(grudev, "gru %p, gid %d\n", gru, gru->gs_gid);
214 tgh = get_lock_tgh_handle(gru);
215 tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0);
216 get_unlock_tgh_handle(tgh);
217 preempt_enable();
218}
219
220/*
221 * MMUOPS notifier callout functions
222 */
223static void gru_invalidate_range_start(struct mmu_notifier *mn,
224 struct mm_struct *mm,
225 unsigned long start, unsigned long end)
226{
227 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
228 ms_notifier);
229
230 STAT(mmu_invalidate_range);
231 atomic_inc(&gms->ms_range_active);
232 gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
233 start, end, atomic_read(&gms->ms_range_active));
234 gru_flush_tlb_range(gms, start, end - start);
235}
236
237static void gru_invalidate_range_end(struct mmu_notifier *mn,
238 struct mm_struct *mm, unsigned long start,
239 unsigned long end)
240{
241 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
242 ms_notifier);
243
244 /* ..._and_test() provides needed barrier */
245 (void)atomic_dec_and_test(&gms->ms_range_active);
246
247 wake_up_all(&gms->ms_wait_queue);
248 gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
249}
250
251static void gru_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
252 unsigned long address)
253{
254 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
255 ms_notifier);
256
257 STAT(mmu_invalidate_page);
258 gru_flush_tlb_range(gms, address, PAGE_SIZE);
259 gru_dbg(grudev, "gms %p, address 0x%lx\n", gms, address);
260}
261
262static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
263{
264 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
265 ms_notifier);
266
267 gms->ms_released = 1;
268 gru_dbg(grudev, "gms %p\n", gms);
269}
270
271
272static const struct mmu_notifier_ops gru_mmuops = {
273 .invalidate_page = gru_invalidate_page,
274 .invalidate_range_start = gru_invalidate_range_start,
275 .invalidate_range_end = gru_invalidate_range_end,
276 .release = gru_release,
277};
278
279/* Move this to the basic mmu_notifier file. But for now... */
280static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm,
281 const struct mmu_notifier_ops *ops)
282{
283 struct mmu_notifier *mn, *gru_mn = NULL;
284 struct hlist_node *n;
285
286 if (mm->mmu_notifier_mm) {
287 rcu_read_lock();
288 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list,
289 hlist)
290 if (mn->ops == ops) {
291 gru_mn = mn;
292 break;
293 }
294 rcu_read_unlock();
295 }
296 return gru_mn;
297}
298
299struct gru_mm_struct *gru_register_mmu_notifier(void)
300{
301 struct gru_mm_struct *gms;
302 struct mmu_notifier *mn;
303
304 mn = mmu_find_ops(current->mm, &gru_mmuops);
305 if (mn) {
306 gms = container_of(mn, struct gru_mm_struct, ms_notifier);
307 atomic_inc(&gms->ms_refcnt);
308 } else {
309 gms = kzalloc(sizeof(*gms), GFP_KERNEL);
310 if (gms) {
311 spin_lock_init(&gms->ms_asid_lock);
312 gms->ms_notifier.ops = &gru_mmuops;
313 atomic_set(&gms->ms_refcnt, 1);
314 init_waitqueue_head(&gms->ms_wait_queue);
315 __mmu_notifier_register(&gms->ms_notifier, current->mm);
316 }
317 }
318 gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
319 atomic_read(&gms->ms_refcnt));
320 return gms;
321}
322
323void gru_drop_mmu_notifier(struct gru_mm_struct *gms)
324{
325 gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms,
326 atomic_read(&gms->ms_refcnt), gms->ms_released);
327 if (atomic_dec_return(&gms->ms_refcnt) == 0) {
328 if (!gms->ms_released)
329 mmu_notifier_unregister(&gms->ms_notifier, current->mm);
330 kfree(gms);
331 }
332}
333
334/*
335 * Setup TGH parameters. There are:
336 * - 24 TGH handles per GRU chiplet
337 * - a portion (MAX_LOCAL_TGH) of the handles are reserved for
338 * use by blade-local cpus
339 * - the rest are used by off-blade cpus. This usage is
340 * less frequent than blade-local usage.
341 *
342 * For now, use 16 handles for local flushes, 8 for remote flushes. If the blade
343 * has less tan or equal to 16 cpus, each cpu has a unique handle that it can
344 * use.
345 */
346#define MAX_LOCAL_TGH 16
347
348void gru_tgh_flush_init(struct gru_state *gru)
349{
350 int cpus, shift = 0, n;
351
352 cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id);
353
354 /* n = cpus rounded up to next power of 2 */
355 if (cpus) {
356 n = 1 << fls(cpus - 1);
357
358 /*
359 * shift count for converting local cpu# to TGH index
360 * 0 if cpus <= MAX_LOCAL_TGH,
361 * 1 if cpus <= 2*MAX_LOCAL_TGH,
362 * etc
363 */
364 shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1));
365 }
366 gru->gs_tgh_local_shift = shift;
367
368 /* first starting TGH index to use for remote purges */
369 gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift;
370
371}