diff options
Diffstat (limited to 'drivers/misc/sgi-gru')
-rw-r--r-- | drivers/misc/sgi-gru/gru.h | 11 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/gru_instructions.h | 144 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grufault.c | 311 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grufile.c | 292 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/gruhandles.c | 70 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/gruhandles.h | 37 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grukdump.c | 13 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grukservices.c | 211 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grukservices.h | 14 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grulib.h | 21 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grumain.c | 228 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/gruprocfs.c | 42 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grutables.h | 90 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grutlbpurge.c | 14 |
14 files changed, 1050 insertions, 448 deletions
diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h index f93f03a9e6e9..3ad76cd18b4b 100644 --- a/drivers/misc/sgi-gru/gru.h +++ b/drivers/misc/sgi-gru/gru.h | |||
@@ -53,6 +53,17 @@ struct gru_chiplet_info { | |||
53 | int free_user_cbr; | 53 | int free_user_cbr; |
54 | }; | 54 | }; |
55 | 55 | ||
56 | /* | ||
57 | * Statictics kept for each context. | ||
58 | */ | ||
59 | struct gru_gseg_statistics { | ||
60 | unsigned long fmm_tlbmiss; | ||
61 | unsigned long upm_tlbmiss; | ||
62 | unsigned long tlbdropin; | ||
63 | unsigned long context_stolen; | ||
64 | unsigned long reserved[10]; | ||
65 | }; | ||
66 | |||
56 | /* Flags for GRU options on the gru_create_context() call */ | 67 | /* Flags for GRU options on the gru_create_context() call */ |
57 | /* Select one of the follow 4 options to specify how TLB misses are handled */ | 68 | /* Select one of the follow 4 options to specify how TLB misses are handled */ |
58 | #define GRU_OPT_MISS_DEFAULT 0x0000 /* Use default mode */ | 69 | #define GRU_OPT_MISS_DEFAULT 0x0000 /* Use default mode */ |
diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h index 3c9c06618e6a..d95587cc794c 100644 --- a/drivers/misc/sgi-gru/gru_instructions.h +++ b/drivers/misc/sgi-gru/gru_instructions.h | |||
@@ -34,17 +34,17 @@ extern void gru_wait_abort_proc(void *cb); | |||
34 | #include <asm/intrinsics.h> | 34 | #include <asm/intrinsics.h> |
35 | #define __flush_cache(p) ia64_fc((unsigned long)p) | 35 | #define __flush_cache(p) ia64_fc((unsigned long)p) |
36 | /* Use volatile on IA64 to ensure ordering via st4.rel */ | 36 | /* Use volatile on IA64 to ensure ordering via st4.rel */ |
37 | #define gru_ordered_store_int(p, v) \ | 37 | #define gru_ordered_store_ulong(p, v) \ |
38 | do { \ | 38 | do { \ |
39 | barrier(); \ | 39 | barrier(); \ |
40 | *((volatile int *)(p)) = v; /* force st.rel */ \ | 40 | *((volatile unsigned long *)(p)) = v; /* force st.rel */ \ |
41 | } while (0) | 41 | } while (0) |
42 | #elif defined(CONFIG_X86_64) | 42 | #elif defined(CONFIG_X86_64) |
43 | #define __flush_cache(p) clflush(p) | 43 | #define __flush_cache(p) clflush(p) |
44 | #define gru_ordered_store_int(p, v) \ | 44 | #define gru_ordered_store_ulong(p, v) \ |
45 | do { \ | 45 | do { \ |
46 | barrier(); \ | 46 | barrier(); \ |
47 | *(int *)p = v; \ | 47 | *(unsigned long *)p = v; \ |
48 | } while (0) | 48 | } while (0) |
49 | #else | 49 | #else |
50 | #error "Unsupported architecture" | 50 | #error "Unsupported architecture" |
@@ -129,8 +129,13 @@ struct gru_instruction_bits { | |||
129 | */ | 129 | */ |
130 | struct gru_instruction { | 130 | struct gru_instruction { |
131 | /* DW 0 */ | 131 | /* DW 0 */ |
132 | unsigned int op32; /* icmd,xtype,iaa0,ima,opc */ | 132 | union { |
133 | unsigned int tri0; | 133 | unsigned long op64; /* icmd,xtype,iaa0,ima,opc,tri0 */ |
134 | struct { | ||
135 | unsigned int op32; | ||
136 | unsigned int tri0; | ||
137 | }; | ||
138 | }; | ||
134 | unsigned long tri1_bufsize; /* DW 1 */ | 139 | unsigned long tri1_bufsize; /* DW 1 */ |
135 | unsigned long baddr0; /* DW 2 */ | 140 | unsigned long baddr0; /* DW 2 */ |
136 | unsigned long nelem; /* DW 3 */ | 141 | unsigned long nelem; /* DW 3 */ |
@@ -140,7 +145,7 @@ struct gru_instruction { | |||
140 | unsigned long avalue; /* DW 7 */ | 145 | unsigned long avalue; /* DW 7 */ |
141 | }; | 146 | }; |
142 | 147 | ||
143 | /* Some shifts and masks for the low 32 bits of a GRU command */ | 148 | /* Some shifts and masks for the low 64 bits of a GRU command */ |
144 | #define GRU_CB_ICMD_SHFT 0 | 149 | #define GRU_CB_ICMD_SHFT 0 |
145 | #define GRU_CB_ICMD_MASK 0x1 | 150 | #define GRU_CB_ICMD_MASK 0x1 |
146 | #define GRU_CB_XTYPE_SHFT 8 | 151 | #define GRU_CB_XTYPE_SHFT 8 |
@@ -155,6 +160,10 @@ struct gru_instruction { | |||
155 | #define GRU_CB_OPC_MASK 0xff | 160 | #define GRU_CB_OPC_MASK 0xff |
156 | #define GRU_CB_EXOPC_SHFT 24 | 161 | #define GRU_CB_EXOPC_SHFT 24 |
157 | #define GRU_CB_EXOPC_MASK 0xff | 162 | #define GRU_CB_EXOPC_MASK 0xff |
163 | #define GRU_IDEF2_SHFT 32 | ||
164 | #define GRU_IDEF2_MASK 0x3ffff | ||
165 | #define GRU_ISTATUS_SHFT 56 | ||
166 | #define GRU_ISTATUS_MASK 0x3 | ||
158 | 167 | ||
159 | /* GRU instruction opcodes (opc field) */ | 168 | /* GRU instruction opcodes (opc field) */ |
160 | #define OP_NOP 0x00 | 169 | #define OP_NOP 0x00 |
@@ -256,6 +265,7 @@ struct gru_instruction { | |||
256 | #define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 16) | 265 | #define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 16) |
257 | #define CBE_CAUSE_RA_RESPONSE_DATA_ERROR (1 << 17) | 266 | #define CBE_CAUSE_RA_RESPONSE_DATA_ERROR (1 << 17) |
258 | #define CBE_CAUSE_HA_RESPONSE_DATA_ERROR (1 << 18) | 267 | #define CBE_CAUSE_HA_RESPONSE_DATA_ERROR (1 << 18) |
268 | #define CBE_CAUSE_FORCED_ERROR (1 << 19) | ||
259 | 269 | ||
260 | /* CBE cbrexecstatus bits */ | 270 | /* CBE cbrexecstatus bits */ |
261 | #define CBR_EXS_ABORT_OCC_BIT 0 | 271 | #define CBR_EXS_ABORT_OCC_BIT 0 |
@@ -264,13 +274,15 @@ struct gru_instruction { | |||
264 | #define CBR_EXS_QUEUED_BIT 3 | 274 | #define CBR_EXS_QUEUED_BIT 3 |
265 | #define CBR_EXS_TLB_INVAL_BIT 4 | 275 | #define CBR_EXS_TLB_INVAL_BIT 4 |
266 | #define CBR_EXS_EXCEPTION_BIT 5 | 276 | #define CBR_EXS_EXCEPTION_BIT 5 |
277 | #define CBR_EXS_CB_INT_PENDING_BIT 6 | ||
267 | 278 | ||
268 | #define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT) | 279 | #define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT) |
269 | #define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT) | 280 | #define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT) |
270 | #define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT) | 281 | #define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT) |
271 | #define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT) | 282 | #define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT) |
272 | #define CBR_TLB_INVAL (1 << CBR_EXS_TLB_INVAL_BIT) | 283 | #define CBR_EXS_TLB_INVAL (1 << CBR_EXS_TLB_INVAL_BIT) |
273 | #define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT) | 284 | #define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT) |
285 | #define CBR_EXS_CB_INT_PENDING (1 << CBR_EXS_CB_INT_PENDING_BIT) | ||
274 | 286 | ||
275 | /* | 287 | /* |
276 | * Exceptions are retried for the following cases. If any OTHER bits are set | 288 | * Exceptions are retried for the following cases. If any OTHER bits are set |
@@ -296,12 +308,14 @@ union gru_mesqhead { | |||
296 | 308 | ||
297 | 309 | ||
298 | /* Generate the low word of a GRU instruction */ | 310 | /* Generate the low word of a GRU instruction */ |
299 | static inline unsigned int | 311 | static inline unsigned long |
300 | __opword(unsigned char opcode, unsigned char exopc, unsigned char xtype, | 312 | __opdword(unsigned char opcode, unsigned char exopc, unsigned char xtype, |
301 | unsigned char iaa0, unsigned char iaa1, | 313 | unsigned char iaa0, unsigned char iaa1, |
302 | unsigned char ima) | 314 | unsigned long idef2, unsigned char ima) |
303 | { | 315 | { |
304 | return (1 << GRU_CB_ICMD_SHFT) | | 316 | return (1 << GRU_CB_ICMD_SHFT) | |
317 | ((unsigned long)CBS_ACTIVE << GRU_ISTATUS_SHFT) | | ||
318 | (idef2<< GRU_IDEF2_SHFT) | | ||
305 | (iaa0 << GRU_CB_IAA0_SHFT) | | 319 | (iaa0 << GRU_CB_IAA0_SHFT) | |
306 | (iaa1 << GRU_CB_IAA1_SHFT) | | 320 | (iaa1 << GRU_CB_IAA1_SHFT) | |
307 | (ima << GRU_CB_IMA_SHFT) | | 321 | (ima << GRU_CB_IMA_SHFT) | |
@@ -319,12 +333,13 @@ static inline void gru_flush_cache(void *p) | |||
319 | } | 333 | } |
320 | 334 | ||
321 | /* | 335 | /* |
322 | * Store the lower 32 bits of the command including the "start" bit. Then | 336 | * Store the lower 64 bits of the command including the "start" bit. Then |
323 | * start the instruction executing. | 337 | * start the instruction executing. |
324 | */ | 338 | */ |
325 | static inline void gru_start_instruction(struct gru_instruction *ins, int op32) | 339 | static inline void gru_start_instruction(struct gru_instruction *ins, unsigned long op64) |
326 | { | 340 | { |
327 | gru_ordered_store_int(ins, op32); | 341 | gru_ordered_store_ulong(ins, op64); |
342 | mb(); | ||
328 | gru_flush_cache(ins); | 343 | gru_flush_cache(ins); |
329 | } | 344 | } |
330 | 345 | ||
@@ -340,6 +355,30 @@ static inline void gru_start_instruction(struct gru_instruction *ins, int op32) | |||
340 | * - nelem and stride are in elements | 355 | * - nelem and stride are in elements |
341 | * - tri0/tri1 is in bytes for the beginning of the data segment. | 356 | * - tri0/tri1 is in bytes for the beginning of the data segment. |
342 | */ | 357 | */ |
358 | static inline void gru_vload_phys(void *cb, unsigned long gpa, | ||
359 | unsigned int tri0, int iaa, unsigned long hints) | ||
360 | { | ||
361 | struct gru_instruction *ins = (struct gru_instruction *)cb; | ||
362 | |||
363 | ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62); | ||
364 | ins->nelem = 1; | ||
365 | ins->op1_stride = 1; | ||
366 | gru_start_instruction(ins, __opdword(OP_VLOAD, 0, XTYPE_DW, iaa, 0, | ||
367 | (unsigned long)tri0, CB_IMA(hints))); | ||
368 | } | ||
369 | |||
370 | static inline void gru_vstore_phys(void *cb, unsigned long gpa, | ||
371 | unsigned int tri0, int iaa, unsigned long hints) | ||
372 | { | ||
373 | struct gru_instruction *ins = (struct gru_instruction *)cb; | ||
374 | |||
375 | ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62); | ||
376 | ins->nelem = 1; | ||
377 | ins->op1_stride = 1; | ||
378 | gru_start_instruction(ins, __opdword(OP_VSTORE, 0, XTYPE_DW, iaa, 0, | ||
379 | (unsigned long)tri0, CB_IMA(hints))); | ||
380 | } | ||
381 | |||
343 | static inline void gru_vload(void *cb, unsigned long mem_addr, | 382 | static inline void gru_vload(void *cb, unsigned long mem_addr, |
344 | unsigned int tri0, unsigned char xtype, unsigned long nelem, | 383 | unsigned int tri0, unsigned char xtype, unsigned long nelem, |
345 | unsigned long stride, unsigned long hints) | 384 | unsigned long stride, unsigned long hints) |
@@ -348,10 +387,9 @@ static inline void gru_vload(void *cb, unsigned long mem_addr, | |||
348 | 387 | ||
349 | ins->baddr0 = (long)mem_addr; | 388 | ins->baddr0 = (long)mem_addr; |
350 | ins->nelem = nelem; | 389 | ins->nelem = nelem; |
351 | ins->tri0 = tri0; | ||
352 | ins->op1_stride = stride; | 390 | ins->op1_stride = stride; |
353 | gru_start_instruction(ins, __opword(OP_VLOAD, 0, xtype, IAA_RAM, 0, | 391 | gru_start_instruction(ins, __opdword(OP_VLOAD, 0, xtype, IAA_RAM, 0, |
354 | CB_IMA(hints))); | 392 | (unsigned long)tri0, CB_IMA(hints))); |
355 | } | 393 | } |
356 | 394 | ||
357 | static inline void gru_vstore(void *cb, unsigned long mem_addr, | 395 | static inline void gru_vstore(void *cb, unsigned long mem_addr, |
@@ -362,10 +400,9 @@ static inline void gru_vstore(void *cb, unsigned long mem_addr, | |||
362 | 400 | ||
363 | ins->baddr0 = (long)mem_addr; | 401 | ins->baddr0 = (long)mem_addr; |
364 | ins->nelem = nelem; | 402 | ins->nelem = nelem; |
365 | ins->tri0 = tri0; | ||
366 | ins->op1_stride = stride; | 403 | ins->op1_stride = stride; |
367 | gru_start_instruction(ins, __opword(OP_VSTORE, 0, xtype, IAA_RAM, 0, | 404 | gru_start_instruction(ins, __opdword(OP_VSTORE, 0, xtype, IAA_RAM, 0, |
368 | CB_IMA(hints))); | 405 | tri0, CB_IMA(hints))); |
369 | } | 406 | } |
370 | 407 | ||
371 | static inline void gru_ivload(void *cb, unsigned long mem_addr, | 408 | static inline void gru_ivload(void *cb, unsigned long mem_addr, |
@@ -376,10 +413,9 @@ static inline void gru_ivload(void *cb, unsigned long mem_addr, | |||
376 | 413 | ||
377 | ins->baddr0 = (long)mem_addr; | 414 | ins->baddr0 = (long)mem_addr; |
378 | ins->nelem = nelem; | 415 | ins->nelem = nelem; |
379 | ins->tri0 = tri0; | ||
380 | ins->tri1_bufsize = tri1; | 416 | ins->tri1_bufsize = tri1; |
381 | gru_start_instruction(ins, __opword(OP_IVLOAD, 0, xtype, IAA_RAM, 0, | 417 | gru_start_instruction(ins, __opdword(OP_IVLOAD, 0, xtype, IAA_RAM, 0, |
382 | CB_IMA(hints))); | 418 | tri0, CB_IMA(hints))); |
383 | } | 419 | } |
384 | 420 | ||
385 | static inline void gru_ivstore(void *cb, unsigned long mem_addr, | 421 | static inline void gru_ivstore(void *cb, unsigned long mem_addr, |
@@ -390,10 +426,9 @@ static inline void gru_ivstore(void *cb, unsigned long mem_addr, | |||
390 | 426 | ||
391 | ins->baddr0 = (long)mem_addr; | 427 | ins->baddr0 = (long)mem_addr; |
392 | ins->nelem = nelem; | 428 | ins->nelem = nelem; |
393 | ins->tri0 = tri0; | ||
394 | ins->tri1_bufsize = tri1; | 429 | ins->tri1_bufsize = tri1; |
395 | gru_start_instruction(ins, __opword(OP_IVSTORE, 0, xtype, IAA_RAM, 0, | 430 | gru_start_instruction(ins, __opdword(OP_IVSTORE, 0, xtype, IAA_RAM, 0, |
396 | CB_IMA(hints))); | 431 | tri0, CB_IMA(hints))); |
397 | } | 432 | } |
398 | 433 | ||
399 | static inline void gru_vset(void *cb, unsigned long mem_addr, | 434 | static inline void gru_vset(void *cb, unsigned long mem_addr, |
@@ -406,8 +441,8 @@ static inline void gru_vset(void *cb, unsigned long mem_addr, | |||
406 | ins->op2_value_baddr1 = value; | 441 | ins->op2_value_baddr1 = value; |
407 | ins->nelem = nelem; | 442 | ins->nelem = nelem; |
408 | ins->op1_stride = stride; | 443 | ins->op1_stride = stride; |
409 | gru_start_instruction(ins, __opword(OP_VSET, 0, xtype, IAA_RAM, 0, | 444 | gru_start_instruction(ins, __opdword(OP_VSET, 0, xtype, IAA_RAM, 0, |
410 | CB_IMA(hints))); | 445 | 0, CB_IMA(hints))); |
411 | } | 446 | } |
412 | 447 | ||
413 | static inline void gru_ivset(void *cb, unsigned long mem_addr, | 448 | static inline void gru_ivset(void *cb, unsigned long mem_addr, |
@@ -420,8 +455,8 @@ static inline void gru_ivset(void *cb, unsigned long mem_addr, | |||
420 | ins->op2_value_baddr1 = value; | 455 | ins->op2_value_baddr1 = value; |
421 | ins->nelem = nelem; | 456 | ins->nelem = nelem; |
422 | ins->tri1_bufsize = tri1; | 457 | ins->tri1_bufsize = tri1; |
423 | gru_start_instruction(ins, __opword(OP_IVSET, 0, xtype, IAA_RAM, 0, | 458 | gru_start_instruction(ins, __opdword(OP_IVSET, 0, xtype, IAA_RAM, 0, |
424 | CB_IMA(hints))); | 459 | 0, CB_IMA(hints))); |
425 | } | 460 | } |
426 | 461 | ||
427 | static inline void gru_vflush(void *cb, unsigned long mem_addr, | 462 | static inline void gru_vflush(void *cb, unsigned long mem_addr, |
@@ -433,15 +468,15 @@ static inline void gru_vflush(void *cb, unsigned long mem_addr, | |||
433 | ins->baddr0 = (long)mem_addr; | 468 | ins->baddr0 = (long)mem_addr; |
434 | ins->op1_stride = stride; | 469 | ins->op1_stride = stride; |
435 | ins->nelem = nelem; | 470 | ins->nelem = nelem; |
436 | gru_start_instruction(ins, __opword(OP_VFLUSH, 0, xtype, IAA_RAM, 0, | 471 | gru_start_instruction(ins, __opdword(OP_VFLUSH, 0, xtype, IAA_RAM, 0, |
437 | CB_IMA(hints))); | 472 | 0, CB_IMA(hints))); |
438 | } | 473 | } |
439 | 474 | ||
440 | static inline void gru_nop(void *cb, int hints) | 475 | static inline void gru_nop(void *cb, int hints) |
441 | { | 476 | { |
442 | struct gru_instruction *ins = (void *)cb; | 477 | struct gru_instruction *ins = (void *)cb; |
443 | 478 | ||
444 | gru_start_instruction(ins, __opword(OP_NOP, 0, 0, 0, 0, CB_IMA(hints))); | 479 | gru_start_instruction(ins, __opdword(OP_NOP, 0, 0, 0, 0, 0, CB_IMA(hints))); |
445 | } | 480 | } |
446 | 481 | ||
447 | 482 | ||
@@ -455,10 +490,9 @@ static inline void gru_bcopy(void *cb, const unsigned long src, | |||
455 | ins->baddr0 = (long)src; | 490 | ins->baddr0 = (long)src; |
456 | ins->op2_value_baddr1 = (long)dest; | 491 | ins->op2_value_baddr1 = (long)dest; |
457 | ins->nelem = nelem; | 492 | ins->nelem = nelem; |
458 | ins->tri0 = tri0; | ||
459 | ins->tri1_bufsize = bufsize; | 493 | ins->tri1_bufsize = bufsize; |
460 | gru_start_instruction(ins, __opword(OP_BCOPY, 0, xtype, IAA_RAM, | 494 | gru_start_instruction(ins, __opdword(OP_BCOPY, 0, xtype, IAA_RAM, |
461 | IAA_RAM, CB_IMA(hints))); | 495 | IAA_RAM, tri0, CB_IMA(hints))); |
462 | } | 496 | } |
463 | 497 | ||
464 | static inline void gru_bstore(void *cb, const unsigned long src, | 498 | static inline void gru_bstore(void *cb, const unsigned long src, |
@@ -470,9 +504,8 @@ static inline void gru_bstore(void *cb, const unsigned long src, | |||
470 | ins->baddr0 = (long)src; | 504 | ins->baddr0 = (long)src; |
471 | ins->op2_value_baddr1 = (long)dest; | 505 | ins->op2_value_baddr1 = (long)dest; |
472 | ins->nelem = nelem; | 506 | ins->nelem = nelem; |
473 | ins->tri0 = tri0; | 507 | gru_start_instruction(ins, __opdword(OP_BSTORE, 0, xtype, 0, IAA_RAM, |
474 | gru_start_instruction(ins, __opword(OP_BSTORE, 0, xtype, 0, IAA_RAM, | 508 | tri0, CB_IMA(hints))); |
475 | CB_IMA(hints))); | ||
476 | } | 509 | } |
477 | 510 | ||
478 | static inline void gru_gamir(void *cb, int exopc, unsigned long src, | 511 | static inline void gru_gamir(void *cb, int exopc, unsigned long src, |
@@ -481,8 +514,8 @@ static inline void gru_gamir(void *cb, int exopc, unsigned long src, | |||
481 | struct gru_instruction *ins = (void *)cb; | 514 | struct gru_instruction *ins = (void *)cb; |
482 | 515 | ||
483 | ins->baddr0 = (long)src; | 516 | ins->baddr0 = (long)src; |
484 | gru_start_instruction(ins, __opword(OP_GAMIR, exopc, xtype, IAA_RAM, 0, | 517 | gru_start_instruction(ins, __opdword(OP_GAMIR, exopc, xtype, IAA_RAM, 0, |
485 | CB_IMA(hints))); | 518 | 0, CB_IMA(hints))); |
486 | } | 519 | } |
487 | 520 | ||
488 | static inline void gru_gamirr(void *cb, int exopc, unsigned long src, | 521 | static inline void gru_gamirr(void *cb, int exopc, unsigned long src, |
@@ -491,8 +524,8 @@ static inline void gru_gamirr(void *cb, int exopc, unsigned long src, | |||
491 | struct gru_instruction *ins = (void *)cb; | 524 | struct gru_instruction *ins = (void *)cb; |
492 | 525 | ||
493 | ins->baddr0 = (long)src; | 526 | ins->baddr0 = (long)src; |
494 | gru_start_instruction(ins, __opword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0, | 527 | gru_start_instruction(ins, __opdword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0, |
495 | CB_IMA(hints))); | 528 | 0, CB_IMA(hints))); |
496 | } | 529 | } |
497 | 530 | ||
498 | static inline void gru_gamer(void *cb, int exopc, unsigned long src, | 531 | static inline void gru_gamer(void *cb, int exopc, unsigned long src, |
@@ -505,8 +538,8 @@ static inline void gru_gamer(void *cb, int exopc, unsigned long src, | |||
505 | ins->baddr0 = (long)src; | 538 | ins->baddr0 = (long)src; |
506 | ins->op1_stride = operand1; | 539 | ins->op1_stride = operand1; |
507 | ins->op2_value_baddr1 = operand2; | 540 | ins->op2_value_baddr1 = operand2; |
508 | gru_start_instruction(ins, __opword(OP_GAMER, exopc, xtype, IAA_RAM, 0, | 541 | gru_start_instruction(ins, __opdword(OP_GAMER, exopc, xtype, IAA_RAM, 0, |
509 | CB_IMA(hints))); | 542 | 0, CB_IMA(hints))); |
510 | } | 543 | } |
511 | 544 | ||
512 | static inline void gru_gamerr(void *cb, int exopc, unsigned long src, | 545 | static inline void gru_gamerr(void *cb, int exopc, unsigned long src, |
@@ -518,8 +551,8 @@ static inline void gru_gamerr(void *cb, int exopc, unsigned long src, | |||
518 | ins->baddr0 = (long)src; | 551 | ins->baddr0 = (long)src; |
519 | ins->op1_stride = operand1; | 552 | ins->op1_stride = operand1; |
520 | ins->op2_value_baddr1 = operand2; | 553 | ins->op2_value_baddr1 = operand2; |
521 | gru_start_instruction(ins, __opword(OP_GAMERR, exopc, xtype, IAA_RAM, 0, | 554 | gru_start_instruction(ins, __opdword(OP_GAMERR, exopc, xtype, IAA_RAM, 0, |
522 | CB_IMA(hints))); | 555 | 0, CB_IMA(hints))); |
523 | } | 556 | } |
524 | 557 | ||
525 | static inline void gru_gamxr(void *cb, unsigned long src, | 558 | static inline void gru_gamxr(void *cb, unsigned long src, |
@@ -529,8 +562,8 @@ static inline void gru_gamxr(void *cb, unsigned long src, | |||
529 | 562 | ||
530 | ins->baddr0 = (long)src; | 563 | ins->baddr0 = (long)src; |
531 | ins->nelem = 4; | 564 | ins->nelem = 4; |
532 | gru_start_instruction(ins, __opword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW, | 565 | gru_start_instruction(ins, __opdword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW, |
533 | IAA_RAM, 0, CB_IMA(hints))); | 566 | IAA_RAM, 0, 0, CB_IMA(hints))); |
534 | } | 567 | } |
535 | 568 | ||
536 | static inline void gru_mesq(void *cb, unsigned long queue, | 569 | static inline void gru_mesq(void *cb, unsigned long queue, |
@@ -541,9 +574,8 @@ static inline void gru_mesq(void *cb, unsigned long queue, | |||
541 | 574 | ||
542 | ins->baddr0 = (long)queue; | 575 | ins->baddr0 = (long)queue; |
543 | ins->nelem = nelem; | 576 | ins->nelem = nelem; |
544 | ins->tri0 = tri0; | 577 | gru_start_instruction(ins, __opdword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0, |
545 | gru_start_instruction(ins, __opword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0, | 578 | tri0, CB_IMA(hints))); |
546 | CB_IMA(hints))); | ||
547 | } | 579 | } |
548 | 580 | ||
549 | static inline unsigned long gru_get_amo_value(void *cb) | 581 | static inline unsigned long gru_get_amo_value(void *cb) |
@@ -662,6 +694,14 @@ static inline void gru_wait_abort(void *cb) | |||
662 | gru_wait_abort_proc(cb); | 694 | gru_wait_abort_proc(cb); |
663 | } | 695 | } |
664 | 696 | ||
697 | /* | ||
698 | * Get a pointer to the start of a gseg | ||
699 | * p - Any valid pointer within the gseg | ||
700 | */ | ||
701 | static inline void *gru_get_gseg_pointer (void *p) | ||
702 | { | ||
703 | return (void *)((unsigned long)p & ~(GRU_GSEG_PAGESIZE - 1)); | ||
704 | } | ||
665 | 705 | ||
666 | /* | 706 | /* |
667 | * Get a pointer to a control block | 707 | * Get a pointer to a control block |
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index 679e01778286..38657cdaf54d 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c | |||
@@ -40,6 +40,12 @@ | |||
40 | #include "gru_instructions.h" | 40 | #include "gru_instructions.h" |
41 | #include <asm/uv/uv_hub.h> | 41 | #include <asm/uv/uv_hub.h> |
42 | 42 | ||
43 | /* Return codes for vtop functions */ | ||
44 | #define VTOP_SUCCESS 0 | ||
45 | #define VTOP_INVALID -1 | ||
46 | #define VTOP_RETRY -2 | ||
47 | |||
48 | |||
43 | /* | 49 | /* |
44 | * Test if a physical address is a valid GRU GSEG address | 50 | * Test if a physical address is a valid GRU GSEG address |
45 | */ | 51 | */ |
@@ -90,19 +96,22 @@ static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr) | |||
90 | { | 96 | { |
91 | struct mm_struct *mm = current->mm; | 97 | struct mm_struct *mm = current->mm; |
92 | struct vm_area_struct *vma; | 98 | struct vm_area_struct *vma; |
93 | struct gru_thread_state *gts = NULL; | 99 | struct gru_thread_state *gts = ERR_PTR(-EINVAL); |
94 | 100 | ||
95 | down_write(&mm->mmap_sem); | 101 | down_write(&mm->mmap_sem); |
96 | vma = gru_find_vma(vaddr); | 102 | vma = gru_find_vma(vaddr); |
97 | if (vma) | 103 | if (!vma) |
98 | gts = gru_alloc_thread_state(vma, TSID(vaddr, vma)); | 104 | goto err; |
99 | if (gts) { | ||
100 | mutex_lock(>s->ts_ctxlock); | ||
101 | downgrade_write(&mm->mmap_sem); | ||
102 | } else { | ||
103 | up_write(&mm->mmap_sem); | ||
104 | } | ||
105 | 105 | ||
106 | gts = gru_alloc_thread_state(vma, TSID(vaddr, vma)); | ||
107 | if (IS_ERR(gts)) | ||
108 | goto err; | ||
109 | mutex_lock(>s->ts_ctxlock); | ||
110 | downgrade_write(&mm->mmap_sem); | ||
111 | return gts; | ||
112 | |||
113 | err: | ||
114 | up_write(&mm->mmap_sem); | ||
106 | return gts; | 115 | return gts; |
107 | } | 116 | } |
108 | 117 | ||
@@ -122,39 +131,15 @@ static void gru_unlock_gts(struct gru_thread_state *gts) | |||
122 | * is necessary to prevent the user from seeing a stale cb.istatus that will | 131 | * is necessary to prevent the user from seeing a stale cb.istatus that will |
123 | * change as soon as the TFH restart is complete. Races may cause an | 132 | * change as soon as the TFH restart is complete. Races may cause an |
124 | * occasional failure to clear the cb.istatus, but that is ok. | 133 | * occasional failure to clear the cb.istatus, but that is ok. |
125 | * | ||
126 | * If the cb address is not valid (should not happen, but...), nothing | ||
127 | * bad will happen.. The get_user()/put_user() will fail but there | ||
128 | * are no bad side-effects. | ||
129 | */ | 134 | */ |
130 | static void gru_cb_set_istatus_active(unsigned long __user *cb) | 135 | static void gru_cb_set_istatus_active(struct gru_instruction_bits *cbk) |
131 | { | 136 | { |
132 | union { | 137 | if (cbk) { |
133 | struct gru_instruction_bits bits; | 138 | cbk->istatus = CBS_ACTIVE; |
134 | unsigned long dw; | ||
135 | } u; | ||
136 | |||
137 | if (cb) { | ||
138 | get_user(u.dw, cb); | ||
139 | u.bits.istatus = CBS_ACTIVE; | ||
140 | put_user(u.dw, cb); | ||
141 | } | 139 | } |
142 | } | 140 | } |
143 | 141 | ||
144 | /* | 142 | /* |
145 | * Convert a interrupt IRQ to a pointer to the GRU GTS that caused the | ||
146 | * interrupt. Interrupts are always sent to a cpu on the blade that contains the | ||
147 | * GRU (except for headless blades which are not currently supported). A blade | ||
148 | * has N grus; a block of N consecutive IRQs is assigned to the GRUs. The IRQ | ||
149 | * number uniquely identifies the GRU chiplet on the local blade that caused the | ||
150 | * interrupt. Always called in interrupt context. | ||
151 | */ | ||
152 | static inline struct gru_state *irq_to_gru(int irq) | ||
153 | { | ||
154 | return &gru_base[uv_numa_blade_id()]->bs_grus[irq - IRQ_GRU]; | ||
155 | } | ||
156 | |||
157 | /* | ||
158 | * Read & clear a TFM | 143 | * Read & clear a TFM |
159 | * | 144 | * |
160 | * The GRU has an array of fault maps. A map is private to a cpu | 145 | * The GRU has an array of fault maps. A map is private to a cpu |
@@ -207,10 +192,11 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma, | |||
207 | { | 192 | { |
208 | struct page *page; | 193 | struct page *page; |
209 | 194 | ||
210 | /* ZZZ Need to handle HUGE pages */ | 195 | #ifdef CONFIG_HUGETLB_PAGE |
211 | if (is_vm_hugetlb_page(vma)) | 196 | *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT; |
212 | return -EFAULT; | 197 | #else |
213 | *pageshift = PAGE_SHIFT; | 198 | *pageshift = PAGE_SHIFT; |
199 | #endif | ||
214 | if (get_user_pages | 200 | if (get_user_pages |
215 | (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0) | 201 | (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0) |
216 | return -EFAULT; | 202 | return -EFAULT; |
@@ -268,7 +254,6 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, | |||
268 | return 0; | 254 | return 0; |
269 | 255 | ||
270 | err: | 256 | err: |
271 | local_irq_enable(); | ||
272 | return 1; | 257 | return 1; |
273 | } | 258 | } |
274 | 259 | ||
@@ -301,14 +286,69 @@ static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr, | |||
301 | paddr = paddr & ~((1UL << ps) - 1); | 286 | paddr = paddr & ~((1UL << ps) - 1); |
302 | *gpa = uv_soc_phys_ram_to_gpa(paddr); | 287 | *gpa = uv_soc_phys_ram_to_gpa(paddr); |
303 | *pageshift = ps; | 288 | *pageshift = ps; |
304 | return 0; | 289 | return VTOP_SUCCESS; |
305 | 290 | ||
306 | inval: | 291 | inval: |
307 | return -1; | 292 | return VTOP_INVALID; |
308 | upm: | 293 | upm: |
309 | return -2; | 294 | return VTOP_RETRY; |
295 | } | ||
296 | |||
297 | |||
298 | /* | ||
299 | * Flush a CBE from cache. The CBE is clean in the cache. Dirty the | ||
300 | * CBE cacheline so that the line will be written back to home agent. | ||
301 | * Otherwise the line may be silently dropped. This has no impact | ||
302 | * except on performance. | ||
303 | */ | ||
304 | static void gru_flush_cache_cbe(struct gru_control_block_extended *cbe) | ||
305 | { | ||
306 | if (unlikely(cbe)) { | ||
307 | cbe->cbrexecstatus = 0; /* make CL dirty */ | ||
308 | gru_flush_cache(cbe); | ||
309 | } | ||
310 | } | 310 | } |
311 | 311 | ||
312 | /* | ||
313 | * Preload the TLB with entries that may be required. Currently, preloading | ||
314 | * is implemented only for BCOPY. Preload <tlb_preload_count> pages OR to | ||
315 | * the end of the bcopy tranfer, whichever is smaller. | ||
316 | */ | ||
317 | static void gru_preload_tlb(struct gru_state *gru, | ||
318 | struct gru_thread_state *gts, int atomic, | ||
319 | unsigned long fault_vaddr, int asid, int write, | ||
320 | unsigned char tlb_preload_count, | ||
321 | struct gru_tlb_fault_handle *tfh, | ||
322 | struct gru_control_block_extended *cbe) | ||
323 | { | ||
324 | unsigned long vaddr = 0, gpa; | ||
325 | int ret, pageshift; | ||
326 | |||
327 | if (cbe->opccpy != OP_BCOPY) | ||
328 | return; | ||
329 | |||
330 | if (fault_vaddr == cbe->cbe_baddr0) | ||
331 | vaddr = fault_vaddr + GRU_CACHE_LINE_BYTES * cbe->cbe_src_cl - 1; | ||
332 | else if (fault_vaddr == cbe->cbe_baddr1) | ||
333 | vaddr = fault_vaddr + (1 << cbe->xtypecpy) * cbe->cbe_nelemcur - 1; | ||
334 | |||
335 | fault_vaddr &= PAGE_MASK; | ||
336 | vaddr &= PAGE_MASK; | ||
337 | vaddr = min(vaddr, fault_vaddr + tlb_preload_count * PAGE_SIZE); | ||
338 | |||
339 | while (vaddr > fault_vaddr) { | ||
340 | ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift); | ||
341 | if (ret || tfh_write_only(tfh, gpa, GAA_RAM, vaddr, asid, write, | ||
342 | GRU_PAGESIZE(pageshift))) | ||
343 | return; | ||
344 | gru_dbg(grudev, | ||
345 | "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, rw %d, ps %d, gpa 0x%lx\n", | ||
346 | atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, | ||
347 | vaddr, asid, write, pageshift, gpa); | ||
348 | vaddr -= PAGE_SIZE; | ||
349 | STAT(tlb_preload_page); | ||
350 | } | ||
351 | } | ||
312 | 352 | ||
313 | /* | 353 | /* |
314 | * Drop a TLB entry into the GRU. The fault is described by info in an TFH. | 354 | * Drop a TLB entry into the GRU. The fault is described by info in an TFH. |
@@ -320,11 +360,14 @@ upm: | |||
320 | * < 0 = error code | 360 | * < 0 = error code |
321 | * | 361 | * |
322 | */ | 362 | */ |
323 | static int gru_try_dropin(struct gru_thread_state *gts, | 363 | static int gru_try_dropin(struct gru_state *gru, |
364 | struct gru_thread_state *gts, | ||
324 | struct gru_tlb_fault_handle *tfh, | 365 | struct gru_tlb_fault_handle *tfh, |
325 | unsigned long __user *cb) | 366 | struct gru_instruction_bits *cbk) |
326 | { | 367 | { |
327 | int pageshift = 0, asid, write, ret, atomic = !cb; | 368 | struct gru_control_block_extended *cbe = NULL; |
369 | unsigned char tlb_preload_count = gts->ts_tlb_preload_count; | ||
370 | int pageshift = 0, asid, write, ret, atomic = !cbk, indexway; | ||
328 | unsigned long gpa = 0, vaddr = 0; | 371 | unsigned long gpa = 0, vaddr = 0; |
329 | 372 | ||
330 | /* | 373 | /* |
@@ -335,24 +378,34 @@ static int gru_try_dropin(struct gru_thread_state *gts, | |||
335 | */ | 378 | */ |
336 | 379 | ||
337 | /* | 380 | /* |
381 | * Prefetch the CBE if doing TLB preloading | ||
382 | */ | ||
383 | if (unlikely(tlb_preload_count)) { | ||
384 | cbe = gru_tfh_to_cbe(tfh); | ||
385 | prefetchw(cbe); | ||
386 | } | ||
387 | |||
388 | /* | ||
338 | * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call. | 389 | * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call. |
339 | * Might be a hardware race OR a stupid user. Ignore FMM because FMM | 390 | * Might be a hardware race OR a stupid user. Ignore FMM because FMM |
340 | * is a transient state. | 391 | * is a transient state. |
341 | */ | 392 | */ |
342 | if (tfh->status != TFHSTATUS_EXCEPTION) { | 393 | if (tfh->status != TFHSTATUS_EXCEPTION) { |
343 | gru_flush_cache(tfh); | 394 | gru_flush_cache(tfh); |
395 | sync_core(); | ||
344 | if (tfh->status != TFHSTATUS_EXCEPTION) | 396 | if (tfh->status != TFHSTATUS_EXCEPTION) |
345 | goto failnoexception; | 397 | goto failnoexception; |
346 | STAT(tfh_stale_on_fault); | 398 | STAT(tfh_stale_on_fault); |
347 | } | 399 | } |
348 | if (tfh->state == TFHSTATE_IDLE) | 400 | if (tfh->state == TFHSTATE_IDLE) |
349 | goto failidle; | 401 | goto failidle; |
350 | if (tfh->state == TFHSTATE_MISS_FMM && cb) | 402 | if (tfh->state == TFHSTATE_MISS_FMM && cbk) |
351 | goto failfmm; | 403 | goto failfmm; |
352 | 404 | ||
353 | write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0; | 405 | write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0; |
354 | vaddr = tfh->missvaddr; | 406 | vaddr = tfh->missvaddr; |
355 | asid = tfh->missasid; | 407 | asid = tfh->missasid; |
408 | indexway = tfh->indexway; | ||
356 | if (asid == 0) | 409 | if (asid == 0) |
357 | goto failnoasid; | 410 | goto failnoasid; |
358 | 411 | ||
@@ -366,41 +419,51 @@ static int gru_try_dropin(struct gru_thread_state *gts, | |||
366 | goto failactive; | 419 | goto failactive; |
367 | 420 | ||
368 | ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift); | 421 | ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift); |
369 | if (ret == -1) | 422 | if (ret == VTOP_INVALID) |
370 | goto failinval; | 423 | goto failinval; |
371 | if (ret == -2) | 424 | if (ret == VTOP_RETRY) |
372 | goto failupm; | 425 | goto failupm; |
373 | 426 | ||
374 | if (!(gts->ts_sizeavail & GRU_SIZEAVAIL(pageshift))) { | 427 | if (!(gts->ts_sizeavail & GRU_SIZEAVAIL(pageshift))) { |
375 | gts->ts_sizeavail |= GRU_SIZEAVAIL(pageshift); | 428 | gts->ts_sizeavail |= GRU_SIZEAVAIL(pageshift); |
376 | if (atomic || !gru_update_cch(gts, 0)) { | 429 | if (atomic || !gru_update_cch(gts)) { |
377 | gts->ts_force_cch_reload = 1; | 430 | gts->ts_force_cch_reload = 1; |
378 | goto failupm; | 431 | goto failupm; |
379 | } | 432 | } |
380 | } | 433 | } |
381 | gru_cb_set_istatus_active(cb); | 434 | |
435 | if (unlikely(cbe) && pageshift == PAGE_SHIFT) { | ||
436 | gru_preload_tlb(gru, gts, atomic, vaddr, asid, write, tlb_preload_count, tfh, cbe); | ||
437 | gru_flush_cache_cbe(cbe); | ||
438 | } | ||
439 | |||
440 | gru_cb_set_istatus_active(cbk); | ||
441 | gts->ustats.tlbdropin++; | ||
382 | tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write, | 442 | tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write, |
383 | GRU_PAGESIZE(pageshift)); | 443 | GRU_PAGESIZE(pageshift)); |
384 | STAT(tlb_dropin); | ||
385 | gru_dbg(grudev, | 444 | gru_dbg(grudev, |
386 | "%s: tfh 0x%p, vaddr 0x%lx, asid 0x%x, ps %d, gpa 0x%lx\n", | 445 | "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, indexway 0x%x," |
387 | ret ? "non-atomic" : "atomic", tfh, vaddr, asid, | 446 | " rw %d, ps %d, gpa 0x%lx\n", |
388 | pageshift, gpa); | 447 | atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, vaddr, asid, |
448 | indexway, write, pageshift, gpa); | ||
449 | STAT(tlb_dropin); | ||
389 | return 0; | 450 | return 0; |
390 | 451 | ||
391 | failnoasid: | 452 | failnoasid: |
392 | /* No asid (delayed unload). */ | 453 | /* No asid (delayed unload). */ |
393 | STAT(tlb_dropin_fail_no_asid); | 454 | STAT(tlb_dropin_fail_no_asid); |
394 | gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); | 455 | gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); |
395 | if (!cb) | 456 | if (!cbk) |
396 | tfh_user_polling_mode(tfh); | 457 | tfh_user_polling_mode(tfh); |
397 | else | 458 | else |
398 | gru_flush_cache(tfh); | 459 | gru_flush_cache(tfh); |
460 | gru_flush_cache_cbe(cbe); | ||
399 | return -EAGAIN; | 461 | return -EAGAIN; |
400 | 462 | ||
401 | failupm: | 463 | failupm: |
402 | /* Atomic failure switch CBR to UPM */ | 464 | /* Atomic failure switch CBR to UPM */ |
403 | tfh_user_polling_mode(tfh); | 465 | tfh_user_polling_mode(tfh); |
466 | gru_flush_cache_cbe(cbe); | ||
404 | STAT(tlb_dropin_fail_upm); | 467 | STAT(tlb_dropin_fail_upm); |
405 | gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); | 468 | gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); |
406 | return 1; | 469 | return 1; |
@@ -408,6 +471,7 @@ failupm: | |||
408 | failfmm: | 471 | failfmm: |
409 | /* FMM state on UPM call */ | 472 | /* FMM state on UPM call */ |
410 | gru_flush_cache(tfh); | 473 | gru_flush_cache(tfh); |
474 | gru_flush_cache_cbe(cbe); | ||
411 | STAT(tlb_dropin_fail_fmm); | 475 | STAT(tlb_dropin_fail_fmm); |
412 | gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state); | 476 | gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state); |
413 | return 0; | 477 | return 0; |
@@ -415,17 +479,20 @@ failfmm: | |||
415 | failnoexception: | 479 | failnoexception: |
416 | /* TFH status did not show exception pending */ | 480 | /* TFH status did not show exception pending */ |
417 | gru_flush_cache(tfh); | 481 | gru_flush_cache(tfh); |
418 | if (cb) | 482 | gru_flush_cache_cbe(cbe); |
419 | gru_flush_cache(cb); | 483 | if (cbk) |
484 | gru_flush_cache(cbk); | ||
420 | STAT(tlb_dropin_fail_no_exception); | 485 | STAT(tlb_dropin_fail_no_exception); |
421 | gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n", tfh, tfh->status, tfh->state); | 486 | gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n", |
487 | tfh, tfh->status, tfh->state); | ||
422 | return 0; | 488 | return 0; |
423 | 489 | ||
424 | failidle: | 490 | failidle: |
425 | /* TFH state was idle - no miss pending */ | 491 | /* TFH state was idle - no miss pending */ |
426 | gru_flush_cache(tfh); | 492 | gru_flush_cache(tfh); |
427 | if (cb) | 493 | gru_flush_cache_cbe(cbe); |
428 | gru_flush_cache(cb); | 494 | if (cbk) |
495 | gru_flush_cache(cbk); | ||
429 | STAT(tlb_dropin_fail_idle); | 496 | STAT(tlb_dropin_fail_idle); |
430 | gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state); | 497 | gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state); |
431 | return 0; | 498 | return 0; |
@@ -433,16 +500,18 @@ failidle: | |||
433 | failinval: | 500 | failinval: |
434 | /* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */ | 501 | /* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */ |
435 | tfh_exception(tfh); | 502 | tfh_exception(tfh); |
503 | gru_flush_cache_cbe(cbe); | ||
436 | STAT(tlb_dropin_fail_invalid); | 504 | STAT(tlb_dropin_fail_invalid); |
437 | gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); | 505 | gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); |
438 | return -EFAULT; | 506 | return -EFAULT; |
439 | 507 | ||
440 | failactive: | 508 | failactive: |
441 | /* Range invalidate active. Switch to UPM iff atomic */ | 509 | /* Range invalidate active. Switch to UPM iff atomic */ |
442 | if (!cb) | 510 | if (!cbk) |
443 | tfh_user_polling_mode(tfh); | 511 | tfh_user_polling_mode(tfh); |
444 | else | 512 | else |
445 | gru_flush_cache(tfh); | 513 | gru_flush_cache(tfh); |
514 | gru_flush_cache_cbe(cbe); | ||
446 | STAT(tlb_dropin_fail_range_active); | 515 | STAT(tlb_dropin_fail_range_active); |
447 | gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n", | 516 | gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n", |
448 | tfh, vaddr); | 517 | tfh, vaddr); |
@@ -455,31 +524,41 @@ failactive: | |||
455 | * Note that this is the interrupt handler that is registered with linux | 524 | * Note that this is the interrupt handler that is registered with linux |
456 | * interrupt handlers. | 525 | * interrupt handlers. |
457 | */ | 526 | */ |
458 | irqreturn_t gru_intr(int irq, void *dev_id) | 527 | static irqreturn_t gru_intr(int chiplet, int blade) |
459 | { | 528 | { |
460 | struct gru_state *gru; | 529 | struct gru_state *gru; |
461 | struct gru_tlb_fault_map imap, dmap; | 530 | struct gru_tlb_fault_map imap, dmap; |
462 | struct gru_thread_state *gts; | 531 | struct gru_thread_state *gts; |
463 | struct gru_tlb_fault_handle *tfh = NULL; | 532 | struct gru_tlb_fault_handle *tfh = NULL; |
533 | struct completion *cmp; | ||
464 | int cbrnum, ctxnum; | 534 | int cbrnum, ctxnum; |
465 | 535 | ||
466 | STAT(intr); | 536 | STAT(intr); |
467 | 537 | ||
468 | gru = irq_to_gru(irq); | 538 | gru = &gru_base[blade]->bs_grus[chiplet]; |
469 | if (!gru) { | 539 | if (!gru) { |
470 | dev_err(grudev, "GRU: invalid interrupt: cpu %d, irq %d\n", | 540 | dev_err(grudev, "GRU: invalid interrupt: cpu %d, chiplet %d\n", |
471 | raw_smp_processor_id(), irq); | 541 | raw_smp_processor_id(), chiplet); |
472 | return IRQ_NONE; | 542 | return IRQ_NONE; |
473 | } | 543 | } |
474 | get_clear_fault_map(gru, &imap, &dmap); | 544 | get_clear_fault_map(gru, &imap, &dmap); |
545 | gru_dbg(grudev, | ||
546 | "cpu %d, chiplet %d, gid %d, imap %016lx %016lx, dmap %016lx %016lx\n", | ||
547 | smp_processor_id(), chiplet, gru->gs_gid, | ||
548 | imap.fault_bits[0], imap.fault_bits[1], | ||
549 | dmap.fault_bits[0], dmap.fault_bits[1]); | ||
475 | 550 | ||
476 | for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) { | 551 | for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) { |
477 | complete(gru->gs_blade->bs_async_wq); | 552 | STAT(intr_cbr); |
553 | cmp = gru->gs_blade->bs_async_wq; | ||
554 | if (cmp) | ||
555 | complete(cmp); | ||
478 | gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n", | 556 | gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n", |
479 | gru->gs_gid, cbrnum, gru->gs_blade->bs_async_wq->done); | 557 | gru->gs_gid, cbrnum, cmp ? cmp->done : -1); |
480 | } | 558 | } |
481 | 559 | ||
482 | for_each_cbr_in_tfm(cbrnum, imap.fault_bits) { | 560 | for_each_cbr_in_tfm(cbrnum, imap.fault_bits) { |
561 | STAT(intr_tfh); | ||
483 | tfh = get_tfh_by_index(gru, cbrnum); | 562 | tfh = get_tfh_by_index(gru, cbrnum); |
484 | prefetchw(tfh); /* Helps on hdw, required for emulator */ | 563 | prefetchw(tfh); /* Helps on hdw, required for emulator */ |
485 | 564 | ||
@@ -492,14 +571,20 @@ irqreturn_t gru_intr(int irq, void *dev_id) | |||
492 | ctxnum = tfh->ctxnum; | 571 | ctxnum = tfh->ctxnum; |
493 | gts = gru->gs_gts[ctxnum]; | 572 | gts = gru->gs_gts[ctxnum]; |
494 | 573 | ||
574 | /* Spurious interrupts can cause this. Ignore. */ | ||
575 | if (!gts) { | ||
576 | STAT(intr_spurious); | ||
577 | continue; | ||
578 | } | ||
579 | |||
495 | /* | 580 | /* |
496 | * This is running in interrupt context. Trylock the mmap_sem. | 581 | * This is running in interrupt context. Trylock the mmap_sem. |
497 | * If it fails, retry the fault in user context. | 582 | * If it fails, retry the fault in user context. |
498 | */ | 583 | */ |
584 | gts->ustats.fmm_tlbmiss++; | ||
499 | if (!gts->ts_force_cch_reload && | 585 | if (!gts->ts_force_cch_reload && |
500 | down_read_trylock(>s->ts_mm->mmap_sem)) { | 586 | down_read_trylock(>s->ts_mm->mmap_sem)) { |
501 | gts->ustats.fmm_tlbdropin++; | 587 | gru_try_dropin(gru, gts, tfh, NULL); |
502 | gru_try_dropin(gts, tfh, NULL); | ||
503 | up_read(>s->ts_mm->mmap_sem); | 588 | up_read(>s->ts_mm->mmap_sem); |
504 | } else { | 589 | } else { |
505 | tfh_user_polling_mode(tfh); | 590 | tfh_user_polling_mode(tfh); |
@@ -509,20 +594,43 @@ irqreturn_t gru_intr(int irq, void *dev_id) | |||
509 | return IRQ_HANDLED; | 594 | return IRQ_HANDLED; |
510 | } | 595 | } |
511 | 596 | ||
597 | irqreturn_t gru0_intr(int irq, void *dev_id) | ||
598 | { | ||
599 | return gru_intr(0, uv_numa_blade_id()); | ||
600 | } | ||
601 | |||
602 | irqreturn_t gru1_intr(int irq, void *dev_id) | ||
603 | { | ||
604 | return gru_intr(1, uv_numa_blade_id()); | ||
605 | } | ||
606 | |||
607 | irqreturn_t gru_intr_mblade(int irq, void *dev_id) | ||
608 | { | ||
609 | int blade; | ||
610 | |||
611 | for_each_possible_blade(blade) { | ||
612 | if (uv_blade_nr_possible_cpus(blade)) | ||
613 | continue; | ||
614 | gru_intr(0, blade); | ||
615 | gru_intr(1, blade); | ||
616 | } | ||
617 | return IRQ_HANDLED; | ||
618 | } | ||
619 | |||
512 | 620 | ||
513 | static int gru_user_dropin(struct gru_thread_state *gts, | 621 | static int gru_user_dropin(struct gru_thread_state *gts, |
514 | struct gru_tlb_fault_handle *tfh, | 622 | struct gru_tlb_fault_handle *tfh, |
515 | unsigned long __user *cb) | 623 | void *cb) |
516 | { | 624 | { |
517 | struct gru_mm_struct *gms = gts->ts_gms; | 625 | struct gru_mm_struct *gms = gts->ts_gms; |
518 | int ret; | 626 | int ret; |
519 | 627 | ||
520 | gts->ustats.upm_tlbdropin++; | 628 | gts->ustats.upm_tlbmiss++; |
521 | while (1) { | 629 | while (1) { |
522 | wait_event(gms->ms_wait_queue, | 630 | wait_event(gms->ms_wait_queue, |
523 | atomic_read(&gms->ms_range_active) == 0); | 631 | atomic_read(&gms->ms_range_active) == 0); |
524 | prefetchw(tfh); /* Helps on hdw, required for emulator */ | 632 | prefetchw(tfh); /* Helps on hdw, required for emulator */ |
525 | ret = gru_try_dropin(gts, tfh, cb); | 633 | ret = gru_try_dropin(gts->ts_gru, gts, tfh, cb); |
526 | if (ret <= 0) | 634 | if (ret <= 0) |
527 | return ret; | 635 | return ret; |
528 | STAT(call_os_wait_queue); | 636 | STAT(call_os_wait_queue); |
@@ -538,52 +646,41 @@ int gru_handle_user_call_os(unsigned long cb) | |||
538 | { | 646 | { |
539 | struct gru_tlb_fault_handle *tfh; | 647 | struct gru_tlb_fault_handle *tfh; |
540 | struct gru_thread_state *gts; | 648 | struct gru_thread_state *gts; |
541 | unsigned long __user *cbp; | 649 | void *cbk; |
542 | int ucbnum, cbrnum, ret = -EINVAL; | 650 | int ucbnum, cbrnum, ret = -EINVAL; |
543 | 651 | ||
544 | STAT(call_os); | 652 | STAT(call_os); |
545 | gru_dbg(grudev, "address 0x%lx\n", cb); | ||
546 | 653 | ||
547 | /* sanity check the cb pointer */ | 654 | /* sanity check the cb pointer */ |
548 | ucbnum = get_cb_number((void *)cb); | 655 | ucbnum = get_cb_number((void *)cb); |
549 | if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB) | 656 | if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB) |
550 | return -EINVAL; | 657 | return -EINVAL; |
551 | cbp = (unsigned long *)cb; | ||
552 | 658 | ||
553 | gts = gru_find_lock_gts(cb); | 659 | gts = gru_find_lock_gts(cb); |
554 | if (!gts) | 660 | if (!gts) |
555 | return -EINVAL; | 661 | return -EINVAL; |
662 | gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts); | ||
556 | 663 | ||
557 | if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) | 664 | if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) |
558 | goto exit; | 665 | goto exit; |
559 | 666 | ||
560 | /* | 667 | gru_check_context_placement(gts); |
561 | * If force_unload is set, the UPM TLB fault is phony. The task | ||
562 | * has migrated to another node and the GSEG must be moved. Just | ||
563 | * unload the context. The task will page fault and assign a new | ||
564 | * context. | ||
565 | */ | ||
566 | if (gts->ts_tgid_owner == current->tgid && gts->ts_blade >= 0 && | ||
567 | gts->ts_blade != uv_numa_blade_id()) { | ||
568 | STAT(call_os_offnode_reference); | ||
569 | gts->ts_force_unload = 1; | ||
570 | } | ||
571 | 668 | ||
572 | /* | 669 | /* |
573 | * CCH may contain stale data if ts_force_cch_reload is set. | 670 | * CCH may contain stale data if ts_force_cch_reload is set. |
574 | */ | 671 | */ |
575 | if (gts->ts_gru && gts->ts_force_cch_reload) { | 672 | if (gts->ts_gru && gts->ts_force_cch_reload) { |
576 | gts->ts_force_cch_reload = 0; | 673 | gts->ts_force_cch_reload = 0; |
577 | gru_update_cch(gts, 0); | 674 | gru_update_cch(gts); |
578 | } | 675 | } |
579 | 676 | ||
580 | ret = -EAGAIN; | 677 | ret = -EAGAIN; |
581 | cbrnum = thread_cbr_number(gts, ucbnum); | 678 | cbrnum = thread_cbr_number(gts, ucbnum); |
582 | if (gts->ts_force_unload) { | 679 | if (gts->ts_gru) { |
583 | gru_unload_context(gts, 1); | ||
584 | } else if (gts->ts_gru) { | ||
585 | tfh = get_tfh_by_index(gts->ts_gru, cbrnum); | 680 | tfh = get_tfh_by_index(gts->ts_gru, cbrnum); |
586 | ret = gru_user_dropin(gts, tfh, cbp); | 681 | cbk = get_gseg_base_address_cb(gts->ts_gru->gs_gru_base_vaddr, |
682 | gts->ts_ctxnum, ucbnum); | ||
683 | ret = gru_user_dropin(gts, tfh, cbk); | ||
587 | } | 684 | } |
588 | exit: | 685 | exit: |
589 | gru_unlock_gts(gts); | 686 | gru_unlock_gts(gts); |
@@ -605,11 +702,11 @@ int gru_get_exception_detail(unsigned long arg) | |||
605 | if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet))) | 702 | if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet))) |
606 | return -EFAULT; | 703 | return -EFAULT; |
607 | 704 | ||
608 | gru_dbg(grudev, "address 0x%lx\n", excdet.cb); | ||
609 | gts = gru_find_lock_gts(excdet.cb); | 705 | gts = gru_find_lock_gts(excdet.cb); |
610 | if (!gts) | 706 | if (!gts) |
611 | return -EINVAL; | 707 | return -EINVAL; |
612 | 708 | ||
709 | gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", excdet.cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts); | ||
613 | ucbnum = get_cb_number((void *)excdet.cb); | 710 | ucbnum = get_cb_number((void *)excdet.cb); |
614 | if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) { | 711 | if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) { |
615 | ret = -EINVAL; | 712 | ret = -EINVAL; |
@@ -617,6 +714,7 @@ int gru_get_exception_detail(unsigned long arg) | |||
617 | cbrnum = thread_cbr_number(gts, ucbnum); | 714 | cbrnum = thread_cbr_number(gts, ucbnum); |
618 | cbe = get_cbe_by_index(gts->ts_gru, cbrnum); | 715 | cbe = get_cbe_by_index(gts->ts_gru, cbrnum); |
619 | gru_flush_cache(cbe); /* CBE not coherent */ | 716 | gru_flush_cache(cbe); /* CBE not coherent */ |
717 | sync_core(); /* make sure we are have current data */ | ||
620 | excdet.opc = cbe->opccpy; | 718 | excdet.opc = cbe->opccpy; |
621 | excdet.exopc = cbe->exopccpy; | 719 | excdet.exopc = cbe->exopccpy; |
622 | excdet.ecause = cbe->ecause; | 720 | excdet.ecause = cbe->ecause; |
@@ -624,7 +722,7 @@ int gru_get_exception_detail(unsigned long arg) | |||
624 | excdet.exceptdet1 = cbe->idef3upd; | 722 | excdet.exceptdet1 = cbe->idef3upd; |
625 | excdet.cbrstate = cbe->cbrstate; | 723 | excdet.cbrstate = cbe->cbrstate; |
626 | excdet.cbrexecstatus = cbe->cbrexecstatus; | 724 | excdet.cbrexecstatus = cbe->cbrexecstatus; |
627 | gru_flush_cache(cbe); | 725 | gru_flush_cache_cbe(cbe); |
628 | ret = 0; | 726 | ret = 0; |
629 | } else { | 727 | } else { |
630 | ret = -EAGAIN; | 728 | ret = -EAGAIN; |
@@ -733,6 +831,11 @@ long gru_get_gseg_statistics(unsigned long arg) | |||
733 | if (copy_from_user(&req, (void __user *)arg, sizeof(req))) | 831 | if (copy_from_user(&req, (void __user *)arg, sizeof(req))) |
734 | return -EFAULT; | 832 | return -EFAULT; |
735 | 833 | ||
834 | /* | ||
835 | * The library creates arrays of contexts for threaded programs. | ||
836 | * If no gts exists in the array, the context has never been used & all | ||
837 | * statistics are implicitly 0. | ||
838 | */ | ||
736 | gts = gru_find_lock_gts(req.gseg); | 839 | gts = gru_find_lock_gts(req.gseg); |
737 | if (gts) { | 840 | if (gts) { |
738 | memcpy(&req.stats, >s->ustats, sizeof(gts->ustats)); | 841 | memcpy(&req.stats, >s->ustats, sizeof(gts->ustats)); |
@@ -762,11 +865,25 @@ int gru_set_context_option(unsigned long arg) | |||
762 | return -EFAULT; | 865 | return -EFAULT; |
763 | gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1); | 866 | gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1); |
764 | 867 | ||
765 | gts = gru_alloc_locked_gts(req.gseg); | 868 | gts = gru_find_lock_gts(req.gseg); |
766 | if (!gts) | 869 | if (!gts) { |
767 | return -EINVAL; | 870 | gts = gru_alloc_locked_gts(req.gseg); |
871 | if (IS_ERR(gts)) | ||
872 | return PTR_ERR(gts); | ||
873 | } | ||
768 | 874 | ||
769 | switch (req.op) { | 875 | switch (req.op) { |
876 | case sco_blade_chiplet: | ||
877 | /* Select blade/chiplet for GRU context */ | ||
878 | if (req.val1 < -1 || req.val1 >= GRU_MAX_BLADES || !gru_base[req.val1] || | ||
879 | req.val0 < -1 || req.val0 >= GRU_CHIPLETS_PER_HUB) { | ||
880 | ret = -EINVAL; | ||
881 | } else { | ||
882 | gts->ts_user_blade_id = req.val1; | ||
883 | gts->ts_user_chiplet_id = req.val0; | ||
884 | gru_check_context_placement(gts); | ||
885 | } | ||
886 | break; | ||
770 | case sco_gseg_owner: | 887 | case sco_gseg_owner: |
771 | /* Register the current task as the GSEG owner */ | 888 | /* Register the current task as the GSEG owner */ |
772 | gts->ts_tgid_owner = current->tgid; | 889 | gts->ts_tgid_owner = current->tgid; |
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c index 41c8fe2a928c..cb3b4d228475 100644 --- a/drivers/misc/sgi-gru/grufile.c +++ b/drivers/misc/sgi-gru/grufile.c | |||
@@ -35,6 +35,9 @@ | |||
35 | #include <linux/interrupt.h> | 35 | #include <linux/interrupt.h> |
36 | #include <linux/proc_fs.h> | 36 | #include <linux/proc_fs.h> |
37 | #include <linux/uaccess.h> | 37 | #include <linux/uaccess.h> |
38 | #ifdef CONFIG_X86_64 | ||
39 | #include <asm/uv/uv_irq.h> | ||
40 | #endif | ||
38 | #include <asm/uv/uv.h> | 41 | #include <asm/uv/uv.h> |
39 | #include "gru.h" | 42 | #include "gru.h" |
40 | #include "grulib.h" | 43 | #include "grulib.h" |
@@ -92,7 +95,7 @@ static void gru_vma_close(struct vm_area_struct *vma) | |||
92 | /* | 95 | /* |
93 | * gru_file_mmap | 96 | * gru_file_mmap |
94 | * | 97 | * |
95 | * Called when mmaping the device. Initializes the vma with a fault handler | 98 | * Called when mmapping the device. Initializes the vma with a fault handler |
96 | * and private data structure necessary to allocate, track, and free the | 99 | * and private data structure necessary to allocate, track, and free the |
97 | * underlying pages. | 100 | * underlying pages. |
98 | */ | 101 | */ |
@@ -130,7 +133,6 @@ static int gru_create_new_context(unsigned long arg) | |||
130 | struct gru_vma_data *vdata; | 133 | struct gru_vma_data *vdata; |
131 | int ret = -EINVAL; | 134 | int ret = -EINVAL; |
132 | 135 | ||
133 | |||
134 | if (copy_from_user(&req, (void __user *)arg, sizeof(req))) | 136 | if (copy_from_user(&req, (void __user *)arg, sizeof(req))) |
135 | return -EFAULT; | 137 | return -EFAULT; |
136 | 138 | ||
@@ -150,6 +152,7 @@ static int gru_create_new_context(unsigned long arg) | |||
150 | vdata->vd_dsr_au_count = | 152 | vdata->vd_dsr_au_count = |
151 | GRU_DS_BYTES_TO_AU(req.data_segment_bytes); | 153 | GRU_DS_BYTES_TO_AU(req.data_segment_bytes); |
152 | vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks); | 154 | vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks); |
155 | vdata->vd_tlb_preload_count = req.tlb_preload_count; | ||
153 | ret = 0; | 156 | ret = 0; |
154 | } | 157 | } |
155 | up_write(¤t->mm->mmap_sem); | 158 | up_write(¤t->mm->mmap_sem); |
@@ -190,7 +193,7 @@ static long gru_file_unlocked_ioctl(struct file *file, unsigned int req, | |||
190 | { | 193 | { |
191 | int err = -EBADRQC; | 194 | int err = -EBADRQC; |
192 | 195 | ||
193 | gru_dbg(grudev, "file %p\n", file); | 196 | gru_dbg(grudev, "file %p, req 0x%x, 0x%lx\n", file, req, arg); |
194 | 197 | ||
195 | switch (req) { | 198 | switch (req) { |
196 | case GRU_CREATE_CONTEXT: | 199 | case GRU_CREATE_CONTEXT: |
@@ -232,23 +235,24 @@ static long gru_file_unlocked_ioctl(struct file *file, unsigned int req, | |||
232 | * system. | 235 | * system. |
233 | */ | 236 | */ |
234 | static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr, | 237 | static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr, |
235 | void *vaddr, int nid, int bid, int grunum) | 238 | void *vaddr, int blade_id, int chiplet_id) |
236 | { | 239 | { |
237 | spin_lock_init(&gru->gs_lock); | 240 | spin_lock_init(&gru->gs_lock); |
238 | spin_lock_init(&gru->gs_asid_lock); | 241 | spin_lock_init(&gru->gs_asid_lock); |
239 | gru->gs_gru_base_paddr = paddr; | 242 | gru->gs_gru_base_paddr = paddr; |
240 | gru->gs_gru_base_vaddr = vaddr; | 243 | gru->gs_gru_base_vaddr = vaddr; |
241 | gru->gs_gid = bid * GRU_CHIPLETS_PER_BLADE + grunum; | 244 | gru->gs_gid = blade_id * GRU_CHIPLETS_PER_BLADE + chiplet_id; |
242 | gru->gs_blade = gru_base[bid]; | 245 | gru->gs_blade = gru_base[blade_id]; |
243 | gru->gs_blade_id = bid; | 246 | gru->gs_blade_id = blade_id; |
247 | gru->gs_chiplet_id = chiplet_id; | ||
244 | gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1; | 248 | gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1; |
245 | gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1; | 249 | gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1; |
246 | gru->gs_asid_limit = MAX_ASID; | 250 | gru->gs_asid_limit = MAX_ASID; |
247 | gru_tgh_flush_init(gru); | 251 | gru_tgh_flush_init(gru); |
248 | if (gru->gs_gid >= gru_max_gids) | 252 | if (gru->gs_gid >= gru_max_gids) |
249 | gru_max_gids = gru->gs_gid + 1; | 253 | gru_max_gids = gru->gs_gid + 1; |
250 | gru_dbg(grudev, "bid %d, nid %d, gid %d, vaddr %p (0x%lx)\n", | 254 | gru_dbg(grudev, "bid %d, gid %d, vaddr %p (0x%lx)\n", |
251 | bid, nid, gru->gs_gid, gru->gs_gru_base_vaddr, | 255 | blade_id, gru->gs_gid, gru->gs_gru_base_vaddr, |
252 | gru->gs_gru_base_paddr); | 256 | gru->gs_gru_base_paddr); |
253 | } | 257 | } |
254 | 258 | ||
@@ -264,12 +268,10 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr) | |||
264 | 268 | ||
265 | max_user_cbrs = GRU_NUM_CB; | 269 | max_user_cbrs = GRU_NUM_CB; |
266 | max_user_dsr_bytes = GRU_NUM_DSR_BYTES; | 270 | max_user_dsr_bytes = GRU_NUM_DSR_BYTES; |
267 | for_each_online_node(nid) { | 271 | for_each_possible_blade(bid) { |
268 | bid = uv_node_to_blade_id(nid); | 272 | pnode = uv_blade_to_pnode(bid); |
269 | pnode = uv_node_to_pnode(nid); | 273 | nid = uv_blade_to_memory_nid(bid);/* -1 if no memory on blade */ |
270 | if (bid < 0 || gru_base[bid]) | 274 | page = alloc_pages_node(nid, GFP_KERNEL, order); |
271 | continue; | ||
272 | page = alloc_pages_exact_node(nid, GFP_KERNEL, order); | ||
273 | if (!page) | 275 | if (!page) |
274 | goto fail; | 276 | goto fail; |
275 | gru_base[bid] = page_address(page); | 277 | gru_base[bid] = page_address(page); |
@@ -285,7 +287,7 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr) | |||
285 | chip++, gru++) { | 287 | chip++, gru++) { |
286 | paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip); | 288 | paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip); |
287 | vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip); | 289 | vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip); |
288 | gru_init_chiplet(gru, paddr, vaddr, nid, bid, chip); | 290 | gru_init_chiplet(gru, paddr, vaddr, bid, chip); |
289 | n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE; | 291 | n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE; |
290 | cbrs = max(cbrs, n); | 292 | cbrs = max(cbrs, n); |
291 | n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES; | 293 | n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES; |
@@ -298,39 +300,215 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr) | |||
298 | return 0; | 300 | return 0; |
299 | 301 | ||
300 | fail: | 302 | fail: |
301 | for (nid--; nid >= 0; nid--) | 303 | for (bid--; bid >= 0; bid--) |
302 | free_pages((unsigned long)gru_base[nid], order); | 304 | free_pages((unsigned long)gru_base[bid], order); |
303 | return -ENOMEM; | 305 | return -ENOMEM; |
304 | } | 306 | } |
305 | 307 | ||
306 | #ifdef CONFIG_IA64 | 308 | static void gru_free_tables(void) |
309 | { | ||
310 | int bid; | ||
311 | int order = get_order(sizeof(struct gru_state) * | ||
312 | GRU_CHIPLETS_PER_BLADE); | ||
307 | 313 | ||
308 | static int get_base_irq(void) | 314 | for (bid = 0; bid < GRU_MAX_BLADES; bid++) |
315 | free_pages((unsigned long)gru_base[bid], order); | ||
316 | } | ||
317 | |||
318 | static unsigned long gru_chiplet_cpu_to_mmr(int chiplet, int cpu, int *corep) | ||
309 | { | 319 | { |
310 | return IRQ_GRU; | 320 | unsigned long mmr = 0; |
321 | int core; | ||
322 | |||
323 | /* | ||
324 | * We target the cores of a blade and not the hyperthreads themselves. | ||
325 | * There is a max of 8 cores per socket and 2 sockets per blade, | ||
326 | * making for a max total of 16 cores (i.e., 16 CPUs without | ||
327 | * hyperthreading and 32 CPUs with hyperthreading). | ||
328 | */ | ||
329 | core = uv_cpu_core_number(cpu) + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu); | ||
330 | if (core >= GRU_NUM_TFM || uv_cpu_ht_number(cpu)) | ||
331 | return 0; | ||
332 | |||
333 | if (chiplet == 0) { | ||
334 | mmr = UVH_GR0_TLB_INT0_CONFIG + | ||
335 | core * (UVH_GR0_TLB_INT1_CONFIG - UVH_GR0_TLB_INT0_CONFIG); | ||
336 | } else if (chiplet == 1) { | ||
337 | mmr = UVH_GR1_TLB_INT0_CONFIG + | ||
338 | core * (UVH_GR1_TLB_INT1_CONFIG - UVH_GR1_TLB_INT0_CONFIG); | ||
339 | } else { | ||
340 | BUG(); | ||
341 | } | ||
342 | |||
343 | *corep = core; | ||
344 | return mmr; | ||
311 | } | 345 | } |
312 | 346 | ||
313 | #elif defined CONFIG_X86_64 | 347 | #ifdef CONFIG_IA64 |
314 | 348 | ||
315 | static void noop(unsigned int irq) | 349 | static int gru_irq_count[GRU_CHIPLETS_PER_BLADE]; |
350 | |||
351 | static void gru_noop(unsigned int irq) | ||
316 | { | 352 | { |
317 | } | 353 | } |
318 | 354 | ||
319 | static struct irq_chip gru_chip = { | 355 | static struct irq_chip gru_chip[GRU_CHIPLETS_PER_BLADE] = { |
320 | .name = "gru", | 356 | [0 ... GRU_CHIPLETS_PER_BLADE - 1] { |
321 | .mask = noop, | 357 | .mask = gru_noop, |
322 | .unmask = noop, | 358 | .unmask = gru_noop, |
323 | .ack = noop, | 359 | .ack = gru_noop |
360 | } | ||
324 | }; | 361 | }; |
325 | 362 | ||
326 | static int get_base_irq(void) | 363 | static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name, |
364 | irq_handler_t irq_handler, int cpu, int blade) | ||
365 | { | ||
366 | unsigned long mmr; | ||
367 | int irq = IRQ_GRU + chiplet; | ||
368 | int ret, core; | ||
369 | |||
370 | mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); | ||
371 | if (mmr == 0) | ||
372 | return 0; | ||
373 | |||
374 | if (gru_irq_count[chiplet] == 0) { | ||
375 | gru_chip[chiplet].name = irq_name; | ||
376 | ret = set_irq_chip(irq, &gru_chip[chiplet]); | ||
377 | if (ret) { | ||
378 | printk(KERN_ERR "%s: set_irq_chip failed, errno=%d\n", | ||
379 | GRU_DRIVER_ID_STR, -ret); | ||
380 | return ret; | ||
381 | } | ||
382 | |||
383 | ret = request_irq(irq, irq_handler, 0, irq_name, NULL); | ||
384 | if (ret) { | ||
385 | printk(KERN_ERR "%s: request_irq failed, errno=%d\n", | ||
386 | GRU_DRIVER_ID_STR, -ret); | ||
387 | return ret; | ||
388 | } | ||
389 | } | ||
390 | gru_irq_count[chiplet]++; | ||
391 | |||
392 | return 0; | ||
393 | } | ||
394 | |||
395 | static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade) | ||
396 | { | ||
397 | unsigned long mmr; | ||
398 | int core, irq = IRQ_GRU + chiplet; | ||
399 | |||
400 | if (gru_irq_count[chiplet] == 0) | ||
401 | return; | ||
402 | |||
403 | mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); | ||
404 | if (mmr == 0) | ||
405 | return; | ||
406 | |||
407 | if (--gru_irq_count[chiplet] == 0) | ||
408 | free_irq(irq, NULL); | ||
409 | } | ||
410 | |||
411 | #elif defined CONFIG_X86_64 | ||
412 | |||
413 | static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name, | ||
414 | irq_handler_t irq_handler, int cpu, int blade) | ||
415 | { | ||
416 | unsigned long mmr; | ||
417 | int irq, core; | ||
418 | int ret; | ||
419 | |||
420 | mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); | ||
421 | if (mmr == 0) | ||
422 | return 0; | ||
423 | |||
424 | irq = uv_setup_irq(irq_name, cpu, blade, mmr, UV_AFFINITY_CPU); | ||
425 | if (irq < 0) { | ||
426 | printk(KERN_ERR "%s: uv_setup_irq failed, errno=%d\n", | ||
427 | GRU_DRIVER_ID_STR, -irq); | ||
428 | return irq; | ||
429 | } | ||
430 | |||
431 | ret = request_irq(irq, irq_handler, 0, irq_name, NULL); | ||
432 | if (ret) { | ||
433 | uv_teardown_irq(irq); | ||
434 | printk(KERN_ERR "%s: request_irq failed, errno=%d\n", | ||
435 | GRU_DRIVER_ID_STR, -ret); | ||
436 | return ret; | ||
437 | } | ||
438 | gru_base[blade]->bs_grus[chiplet].gs_irq[core] = irq; | ||
439 | return 0; | ||
440 | } | ||
441 | |||
442 | static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade) | ||
327 | { | 443 | { |
328 | set_irq_chip(IRQ_GRU, &gru_chip); | 444 | int irq, core; |
329 | set_irq_chip(IRQ_GRU + 1, &gru_chip); | 445 | unsigned long mmr; |
330 | return IRQ_GRU; | 446 | |
447 | mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); | ||
448 | if (mmr) { | ||
449 | irq = gru_base[blade]->bs_grus[chiplet].gs_irq[core]; | ||
450 | if (irq) { | ||
451 | free_irq(irq, NULL); | ||
452 | uv_teardown_irq(irq); | ||
453 | } | ||
454 | } | ||
331 | } | 455 | } |
456 | |||
332 | #endif | 457 | #endif |
333 | 458 | ||
459 | static void gru_teardown_tlb_irqs(void) | ||
460 | { | ||
461 | int blade; | ||
462 | int cpu; | ||
463 | |||
464 | for_each_online_cpu(cpu) { | ||
465 | blade = uv_cpu_to_blade_id(cpu); | ||
466 | gru_chiplet_teardown_tlb_irq(0, cpu, blade); | ||
467 | gru_chiplet_teardown_tlb_irq(1, cpu, blade); | ||
468 | } | ||
469 | for_each_possible_blade(blade) { | ||
470 | if (uv_blade_nr_possible_cpus(blade)) | ||
471 | continue; | ||
472 | gru_chiplet_teardown_tlb_irq(0, 0, blade); | ||
473 | gru_chiplet_teardown_tlb_irq(1, 0, blade); | ||
474 | } | ||
475 | } | ||
476 | |||
477 | static int gru_setup_tlb_irqs(void) | ||
478 | { | ||
479 | int blade; | ||
480 | int cpu; | ||
481 | int ret; | ||
482 | |||
483 | for_each_online_cpu(cpu) { | ||
484 | blade = uv_cpu_to_blade_id(cpu); | ||
485 | ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru0_intr, cpu, blade); | ||
486 | if (ret != 0) | ||
487 | goto exit1; | ||
488 | |||
489 | ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru1_intr, cpu, blade); | ||
490 | if (ret != 0) | ||
491 | goto exit1; | ||
492 | } | ||
493 | for_each_possible_blade(blade) { | ||
494 | if (uv_blade_nr_possible_cpus(blade)) | ||
495 | continue; | ||
496 | ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru_intr_mblade, 0, blade); | ||
497 | if (ret != 0) | ||
498 | goto exit1; | ||
499 | |||
500 | ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru_intr_mblade, 0, blade); | ||
501 | if (ret != 0) | ||
502 | goto exit1; | ||
503 | } | ||
504 | |||
505 | return 0; | ||
506 | |||
507 | exit1: | ||
508 | gru_teardown_tlb_irqs(); | ||
509 | return ret; | ||
510 | } | ||
511 | |||
334 | /* | 512 | /* |
335 | * gru_init | 513 | * gru_init |
336 | * | 514 | * |
@@ -338,8 +516,7 @@ static int get_base_irq(void) | |||
338 | */ | 516 | */ |
339 | static int __init gru_init(void) | 517 | static int __init gru_init(void) |
340 | { | 518 | { |
341 | int ret, irq, chip; | 519 | int ret; |
342 | char id[10]; | ||
343 | 520 | ||
344 | if (!is_uv_system()) | 521 | if (!is_uv_system()) |
345 | return 0; | 522 | return 0; |
@@ -354,41 +531,29 @@ static int __init gru_init(void) | |||
354 | gru_end_paddr = gru_start_paddr + GRU_MAX_BLADES * GRU_SIZE; | 531 | gru_end_paddr = gru_start_paddr + GRU_MAX_BLADES * GRU_SIZE; |
355 | printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n", | 532 | printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n", |
356 | gru_start_paddr, gru_end_paddr); | 533 | gru_start_paddr, gru_end_paddr); |
357 | irq = get_base_irq(); | ||
358 | for (chip = 0; chip < GRU_CHIPLETS_PER_BLADE; chip++) { | ||
359 | ret = request_irq(irq + chip, gru_intr, 0, id, NULL); | ||
360 | /* TODO: fix irq handling on x86. For now ignore failure because | ||
361 | * interrupts are not required & not yet fully supported */ | ||
362 | if (ret) { | ||
363 | printk(KERN_WARNING | ||
364 | "!!!WARNING: GRU ignoring request failure!!!\n"); | ||
365 | ret = 0; | ||
366 | } | ||
367 | if (ret) { | ||
368 | printk(KERN_ERR "%s: request_irq failed\n", | ||
369 | GRU_DRIVER_ID_STR); | ||
370 | goto exit1; | ||
371 | } | ||
372 | } | ||
373 | |||
374 | ret = misc_register(&gru_miscdev); | 534 | ret = misc_register(&gru_miscdev); |
375 | if (ret) { | 535 | if (ret) { |
376 | printk(KERN_ERR "%s: misc_register failed\n", | 536 | printk(KERN_ERR "%s: misc_register failed\n", |
377 | GRU_DRIVER_ID_STR); | 537 | GRU_DRIVER_ID_STR); |
378 | goto exit1; | 538 | goto exit0; |
379 | } | 539 | } |
380 | 540 | ||
381 | ret = gru_proc_init(); | 541 | ret = gru_proc_init(); |
382 | if (ret) { | 542 | if (ret) { |
383 | printk(KERN_ERR "%s: proc init failed\n", GRU_DRIVER_ID_STR); | 543 | printk(KERN_ERR "%s: proc init failed\n", GRU_DRIVER_ID_STR); |
384 | goto exit2; | 544 | goto exit1; |
385 | } | 545 | } |
386 | 546 | ||
387 | ret = gru_init_tables(gru_start_paddr, gru_start_vaddr); | 547 | ret = gru_init_tables(gru_start_paddr, gru_start_vaddr); |
388 | if (ret) { | 548 | if (ret) { |
389 | printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR); | 549 | printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR); |
390 | goto exit3; | 550 | goto exit2; |
391 | } | 551 | } |
552 | |||
553 | ret = gru_setup_tlb_irqs(); | ||
554 | if (ret != 0) | ||
555 | goto exit3; | ||
556 | |||
392 | gru_kservices_init(); | 557 | gru_kservices_init(); |
393 | 558 | ||
394 | printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR, | 559 | printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR, |
@@ -396,31 +561,24 @@ static int __init gru_init(void) | |||
396 | return 0; | 561 | return 0; |
397 | 562 | ||
398 | exit3: | 563 | exit3: |
399 | gru_proc_exit(); | 564 | gru_free_tables(); |
400 | exit2: | 565 | exit2: |
401 | misc_deregister(&gru_miscdev); | 566 | gru_proc_exit(); |
402 | exit1: | 567 | exit1: |
403 | for (--chip; chip >= 0; chip--) | 568 | misc_deregister(&gru_miscdev); |
404 | free_irq(irq + chip, NULL); | 569 | exit0: |
405 | return ret; | 570 | return ret; |
406 | 571 | ||
407 | } | 572 | } |
408 | 573 | ||
409 | static void __exit gru_exit(void) | 574 | static void __exit gru_exit(void) |
410 | { | 575 | { |
411 | int i, bid; | ||
412 | int order = get_order(sizeof(struct gru_state) * | ||
413 | GRU_CHIPLETS_PER_BLADE); | ||
414 | |||
415 | if (!is_uv_system()) | 576 | if (!is_uv_system()) |
416 | return; | 577 | return; |
417 | 578 | ||
418 | for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++) | 579 | gru_teardown_tlb_irqs(); |
419 | free_irq(IRQ_GRU + i, NULL); | ||
420 | gru_kservices_exit(); | 580 | gru_kservices_exit(); |
421 | for (bid = 0; bid < GRU_MAX_BLADES; bid++) | 581 | gru_free_tables(); |
422 | free_pages((unsigned long)gru_base[bid], order); | ||
423 | |||
424 | misc_deregister(&gru_miscdev); | 582 | misc_deregister(&gru_miscdev); |
425 | gru_proc_exit(); | 583 | gru_proc_exit(); |
426 | } | 584 | } |
diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c index 37e7cfc53b9c..2f30badc6ffd 100644 --- a/drivers/misc/sgi-gru/gruhandles.c +++ b/drivers/misc/sgi-gru/gruhandles.c | |||
@@ -27,9 +27,11 @@ | |||
27 | #ifdef CONFIG_IA64 | 27 | #ifdef CONFIG_IA64 |
28 | #include <asm/processor.h> | 28 | #include <asm/processor.h> |
29 | #define GRU_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10) | 29 | #define GRU_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10) |
30 | #define CLKS2NSEC(c) ((c) *1000000000 / local_cpu_data->itc_freq) | ||
30 | #else | 31 | #else |
31 | #include <asm/tsc.h> | 32 | #include <asm/tsc.h> |
32 | #define GRU_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) | 33 | #define GRU_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) |
34 | #define CLKS2NSEC(c) ((c) * 1000000 / tsc_khz) | ||
33 | #endif | 35 | #endif |
34 | 36 | ||
35 | /* Extract the status field from a kernel handle */ | 37 | /* Extract the status field from a kernel handle */ |
@@ -39,21 +41,39 @@ struct mcs_op_statistic mcs_op_statistics[mcsop_last]; | |||
39 | 41 | ||
40 | static void update_mcs_stats(enum mcs_op op, unsigned long clks) | 42 | static void update_mcs_stats(enum mcs_op op, unsigned long clks) |
41 | { | 43 | { |
44 | unsigned long nsec; | ||
45 | |||
46 | nsec = CLKS2NSEC(clks); | ||
42 | atomic_long_inc(&mcs_op_statistics[op].count); | 47 | atomic_long_inc(&mcs_op_statistics[op].count); |
43 | atomic_long_add(clks, &mcs_op_statistics[op].total); | 48 | atomic_long_add(nsec, &mcs_op_statistics[op].total); |
44 | if (mcs_op_statistics[op].max < clks) | 49 | if (mcs_op_statistics[op].max < nsec) |
45 | mcs_op_statistics[op].max = clks; | 50 | mcs_op_statistics[op].max = nsec; |
46 | } | 51 | } |
47 | 52 | ||
48 | static void start_instruction(void *h) | 53 | static void start_instruction(void *h) |
49 | { | 54 | { |
50 | unsigned long *w0 = h; | 55 | unsigned long *w0 = h; |
51 | 56 | ||
52 | wmb(); /* setting CMD bit must be last */ | 57 | wmb(); /* setting CMD/STATUS bits must be last */ |
53 | *w0 = *w0 | 1; | 58 | *w0 = *w0 | 0x20001; |
54 | gru_flush_cache(h); | 59 | gru_flush_cache(h); |
55 | } | 60 | } |
56 | 61 | ||
62 | static void report_instruction_timeout(void *h) | ||
63 | { | ||
64 | unsigned long goff = GSEGPOFF((unsigned long)h); | ||
65 | char *id = "???"; | ||
66 | |||
67 | if (TYPE_IS(CCH, goff)) | ||
68 | id = "CCH"; | ||
69 | else if (TYPE_IS(TGH, goff)) | ||
70 | id = "TGH"; | ||
71 | else if (TYPE_IS(TFH, goff)) | ||
72 | id = "TFH"; | ||
73 | |||
74 | panic(KERN_ALERT "GRU %p (%s) is malfunctioning\n", h, id); | ||
75 | } | ||
76 | |||
57 | static int wait_instruction_complete(void *h, enum mcs_op opc) | 77 | static int wait_instruction_complete(void *h, enum mcs_op opc) |
58 | { | 78 | { |
59 | int status; | 79 | int status; |
@@ -64,9 +84,10 @@ static int wait_instruction_complete(void *h, enum mcs_op opc) | |||
64 | status = GET_MSEG_HANDLE_STATUS(h); | 84 | status = GET_MSEG_HANDLE_STATUS(h); |
65 | if (status != CCHSTATUS_ACTIVE) | 85 | if (status != CCHSTATUS_ACTIVE) |
66 | break; | 86 | break; |
67 | if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time)) | 87 | if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time)) { |
68 | panic("GRU %p is malfunctioning: start %ld, end %ld\n", | 88 | report_instruction_timeout(h); |
69 | h, start_time, (unsigned long)get_cycles()); | 89 | start_time = get_cycles(); |
90 | } | ||
70 | } | 91 | } |
71 | if (gru_options & OPT_STATS) | 92 | if (gru_options & OPT_STATS) |
72 | update_mcs_stats(opc, get_cycles() - start_time); | 93 | update_mcs_stats(opc, get_cycles() - start_time); |
@@ -75,9 +96,18 @@ static int wait_instruction_complete(void *h, enum mcs_op opc) | |||
75 | 96 | ||
76 | int cch_allocate(struct gru_context_configuration_handle *cch) | 97 | int cch_allocate(struct gru_context_configuration_handle *cch) |
77 | { | 98 | { |
99 | int ret; | ||
100 | |||
78 | cch->opc = CCHOP_ALLOCATE; | 101 | cch->opc = CCHOP_ALLOCATE; |
79 | start_instruction(cch); | 102 | start_instruction(cch); |
80 | return wait_instruction_complete(cch, cchop_allocate); | 103 | ret = wait_instruction_complete(cch, cchop_allocate); |
104 | |||
105 | /* | ||
106 | * Stop speculation into the GSEG being mapped by the previous ALLOCATE. | ||
107 | * The GSEG memory does not exist until the ALLOCATE completes. | ||
108 | */ | ||
109 | sync_core(); | ||
110 | return ret; | ||
81 | } | 111 | } |
82 | 112 | ||
83 | int cch_start(struct gru_context_configuration_handle *cch) | 113 | int cch_start(struct gru_context_configuration_handle *cch) |
@@ -96,9 +126,18 @@ int cch_interrupt(struct gru_context_configuration_handle *cch) | |||
96 | 126 | ||
97 | int cch_deallocate(struct gru_context_configuration_handle *cch) | 127 | int cch_deallocate(struct gru_context_configuration_handle *cch) |
98 | { | 128 | { |
129 | int ret; | ||
130 | |||
99 | cch->opc = CCHOP_DEALLOCATE; | 131 | cch->opc = CCHOP_DEALLOCATE; |
100 | start_instruction(cch); | 132 | start_instruction(cch); |
101 | return wait_instruction_complete(cch, cchop_deallocate); | 133 | ret = wait_instruction_complete(cch, cchop_deallocate); |
134 | |||
135 | /* | ||
136 | * Stop speculation into the GSEG being unmapped by the previous | ||
137 | * DEALLOCATE. | ||
138 | */ | ||
139 | sync_core(); | ||
140 | return ret; | ||
102 | } | 141 | } |
103 | 142 | ||
104 | int cch_interrupt_sync(struct gru_context_configuration_handle | 143 | int cch_interrupt_sync(struct gru_context_configuration_handle |
@@ -126,17 +165,20 @@ int tgh_invalidate(struct gru_tlb_global_handle *tgh, | |||
126 | return wait_instruction_complete(tgh, tghop_invalidate); | 165 | return wait_instruction_complete(tgh, tghop_invalidate); |
127 | } | 166 | } |
128 | 167 | ||
129 | void tfh_write_only(struct gru_tlb_fault_handle *tfh, | 168 | int tfh_write_only(struct gru_tlb_fault_handle *tfh, |
130 | unsigned long pfn, unsigned long vaddr, | 169 | unsigned long paddr, int gaa, |
131 | int asid, int dirty, int pagesize) | 170 | unsigned long vaddr, int asid, int dirty, |
171 | int pagesize) | ||
132 | { | 172 | { |
133 | tfh->fillasid = asid; | 173 | tfh->fillasid = asid; |
134 | tfh->fillvaddr = vaddr; | 174 | tfh->fillvaddr = vaddr; |
135 | tfh->pfn = pfn; | 175 | tfh->pfn = paddr >> GRU_PADDR_SHIFT; |
176 | tfh->gaa = gaa; | ||
136 | tfh->dirty = dirty; | 177 | tfh->dirty = dirty; |
137 | tfh->pagesize = pagesize; | 178 | tfh->pagesize = pagesize; |
138 | tfh->opc = TFHOP_WRITE_ONLY; | 179 | tfh->opc = TFHOP_WRITE_ONLY; |
139 | start_instruction(tfh); | 180 | start_instruction(tfh); |
181 | return wait_instruction_complete(tfh, tfhop_write_only); | ||
140 | } | 182 | } |
141 | 183 | ||
142 | void tfh_write_restart(struct gru_tlb_fault_handle *tfh, | 184 | void tfh_write_restart(struct gru_tlb_fault_handle *tfh, |
diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h index f44112242d00..3f998b924d8f 100644 --- a/drivers/misc/sgi-gru/gruhandles.h +++ b/drivers/misc/sgi-gru/gruhandles.h | |||
@@ -91,6 +91,12 @@ | |||
91 | /* Convert an arbitrary handle address to the beginning of the GRU segment */ | 91 | /* Convert an arbitrary handle address to the beginning of the GRU segment */ |
92 | #define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1))) | 92 | #define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1))) |
93 | 93 | ||
94 | /* Test a valid handle address to determine the type */ | ||
95 | #define TYPE_IS(hn, h) ((h) >= GRU_##hn##_BASE && (h) < \ | ||
96 | GRU_##hn##_BASE + GRU_NUM_##hn * GRU_HANDLE_STRIDE && \ | ||
97 | (((h) & (GRU_HANDLE_STRIDE - 1)) == 0)) | ||
98 | |||
99 | |||
94 | /* General addressing macros. */ | 100 | /* General addressing macros. */ |
95 | static inline void *get_gseg_base_address(void *base, int ctxnum) | 101 | static inline void *get_gseg_base_address(void *base, int ctxnum) |
96 | { | 102 | { |
@@ -158,6 +164,16 @@ static inline void *gru_chiplet_vaddr(void *vaddr, int pnode, int chiplet) | |||
158 | return vaddr + GRU_SIZE * (2 * pnode + chiplet); | 164 | return vaddr + GRU_SIZE * (2 * pnode + chiplet); |
159 | } | 165 | } |
160 | 166 | ||
167 | static inline struct gru_control_block_extended *gru_tfh_to_cbe( | ||
168 | struct gru_tlb_fault_handle *tfh) | ||
169 | { | ||
170 | unsigned long cbe; | ||
171 | |||
172 | cbe = (unsigned long)tfh - GRU_TFH_BASE + GRU_CBE_BASE; | ||
173 | return (struct gru_control_block_extended*)cbe; | ||
174 | } | ||
175 | |||
176 | |||
161 | 177 | ||
162 | 178 | ||
163 | /* | 179 | /* |
@@ -236,6 +252,17 @@ enum gru_tgh_state { | |||
236 | TGHSTATE_RESTART_CTX, | 252 | TGHSTATE_RESTART_CTX, |
237 | }; | 253 | }; |
238 | 254 | ||
255 | enum gru_tgh_cause { | ||
256 | TGHCAUSE_RR_ECC, | ||
257 | TGHCAUSE_TLB_ECC, | ||
258 | TGHCAUSE_LRU_ECC, | ||
259 | TGHCAUSE_PS_ECC, | ||
260 | TGHCAUSE_MUL_ERR, | ||
261 | TGHCAUSE_DATA_ERR, | ||
262 | TGHCAUSE_SW_FORCE | ||
263 | }; | ||
264 | |||
265 | |||
239 | /* | 266 | /* |
240 | * TFH - TLB Global Handle | 267 | * TFH - TLB Global Handle |
241 | * Used for TLB dropins into the GRU TLB. | 268 | * Used for TLB dropins into the GRU TLB. |
@@ -440,6 +467,12 @@ struct gru_control_block_extended { | |||
440 | unsigned int cbrexecstatus:8; | 467 | unsigned int cbrexecstatus:8; |
441 | }; | 468 | }; |
442 | 469 | ||
470 | /* CBE fields for active BCOPY instructions */ | ||
471 | #define cbe_baddr0 idef1upd | ||
472 | #define cbe_baddr1 idef3upd | ||
473 | #define cbe_src_cl idef6cpy | ||
474 | #define cbe_nelemcur idef5upd | ||
475 | |||
443 | enum gru_cbr_state { | 476 | enum gru_cbr_state { |
444 | CBRSTATE_INACTIVE, | 477 | CBRSTATE_INACTIVE, |
445 | CBRSTATE_IDLE, | 478 | CBRSTATE_IDLE, |
@@ -487,8 +520,8 @@ int cch_interrupt_sync(struct gru_context_configuration_handle *cch); | |||
487 | int tgh_invalidate(struct gru_tlb_global_handle *tgh, unsigned long vaddr, | 520 | int tgh_invalidate(struct gru_tlb_global_handle *tgh, unsigned long vaddr, |
488 | unsigned long vaddrmask, int asid, int pagesize, int global, int n, | 521 | unsigned long vaddrmask, int asid, int pagesize, int global, int n, |
489 | unsigned short ctxbitmap); | 522 | unsigned short ctxbitmap); |
490 | void tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long pfn, | 523 | int tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long paddr, |
491 | unsigned long vaddr, int asid, int dirty, int pagesize); | 524 | int gaa, unsigned long vaddr, int asid, int dirty, int pagesize); |
492 | void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr, | 525 | void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr, |
493 | int gaa, unsigned long vaddr, int asid, int dirty, int pagesize); | 526 | int gaa, unsigned long vaddr, int asid, int dirty, int pagesize); |
494 | void tfh_restart(struct gru_tlb_fault_handle *tfh); | 527 | void tfh_restart(struct gru_tlb_fault_handle *tfh); |
diff --git a/drivers/misc/sgi-gru/grukdump.c b/drivers/misc/sgi-gru/grukdump.c index 55eabfa85585..9b2062d17327 100644 --- a/drivers/misc/sgi-gru/grukdump.c +++ b/drivers/misc/sgi-gru/grukdump.c | |||
@@ -44,7 +44,8 @@ static int gru_user_copy_handle(void __user **dp, void *s) | |||
44 | 44 | ||
45 | static int gru_dump_context_data(void *grubase, | 45 | static int gru_dump_context_data(void *grubase, |
46 | struct gru_context_configuration_handle *cch, | 46 | struct gru_context_configuration_handle *cch, |
47 | void __user *ubuf, int ctxnum, int dsrcnt) | 47 | void __user *ubuf, int ctxnum, int dsrcnt, |
48 | int flush_cbrs) | ||
48 | { | 49 | { |
49 | void *cb, *cbe, *tfh, *gseg; | 50 | void *cb, *cbe, *tfh, *gseg; |
50 | int i, scr; | 51 | int i, scr; |
@@ -55,6 +56,8 @@ static int gru_dump_context_data(void *grubase, | |||
55 | tfh = grubase + GRU_TFH_BASE; | 56 | tfh = grubase + GRU_TFH_BASE; |
56 | 57 | ||
57 | for_each_cbr_in_allocation_map(i, &cch->cbr_allocation_map, scr) { | 58 | for_each_cbr_in_allocation_map(i, &cch->cbr_allocation_map, scr) { |
59 | if (flush_cbrs) | ||
60 | gru_flush_cache(cb); | ||
58 | if (gru_user_copy_handle(&ubuf, cb)) | 61 | if (gru_user_copy_handle(&ubuf, cb)) |
59 | goto fail; | 62 | goto fail; |
60 | if (gru_user_copy_handle(&ubuf, tfh + i * GRU_HANDLE_STRIDE)) | 63 | if (gru_user_copy_handle(&ubuf, tfh + i * GRU_HANDLE_STRIDE)) |
@@ -115,7 +118,7 @@ fail: | |||
115 | 118 | ||
116 | static int gru_dump_context(struct gru_state *gru, int ctxnum, | 119 | static int gru_dump_context(struct gru_state *gru, int ctxnum, |
117 | void __user *ubuf, void __user *ubufend, char data_opt, | 120 | void __user *ubuf, void __user *ubufend, char data_opt, |
118 | char lock_cch) | 121 | char lock_cch, char flush_cbrs) |
119 | { | 122 | { |
120 | struct gru_dump_context_header hdr; | 123 | struct gru_dump_context_header hdr; |
121 | struct gru_dump_context_header __user *uhdr = ubuf; | 124 | struct gru_dump_context_header __user *uhdr = ubuf; |
@@ -159,8 +162,7 @@ static int gru_dump_context(struct gru_state *gru, int ctxnum, | |||
159 | ret = -EFBIG; | 162 | ret = -EFBIG; |
160 | else | 163 | else |
161 | ret = gru_dump_context_data(grubase, cch, ubuf, ctxnum, | 164 | ret = gru_dump_context_data(grubase, cch, ubuf, ctxnum, |
162 | dsrcnt); | 165 | dsrcnt, flush_cbrs); |
163 | |||
164 | } | 166 | } |
165 | if (cch_locked) | 167 | if (cch_locked) |
166 | unlock_cch_handle(cch); | 168 | unlock_cch_handle(cch); |
@@ -215,7 +217,8 @@ int gru_dump_chiplet_request(unsigned long arg) | |||
215 | for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) { | 217 | for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) { |
216 | if (req.ctxnum == ctxnum || req.ctxnum < 0) { | 218 | if (req.ctxnum == ctxnum || req.ctxnum < 0) { |
217 | ret = gru_dump_context(gru, ctxnum, ubuf, ubufend, | 219 | ret = gru_dump_context(gru, ctxnum, ubuf, ubufend, |
218 | req.data_opt, req.lock_cch); | 220 | req.data_opt, req.lock_cch, |
221 | req.flush_cbrs); | ||
219 | if (ret < 0) | 222 | if (ret < 0) |
220 | goto fail; | 223 | goto fail; |
221 | ubuf += ret; | 224 | ubuf += ret; |
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c index 766e21e15574..34749ee88dfa 100644 --- a/drivers/misc/sgi-gru/grukservices.c +++ b/drivers/misc/sgi-gru/grukservices.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/interrupt.h> | 31 | #include <linux/interrupt.h> |
32 | #include <linux/uaccess.h> | 32 | #include <linux/uaccess.h> |
33 | #include <linux/delay.h> | 33 | #include <linux/delay.h> |
34 | #include <asm/io_apic.h> | ||
34 | #include "gru.h" | 35 | #include "gru.h" |
35 | #include "grulib.h" | 36 | #include "grulib.h" |
36 | #include "grutables.h" | 37 | #include "grutables.h" |
@@ -97,9 +98,6 @@ | |||
97 | #define ASYNC_HAN_TO_BID(h) ((h) - 1) | 98 | #define ASYNC_HAN_TO_BID(h) ((h) - 1) |
98 | #define ASYNC_BID_TO_HAN(b) ((b) + 1) | 99 | #define ASYNC_BID_TO_HAN(b) ((b) + 1) |
99 | #define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)] | 100 | #define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)] |
100 | #define KCB_TO_GID(cb) ((cb - gru_start_vaddr) / \ | ||
101 | (GRU_SIZE * GRU_CHIPLETS_PER_BLADE)) | ||
102 | #define KCB_TO_BS(cb) gru_base[KCB_TO_GID(cb)] | ||
103 | 101 | ||
104 | #define GRU_NUM_KERNEL_CBR 1 | 102 | #define GRU_NUM_KERNEL_CBR 1 |
105 | #define GRU_NUM_KERNEL_DSR_BYTES 256 | 103 | #define GRU_NUM_KERNEL_DSR_BYTES 256 |
@@ -160,8 +158,10 @@ static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id) | |||
160 | up_read(&bs->bs_kgts_sema); | 158 | up_read(&bs->bs_kgts_sema); |
161 | down_write(&bs->bs_kgts_sema); | 159 | down_write(&bs->bs_kgts_sema); |
162 | 160 | ||
163 | if (!bs->bs_kgts) | 161 | if (!bs->bs_kgts) { |
164 | bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0); | 162 | bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0); |
163 | bs->bs_kgts->ts_user_blade_id = blade_id; | ||
164 | } | ||
165 | kgts = bs->bs_kgts; | 165 | kgts = bs->bs_kgts; |
166 | 166 | ||
167 | if (!kgts->ts_gru) { | 167 | if (!kgts->ts_gru) { |
@@ -172,9 +172,9 @@ static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id) | |||
172 | kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU( | 172 | kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU( |
173 | GRU_NUM_KERNEL_DSR_BYTES * ncpus + | 173 | GRU_NUM_KERNEL_DSR_BYTES * ncpus + |
174 | bs->bs_async_dsr_bytes); | 174 | bs->bs_async_dsr_bytes); |
175 | while (!gru_assign_gru_context(kgts, blade_id)) { | 175 | while (!gru_assign_gru_context(kgts)) { |
176 | msleep(1); | 176 | msleep(1); |
177 | gru_steal_context(kgts, blade_id); | 177 | gru_steal_context(kgts); |
178 | } | 178 | } |
179 | gru_load_context(kgts); | 179 | gru_load_context(kgts); |
180 | gru = bs->bs_kgts->ts_gru; | 180 | gru = bs->bs_kgts->ts_gru; |
@@ -200,13 +200,15 @@ static int gru_free_kernel_contexts(void) | |||
200 | bs = gru_base[bid]; | 200 | bs = gru_base[bid]; |
201 | if (!bs) | 201 | if (!bs) |
202 | continue; | 202 | continue; |
203 | |||
204 | /* Ignore busy contexts. Don't want to block here. */ | ||
203 | if (down_write_trylock(&bs->bs_kgts_sema)) { | 205 | if (down_write_trylock(&bs->bs_kgts_sema)) { |
204 | kgts = bs->bs_kgts; | 206 | kgts = bs->bs_kgts; |
205 | if (kgts && kgts->ts_gru) | 207 | if (kgts && kgts->ts_gru) |
206 | gru_unload_context(kgts, 0); | 208 | gru_unload_context(kgts, 0); |
207 | kfree(kgts); | ||
208 | bs->bs_kgts = NULL; | 209 | bs->bs_kgts = NULL; |
209 | up_write(&bs->bs_kgts_sema); | 210 | up_write(&bs->bs_kgts_sema); |
211 | kfree(kgts); | ||
210 | } else { | 212 | } else { |
211 | ret++; | 213 | ret++; |
212 | } | 214 | } |
@@ -220,13 +222,21 @@ static int gru_free_kernel_contexts(void) | |||
220 | static struct gru_blade_state *gru_lock_kernel_context(int blade_id) | 222 | static struct gru_blade_state *gru_lock_kernel_context(int blade_id) |
221 | { | 223 | { |
222 | struct gru_blade_state *bs; | 224 | struct gru_blade_state *bs; |
225 | int bid; | ||
223 | 226 | ||
224 | STAT(lock_kernel_context); | 227 | STAT(lock_kernel_context); |
225 | bs = gru_base[blade_id]; | 228 | again: |
229 | bid = blade_id < 0 ? uv_numa_blade_id() : blade_id; | ||
230 | bs = gru_base[bid]; | ||
226 | 231 | ||
232 | /* Handle the case where migration occured while waiting for the sema */ | ||
227 | down_read(&bs->bs_kgts_sema); | 233 | down_read(&bs->bs_kgts_sema); |
234 | if (blade_id < 0 && bid != uv_numa_blade_id()) { | ||
235 | up_read(&bs->bs_kgts_sema); | ||
236 | goto again; | ||
237 | } | ||
228 | if (!bs->bs_kgts || !bs->bs_kgts->ts_gru) | 238 | if (!bs->bs_kgts || !bs->bs_kgts->ts_gru) |
229 | gru_load_kernel_context(bs, blade_id); | 239 | gru_load_kernel_context(bs, bid); |
230 | return bs; | 240 | return bs; |
231 | 241 | ||
232 | } | 242 | } |
@@ -255,7 +265,7 @@ static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr) | |||
255 | 265 | ||
256 | BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES); | 266 | BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES); |
257 | preempt_disable(); | 267 | preempt_disable(); |
258 | bs = gru_lock_kernel_context(uv_numa_blade_id()); | 268 | bs = gru_lock_kernel_context(-1); |
259 | lcpu = uv_blade_processor_id(); | 269 | lcpu = uv_blade_processor_id(); |
260 | *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE; | 270 | *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE; |
261 | *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES; | 271 | *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES; |
@@ -384,13 +394,31 @@ int gru_get_cb_exception_detail(void *cb, | |||
384 | struct control_block_extended_exc_detail *excdet) | 394 | struct control_block_extended_exc_detail *excdet) |
385 | { | 395 | { |
386 | struct gru_control_block_extended *cbe; | 396 | struct gru_control_block_extended *cbe; |
387 | struct gru_blade_state *bs; | 397 | struct gru_thread_state *kgts = NULL; |
388 | int cbrnum; | 398 | unsigned long off; |
389 | 399 | int cbrnum, bid; | |
390 | bs = KCB_TO_BS(cb); | 400 | |
391 | cbrnum = thread_cbr_number(bs->bs_kgts, get_cb_number(cb)); | 401 | /* |
402 | * Locate kgts for cb. This algorithm is SLOW but | ||
403 | * this function is rarely called (ie., almost never). | ||
404 | * Performance does not matter. | ||
405 | */ | ||
406 | for_each_possible_blade(bid) { | ||
407 | if (!gru_base[bid]) | ||
408 | break; | ||
409 | kgts = gru_base[bid]->bs_kgts; | ||
410 | if (!kgts || !kgts->ts_gru) | ||
411 | continue; | ||
412 | off = cb - kgts->ts_gru->gs_gru_base_vaddr; | ||
413 | if (off < GRU_SIZE) | ||
414 | break; | ||
415 | kgts = NULL; | ||
416 | } | ||
417 | BUG_ON(!kgts); | ||
418 | cbrnum = thread_cbr_number(kgts, get_cb_number(cb)); | ||
392 | cbe = get_cbe(GRUBASE(cb), cbrnum); | 419 | cbe = get_cbe(GRUBASE(cb), cbrnum); |
393 | gru_flush_cache(cbe); /* CBE not coherent */ | 420 | gru_flush_cache(cbe); /* CBE not coherent */ |
421 | sync_core(); | ||
394 | excdet->opc = cbe->opccpy; | 422 | excdet->opc = cbe->opccpy; |
395 | excdet->exopc = cbe->exopccpy; | 423 | excdet->exopc = cbe->exopccpy; |
396 | excdet->ecause = cbe->ecause; | 424 | excdet->ecause = cbe->ecause; |
@@ -409,8 +437,8 @@ char *gru_get_cb_exception_detail_str(int ret, void *cb, | |||
409 | if (ret > 0 && gen->istatus == CBS_EXCEPTION) { | 437 | if (ret > 0 && gen->istatus == CBS_EXCEPTION) { |
410 | gru_get_cb_exception_detail(cb, &excdet); | 438 | gru_get_cb_exception_detail(cb, &excdet); |
411 | snprintf(buf, size, | 439 | snprintf(buf, size, |
412 | "GRU exception: cb %p, opc %d, exopc %d, ecause 0x%x," | 440 | "GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x," |
413 | "excdet0 0x%lx, excdet1 0x%x", | 441 | "excdet0 0x%lx, excdet1 0x%x", smp_processor_id(), |
414 | gen, excdet.opc, excdet.exopc, excdet.ecause, | 442 | gen, excdet.opc, excdet.exopc, excdet.ecause, |
415 | excdet.exceptdet0, excdet.exceptdet1); | 443 | excdet.exceptdet0, excdet.exceptdet1); |
416 | } else { | 444 | } else { |
@@ -457,9 +485,10 @@ int gru_check_status_proc(void *cb) | |||
457 | int ret; | 485 | int ret; |
458 | 486 | ||
459 | ret = gen->istatus; | 487 | ret = gen->istatus; |
460 | if (ret != CBS_EXCEPTION) | 488 | if (ret == CBS_EXCEPTION) |
461 | return ret; | 489 | ret = gru_retry_exception(cb); |
462 | return gru_retry_exception(cb); | 490 | rmb(); |
491 | return ret; | ||
463 | 492 | ||
464 | } | 493 | } |
465 | 494 | ||
@@ -471,7 +500,7 @@ int gru_wait_proc(void *cb) | |||
471 | ret = gru_wait_idle_or_exception(gen); | 500 | ret = gru_wait_idle_or_exception(gen); |
472 | if (ret == CBS_EXCEPTION) | 501 | if (ret == CBS_EXCEPTION) |
473 | ret = gru_retry_exception(cb); | 502 | ret = gru_retry_exception(cb); |
474 | 503 | rmb(); | |
475 | return ret; | 504 | return ret; |
476 | } | 505 | } |
477 | 506 | ||
@@ -538,7 +567,7 @@ int gru_create_message_queue(struct gru_message_queue_desc *mqd, | |||
538 | mqd->mq = mq; | 567 | mqd->mq = mq; |
539 | mqd->mq_gpa = uv_gpa(mq); | 568 | mqd->mq_gpa = uv_gpa(mq); |
540 | mqd->qlines = qlines; | 569 | mqd->qlines = qlines; |
541 | mqd->interrupt_pnode = UV_NASID_TO_PNODE(nasid); | 570 | mqd->interrupt_pnode = nasid >> 1; |
542 | mqd->interrupt_vector = vector; | 571 | mqd->interrupt_vector = vector; |
543 | mqd->interrupt_apicid = apicid; | 572 | mqd->interrupt_apicid = apicid; |
544 | return 0; | 573 | return 0; |
@@ -598,6 +627,8 @@ static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd, | |||
598 | ret = MQE_UNEXPECTED_CB_ERR; | 627 | ret = MQE_UNEXPECTED_CB_ERR; |
599 | break; | 628 | break; |
600 | case CBSS_PAGE_OVERFLOW: | 629 | case CBSS_PAGE_OVERFLOW: |
630 | STAT(mesq_noop_page_overflow); | ||
631 | /* fallthru */ | ||
601 | default: | 632 | default: |
602 | BUG(); | 633 | BUG(); |
603 | } | 634 | } |
@@ -673,18 +704,6 @@ cberr: | |||
673 | } | 704 | } |
674 | 705 | ||
675 | /* | 706 | /* |
676 | * Send a cross-partition interrupt to the SSI that contains the target | ||
677 | * message queue. Normally, the interrupt is automatically delivered by hardware | ||
678 | * but some error conditions require explicit delivery. | ||
679 | */ | ||
680 | static void send_message_queue_interrupt(struct gru_message_queue_desc *mqd) | ||
681 | { | ||
682 | if (mqd->interrupt_vector) | ||
683 | uv_hub_send_ipi(mqd->interrupt_pnode, mqd->interrupt_apicid, | ||
684 | mqd->interrupt_vector); | ||
685 | } | ||
686 | |||
687 | /* | ||
688 | * Handle a PUT failure. Note: if message was a 2-line message, one of the | 707 | * Handle a PUT failure. Note: if message was a 2-line message, one of the |
689 | * lines might have successfully have been written. Before sending the | 708 | * lines might have successfully have been written. Before sending the |
690 | * message, "present" must be cleared in BOTH lines to prevent the receiver | 709 | * message, "present" must be cleared in BOTH lines to prevent the receiver |
@@ -693,7 +712,8 @@ static void send_message_queue_interrupt(struct gru_message_queue_desc *mqd) | |||
693 | static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd, | 712 | static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd, |
694 | void *mesg, int lines) | 713 | void *mesg, int lines) |
695 | { | 714 | { |
696 | unsigned long m; | 715 | unsigned long m, *val = mesg, gpa, save; |
716 | int ret; | ||
697 | 717 | ||
698 | m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); | 718 | m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); |
699 | if (lines == 2) { | 719 | if (lines == 2) { |
@@ -704,7 +724,26 @@ static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd, | |||
704 | gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); | 724 | gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); |
705 | if (gru_wait(cb) != CBS_IDLE) | 725 | if (gru_wait(cb) != CBS_IDLE) |
706 | return MQE_UNEXPECTED_CB_ERR; | 726 | return MQE_UNEXPECTED_CB_ERR; |
707 | send_message_queue_interrupt(mqd); | 727 | |
728 | if (!mqd->interrupt_vector) | ||
729 | return MQE_OK; | ||
730 | |||
731 | /* | ||
732 | * Send a cross-partition interrupt to the SSI that contains the target | ||
733 | * message queue. Normally, the interrupt is automatically delivered by | ||
734 | * hardware but some error conditions require explicit delivery. | ||
735 | * Use the GRU to deliver the interrupt. Otherwise partition failures | ||
736 | * could cause unrecovered errors. | ||
737 | */ | ||
738 | gpa = uv_global_gru_mmr_address(mqd->interrupt_pnode, UVH_IPI_INT); | ||
739 | save = *val; | ||
740 | *val = uv_hub_ipi_value(mqd->interrupt_apicid, mqd->interrupt_vector, | ||
741 | dest_Fixed); | ||
742 | gru_vstore_phys(cb, gpa, gru_get_tri(mesg), IAA_REGISTER, IMA); | ||
743 | ret = gru_wait(cb); | ||
744 | *val = save; | ||
745 | if (ret != CBS_IDLE) | ||
746 | return MQE_UNEXPECTED_CB_ERR; | ||
708 | return MQE_OK; | 747 | return MQE_OK; |
709 | } | 748 | } |
710 | 749 | ||
@@ -739,6 +778,9 @@ static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd, | |||
739 | STAT(mesq_send_put_nacked); | 778 | STAT(mesq_send_put_nacked); |
740 | ret = send_message_put_nacked(cb, mqd, mesg, lines); | 779 | ret = send_message_put_nacked(cb, mqd, mesg, lines); |
741 | break; | 780 | break; |
781 | case CBSS_PAGE_OVERFLOW: | ||
782 | STAT(mesq_page_overflow); | ||
783 | /* fallthru */ | ||
742 | default: | 784 | default: |
743 | BUG(); | 785 | BUG(); |
744 | } | 786 | } |
@@ -831,7 +873,6 @@ void *gru_get_next_message(struct gru_message_queue_desc *mqd) | |||
831 | int present = mhdr->present; | 873 | int present = mhdr->present; |
832 | 874 | ||
833 | /* skip NOOP messages */ | 875 | /* skip NOOP messages */ |
834 | STAT(mesq_receive); | ||
835 | while (present == MQS_NOOP) { | 876 | while (present == MQS_NOOP) { |
836 | gru_free_message(mqd, mhdr); | 877 | gru_free_message(mqd, mhdr); |
837 | mhdr = mq->next; | 878 | mhdr = mq->next; |
@@ -851,6 +892,7 @@ void *gru_get_next_message(struct gru_message_queue_desc *mqd) | |||
851 | if (mhdr->lines == 2) | 892 | if (mhdr->lines == 2) |
852 | restore_present2(mhdr, mhdr->present2); | 893 | restore_present2(mhdr, mhdr->present2); |
853 | 894 | ||
895 | STAT(mesq_receive); | ||
854 | return mhdr; | 896 | return mhdr; |
855 | } | 897 | } |
856 | EXPORT_SYMBOL_GPL(gru_get_next_message); | 898 | EXPORT_SYMBOL_GPL(gru_get_next_message); |
@@ -858,6 +900,29 @@ EXPORT_SYMBOL_GPL(gru_get_next_message); | |||
858 | /* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/ | 900 | /* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/ |
859 | 901 | ||
860 | /* | 902 | /* |
903 | * Load a DW from a global GPA. The GPA can be a memory or MMR address. | ||
904 | */ | ||
905 | int gru_read_gpa(unsigned long *value, unsigned long gpa) | ||
906 | { | ||
907 | void *cb; | ||
908 | void *dsr; | ||
909 | int ret, iaa; | ||
910 | |||
911 | STAT(read_gpa); | ||
912 | if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) | ||
913 | return MQE_BUG_NO_RESOURCES; | ||
914 | iaa = gpa >> 62; | ||
915 | gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA); | ||
916 | ret = gru_wait(cb); | ||
917 | if (ret == CBS_IDLE) | ||
918 | *value = *(unsigned long *)dsr; | ||
919 | gru_free_cpu_resources(cb, dsr); | ||
920 | return ret; | ||
921 | } | ||
922 | EXPORT_SYMBOL_GPL(gru_read_gpa); | ||
923 | |||
924 | |||
925 | /* | ||
861 | * Copy a block of data using the GRU resources | 926 | * Copy a block of data using the GRU resources |
862 | */ | 927 | */ |
863 | int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, | 928 | int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, |
@@ -898,24 +963,24 @@ static int quicktest0(unsigned long arg) | |||
898 | 963 | ||
899 | gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); | 964 | gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); |
900 | if (gru_wait(cb) != CBS_IDLE) { | 965 | if (gru_wait(cb) != CBS_IDLE) { |
901 | printk(KERN_DEBUG "GRU quicktest0: CBR failure 1\n"); | 966 | printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id()); |
902 | goto done; | 967 | goto done; |
903 | } | 968 | } |
904 | 969 | ||
905 | if (*p != MAGIC) { | 970 | if (*p != MAGIC) { |
906 | printk(KERN_DEBUG "GRU: quicktest0 bad magic 0x%lx\n", *p); | 971 | printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p); |
907 | goto done; | 972 | goto done; |
908 | } | 973 | } |
909 | gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); | 974 | gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); |
910 | if (gru_wait(cb) != CBS_IDLE) { | 975 | if (gru_wait(cb) != CBS_IDLE) { |
911 | printk(KERN_DEBUG "GRU quicktest0: CBR failure 2\n"); | 976 | printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id()); |
912 | goto done; | 977 | goto done; |
913 | } | 978 | } |
914 | 979 | ||
915 | if (word0 != word1 || word1 != MAGIC) { | 980 | if (word0 != word1 || word1 != MAGIC) { |
916 | printk(KERN_DEBUG | 981 | printk(KERN_DEBUG |
917 | "GRU quicktest0 err: found 0x%lx, expected 0x%lx\n", | 982 | "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n", |
918 | word1, MAGIC); | 983 | smp_processor_id(), word1, MAGIC); |
919 | goto done; | 984 | goto done; |
920 | } | 985 | } |
921 | ret = 0; | 986 | ret = 0; |
@@ -952,8 +1017,11 @@ static int quicktest1(unsigned long arg) | |||
952 | if (ret) | 1017 | if (ret) |
953 | break; | 1018 | break; |
954 | } | 1019 | } |
955 | if (ret != MQE_QUEUE_FULL || i != 4) | 1020 | if (ret != MQE_QUEUE_FULL || i != 4) { |
1021 | printk(KERN_DEBUG "GRU:%d quicktest1: unexpect status %d, i %d\n", | ||
1022 | smp_processor_id(), ret, i); | ||
956 | goto done; | 1023 | goto done; |
1024 | } | ||
957 | 1025 | ||
958 | for (i = 0; i < 6; i++) { | 1026 | for (i = 0; i < 6; i++) { |
959 | m = gru_get_next_message(&mqd); | 1027 | m = gru_get_next_message(&mqd); |
@@ -961,7 +1029,12 @@ static int quicktest1(unsigned long arg) | |||
961 | break; | 1029 | break; |
962 | gru_free_message(&mqd, m); | 1030 | gru_free_message(&mqd, m); |
963 | } | 1031 | } |
964 | ret = (i == 4) ? 0 : -EIO; | 1032 | if (i != 4) { |
1033 | printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n", | ||
1034 | smp_processor_id(), i, m, m ? m[8] : -1); | ||
1035 | goto done; | ||
1036 | } | ||
1037 | ret = 0; | ||
965 | 1038 | ||
966 | done: | 1039 | done: |
967 | kfree(p); | 1040 | kfree(p); |
@@ -977,6 +1050,7 @@ static int quicktest2(unsigned long arg) | |||
977 | int ret = 0; | 1050 | int ret = 0; |
978 | unsigned long *buf; | 1051 | unsigned long *buf; |
979 | void *cb0, *cb; | 1052 | void *cb0, *cb; |
1053 | struct gru_control_block_status *gen; | ||
980 | int i, k, istatus, bytes; | 1054 | int i, k, istatus, bytes; |
981 | 1055 | ||
982 | bytes = numcb * 4 * 8; | 1056 | bytes = numcb * 4 * 8; |
@@ -996,20 +1070,30 @@ static int quicktest2(unsigned long arg) | |||
996 | XTYPE_DW, 4, 1, IMA_INTERRUPT); | 1070 | XTYPE_DW, 4, 1, IMA_INTERRUPT); |
997 | 1071 | ||
998 | ret = 0; | 1072 | ret = 0; |
999 | for (k = 0; k < numcb; k++) { | 1073 | k = numcb; |
1074 | do { | ||
1000 | gru_wait_async_cbr(han); | 1075 | gru_wait_async_cbr(han); |
1001 | for (i = 0; i < numcb; i++) { | 1076 | for (i = 0; i < numcb; i++) { |
1002 | cb = cb0 + i * GRU_HANDLE_STRIDE; | 1077 | cb = cb0 + i * GRU_HANDLE_STRIDE; |
1003 | istatus = gru_check_status(cb); | 1078 | istatus = gru_check_status(cb); |
1004 | if (istatus == CBS_ACTIVE) | 1079 | if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS) |
1005 | continue; | 1080 | break; |
1006 | if (istatus == CBS_EXCEPTION) | ||
1007 | ret = -EFAULT; | ||
1008 | else if (buf[i] || buf[i + 1] || buf[i + 2] || | ||
1009 | buf[i + 3]) | ||
1010 | ret = -EIO; | ||
1011 | } | 1081 | } |
1012 | } | 1082 | if (i == numcb) |
1083 | continue; | ||
1084 | if (istatus != CBS_IDLE) { | ||
1085 | printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i); | ||
1086 | ret = -EFAULT; | ||
1087 | } else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] || | ||
1088 | buf[4 * i + 3]) { | ||
1089 | printk(KERN_DEBUG "GRU:%d quicktest2:cb %d, buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n", | ||
1090 | smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]); | ||
1091 | ret = -EIO; | ||
1092 | } | ||
1093 | k--; | ||
1094 | gen = cb; | ||
1095 | gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */ | ||
1096 | } while (k); | ||
1013 | BUG_ON(cmp.done); | 1097 | BUG_ON(cmp.done); |
1014 | 1098 | ||
1015 | gru_unlock_async_resource(han); | 1099 | gru_unlock_async_resource(han); |
@@ -1019,6 +1103,22 @@ done: | |||
1019 | return ret; | 1103 | return ret; |
1020 | } | 1104 | } |
1021 | 1105 | ||
1106 | #define BUFSIZE 200 | ||
1107 | static int quicktest3(unsigned long arg) | ||
1108 | { | ||
1109 | char buf1[BUFSIZE], buf2[BUFSIZE]; | ||
1110 | int ret = 0; | ||
1111 | |||
1112 | memset(buf2, 0, sizeof(buf2)); | ||
1113 | memset(buf1, get_cycles() & 255, sizeof(buf1)); | ||
1114 | gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE); | ||
1115 | if (memcmp(buf1, buf2, BUFSIZE)) { | ||
1116 | printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id()); | ||
1117 | ret = -EIO; | ||
1118 | } | ||
1119 | return ret; | ||
1120 | } | ||
1121 | |||
1022 | /* | 1122 | /* |
1023 | * Debugging only. User hook for various kernel tests | 1123 | * Debugging only. User hook for various kernel tests |
1024 | * of driver & gru. | 1124 | * of driver & gru. |
@@ -1037,6 +1137,9 @@ int gru_ktest(unsigned long arg) | |||
1037 | case 2: | 1137 | case 2: |
1038 | ret = quicktest2(arg); | 1138 | ret = quicktest2(arg); |
1039 | break; | 1139 | break; |
1140 | case 3: | ||
1141 | ret = quicktest3(arg); | ||
1142 | break; | ||
1040 | case 99: | 1143 | case 99: |
1041 | ret = gru_free_kernel_contexts(); | 1144 | ret = gru_free_kernel_contexts(); |
1042 | break; | 1145 | break; |
diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h index d60d34bca44d..02aa94d8484a 100644 --- a/drivers/misc/sgi-gru/grukservices.h +++ b/drivers/misc/sgi-gru/grukservices.h | |||
@@ -131,6 +131,20 @@ extern void *gru_get_next_message(struct gru_message_queue_desc *mqd); | |||
131 | 131 | ||
132 | 132 | ||
133 | /* | 133 | /* |
134 | * Read a GRU global GPA. Source can be located in a remote partition. | ||
135 | * | ||
136 | * Input: | ||
137 | * value memory address where MMR value is returned | ||
138 | * gpa source numalink physical address of GPA | ||
139 | * | ||
140 | * Output: | ||
141 | * 0 OK | ||
142 | * >0 error | ||
143 | */ | ||
144 | int gru_read_gpa(unsigned long *value, unsigned long gpa); | ||
145 | |||
146 | |||
147 | /* | ||
134 | * Copy data using the GRU. Source or destination can be located in a remote | 148 | * Copy data using the GRU. Source or destination can be located in a remote |
135 | * partition. | 149 | * partition. |
136 | * | 150 | * |
diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h index 889bc442a3e8..e77d1b1f9d05 100644 --- a/drivers/misc/sgi-gru/grulib.h +++ b/drivers/misc/sgi-gru/grulib.h | |||
@@ -63,18 +63,9 @@ | |||
63 | #define THREAD_POINTER(p, th) (p + GRU_GSEG_PAGESIZE * (th)) | 63 | #define THREAD_POINTER(p, th) (p + GRU_GSEG_PAGESIZE * (th)) |
64 | #define GSEG_START(cb) ((void *)((unsigned long)(cb) & ~(GRU_GSEG_PAGESIZE - 1))) | 64 | #define GSEG_START(cb) ((void *)((unsigned long)(cb) & ~(GRU_GSEG_PAGESIZE - 1))) |
65 | 65 | ||
66 | /* | ||
67 | * Statictics kept on a per-GTS basis. | ||
68 | */ | ||
69 | struct gts_statistics { | ||
70 | unsigned long fmm_tlbdropin; | ||
71 | unsigned long upm_tlbdropin; | ||
72 | unsigned long context_stolen; | ||
73 | }; | ||
74 | |||
75 | struct gru_get_gseg_statistics_req { | 66 | struct gru_get_gseg_statistics_req { |
76 | unsigned long gseg; | 67 | unsigned long gseg; |
77 | struct gts_statistics stats; | 68 | struct gru_gseg_statistics stats; |
78 | }; | 69 | }; |
79 | 70 | ||
80 | /* | 71 | /* |
@@ -86,6 +77,7 @@ struct gru_create_context_req { | |||
86 | unsigned int control_blocks; | 77 | unsigned int control_blocks; |
87 | unsigned int maximum_thread_count; | 78 | unsigned int maximum_thread_count; |
88 | unsigned int options; | 79 | unsigned int options; |
80 | unsigned char tlb_preload_count; | ||
89 | }; | 81 | }; |
90 | 82 | ||
91 | /* | 83 | /* |
@@ -98,11 +90,12 @@ struct gru_unload_context_req { | |||
98 | /* | 90 | /* |
99 | * Structure used to set context options | 91 | * Structure used to set context options |
100 | */ | 92 | */ |
101 | enum {sco_gseg_owner, sco_cch_req_slice}; | 93 | enum {sco_gseg_owner, sco_cch_req_slice, sco_blade_chiplet}; |
102 | struct gru_set_context_option_req { | 94 | struct gru_set_context_option_req { |
103 | unsigned long gseg; | 95 | unsigned long gseg; |
104 | int op; | 96 | int op; |
105 | unsigned long val1; | 97 | int val0; |
98 | long val1; | ||
106 | }; | 99 | }; |
107 | 100 | ||
108 | /* | 101 | /* |
@@ -124,6 +117,8 @@ struct gru_dump_chiplet_state_req { | |||
124 | int ctxnum; | 117 | int ctxnum; |
125 | char data_opt; | 118 | char data_opt; |
126 | char lock_cch; | 119 | char lock_cch; |
120 | char flush_cbrs; | ||
121 | char fill[10]; | ||
127 | pid_t pid; | 122 | pid_t pid; |
128 | void *buf; | 123 | void *buf; |
129 | size_t buflen; | 124 | size_t buflen; |
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c index 3bc643dad606..f8538bbd0bfa 100644 --- a/drivers/misc/sgi-gru/grumain.c +++ b/drivers/misc/sgi-gru/grumain.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/sched.h> | 27 | #include <linux/sched.h> |
28 | #include <linux/device.h> | 28 | #include <linux/device.h> |
29 | #include <linux/list.h> | 29 | #include <linux/list.h> |
30 | #include <linux/err.h> | ||
30 | #include <asm/uv/uv_hub.h> | 31 | #include <asm/uv/uv_hub.h> |
31 | #include "gru.h" | 32 | #include "gru.h" |
32 | #include "grutables.h" | 33 | #include "grutables.h" |
@@ -48,12 +49,20 @@ struct device *grudev = &gru_device; | |||
48 | /* | 49 | /* |
49 | * Select a gru fault map to be used by the current cpu. Note that | 50 | * Select a gru fault map to be used by the current cpu. Note that |
50 | * multiple cpus may be using the same map. | 51 | * multiple cpus may be using the same map. |
51 | * ZZZ should "shift" be used?? Depends on HT cpu numbering | ||
52 | * ZZZ should be inline but did not work on emulator | 52 | * ZZZ should be inline but did not work on emulator |
53 | */ | 53 | */ |
54 | int gru_cpu_fault_map_id(void) | 54 | int gru_cpu_fault_map_id(void) |
55 | { | 55 | { |
56 | #ifdef CONFIG_IA64 | ||
56 | return uv_blade_processor_id() % GRU_NUM_TFM; | 57 | return uv_blade_processor_id() % GRU_NUM_TFM; |
58 | #else | ||
59 | int cpu = smp_processor_id(); | ||
60 | int id, core; | ||
61 | |||
62 | core = uv_cpu_core_number(cpu); | ||
63 | id = core + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu); | ||
64 | return id; | ||
65 | #endif | ||
57 | } | 66 | } |
58 | 67 | ||
59 | /*--------- ASID Management ------------------------------------------- | 68 | /*--------- ASID Management ------------------------------------------- |
@@ -286,7 +295,8 @@ static void gru_unload_mm_tracker(struct gru_state *gru, | |||
286 | void gts_drop(struct gru_thread_state *gts) | 295 | void gts_drop(struct gru_thread_state *gts) |
287 | { | 296 | { |
288 | if (gts && atomic_dec_return(>s->ts_refcnt) == 0) { | 297 | if (gts && atomic_dec_return(>s->ts_refcnt) == 0) { |
289 | gru_drop_mmu_notifier(gts->ts_gms); | 298 | if (gts->ts_gms) |
299 | gru_drop_mmu_notifier(gts->ts_gms); | ||
290 | kfree(gts); | 300 | kfree(gts); |
291 | STAT(gts_free); | 301 | STAT(gts_free); |
292 | } | 302 | } |
@@ -310,16 +320,18 @@ static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data | |||
310 | * Allocate a thread state structure. | 320 | * Allocate a thread state structure. |
311 | */ | 321 | */ |
312 | struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, | 322 | struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, |
313 | int cbr_au_count, int dsr_au_count, int options, int tsid) | 323 | int cbr_au_count, int dsr_au_count, |
324 | unsigned char tlb_preload_count, int options, int tsid) | ||
314 | { | 325 | { |
315 | struct gru_thread_state *gts; | 326 | struct gru_thread_state *gts; |
327 | struct gru_mm_struct *gms; | ||
316 | int bytes; | 328 | int bytes; |
317 | 329 | ||
318 | bytes = DSR_BYTES(dsr_au_count) + CBR_BYTES(cbr_au_count); | 330 | bytes = DSR_BYTES(dsr_au_count) + CBR_BYTES(cbr_au_count); |
319 | bytes += sizeof(struct gru_thread_state); | 331 | bytes += sizeof(struct gru_thread_state); |
320 | gts = kmalloc(bytes, GFP_KERNEL); | 332 | gts = kmalloc(bytes, GFP_KERNEL); |
321 | if (!gts) | 333 | if (!gts) |
322 | return NULL; | 334 | return ERR_PTR(-ENOMEM); |
323 | 335 | ||
324 | STAT(gts_alloc); | 336 | STAT(gts_alloc); |
325 | memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */ | 337 | memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */ |
@@ -327,7 +339,10 @@ struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, | |||
327 | mutex_init(>s->ts_ctxlock); | 339 | mutex_init(>s->ts_ctxlock); |
328 | gts->ts_cbr_au_count = cbr_au_count; | 340 | gts->ts_cbr_au_count = cbr_au_count; |
329 | gts->ts_dsr_au_count = dsr_au_count; | 341 | gts->ts_dsr_au_count = dsr_au_count; |
342 | gts->ts_tlb_preload_count = tlb_preload_count; | ||
330 | gts->ts_user_options = options; | 343 | gts->ts_user_options = options; |
344 | gts->ts_user_blade_id = -1; | ||
345 | gts->ts_user_chiplet_id = -1; | ||
331 | gts->ts_tsid = tsid; | 346 | gts->ts_tsid = tsid; |
332 | gts->ts_ctxnum = NULLCTX; | 347 | gts->ts_ctxnum = NULLCTX; |
333 | gts->ts_tlb_int_select = -1; | 348 | gts->ts_tlb_int_select = -1; |
@@ -336,9 +351,10 @@ struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, | |||
336 | if (vma) { | 351 | if (vma) { |
337 | gts->ts_mm = current->mm; | 352 | gts->ts_mm = current->mm; |
338 | gts->ts_vma = vma; | 353 | gts->ts_vma = vma; |
339 | gts->ts_gms = gru_register_mmu_notifier(); | 354 | gms = gru_register_mmu_notifier(); |
340 | if (!gts->ts_gms) | 355 | if (IS_ERR(gms)) |
341 | goto err; | 356 | goto err; |
357 | gts->ts_gms = gms; | ||
342 | } | 358 | } |
343 | 359 | ||
344 | gru_dbg(grudev, "alloc gts %p\n", gts); | 360 | gru_dbg(grudev, "alloc gts %p\n", gts); |
@@ -346,7 +362,7 @@ struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, | |||
346 | 362 | ||
347 | err: | 363 | err: |
348 | gts_drop(gts); | 364 | gts_drop(gts); |
349 | return NULL; | 365 | return ERR_CAST(gms); |
350 | } | 366 | } |
351 | 367 | ||
352 | /* | 368 | /* |
@@ -360,6 +376,7 @@ struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid) | |||
360 | if (!vdata) | 376 | if (!vdata) |
361 | return NULL; | 377 | return NULL; |
362 | 378 | ||
379 | STAT(vdata_alloc); | ||
363 | INIT_LIST_HEAD(&vdata->vd_head); | 380 | INIT_LIST_HEAD(&vdata->vd_head); |
364 | spin_lock_init(&vdata->vd_lock); | 381 | spin_lock_init(&vdata->vd_lock); |
365 | gru_dbg(grudev, "alloc vdata %p\n", vdata); | 382 | gru_dbg(grudev, "alloc vdata %p\n", vdata); |
@@ -392,10 +409,12 @@ struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma, | |||
392 | struct gru_vma_data *vdata = vma->vm_private_data; | 409 | struct gru_vma_data *vdata = vma->vm_private_data; |
393 | struct gru_thread_state *gts, *ngts; | 410 | struct gru_thread_state *gts, *ngts; |
394 | 411 | ||
395 | gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count, vdata->vd_dsr_au_count, | 412 | gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count, |
413 | vdata->vd_dsr_au_count, | ||
414 | vdata->vd_tlb_preload_count, | ||
396 | vdata->vd_user_options, tsid); | 415 | vdata->vd_user_options, tsid); |
397 | if (!gts) | 416 | if (IS_ERR(gts)) |
398 | return NULL; | 417 | return gts; |
399 | 418 | ||
400 | spin_lock(&vdata->vd_lock); | 419 | spin_lock(&vdata->vd_lock); |
401 | ngts = gru_find_current_gts_nolock(vdata, tsid); | 420 | ngts = gru_find_current_gts_nolock(vdata, tsid); |
@@ -493,6 +512,9 @@ static void gru_load_context_data(void *save, void *grubase, int ctxnum, | |||
493 | memset(cbe + i * GRU_HANDLE_STRIDE, 0, | 512 | memset(cbe + i * GRU_HANDLE_STRIDE, 0, |
494 | GRU_CACHE_LINE_BYTES); | 513 | GRU_CACHE_LINE_BYTES); |
495 | } | 514 | } |
515 | /* Flush CBE to hide race in context restart */ | ||
516 | mb(); | ||
517 | gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE); | ||
496 | cb += GRU_HANDLE_STRIDE; | 518 | cb += GRU_HANDLE_STRIDE; |
497 | } | 519 | } |
498 | 520 | ||
@@ -513,6 +535,12 @@ static void gru_unload_context_data(void *save, void *grubase, int ctxnum, | |||
513 | cb = gseg + GRU_CB_BASE; | 535 | cb = gseg + GRU_CB_BASE; |
514 | cbe = grubase + GRU_CBE_BASE; | 536 | cbe = grubase + GRU_CBE_BASE; |
515 | length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; | 537 | length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; |
538 | |||
539 | /* CBEs may not be coherent. Flush them from cache */ | ||
540 | for_each_cbr_in_allocation_map(i, &cbrmap, scr) | ||
541 | gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE); | ||
542 | mb(); /* Let the CL flush complete */ | ||
543 | |||
516 | gru_prefetch_context(gseg, cb, cbe, cbrmap, length); | 544 | gru_prefetch_context(gseg, cb, cbe, cbrmap, length); |
517 | 545 | ||
518 | for_each_cbr_in_allocation_map(i, &cbrmap, scr) { | 546 | for_each_cbr_in_allocation_map(i, &cbrmap, scr) { |
@@ -533,7 +561,8 @@ void gru_unload_context(struct gru_thread_state *gts, int savestate) | |||
533 | zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE); | 561 | zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE); |
534 | cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); | 562 | cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); |
535 | 563 | ||
536 | gru_dbg(grudev, "gts %p\n", gts); | 564 | gru_dbg(grudev, "gts %p, cbrmap 0x%lx, dsrmap 0x%lx\n", |
565 | gts, gts->ts_cbr_map, gts->ts_dsr_map); | ||
537 | lock_cch_handle(cch); | 566 | lock_cch_handle(cch); |
538 | if (cch_interrupt_sync(cch)) | 567 | if (cch_interrupt_sync(cch)) |
539 | BUG(); | 568 | BUG(); |
@@ -549,7 +578,6 @@ void gru_unload_context(struct gru_thread_state *gts, int savestate) | |||
549 | 578 | ||
550 | if (cch_deallocate(cch)) | 579 | if (cch_deallocate(cch)) |
551 | BUG(); | 580 | BUG(); |
552 | gts->ts_force_unload = 0; /* ts_force_unload locked by CCH lock */ | ||
553 | unlock_cch_handle(cch); | 581 | unlock_cch_handle(cch); |
554 | 582 | ||
555 | gru_free_gru_context(gts); | 583 | gru_free_gru_context(gts); |
@@ -565,9 +593,7 @@ void gru_load_context(struct gru_thread_state *gts) | |||
565 | struct gru_context_configuration_handle *cch; | 593 | struct gru_context_configuration_handle *cch; |
566 | int i, err, asid, ctxnum = gts->ts_ctxnum; | 594 | int i, err, asid, ctxnum = gts->ts_ctxnum; |
567 | 595 | ||
568 | gru_dbg(grudev, "gts %p\n", gts); | ||
569 | cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); | 596 | cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); |
570 | |||
571 | lock_cch_handle(cch); | 597 | lock_cch_handle(cch); |
572 | cch->tfm_fault_bit_enable = | 598 | cch->tfm_fault_bit_enable = |
573 | (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL | 599 | (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL |
@@ -591,6 +617,7 @@ void gru_load_context(struct gru_thread_state *gts) | |||
591 | cch->unmap_enable = 1; | 617 | cch->unmap_enable = 1; |
592 | cch->tfm_done_bit_enable = 1; | 618 | cch->tfm_done_bit_enable = 1; |
593 | cch->cb_int_enable = 1; | 619 | cch->cb_int_enable = 1; |
620 | cch->tlb_int_select = 0; /* For now, ints go to cpu 0 */ | ||
594 | } else { | 621 | } else { |
595 | cch->unmap_enable = 0; | 622 | cch->unmap_enable = 0; |
596 | cch->tfm_done_bit_enable = 0; | 623 | cch->tfm_done_bit_enable = 0; |
@@ -616,17 +643,18 @@ void gru_load_context(struct gru_thread_state *gts) | |||
616 | if (cch_start(cch)) | 643 | if (cch_start(cch)) |
617 | BUG(); | 644 | BUG(); |
618 | unlock_cch_handle(cch); | 645 | unlock_cch_handle(cch); |
646 | |||
647 | gru_dbg(grudev, "gid %d, gts %p, cbrmap 0x%lx, dsrmap 0x%lx, tie %d, tis %d\n", | ||
648 | gts->ts_gru->gs_gid, gts, gts->ts_cbr_map, gts->ts_dsr_map, | ||
649 | (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR), gts->ts_tlb_int_select); | ||
619 | } | 650 | } |
620 | 651 | ||
621 | /* | 652 | /* |
622 | * Update fields in an active CCH: | 653 | * Update fields in an active CCH: |
623 | * - retarget interrupts on local blade | 654 | * - retarget interrupts on local blade |
624 | * - update sizeavail mask | 655 | * - update sizeavail mask |
625 | * - force a delayed context unload by clearing the CCH asids. This | ||
626 | * forces TLB misses for new GRU instructions. The context is unloaded | ||
627 | * when the next TLB miss occurs. | ||
628 | */ | 656 | */ |
629 | int gru_update_cch(struct gru_thread_state *gts, int force_unload) | 657 | int gru_update_cch(struct gru_thread_state *gts) |
630 | { | 658 | { |
631 | struct gru_context_configuration_handle *cch; | 659 | struct gru_context_configuration_handle *cch; |
632 | struct gru_state *gru = gts->ts_gru; | 660 | struct gru_state *gru = gts->ts_gru; |
@@ -640,21 +668,13 @@ int gru_update_cch(struct gru_thread_state *gts, int force_unload) | |||
640 | goto exit; | 668 | goto exit; |
641 | if (cch_interrupt(cch)) | 669 | if (cch_interrupt(cch)) |
642 | BUG(); | 670 | BUG(); |
643 | if (!force_unload) { | 671 | for (i = 0; i < 8; i++) |
644 | for (i = 0; i < 8; i++) | 672 | cch->sizeavail[i] = gts->ts_sizeavail; |
645 | cch->sizeavail[i] = gts->ts_sizeavail; | 673 | gts->ts_tlb_int_select = gru_cpu_fault_map_id(); |
646 | gts->ts_tlb_int_select = gru_cpu_fault_map_id(); | 674 | cch->tlb_int_select = gru_cpu_fault_map_id(); |
647 | cch->tlb_int_select = gru_cpu_fault_map_id(); | 675 | cch->tfm_fault_bit_enable = |
648 | cch->tfm_fault_bit_enable = | 676 | (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL |
649 | (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL | 677 | || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); |
650 | || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); | ||
651 | } else { | ||
652 | for (i = 0; i < 8; i++) | ||
653 | cch->asid[i] = 0; | ||
654 | cch->tfm_fault_bit_enable = 0; | ||
655 | cch->tlb_int_enable = 0; | ||
656 | gts->ts_force_unload = 1; | ||
657 | } | ||
658 | if (cch_start(cch)) | 678 | if (cch_start(cch)) |
659 | BUG(); | 679 | BUG(); |
660 | ret = 1; | 680 | ret = 1; |
@@ -679,7 +699,54 @@ static int gru_retarget_intr(struct gru_thread_state *gts) | |||
679 | 699 | ||
680 | gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select, | 700 | gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select, |
681 | gru_cpu_fault_map_id()); | 701 | gru_cpu_fault_map_id()); |
682 | return gru_update_cch(gts, 0); | 702 | return gru_update_cch(gts); |
703 | } | ||
704 | |||
705 | /* | ||
706 | * Check if a GRU context is allowed to use a specific chiplet. By default | ||
707 | * a context is assigned to any blade-local chiplet. However, users can | ||
708 | * override this. | ||
709 | * Returns 1 if assignment allowed, 0 otherwise | ||
710 | */ | ||
711 | static int gru_check_chiplet_assignment(struct gru_state *gru, | ||
712 | struct gru_thread_state *gts) | ||
713 | { | ||
714 | int blade_id; | ||
715 | int chiplet_id; | ||
716 | |||
717 | blade_id = gts->ts_user_blade_id; | ||
718 | if (blade_id < 0) | ||
719 | blade_id = uv_numa_blade_id(); | ||
720 | |||
721 | chiplet_id = gts->ts_user_chiplet_id; | ||
722 | return gru->gs_blade_id == blade_id && | ||
723 | (chiplet_id < 0 || chiplet_id == gru->gs_chiplet_id); | ||
724 | } | ||
725 | |||
726 | /* | ||
727 | * Unload the gru context if it is not assigned to the correct blade or | ||
728 | * chiplet. Misassignment can occur if the process migrates to a different | ||
729 | * blade or if the user changes the selected blade/chiplet. | ||
730 | */ | ||
731 | void gru_check_context_placement(struct gru_thread_state *gts) | ||
732 | { | ||
733 | struct gru_state *gru; | ||
734 | |||
735 | /* | ||
736 | * If the current task is the context owner, verify that the | ||
737 | * context is correctly placed. This test is skipped for non-owner | ||
738 | * references. Pthread apps use non-owner references to the CBRs. | ||
739 | */ | ||
740 | gru = gts->ts_gru; | ||
741 | if (!gru || gts->ts_tgid_owner != current->tgid) | ||
742 | return; | ||
743 | |||
744 | if (!gru_check_chiplet_assignment(gru, gts)) { | ||
745 | STAT(check_context_unload); | ||
746 | gru_unload_context(gts, 1); | ||
747 | } else if (gru_retarget_intr(gts)) { | ||
748 | STAT(check_context_retarget_intr); | ||
749 | } | ||
683 | } | 750 | } |
684 | 751 | ||
685 | 752 | ||
@@ -712,13 +779,17 @@ static void gts_stolen(struct gru_thread_state *gts, | |||
712 | } | 779 | } |
713 | } | 780 | } |
714 | 781 | ||
715 | void gru_steal_context(struct gru_thread_state *gts, int blade_id) | 782 | void gru_steal_context(struct gru_thread_state *gts) |
716 | { | 783 | { |
717 | struct gru_blade_state *blade; | 784 | struct gru_blade_state *blade; |
718 | struct gru_state *gru, *gru0; | 785 | struct gru_state *gru, *gru0; |
719 | struct gru_thread_state *ngts = NULL; | 786 | struct gru_thread_state *ngts = NULL; |
720 | int ctxnum, ctxnum0, flag = 0, cbr, dsr; | 787 | int ctxnum, ctxnum0, flag = 0, cbr, dsr; |
788 | int blade_id; | ||
721 | 789 | ||
790 | blade_id = gts->ts_user_blade_id; | ||
791 | if (blade_id < 0) | ||
792 | blade_id = uv_numa_blade_id(); | ||
722 | cbr = gts->ts_cbr_au_count; | 793 | cbr = gts->ts_cbr_au_count; |
723 | dsr = gts->ts_dsr_au_count; | 794 | dsr = gts->ts_dsr_au_count; |
724 | 795 | ||
@@ -729,35 +800,39 @@ void gru_steal_context(struct gru_thread_state *gts, int blade_id) | |||
729 | gru = blade->bs_lru_gru; | 800 | gru = blade->bs_lru_gru; |
730 | if (ctxnum == 0) | 801 | if (ctxnum == 0) |
731 | gru = next_gru(blade, gru); | 802 | gru = next_gru(blade, gru); |
803 | blade->bs_lru_gru = gru; | ||
804 | blade->bs_lru_ctxnum = ctxnum; | ||
732 | ctxnum0 = ctxnum; | 805 | ctxnum0 = ctxnum; |
733 | gru0 = gru; | 806 | gru0 = gru; |
734 | while (1) { | 807 | while (1) { |
735 | if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH)) | 808 | if (gru_check_chiplet_assignment(gru, gts)) { |
736 | break; | 809 | if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH)) |
737 | spin_lock(&gru->gs_lock); | ||
738 | for (; ctxnum < GRU_NUM_CCH; ctxnum++) { | ||
739 | if (flag && gru == gru0 && ctxnum == ctxnum0) | ||
740 | break; | 810 | break; |
741 | ngts = gru->gs_gts[ctxnum]; | 811 | spin_lock(&gru->gs_lock); |
742 | /* | 812 | for (; ctxnum < GRU_NUM_CCH; ctxnum++) { |
743 | * We are grabbing locks out of order, so trylock is | 813 | if (flag && gru == gru0 && ctxnum == ctxnum0) |
744 | * needed. GTSs are usually not locked, so the odds of | 814 | break; |
745 | * success are high. If trylock fails, try to steal a | 815 | ngts = gru->gs_gts[ctxnum]; |
746 | * different GSEG. | 816 | /* |
747 | */ | 817 | * We are grabbing locks out of order, so trylock is |
748 | if (ngts && is_gts_stealable(ngts, blade)) | 818 | * needed. GTSs are usually not locked, so the odds of |
819 | * success are high. If trylock fails, try to steal a | ||
820 | * different GSEG. | ||
821 | */ | ||
822 | if (ngts && is_gts_stealable(ngts, blade)) | ||
823 | break; | ||
824 | ngts = NULL; | ||
825 | } | ||
826 | spin_unlock(&gru->gs_lock); | ||
827 | if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0)) | ||
749 | break; | 828 | break; |
750 | ngts = NULL; | ||
751 | flag = 1; | ||
752 | } | 829 | } |
753 | spin_unlock(&gru->gs_lock); | 830 | if (flag && gru == gru0) |
754 | if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0)) | ||
755 | break; | 831 | break; |
832 | flag = 1; | ||
756 | ctxnum = 0; | 833 | ctxnum = 0; |
757 | gru = next_gru(blade, gru); | 834 | gru = next_gru(blade, gru); |
758 | } | 835 | } |
759 | blade->bs_lru_gru = gru; | ||
760 | blade->bs_lru_ctxnum = ctxnum; | ||
761 | spin_unlock(&blade->bs_lock); | 836 | spin_unlock(&blade->bs_lock); |
762 | 837 | ||
763 | if (ngts) { | 838 | if (ngts) { |
@@ -776,19 +851,34 @@ void gru_steal_context(struct gru_thread_state *gts, int blade_id) | |||
776 | } | 851 | } |
777 | 852 | ||
778 | /* | 853 | /* |
854 | * Assign a gru context. | ||
855 | */ | ||
856 | static int gru_assign_context_number(struct gru_state *gru) | ||
857 | { | ||
858 | int ctxnum; | ||
859 | |||
860 | ctxnum = find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH); | ||
861 | __set_bit(ctxnum, &gru->gs_context_map); | ||
862 | return ctxnum; | ||
863 | } | ||
864 | |||
865 | /* | ||
779 | * Scan the GRUs on the local blade & assign a GRU context. | 866 | * Scan the GRUs on the local blade & assign a GRU context. |
780 | */ | 867 | */ |
781 | struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts, | 868 | struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts) |
782 | int blade) | ||
783 | { | 869 | { |
784 | struct gru_state *gru, *grux; | 870 | struct gru_state *gru, *grux; |
785 | int i, max_active_contexts; | 871 | int i, max_active_contexts; |
872 | int blade_id = gts->ts_user_blade_id; | ||
786 | 873 | ||
787 | 874 | if (blade_id < 0) | |
875 | blade_id = uv_numa_blade_id(); | ||
788 | again: | 876 | again: |
789 | gru = NULL; | 877 | gru = NULL; |
790 | max_active_contexts = GRU_NUM_CCH; | 878 | max_active_contexts = GRU_NUM_CCH; |
791 | for_each_gru_on_blade(grux, blade, i) { | 879 | for_each_gru_on_blade(grux, blade_id, i) { |
880 | if (!gru_check_chiplet_assignment(grux, gts)) | ||
881 | continue; | ||
792 | if (check_gru_resources(grux, gts->ts_cbr_au_count, | 882 | if (check_gru_resources(grux, gts->ts_cbr_au_count, |
793 | gts->ts_dsr_au_count, | 883 | gts->ts_dsr_au_count, |
794 | max_active_contexts)) { | 884 | max_active_contexts)) { |
@@ -809,12 +899,9 @@ again: | |||
809 | reserve_gru_resources(gru, gts); | 899 | reserve_gru_resources(gru, gts); |
810 | gts->ts_gru = gru; | 900 | gts->ts_gru = gru; |
811 | gts->ts_blade = gru->gs_blade_id; | 901 | gts->ts_blade = gru->gs_blade_id; |
812 | gts->ts_ctxnum = | 902 | gts->ts_ctxnum = gru_assign_context_number(gru); |
813 | find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH); | ||
814 | BUG_ON(gts->ts_ctxnum == GRU_NUM_CCH); | ||
815 | atomic_inc(>s->ts_refcnt); | 903 | atomic_inc(>s->ts_refcnt); |
816 | gru->gs_gts[gts->ts_ctxnum] = gts; | 904 | gru->gs_gts[gts->ts_ctxnum] = gts; |
817 | __set_bit(gts->ts_ctxnum, &gru->gs_context_map); | ||
818 | spin_unlock(&gru->gs_lock); | 905 | spin_unlock(&gru->gs_lock); |
819 | 906 | ||
820 | STAT(assign_context); | 907 | STAT(assign_context); |
@@ -842,7 +929,6 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
842 | { | 929 | { |
843 | struct gru_thread_state *gts; | 930 | struct gru_thread_state *gts; |
844 | unsigned long paddr, vaddr; | 931 | unsigned long paddr, vaddr; |
845 | int blade_id; | ||
846 | 932 | ||
847 | vaddr = (unsigned long)vmf->virtual_address; | 933 | vaddr = (unsigned long)vmf->virtual_address; |
848 | gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n", | 934 | gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n", |
@@ -857,28 +943,18 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
857 | again: | 943 | again: |
858 | mutex_lock(>s->ts_ctxlock); | 944 | mutex_lock(>s->ts_ctxlock); |
859 | preempt_disable(); | 945 | preempt_disable(); |
860 | blade_id = uv_numa_blade_id(); | ||
861 | 946 | ||
862 | if (gts->ts_gru) { | 947 | gru_check_context_placement(gts); |
863 | if (gts->ts_gru->gs_blade_id != blade_id) { | ||
864 | STAT(migrated_nopfn_unload); | ||
865 | gru_unload_context(gts, 1); | ||
866 | } else { | ||
867 | if (gru_retarget_intr(gts)) | ||
868 | STAT(migrated_nopfn_retarget); | ||
869 | } | ||
870 | } | ||
871 | 948 | ||
872 | if (!gts->ts_gru) { | 949 | if (!gts->ts_gru) { |
873 | STAT(load_user_context); | 950 | STAT(load_user_context); |
874 | if (!gru_assign_gru_context(gts, blade_id)) { | 951 | if (!gru_assign_gru_context(gts)) { |
875 | preempt_enable(); | 952 | preempt_enable(); |
876 | mutex_unlock(>s->ts_ctxlock); | 953 | mutex_unlock(>s->ts_ctxlock); |
877 | set_current_state(TASK_INTERRUPTIBLE); | 954 | set_current_state(TASK_INTERRUPTIBLE); |
878 | schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */ | 955 | schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */ |
879 | blade_id = uv_numa_blade_id(); | ||
880 | if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies) | 956 | if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies) |
881 | gru_steal_context(gts, blade_id); | 957 | gru_steal_context(gts); |
882 | goto again; | 958 | goto again; |
883 | } | 959 | } |
884 | gru_load_context(gts); | 960 | gru_load_context(gts); |
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c index 3f2375c5ba5b..7768b87d995b 100644 --- a/drivers/misc/sgi-gru/gruprocfs.c +++ b/drivers/misc/sgi-gru/gruprocfs.c | |||
@@ -36,8 +36,7 @@ static void printstat_val(struct seq_file *s, atomic_long_t *v, char *id) | |||
36 | { | 36 | { |
37 | unsigned long val = atomic_long_read(v); | 37 | unsigned long val = atomic_long_read(v); |
38 | 38 | ||
39 | if (val) | 39 | seq_printf(s, "%16lu %s\n", val, id); |
40 | seq_printf(s, "%16lu %s\n", val, id); | ||
41 | } | 40 | } |
42 | 41 | ||
43 | static int statistics_show(struct seq_file *s, void *p) | 42 | static int statistics_show(struct seq_file *s, void *p) |
@@ -46,7 +45,8 @@ static int statistics_show(struct seq_file *s, void *p) | |||
46 | printstat(s, vdata_free); | 45 | printstat(s, vdata_free); |
47 | printstat(s, gts_alloc); | 46 | printstat(s, gts_alloc); |
48 | printstat(s, gts_free); | 47 | printstat(s, gts_free); |
49 | printstat(s, vdata_double_alloc); | 48 | printstat(s, gms_alloc); |
49 | printstat(s, gms_free); | ||
50 | printstat(s, gts_double_allocate); | 50 | printstat(s, gts_double_allocate); |
51 | printstat(s, assign_context); | 51 | printstat(s, assign_context); |
52 | printstat(s, assign_context_failed); | 52 | printstat(s, assign_context_failed); |
@@ -59,28 +59,25 @@ static int statistics_show(struct seq_file *s, void *p) | |||
59 | printstat(s, steal_kernel_context); | 59 | printstat(s, steal_kernel_context); |
60 | printstat(s, steal_context_failed); | 60 | printstat(s, steal_context_failed); |
61 | printstat(s, nopfn); | 61 | printstat(s, nopfn); |
62 | printstat(s, break_cow); | ||
63 | printstat(s, asid_new); | 62 | printstat(s, asid_new); |
64 | printstat(s, asid_next); | 63 | printstat(s, asid_next); |
65 | printstat(s, asid_wrap); | 64 | printstat(s, asid_wrap); |
66 | printstat(s, asid_reuse); | 65 | printstat(s, asid_reuse); |
67 | printstat(s, intr); | 66 | printstat(s, intr); |
67 | printstat(s, intr_cbr); | ||
68 | printstat(s, intr_tfh); | ||
69 | printstat(s, intr_spurious); | ||
68 | printstat(s, intr_mm_lock_failed); | 70 | printstat(s, intr_mm_lock_failed); |
69 | printstat(s, call_os); | 71 | printstat(s, call_os); |
70 | printstat(s, call_os_offnode_reference); | ||
71 | printstat(s, call_os_check_for_bug); | ||
72 | printstat(s, call_os_wait_queue); | 72 | printstat(s, call_os_wait_queue); |
73 | printstat(s, user_flush_tlb); | 73 | printstat(s, user_flush_tlb); |
74 | printstat(s, user_unload_context); | 74 | printstat(s, user_unload_context); |
75 | printstat(s, user_exception); | 75 | printstat(s, user_exception); |
76 | printstat(s, set_context_option); | 76 | printstat(s, set_context_option); |
77 | printstat(s, migrate_check); | 77 | printstat(s, check_context_retarget_intr); |
78 | printstat(s, migrated_retarget); | 78 | printstat(s, check_context_unload); |
79 | printstat(s, migrated_unload); | ||
80 | printstat(s, migrated_unload_delay); | ||
81 | printstat(s, migrated_nopfn_retarget); | ||
82 | printstat(s, migrated_nopfn_unload); | ||
83 | printstat(s, tlb_dropin); | 79 | printstat(s, tlb_dropin); |
80 | printstat(s, tlb_preload_page); | ||
84 | printstat(s, tlb_dropin_fail_no_asid); | 81 | printstat(s, tlb_dropin_fail_no_asid); |
85 | printstat(s, tlb_dropin_fail_upm); | 82 | printstat(s, tlb_dropin_fail_upm); |
86 | printstat(s, tlb_dropin_fail_invalid); | 83 | printstat(s, tlb_dropin_fail_invalid); |
@@ -88,16 +85,15 @@ static int statistics_show(struct seq_file *s, void *p) | |||
88 | printstat(s, tlb_dropin_fail_idle); | 85 | printstat(s, tlb_dropin_fail_idle); |
89 | printstat(s, tlb_dropin_fail_fmm); | 86 | printstat(s, tlb_dropin_fail_fmm); |
90 | printstat(s, tlb_dropin_fail_no_exception); | 87 | printstat(s, tlb_dropin_fail_no_exception); |
91 | printstat(s, tlb_dropin_fail_no_exception_war); | ||
92 | printstat(s, tfh_stale_on_fault); | 88 | printstat(s, tfh_stale_on_fault); |
93 | printstat(s, mmu_invalidate_range); | 89 | printstat(s, mmu_invalidate_range); |
94 | printstat(s, mmu_invalidate_page); | 90 | printstat(s, mmu_invalidate_page); |
95 | printstat(s, mmu_clear_flush_young); | ||
96 | printstat(s, flush_tlb); | 91 | printstat(s, flush_tlb); |
97 | printstat(s, flush_tlb_gru); | 92 | printstat(s, flush_tlb_gru); |
98 | printstat(s, flush_tlb_gru_tgh); | 93 | printstat(s, flush_tlb_gru_tgh); |
99 | printstat(s, flush_tlb_gru_zero_asid); | 94 | printstat(s, flush_tlb_gru_zero_asid); |
100 | printstat(s, copy_gpa); | 95 | printstat(s, copy_gpa); |
96 | printstat(s, read_gpa); | ||
101 | printstat(s, mesq_receive); | 97 | printstat(s, mesq_receive); |
102 | printstat(s, mesq_receive_none); | 98 | printstat(s, mesq_receive_none); |
103 | printstat(s, mesq_send); | 99 | printstat(s, mesq_send); |
@@ -108,7 +104,6 @@ static int statistics_show(struct seq_file *s, void *p) | |||
108 | printstat(s, mesq_send_qlimit_reached); | 104 | printstat(s, mesq_send_qlimit_reached); |
109 | printstat(s, mesq_send_amo_nacked); | 105 | printstat(s, mesq_send_amo_nacked); |
110 | printstat(s, mesq_send_put_nacked); | 106 | printstat(s, mesq_send_put_nacked); |
111 | printstat(s, mesq_qf_not_full); | ||
112 | printstat(s, mesq_qf_locked); | 107 | printstat(s, mesq_qf_locked); |
113 | printstat(s, mesq_qf_noop_not_full); | 108 | printstat(s, mesq_qf_noop_not_full); |
114 | printstat(s, mesq_qf_switch_head_failed); | 109 | printstat(s, mesq_qf_switch_head_failed); |
@@ -118,6 +113,7 @@ static int statistics_show(struct seq_file *s, void *p) | |||
118 | printstat(s, mesq_noop_qlimit_reached); | 113 | printstat(s, mesq_noop_qlimit_reached); |
119 | printstat(s, mesq_noop_amo_nacked); | 114 | printstat(s, mesq_noop_amo_nacked); |
120 | printstat(s, mesq_noop_put_nacked); | 115 | printstat(s, mesq_noop_put_nacked); |
116 | printstat(s, mesq_noop_page_overflow); | ||
121 | return 0; | 117 | return 0; |
122 | } | 118 | } |
123 | 119 | ||
@@ -133,8 +129,10 @@ static int mcs_statistics_show(struct seq_file *s, void *p) | |||
133 | int op; | 129 | int op; |
134 | unsigned long total, count, max; | 130 | unsigned long total, count, max; |
135 | static char *id[] = {"cch_allocate", "cch_start", "cch_interrupt", | 131 | static char *id[] = {"cch_allocate", "cch_start", "cch_interrupt", |
136 | "cch_interrupt_sync", "cch_deallocate", "tgh_invalidate"}; | 132 | "cch_interrupt_sync", "cch_deallocate", "tfh_write_only", |
133 | "tfh_write_restart", "tgh_invalidate"}; | ||
137 | 134 | ||
135 | seq_printf(s, "%-20s%12s%12s%12s\n", "#id", "count", "aver-clks", "max-clks"); | ||
138 | for (op = 0; op < mcsop_last; op++) { | 136 | for (op = 0; op < mcsop_last; op++) { |
139 | count = atomic_long_read(&mcs_op_statistics[op].count); | 137 | count = atomic_long_read(&mcs_op_statistics[op].count); |
140 | total = atomic_long_read(&mcs_op_statistics[op].total); | 138 | total = atomic_long_read(&mcs_op_statistics[op].total); |
@@ -154,6 +152,7 @@ static ssize_t mcs_statistics_write(struct file *file, | |||
154 | 152 | ||
155 | static int options_show(struct seq_file *s, void *p) | 153 | static int options_show(struct seq_file *s, void *p) |
156 | { | 154 | { |
155 | seq_printf(s, "#bitmask: 1=trace, 2=statistics\n"); | ||
157 | seq_printf(s, "0x%lx\n", gru_options); | 156 | seq_printf(s, "0x%lx\n", gru_options); |
158 | return 0; | 157 | return 0; |
159 | } | 158 | } |
@@ -183,16 +182,17 @@ static int cch_seq_show(struct seq_file *file, void *data) | |||
183 | const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" }; | 182 | const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" }; |
184 | 183 | ||
185 | if (gid == 0) | 184 | if (gid == 0) |
186 | seq_printf(file, "#%5s%5s%6s%9s%6s%8s%8s\n", "gid", "bid", | 185 | seq_printf(file, "#%5s%5s%6s%7s%9s%6s%8s%8s\n", "gid", "bid", |
187 | "ctx#", "pid", "cbrs", "dsbytes", "mode"); | 186 | "ctx#", "asid", "pid", "cbrs", "dsbytes", "mode"); |
188 | if (gru) | 187 | if (gru) |
189 | for (i = 0; i < GRU_NUM_CCH; i++) { | 188 | for (i = 0; i < GRU_NUM_CCH; i++) { |
190 | ts = gru->gs_gts[i]; | 189 | ts = gru->gs_gts[i]; |
191 | if (!ts) | 190 | if (!ts) |
192 | continue; | 191 | continue; |
193 | seq_printf(file, " %5d%5d%6d%9d%6d%8d%8s\n", | 192 | seq_printf(file, " %5d%5d%6d%7d%9d%6d%8d%8s\n", |
194 | gru->gs_gid, gru->gs_blade_id, i, | 193 | gru->gs_gid, gru->gs_blade_id, i, |
195 | ts->ts_tgid_owner, | 194 | is_kernel_context(ts) ? 0 : ts->ts_gms->ms_asids[gid].mt_asid, |
195 | is_kernel_context(ts) ? 0 : ts->ts_tgid_owner, | ||
196 | ts->ts_cbr_au_count * GRU_CBR_AU_SIZE, | 196 | ts->ts_cbr_au_count * GRU_CBR_AU_SIZE, |
197 | ts->ts_cbr_au_count * GRU_DSR_AU_BYTES, | 197 | ts->ts_cbr_au_count * GRU_DSR_AU_BYTES, |
198 | mode[ts->ts_user_options & | 198 | mode[ts->ts_user_options & |
@@ -355,7 +355,7 @@ static void delete_proc_files(void) | |||
355 | for (p = proc_files; p->name; p++) | 355 | for (p = proc_files; p->name; p++) |
356 | if (p->entry) | 356 | if (p->entry) |
357 | remove_proc_entry(p->name, proc_gru); | 357 | remove_proc_entry(p->name, proc_gru); |
358 | remove_proc_entry("gru", NULL); | 358 | remove_proc_entry("gru", proc_gru->parent); |
359 | } | 359 | } |
360 | } | 360 | } |
361 | 361 | ||
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h index 46990bcfa536..7a8b9068ea03 100644 --- a/drivers/misc/sgi-gru/grutables.h +++ b/drivers/misc/sgi-gru/grutables.h | |||
@@ -161,7 +161,7 @@ extern unsigned int gru_max_gids; | |||
161 | #define GRU_MAX_GRUS (GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE) | 161 | #define GRU_MAX_GRUS (GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE) |
162 | 162 | ||
163 | #define GRU_DRIVER_ID_STR "SGI GRU Device Driver" | 163 | #define GRU_DRIVER_ID_STR "SGI GRU Device Driver" |
164 | #define GRU_DRIVER_VERSION_STR "0.80" | 164 | #define GRU_DRIVER_VERSION_STR "0.85" |
165 | 165 | ||
166 | /* | 166 | /* |
167 | * GRU statistics. | 167 | * GRU statistics. |
@@ -171,7 +171,8 @@ struct gru_stats_s { | |||
171 | atomic_long_t vdata_free; | 171 | atomic_long_t vdata_free; |
172 | atomic_long_t gts_alloc; | 172 | atomic_long_t gts_alloc; |
173 | atomic_long_t gts_free; | 173 | atomic_long_t gts_free; |
174 | atomic_long_t vdata_double_alloc; | 174 | atomic_long_t gms_alloc; |
175 | atomic_long_t gms_free; | ||
175 | atomic_long_t gts_double_allocate; | 176 | atomic_long_t gts_double_allocate; |
176 | atomic_long_t assign_context; | 177 | atomic_long_t assign_context; |
177 | atomic_long_t assign_context_failed; | 178 | atomic_long_t assign_context_failed; |
@@ -184,28 +185,25 @@ struct gru_stats_s { | |||
184 | atomic_long_t steal_kernel_context; | 185 | atomic_long_t steal_kernel_context; |
185 | atomic_long_t steal_context_failed; | 186 | atomic_long_t steal_context_failed; |
186 | atomic_long_t nopfn; | 187 | atomic_long_t nopfn; |
187 | atomic_long_t break_cow; | ||
188 | atomic_long_t asid_new; | 188 | atomic_long_t asid_new; |
189 | atomic_long_t asid_next; | 189 | atomic_long_t asid_next; |
190 | atomic_long_t asid_wrap; | 190 | atomic_long_t asid_wrap; |
191 | atomic_long_t asid_reuse; | 191 | atomic_long_t asid_reuse; |
192 | atomic_long_t intr; | 192 | atomic_long_t intr; |
193 | atomic_long_t intr_cbr; | ||
194 | atomic_long_t intr_tfh; | ||
195 | atomic_long_t intr_spurious; | ||
193 | atomic_long_t intr_mm_lock_failed; | 196 | atomic_long_t intr_mm_lock_failed; |
194 | atomic_long_t call_os; | 197 | atomic_long_t call_os; |
195 | atomic_long_t call_os_offnode_reference; | ||
196 | atomic_long_t call_os_check_for_bug; | ||
197 | atomic_long_t call_os_wait_queue; | 198 | atomic_long_t call_os_wait_queue; |
198 | atomic_long_t user_flush_tlb; | 199 | atomic_long_t user_flush_tlb; |
199 | atomic_long_t user_unload_context; | 200 | atomic_long_t user_unload_context; |
200 | atomic_long_t user_exception; | 201 | atomic_long_t user_exception; |
201 | atomic_long_t set_context_option; | 202 | atomic_long_t set_context_option; |
202 | atomic_long_t migrate_check; | 203 | atomic_long_t check_context_retarget_intr; |
203 | atomic_long_t migrated_retarget; | 204 | atomic_long_t check_context_unload; |
204 | atomic_long_t migrated_unload; | ||
205 | atomic_long_t migrated_unload_delay; | ||
206 | atomic_long_t migrated_nopfn_retarget; | ||
207 | atomic_long_t migrated_nopfn_unload; | ||
208 | atomic_long_t tlb_dropin; | 205 | atomic_long_t tlb_dropin; |
206 | atomic_long_t tlb_preload_page; | ||
209 | atomic_long_t tlb_dropin_fail_no_asid; | 207 | atomic_long_t tlb_dropin_fail_no_asid; |
210 | atomic_long_t tlb_dropin_fail_upm; | 208 | atomic_long_t tlb_dropin_fail_upm; |
211 | atomic_long_t tlb_dropin_fail_invalid; | 209 | atomic_long_t tlb_dropin_fail_invalid; |
@@ -213,17 +211,16 @@ struct gru_stats_s { | |||
213 | atomic_long_t tlb_dropin_fail_idle; | 211 | atomic_long_t tlb_dropin_fail_idle; |
214 | atomic_long_t tlb_dropin_fail_fmm; | 212 | atomic_long_t tlb_dropin_fail_fmm; |
215 | atomic_long_t tlb_dropin_fail_no_exception; | 213 | atomic_long_t tlb_dropin_fail_no_exception; |
216 | atomic_long_t tlb_dropin_fail_no_exception_war; | ||
217 | atomic_long_t tfh_stale_on_fault; | 214 | atomic_long_t tfh_stale_on_fault; |
218 | atomic_long_t mmu_invalidate_range; | 215 | atomic_long_t mmu_invalidate_range; |
219 | atomic_long_t mmu_invalidate_page; | 216 | atomic_long_t mmu_invalidate_page; |
220 | atomic_long_t mmu_clear_flush_young; | ||
221 | atomic_long_t flush_tlb; | 217 | atomic_long_t flush_tlb; |
222 | atomic_long_t flush_tlb_gru; | 218 | atomic_long_t flush_tlb_gru; |
223 | atomic_long_t flush_tlb_gru_tgh; | 219 | atomic_long_t flush_tlb_gru_tgh; |
224 | atomic_long_t flush_tlb_gru_zero_asid; | 220 | atomic_long_t flush_tlb_gru_zero_asid; |
225 | 221 | ||
226 | atomic_long_t copy_gpa; | 222 | atomic_long_t copy_gpa; |
223 | atomic_long_t read_gpa; | ||
227 | 224 | ||
228 | atomic_long_t mesq_receive; | 225 | atomic_long_t mesq_receive; |
229 | atomic_long_t mesq_receive_none; | 226 | atomic_long_t mesq_receive_none; |
@@ -235,7 +232,7 @@ struct gru_stats_s { | |||
235 | atomic_long_t mesq_send_qlimit_reached; | 232 | atomic_long_t mesq_send_qlimit_reached; |
236 | atomic_long_t mesq_send_amo_nacked; | 233 | atomic_long_t mesq_send_amo_nacked; |
237 | atomic_long_t mesq_send_put_nacked; | 234 | atomic_long_t mesq_send_put_nacked; |
238 | atomic_long_t mesq_qf_not_full; | 235 | atomic_long_t mesq_page_overflow; |
239 | atomic_long_t mesq_qf_locked; | 236 | atomic_long_t mesq_qf_locked; |
240 | atomic_long_t mesq_qf_noop_not_full; | 237 | atomic_long_t mesq_qf_noop_not_full; |
241 | atomic_long_t mesq_qf_switch_head_failed; | 238 | atomic_long_t mesq_qf_switch_head_failed; |
@@ -245,11 +242,13 @@ struct gru_stats_s { | |||
245 | atomic_long_t mesq_noop_qlimit_reached; | 242 | atomic_long_t mesq_noop_qlimit_reached; |
246 | atomic_long_t mesq_noop_amo_nacked; | 243 | atomic_long_t mesq_noop_amo_nacked; |
247 | atomic_long_t mesq_noop_put_nacked; | 244 | atomic_long_t mesq_noop_put_nacked; |
245 | atomic_long_t mesq_noop_page_overflow; | ||
248 | 246 | ||
249 | }; | 247 | }; |
250 | 248 | ||
251 | enum mcs_op {cchop_allocate, cchop_start, cchop_interrupt, cchop_interrupt_sync, | 249 | enum mcs_op {cchop_allocate, cchop_start, cchop_interrupt, cchop_interrupt_sync, |
252 | cchop_deallocate, tghop_invalidate, mcsop_last}; | 250 | cchop_deallocate, tfhop_write_only, tfhop_write_restart, |
251 | tghop_invalidate, mcsop_last}; | ||
253 | 252 | ||
254 | struct mcs_op_statistic { | 253 | struct mcs_op_statistic { |
255 | atomic_long_t count; | 254 | atomic_long_t count; |
@@ -259,8 +258,8 @@ struct mcs_op_statistic { | |||
259 | 258 | ||
260 | extern struct mcs_op_statistic mcs_op_statistics[mcsop_last]; | 259 | extern struct mcs_op_statistic mcs_op_statistics[mcsop_last]; |
261 | 260 | ||
262 | #define OPT_DPRINT 1 | 261 | #define OPT_DPRINT 1 |
263 | #define OPT_STATS 2 | 262 | #define OPT_STATS 2 |
264 | 263 | ||
265 | 264 | ||
266 | #define IRQ_GRU 110 /* Starting IRQ number for interrupts */ | 265 | #define IRQ_GRU 110 /* Starting IRQ number for interrupts */ |
@@ -283,7 +282,7 @@ extern struct mcs_op_statistic mcs_op_statistics[mcsop_last]; | |||
283 | #define gru_dbg(dev, fmt, x...) \ | 282 | #define gru_dbg(dev, fmt, x...) \ |
284 | do { \ | 283 | do { \ |
285 | if (gru_options & OPT_DPRINT) \ | 284 | if (gru_options & OPT_DPRINT) \ |
286 | dev_dbg(dev, "%s: " fmt, __func__, x); \ | 285 | printk(KERN_DEBUG "GRU:%d %s: " fmt, smp_processor_id(), __func__, x);\ |
287 | } while (0) | 286 | } while (0) |
288 | #else | 287 | #else |
289 | #define gru_dbg(x...) | 288 | #define gru_dbg(x...) |
@@ -297,13 +296,7 @@ extern struct mcs_op_statistic mcs_op_statistics[mcsop_last]; | |||
297 | #define ASID_INC 8 /* number of regions */ | 296 | #define ASID_INC 8 /* number of regions */ |
298 | 297 | ||
299 | /* Generate a GRU asid value from a GRU base asid & a virtual address. */ | 298 | /* Generate a GRU asid value from a GRU base asid & a virtual address. */ |
300 | #if defined CONFIG_IA64 | ||
301 | #define VADDR_HI_BIT 64 | 299 | #define VADDR_HI_BIT 64 |
302 | #elif defined CONFIG_X86_64 | ||
303 | #define VADDR_HI_BIT 48 | ||
304 | #else | ||
305 | #error "Unsupported architecture" | ||
306 | #endif | ||
307 | #define GRUREGION(addr) ((addr) >> (VADDR_HI_BIT - 3) & 3) | 300 | #define GRUREGION(addr) ((addr) >> (VADDR_HI_BIT - 3) & 3) |
308 | #define GRUASID(asid, addr) ((asid) + GRUREGION(addr)) | 301 | #define GRUASID(asid, addr) ((asid) + GRUREGION(addr)) |
309 | 302 | ||
@@ -345,6 +338,7 @@ struct gru_vma_data { | |||
345 | long vd_user_options;/* misc user option flags */ | 338 | long vd_user_options;/* misc user option flags */ |
346 | int vd_cbr_au_count; | 339 | int vd_cbr_au_count; |
347 | int vd_dsr_au_count; | 340 | int vd_dsr_au_count; |
341 | unsigned char vd_tlb_preload_count; | ||
348 | }; | 342 | }; |
349 | 343 | ||
350 | /* | 344 | /* |
@@ -360,6 +354,7 @@ struct gru_thread_state { | |||
360 | struct gru_state *ts_gru; /* GRU where the context is | 354 | struct gru_state *ts_gru; /* GRU where the context is |
361 | loaded */ | 355 | loaded */ |
362 | struct gru_mm_struct *ts_gms; /* asid & ioproc struct */ | 356 | struct gru_mm_struct *ts_gms; /* asid & ioproc struct */ |
357 | unsigned char ts_tlb_preload_count; /* TLB preload pages */ | ||
363 | unsigned long ts_cbr_map; /* map of allocated CBRs */ | 358 | unsigned long ts_cbr_map; /* map of allocated CBRs */ |
364 | unsigned long ts_dsr_map; /* map of allocated DATA | 359 | unsigned long ts_dsr_map; /* map of allocated DATA |
365 | resources */ | 360 | resources */ |
@@ -368,6 +363,8 @@ struct gru_thread_state { | |||
368 | long ts_user_options;/* misc user option flags */ | 363 | long ts_user_options;/* misc user option flags */ |
369 | pid_t ts_tgid_owner; /* task that is using the | 364 | pid_t ts_tgid_owner; /* task that is using the |
370 | context - for migration */ | 365 | context - for migration */ |
366 | short ts_user_blade_id;/* user selected blade */ | ||
367 | char ts_user_chiplet_id;/* user selected chiplet */ | ||
371 | unsigned short ts_sizeavail; /* Pagesizes in use */ | 368 | unsigned short ts_sizeavail; /* Pagesizes in use */ |
372 | int ts_tsid; /* thread that owns the | 369 | int ts_tsid; /* thread that owns the |
373 | structure */ | 370 | structure */ |
@@ -384,13 +381,11 @@ struct gru_thread_state { | |||
384 | char ts_blade; /* If >= 0, migrate context if | 381 | char ts_blade; /* If >= 0, migrate context if |
385 | ref from diferent blade */ | 382 | ref from diferent blade */ |
386 | char ts_force_cch_reload; | 383 | char ts_force_cch_reload; |
387 | char ts_force_unload;/* force context to be unloaded | ||
388 | after migration */ | ||
389 | char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each | 384 | char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each |
390 | allocated CB */ | 385 | allocated CB */ |
391 | int ts_data_valid; /* Indicates if ts_gdata has | 386 | int ts_data_valid; /* Indicates if ts_gdata has |
392 | valid data */ | 387 | valid data */ |
393 | struct gts_statistics ustats; /* User statistics */ | 388 | struct gru_gseg_statistics ustats; /* User statistics */ |
394 | unsigned long ts_gdata[0]; /* save area for GRU data (CB, | 389 | unsigned long ts_gdata[0]; /* save area for GRU data (CB, |
395 | DS, CBE) */ | 390 | DS, CBE) */ |
396 | }; | 391 | }; |
@@ -422,6 +417,7 @@ struct gru_state { | |||
422 | gru segments (64) */ | 417 | gru segments (64) */ |
423 | unsigned short gs_gid; /* unique GRU number */ | 418 | unsigned short gs_gid; /* unique GRU number */ |
424 | unsigned short gs_blade_id; /* blade of GRU */ | 419 | unsigned short gs_blade_id; /* blade of GRU */ |
420 | unsigned char gs_chiplet_id; /* blade chiplet of GRU */ | ||
425 | unsigned char gs_tgh_local_shift; /* used to pick TGH for | 421 | unsigned char gs_tgh_local_shift; /* used to pick TGH for |
426 | local flush */ | 422 | local flush */ |
427 | unsigned char gs_tgh_first_remote; /* starting TGH# for | 423 | unsigned char gs_tgh_first_remote; /* starting TGH# for |
@@ -453,6 +449,7 @@ struct gru_state { | |||
453 | in use */ | 449 | in use */ |
454 | struct gru_thread_state *gs_gts[GRU_NUM_CCH]; /* GTS currently using | 450 | struct gru_thread_state *gs_gts[GRU_NUM_CCH]; /* GTS currently using |
455 | the context */ | 451 | the context */ |
452 | int gs_irq[GRU_NUM_TFM]; /* Interrupt irqs */ | ||
456 | }; | 453 | }; |
457 | 454 | ||
458 | /* | 455 | /* |
@@ -519,8 +516,7 @@ struct gru_blade_state { | |||
519 | 516 | ||
520 | /* Scan all active GRUs in a GRU bitmap */ | 517 | /* Scan all active GRUs in a GRU bitmap */ |
521 | #define for_each_gru_in_bitmap(gid, map) \ | 518 | #define for_each_gru_in_bitmap(gid, map) \ |
522 | for ((gid) = find_first_bit((map), GRU_MAX_GRUS); (gid) < GRU_MAX_GRUS;\ | 519 | for_each_set_bit((gid), (map), GRU_MAX_GRUS) |
523 | (gid)++, (gid) = find_next_bit((map), GRU_MAX_GRUS, (gid))) | ||
524 | 520 | ||
525 | /* Scan all active GRUs on a specific blade */ | 521 | /* Scan all active GRUs on a specific blade */ |
526 | #define for_each_gru_on_blade(gru, nid, i) \ | 522 | #define for_each_gru_on_blade(gru, nid, i) \ |
@@ -539,23 +535,17 @@ struct gru_blade_state { | |||
539 | 535 | ||
540 | /* Scan each CBR whose bit is set in a TFM (or copy of) */ | 536 | /* Scan each CBR whose bit is set in a TFM (or copy of) */ |
541 | #define for_each_cbr_in_tfm(i, map) \ | 537 | #define for_each_cbr_in_tfm(i, map) \ |
542 | for ((i) = find_first_bit(map, GRU_NUM_CBE); \ | 538 | for_each_set_bit((i), (map), GRU_NUM_CBE) |
543 | (i) < GRU_NUM_CBE; \ | ||
544 | (i)++, (i) = find_next_bit(map, GRU_NUM_CBE, i)) | ||
545 | 539 | ||
546 | /* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */ | 540 | /* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */ |
547 | #define for_each_cbr_in_allocation_map(i, map, k) \ | 541 | #define for_each_cbr_in_allocation_map(i, map, k) \ |
548 | for ((k) = find_first_bit(map, GRU_CBR_AU); (k) < GRU_CBR_AU; \ | 542 | for_each_set_bit((k), (map), GRU_CBR_AU) \ |
549 | (k) = find_next_bit(map, GRU_CBR_AU, (k) + 1)) \ | ||
550 | for ((i) = (k)*GRU_CBR_AU_SIZE; \ | 543 | for ((i) = (k)*GRU_CBR_AU_SIZE; \ |
551 | (i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++) | 544 | (i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++) |
552 | 545 | ||
553 | /* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */ | 546 | /* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */ |
554 | #define for_each_dsr_in_allocation_map(i, map, k) \ | 547 | #define for_each_dsr_in_allocation_map(i, map, k) \ |
555 | for ((k) = find_first_bit((const unsigned long *)map, GRU_DSR_AU);\ | 548 | for_each_set_bit((k), (const unsigned long *)(map), GRU_DSR_AU) \ |
556 | (k) < GRU_DSR_AU; \ | ||
557 | (k) = find_next_bit((const unsigned long *)map, \ | ||
558 | GRU_DSR_AU, (k) + 1)) \ | ||
559 | for ((i) = (k) * GRU_DSR_AU_CL; \ | 549 | for ((i) = (k) * GRU_DSR_AU_CL; \ |
560 | (i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++) | 550 | (i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++) |
561 | 551 | ||
@@ -619,6 +609,15 @@ static inline int is_kernel_context(struct gru_thread_state *gts) | |||
619 | return !gts->ts_mm; | 609 | return !gts->ts_mm; |
620 | } | 610 | } |
621 | 611 | ||
612 | /* | ||
613 | * The following are for Nehelem-EX. A more general scheme is needed for | ||
614 | * future processors. | ||
615 | */ | ||
616 | #define UV_MAX_INT_CORES 8 | ||
617 | #define uv_cpu_socket_number(p) ((cpu_physical_id(p) >> 5) & 1) | ||
618 | #define uv_cpu_ht_number(p) (cpu_physical_id(p) & 1) | ||
619 | #define uv_cpu_core_number(p) (((cpu_physical_id(p) >> 2) & 4) | \ | ||
620 | ((cpu_physical_id(p) >> 1) & 3)) | ||
622 | /*----------------------------------------------------------------------------- | 621 | /*----------------------------------------------------------------------------- |
623 | * Function prototypes & externs | 622 | * Function prototypes & externs |
624 | */ | 623 | */ |
@@ -633,24 +632,26 @@ extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct | |||
633 | *vma, int tsid); | 632 | *vma, int tsid); |
634 | extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct | 633 | extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct |
635 | *vma, int tsid); | 634 | *vma, int tsid); |
636 | extern struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts, | 635 | extern struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts); |
637 | int blade); | ||
638 | extern void gru_load_context(struct gru_thread_state *gts); | 636 | extern void gru_load_context(struct gru_thread_state *gts); |
639 | extern void gru_steal_context(struct gru_thread_state *gts, int blade_id); | 637 | extern void gru_steal_context(struct gru_thread_state *gts); |
640 | extern void gru_unload_context(struct gru_thread_state *gts, int savestate); | 638 | extern void gru_unload_context(struct gru_thread_state *gts, int savestate); |
641 | extern int gru_update_cch(struct gru_thread_state *gts, int force_unload); | 639 | extern int gru_update_cch(struct gru_thread_state *gts); |
642 | extern void gts_drop(struct gru_thread_state *gts); | 640 | extern void gts_drop(struct gru_thread_state *gts); |
643 | extern void gru_tgh_flush_init(struct gru_state *gru); | 641 | extern void gru_tgh_flush_init(struct gru_state *gru); |
644 | extern int gru_kservices_init(void); | 642 | extern int gru_kservices_init(void); |
645 | extern void gru_kservices_exit(void); | 643 | extern void gru_kservices_exit(void); |
644 | extern irqreturn_t gru0_intr(int irq, void *dev_id); | ||
645 | extern irqreturn_t gru1_intr(int irq, void *dev_id); | ||
646 | extern irqreturn_t gru_intr_mblade(int irq, void *dev_id); | ||
646 | extern int gru_dump_chiplet_request(unsigned long arg); | 647 | extern int gru_dump_chiplet_request(unsigned long arg); |
647 | extern long gru_get_gseg_statistics(unsigned long arg); | 648 | extern long gru_get_gseg_statistics(unsigned long arg); |
648 | extern irqreturn_t gru_intr(int irq, void *dev_id); | ||
649 | extern int gru_handle_user_call_os(unsigned long address); | 649 | extern int gru_handle_user_call_os(unsigned long address); |
650 | extern int gru_user_flush_tlb(unsigned long arg); | 650 | extern int gru_user_flush_tlb(unsigned long arg); |
651 | extern int gru_user_unload_context(unsigned long arg); | 651 | extern int gru_user_unload_context(unsigned long arg); |
652 | extern int gru_get_exception_detail(unsigned long arg); | 652 | extern int gru_get_exception_detail(unsigned long arg); |
653 | extern int gru_set_context_option(unsigned long address); | 653 | extern int gru_set_context_option(unsigned long address); |
654 | extern void gru_check_context_placement(struct gru_thread_state *gts); | ||
654 | extern int gru_cpu_fault_map_id(void); | 655 | extern int gru_cpu_fault_map_id(void); |
655 | extern struct vm_area_struct *gru_find_vma(unsigned long vaddr); | 656 | extern struct vm_area_struct *gru_find_vma(unsigned long vaddr); |
656 | extern void gru_flush_all_tlb(struct gru_state *gru); | 657 | extern void gru_flush_all_tlb(struct gru_state *gru); |
@@ -658,7 +659,8 @@ extern int gru_proc_init(void); | |||
658 | extern void gru_proc_exit(void); | 659 | extern void gru_proc_exit(void); |
659 | 660 | ||
660 | extern struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, | 661 | extern struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, |
661 | int cbr_au_count, int dsr_au_count, int options, int tsid); | 662 | int cbr_au_count, int dsr_au_count, |
663 | unsigned char tlb_preload_count, int options, int tsid); | ||
662 | extern unsigned long gru_reserve_cb_resources(struct gru_state *gru, | 664 | extern unsigned long gru_reserve_cb_resources(struct gru_state *gru, |
663 | int cbr_au_count, char *cbmap); | 665 | int cbr_au_count, char *cbmap); |
664 | extern unsigned long gru_reserve_ds_resources(struct gru_state *gru, | 666 | extern unsigned long gru_reserve_ds_resources(struct gru_state *gru, |
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c index 1d125091f5e7..240a6d361665 100644 --- a/drivers/misc/sgi-gru/grutlbpurge.c +++ b/drivers/misc/sgi-gru/grutlbpurge.c | |||
@@ -184,8 +184,8 @@ void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, | |||
184 | STAT(flush_tlb_gru_tgh); | 184 | STAT(flush_tlb_gru_tgh); |
185 | asid = GRUASID(asid, start); | 185 | asid = GRUASID(asid, start); |
186 | gru_dbg(grudev, | 186 | gru_dbg(grudev, |
187 | " FLUSH gruid %d, asid 0x%x, num %ld, cbmap 0x%x\n", | 187 | " FLUSH gruid %d, asid 0x%x, vaddr 0x%lx, vamask 0x%x, num %ld, cbmap 0x%x\n", |
188 | gid, asid, num, asids->mt_ctxbitmap); | 188 | gid, asid, start, grupagesize, num, asids->mt_ctxbitmap); |
189 | tgh = get_lock_tgh_handle(gru); | 189 | tgh = get_lock_tgh_handle(gru); |
190 | tgh_invalidate(tgh, start, ~0, asid, grupagesize, 0, | 190 | tgh_invalidate(tgh, start, ~0, asid, grupagesize, 0, |
191 | num - 1, asids->mt_ctxbitmap); | 191 | num - 1, asids->mt_ctxbitmap); |
@@ -299,6 +299,7 @@ struct gru_mm_struct *gru_register_mmu_notifier(void) | |||
299 | { | 299 | { |
300 | struct gru_mm_struct *gms; | 300 | struct gru_mm_struct *gms; |
301 | struct mmu_notifier *mn; | 301 | struct mmu_notifier *mn; |
302 | int err; | ||
302 | 303 | ||
303 | mn = mmu_find_ops(current->mm, &gru_mmuops); | 304 | mn = mmu_find_ops(current->mm, &gru_mmuops); |
304 | if (mn) { | 305 | if (mn) { |
@@ -307,16 +308,22 @@ struct gru_mm_struct *gru_register_mmu_notifier(void) | |||
307 | } else { | 308 | } else { |
308 | gms = kzalloc(sizeof(*gms), GFP_KERNEL); | 309 | gms = kzalloc(sizeof(*gms), GFP_KERNEL); |
309 | if (gms) { | 310 | if (gms) { |
311 | STAT(gms_alloc); | ||
310 | spin_lock_init(&gms->ms_asid_lock); | 312 | spin_lock_init(&gms->ms_asid_lock); |
311 | gms->ms_notifier.ops = &gru_mmuops; | 313 | gms->ms_notifier.ops = &gru_mmuops; |
312 | atomic_set(&gms->ms_refcnt, 1); | 314 | atomic_set(&gms->ms_refcnt, 1); |
313 | init_waitqueue_head(&gms->ms_wait_queue); | 315 | init_waitqueue_head(&gms->ms_wait_queue); |
314 | __mmu_notifier_register(&gms->ms_notifier, current->mm); | 316 | err = __mmu_notifier_register(&gms->ms_notifier, current->mm); |
317 | if (err) | ||
318 | goto error; | ||
315 | } | 319 | } |
316 | } | 320 | } |
317 | gru_dbg(grudev, "gms %p, refcnt %d\n", gms, | 321 | gru_dbg(grudev, "gms %p, refcnt %d\n", gms, |
318 | atomic_read(&gms->ms_refcnt)); | 322 | atomic_read(&gms->ms_refcnt)); |
319 | return gms; | 323 | return gms; |
324 | error: | ||
325 | kfree(gms); | ||
326 | return ERR_PTR(err); | ||
320 | } | 327 | } |
321 | 328 | ||
322 | void gru_drop_mmu_notifier(struct gru_mm_struct *gms) | 329 | void gru_drop_mmu_notifier(struct gru_mm_struct *gms) |
@@ -327,6 +334,7 @@ void gru_drop_mmu_notifier(struct gru_mm_struct *gms) | |||
327 | if (!gms->ms_released) | 334 | if (!gms->ms_released) |
328 | mmu_notifier_unregister(&gms->ms_notifier, current->mm); | 335 | mmu_notifier_unregister(&gms->ms_notifier, current->mm); |
329 | kfree(gms); | 336 | kfree(gms); |
337 | STAT(gms_free); | ||
330 | } | 338 | } |
331 | } | 339 | } |
332 | 340 | ||