diff options
Diffstat (limited to 'drivers/misc/sgi-gru/grufault.c')
-rw-r--r-- | drivers/misc/sgi-gru/grufault.c | 311 |
1 files changed, 214 insertions, 97 deletions
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index 679e01778286..38657cdaf54d 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c | |||
@@ -40,6 +40,12 @@ | |||
40 | #include "gru_instructions.h" | 40 | #include "gru_instructions.h" |
41 | #include <asm/uv/uv_hub.h> | 41 | #include <asm/uv/uv_hub.h> |
42 | 42 | ||
43 | /* Return codes for vtop functions */ | ||
44 | #define VTOP_SUCCESS 0 | ||
45 | #define VTOP_INVALID -1 | ||
46 | #define VTOP_RETRY -2 | ||
47 | |||
48 | |||
43 | /* | 49 | /* |
44 | * Test if a physical address is a valid GRU GSEG address | 50 | * Test if a physical address is a valid GRU GSEG address |
45 | */ | 51 | */ |
@@ -90,19 +96,22 @@ static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr) | |||
90 | { | 96 | { |
91 | struct mm_struct *mm = current->mm; | 97 | struct mm_struct *mm = current->mm; |
92 | struct vm_area_struct *vma; | 98 | struct vm_area_struct *vma; |
93 | struct gru_thread_state *gts = NULL; | 99 | struct gru_thread_state *gts = ERR_PTR(-EINVAL); |
94 | 100 | ||
95 | down_write(&mm->mmap_sem); | 101 | down_write(&mm->mmap_sem); |
96 | vma = gru_find_vma(vaddr); | 102 | vma = gru_find_vma(vaddr); |
97 | if (vma) | 103 | if (!vma) |
98 | gts = gru_alloc_thread_state(vma, TSID(vaddr, vma)); | 104 | goto err; |
99 | if (gts) { | ||
100 | mutex_lock(>s->ts_ctxlock); | ||
101 | downgrade_write(&mm->mmap_sem); | ||
102 | } else { | ||
103 | up_write(&mm->mmap_sem); | ||
104 | } | ||
105 | 105 | ||
106 | gts = gru_alloc_thread_state(vma, TSID(vaddr, vma)); | ||
107 | if (IS_ERR(gts)) | ||
108 | goto err; | ||
109 | mutex_lock(>s->ts_ctxlock); | ||
110 | downgrade_write(&mm->mmap_sem); | ||
111 | return gts; | ||
112 | |||
113 | err: | ||
114 | up_write(&mm->mmap_sem); | ||
106 | return gts; | 115 | return gts; |
107 | } | 116 | } |
108 | 117 | ||
@@ -122,39 +131,15 @@ static void gru_unlock_gts(struct gru_thread_state *gts) | |||
122 | * is necessary to prevent the user from seeing a stale cb.istatus that will | 131 | * is necessary to prevent the user from seeing a stale cb.istatus that will |
123 | * change as soon as the TFH restart is complete. Races may cause an | 132 | * change as soon as the TFH restart is complete. Races may cause an |
124 | * occasional failure to clear the cb.istatus, but that is ok. | 133 | * occasional failure to clear the cb.istatus, but that is ok. |
125 | * | ||
126 | * If the cb address is not valid (should not happen, but...), nothing | ||
127 | * bad will happen.. The get_user()/put_user() will fail but there | ||
128 | * are no bad side-effects. | ||
129 | */ | 134 | */ |
130 | static void gru_cb_set_istatus_active(unsigned long __user *cb) | 135 | static void gru_cb_set_istatus_active(struct gru_instruction_bits *cbk) |
131 | { | 136 | { |
132 | union { | 137 | if (cbk) { |
133 | struct gru_instruction_bits bits; | 138 | cbk->istatus = CBS_ACTIVE; |
134 | unsigned long dw; | ||
135 | } u; | ||
136 | |||
137 | if (cb) { | ||
138 | get_user(u.dw, cb); | ||
139 | u.bits.istatus = CBS_ACTIVE; | ||
140 | put_user(u.dw, cb); | ||
141 | } | 139 | } |
142 | } | 140 | } |
143 | 141 | ||
144 | /* | 142 | /* |
145 | * Convert a interrupt IRQ to a pointer to the GRU GTS that caused the | ||
146 | * interrupt. Interrupts are always sent to a cpu on the blade that contains the | ||
147 | * GRU (except for headless blades which are not currently supported). A blade | ||
148 | * has N grus; a block of N consecutive IRQs is assigned to the GRUs. The IRQ | ||
149 | * number uniquely identifies the GRU chiplet on the local blade that caused the | ||
150 | * interrupt. Always called in interrupt context. | ||
151 | */ | ||
152 | static inline struct gru_state *irq_to_gru(int irq) | ||
153 | { | ||
154 | return &gru_base[uv_numa_blade_id()]->bs_grus[irq - IRQ_GRU]; | ||
155 | } | ||
156 | |||
157 | /* | ||
158 | * Read & clear a TFM | 143 | * Read & clear a TFM |
159 | * | 144 | * |
160 | * The GRU has an array of fault maps. A map is private to a cpu | 145 | * The GRU has an array of fault maps. A map is private to a cpu |
@@ -207,10 +192,11 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma, | |||
207 | { | 192 | { |
208 | struct page *page; | 193 | struct page *page; |
209 | 194 | ||
210 | /* ZZZ Need to handle HUGE pages */ | 195 | #ifdef CONFIG_HUGETLB_PAGE |
211 | if (is_vm_hugetlb_page(vma)) | 196 | *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT; |
212 | return -EFAULT; | 197 | #else |
213 | *pageshift = PAGE_SHIFT; | 198 | *pageshift = PAGE_SHIFT; |
199 | #endif | ||
214 | if (get_user_pages | 200 | if (get_user_pages |
215 | (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0) | 201 | (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0) |
216 | return -EFAULT; | 202 | return -EFAULT; |
@@ -268,7 +254,6 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, | |||
268 | return 0; | 254 | return 0; |
269 | 255 | ||
270 | err: | 256 | err: |
271 | local_irq_enable(); | ||
272 | return 1; | 257 | return 1; |
273 | } | 258 | } |
274 | 259 | ||
@@ -301,14 +286,69 @@ static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr, | |||
301 | paddr = paddr & ~((1UL << ps) - 1); | 286 | paddr = paddr & ~((1UL << ps) - 1); |
302 | *gpa = uv_soc_phys_ram_to_gpa(paddr); | 287 | *gpa = uv_soc_phys_ram_to_gpa(paddr); |
303 | *pageshift = ps; | 288 | *pageshift = ps; |
304 | return 0; | 289 | return VTOP_SUCCESS; |
305 | 290 | ||
306 | inval: | 291 | inval: |
307 | return -1; | 292 | return VTOP_INVALID; |
308 | upm: | 293 | upm: |
309 | return -2; | 294 | return VTOP_RETRY; |
295 | } | ||
296 | |||
297 | |||
298 | /* | ||
299 | * Flush a CBE from cache. The CBE is clean in the cache. Dirty the | ||
300 | * CBE cacheline so that the line will be written back to home agent. | ||
301 | * Otherwise the line may be silently dropped. This has no impact | ||
302 | * except on performance. | ||
303 | */ | ||
304 | static void gru_flush_cache_cbe(struct gru_control_block_extended *cbe) | ||
305 | { | ||
306 | if (unlikely(cbe)) { | ||
307 | cbe->cbrexecstatus = 0; /* make CL dirty */ | ||
308 | gru_flush_cache(cbe); | ||
309 | } | ||
310 | } | 310 | } |
311 | 311 | ||
312 | /* | ||
313 | * Preload the TLB with entries that may be required. Currently, preloading | ||
314 | * is implemented only for BCOPY. Preload <tlb_preload_count> pages OR to | ||
315 | * the end of the bcopy tranfer, whichever is smaller. | ||
316 | */ | ||
317 | static void gru_preload_tlb(struct gru_state *gru, | ||
318 | struct gru_thread_state *gts, int atomic, | ||
319 | unsigned long fault_vaddr, int asid, int write, | ||
320 | unsigned char tlb_preload_count, | ||
321 | struct gru_tlb_fault_handle *tfh, | ||
322 | struct gru_control_block_extended *cbe) | ||
323 | { | ||
324 | unsigned long vaddr = 0, gpa; | ||
325 | int ret, pageshift; | ||
326 | |||
327 | if (cbe->opccpy != OP_BCOPY) | ||
328 | return; | ||
329 | |||
330 | if (fault_vaddr == cbe->cbe_baddr0) | ||
331 | vaddr = fault_vaddr + GRU_CACHE_LINE_BYTES * cbe->cbe_src_cl - 1; | ||
332 | else if (fault_vaddr == cbe->cbe_baddr1) | ||
333 | vaddr = fault_vaddr + (1 << cbe->xtypecpy) * cbe->cbe_nelemcur - 1; | ||
334 | |||
335 | fault_vaddr &= PAGE_MASK; | ||
336 | vaddr &= PAGE_MASK; | ||
337 | vaddr = min(vaddr, fault_vaddr + tlb_preload_count * PAGE_SIZE); | ||
338 | |||
339 | while (vaddr > fault_vaddr) { | ||
340 | ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift); | ||
341 | if (ret || tfh_write_only(tfh, gpa, GAA_RAM, vaddr, asid, write, | ||
342 | GRU_PAGESIZE(pageshift))) | ||
343 | return; | ||
344 | gru_dbg(grudev, | ||
345 | "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, rw %d, ps %d, gpa 0x%lx\n", | ||
346 | atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, | ||
347 | vaddr, asid, write, pageshift, gpa); | ||
348 | vaddr -= PAGE_SIZE; | ||
349 | STAT(tlb_preload_page); | ||
350 | } | ||
351 | } | ||
312 | 352 | ||
313 | /* | 353 | /* |
314 | * Drop a TLB entry into the GRU. The fault is described by info in an TFH. | 354 | * Drop a TLB entry into the GRU. The fault is described by info in an TFH. |
@@ -320,11 +360,14 @@ upm: | |||
320 | * < 0 = error code | 360 | * < 0 = error code |
321 | * | 361 | * |
322 | */ | 362 | */ |
323 | static int gru_try_dropin(struct gru_thread_state *gts, | 363 | static int gru_try_dropin(struct gru_state *gru, |
364 | struct gru_thread_state *gts, | ||
324 | struct gru_tlb_fault_handle *tfh, | 365 | struct gru_tlb_fault_handle *tfh, |
325 | unsigned long __user *cb) | 366 | struct gru_instruction_bits *cbk) |
326 | { | 367 | { |
327 | int pageshift = 0, asid, write, ret, atomic = !cb; | 368 | struct gru_control_block_extended *cbe = NULL; |
369 | unsigned char tlb_preload_count = gts->ts_tlb_preload_count; | ||
370 | int pageshift = 0, asid, write, ret, atomic = !cbk, indexway; | ||
328 | unsigned long gpa = 0, vaddr = 0; | 371 | unsigned long gpa = 0, vaddr = 0; |
329 | 372 | ||
330 | /* | 373 | /* |
@@ -335,24 +378,34 @@ static int gru_try_dropin(struct gru_thread_state *gts, | |||
335 | */ | 378 | */ |
336 | 379 | ||
337 | /* | 380 | /* |
381 | * Prefetch the CBE if doing TLB preloading | ||
382 | */ | ||
383 | if (unlikely(tlb_preload_count)) { | ||
384 | cbe = gru_tfh_to_cbe(tfh); | ||
385 | prefetchw(cbe); | ||
386 | } | ||
387 | |||
388 | /* | ||
338 | * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call. | 389 | * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call. |
339 | * Might be a hardware race OR a stupid user. Ignore FMM because FMM | 390 | * Might be a hardware race OR a stupid user. Ignore FMM because FMM |
340 | * is a transient state. | 391 | * is a transient state. |
341 | */ | 392 | */ |
342 | if (tfh->status != TFHSTATUS_EXCEPTION) { | 393 | if (tfh->status != TFHSTATUS_EXCEPTION) { |
343 | gru_flush_cache(tfh); | 394 | gru_flush_cache(tfh); |
395 | sync_core(); | ||
344 | if (tfh->status != TFHSTATUS_EXCEPTION) | 396 | if (tfh->status != TFHSTATUS_EXCEPTION) |
345 | goto failnoexception; | 397 | goto failnoexception; |
346 | STAT(tfh_stale_on_fault); | 398 | STAT(tfh_stale_on_fault); |
347 | } | 399 | } |
348 | if (tfh->state == TFHSTATE_IDLE) | 400 | if (tfh->state == TFHSTATE_IDLE) |
349 | goto failidle; | 401 | goto failidle; |
350 | if (tfh->state == TFHSTATE_MISS_FMM && cb) | 402 | if (tfh->state == TFHSTATE_MISS_FMM && cbk) |
351 | goto failfmm; | 403 | goto failfmm; |
352 | 404 | ||
353 | write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0; | 405 | write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0; |
354 | vaddr = tfh->missvaddr; | 406 | vaddr = tfh->missvaddr; |
355 | asid = tfh->missasid; | 407 | asid = tfh->missasid; |
408 | indexway = tfh->indexway; | ||
356 | if (asid == 0) | 409 | if (asid == 0) |
357 | goto failnoasid; | 410 | goto failnoasid; |
358 | 411 | ||
@@ -366,41 +419,51 @@ static int gru_try_dropin(struct gru_thread_state *gts, | |||
366 | goto failactive; | 419 | goto failactive; |
367 | 420 | ||
368 | ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift); | 421 | ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift); |
369 | if (ret == -1) | 422 | if (ret == VTOP_INVALID) |
370 | goto failinval; | 423 | goto failinval; |
371 | if (ret == -2) | 424 | if (ret == VTOP_RETRY) |
372 | goto failupm; | 425 | goto failupm; |
373 | 426 | ||
374 | if (!(gts->ts_sizeavail & GRU_SIZEAVAIL(pageshift))) { | 427 | if (!(gts->ts_sizeavail & GRU_SIZEAVAIL(pageshift))) { |
375 | gts->ts_sizeavail |= GRU_SIZEAVAIL(pageshift); | 428 | gts->ts_sizeavail |= GRU_SIZEAVAIL(pageshift); |
376 | if (atomic || !gru_update_cch(gts, 0)) { | 429 | if (atomic || !gru_update_cch(gts)) { |
377 | gts->ts_force_cch_reload = 1; | 430 | gts->ts_force_cch_reload = 1; |
378 | goto failupm; | 431 | goto failupm; |
379 | } | 432 | } |
380 | } | 433 | } |
381 | gru_cb_set_istatus_active(cb); | 434 | |
435 | if (unlikely(cbe) && pageshift == PAGE_SHIFT) { | ||
436 | gru_preload_tlb(gru, gts, atomic, vaddr, asid, write, tlb_preload_count, tfh, cbe); | ||
437 | gru_flush_cache_cbe(cbe); | ||
438 | } | ||
439 | |||
440 | gru_cb_set_istatus_active(cbk); | ||
441 | gts->ustats.tlbdropin++; | ||
382 | tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write, | 442 | tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write, |
383 | GRU_PAGESIZE(pageshift)); | 443 | GRU_PAGESIZE(pageshift)); |
384 | STAT(tlb_dropin); | ||
385 | gru_dbg(grudev, | 444 | gru_dbg(grudev, |
386 | "%s: tfh 0x%p, vaddr 0x%lx, asid 0x%x, ps %d, gpa 0x%lx\n", | 445 | "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, indexway 0x%x," |
387 | ret ? "non-atomic" : "atomic", tfh, vaddr, asid, | 446 | " rw %d, ps %d, gpa 0x%lx\n", |
388 | pageshift, gpa); | 447 | atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, vaddr, asid, |
448 | indexway, write, pageshift, gpa); | ||
449 | STAT(tlb_dropin); | ||
389 | return 0; | 450 | return 0; |
390 | 451 | ||
391 | failnoasid: | 452 | failnoasid: |
392 | /* No asid (delayed unload). */ | 453 | /* No asid (delayed unload). */ |
393 | STAT(tlb_dropin_fail_no_asid); | 454 | STAT(tlb_dropin_fail_no_asid); |
394 | gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); | 455 | gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); |
395 | if (!cb) | 456 | if (!cbk) |
396 | tfh_user_polling_mode(tfh); | 457 | tfh_user_polling_mode(tfh); |
397 | else | 458 | else |
398 | gru_flush_cache(tfh); | 459 | gru_flush_cache(tfh); |
460 | gru_flush_cache_cbe(cbe); | ||
399 | return -EAGAIN; | 461 | return -EAGAIN; |
400 | 462 | ||
401 | failupm: | 463 | failupm: |
402 | /* Atomic failure switch CBR to UPM */ | 464 | /* Atomic failure switch CBR to UPM */ |
403 | tfh_user_polling_mode(tfh); | 465 | tfh_user_polling_mode(tfh); |
466 | gru_flush_cache_cbe(cbe); | ||
404 | STAT(tlb_dropin_fail_upm); | 467 | STAT(tlb_dropin_fail_upm); |
405 | gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); | 468 | gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); |
406 | return 1; | 469 | return 1; |
@@ -408,6 +471,7 @@ failupm: | |||
408 | failfmm: | 471 | failfmm: |
409 | /* FMM state on UPM call */ | 472 | /* FMM state on UPM call */ |
410 | gru_flush_cache(tfh); | 473 | gru_flush_cache(tfh); |
474 | gru_flush_cache_cbe(cbe); | ||
411 | STAT(tlb_dropin_fail_fmm); | 475 | STAT(tlb_dropin_fail_fmm); |
412 | gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state); | 476 | gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state); |
413 | return 0; | 477 | return 0; |
@@ -415,17 +479,20 @@ failfmm: | |||
415 | failnoexception: | 479 | failnoexception: |
416 | /* TFH status did not show exception pending */ | 480 | /* TFH status did not show exception pending */ |
417 | gru_flush_cache(tfh); | 481 | gru_flush_cache(tfh); |
418 | if (cb) | 482 | gru_flush_cache_cbe(cbe); |
419 | gru_flush_cache(cb); | 483 | if (cbk) |
484 | gru_flush_cache(cbk); | ||
420 | STAT(tlb_dropin_fail_no_exception); | 485 | STAT(tlb_dropin_fail_no_exception); |
421 | gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n", tfh, tfh->status, tfh->state); | 486 | gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n", |
487 | tfh, tfh->status, tfh->state); | ||
422 | return 0; | 488 | return 0; |
423 | 489 | ||
424 | failidle: | 490 | failidle: |
425 | /* TFH state was idle - no miss pending */ | 491 | /* TFH state was idle - no miss pending */ |
426 | gru_flush_cache(tfh); | 492 | gru_flush_cache(tfh); |
427 | if (cb) | 493 | gru_flush_cache_cbe(cbe); |
428 | gru_flush_cache(cb); | 494 | if (cbk) |
495 | gru_flush_cache(cbk); | ||
429 | STAT(tlb_dropin_fail_idle); | 496 | STAT(tlb_dropin_fail_idle); |
430 | gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state); | 497 | gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state); |
431 | return 0; | 498 | return 0; |
@@ -433,16 +500,18 @@ failidle: | |||
433 | failinval: | 500 | failinval: |
434 | /* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */ | 501 | /* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */ |
435 | tfh_exception(tfh); | 502 | tfh_exception(tfh); |
503 | gru_flush_cache_cbe(cbe); | ||
436 | STAT(tlb_dropin_fail_invalid); | 504 | STAT(tlb_dropin_fail_invalid); |
437 | gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); | 505 | gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); |
438 | return -EFAULT; | 506 | return -EFAULT; |
439 | 507 | ||
440 | failactive: | 508 | failactive: |
441 | /* Range invalidate active. Switch to UPM iff atomic */ | 509 | /* Range invalidate active. Switch to UPM iff atomic */ |
442 | if (!cb) | 510 | if (!cbk) |
443 | tfh_user_polling_mode(tfh); | 511 | tfh_user_polling_mode(tfh); |
444 | else | 512 | else |
445 | gru_flush_cache(tfh); | 513 | gru_flush_cache(tfh); |
514 | gru_flush_cache_cbe(cbe); | ||
446 | STAT(tlb_dropin_fail_range_active); | 515 | STAT(tlb_dropin_fail_range_active); |
447 | gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n", | 516 | gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n", |
448 | tfh, vaddr); | 517 | tfh, vaddr); |
@@ -455,31 +524,41 @@ failactive: | |||
455 | * Note that this is the interrupt handler that is registered with linux | 524 | * Note that this is the interrupt handler that is registered with linux |
456 | * interrupt handlers. | 525 | * interrupt handlers. |
457 | */ | 526 | */ |
458 | irqreturn_t gru_intr(int irq, void *dev_id) | 527 | static irqreturn_t gru_intr(int chiplet, int blade) |
459 | { | 528 | { |
460 | struct gru_state *gru; | 529 | struct gru_state *gru; |
461 | struct gru_tlb_fault_map imap, dmap; | 530 | struct gru_tlb_fault_map imap, dmap; |
462 | struct gru_thread_state *gts; | 531 | struct gru_thread_state *gts; |
463 | struct gru_tlb_fault_handle *tfh = NULL; | 532 | struct gru_tlb_fault_handle *tfh = NULL; |
533 | struct completion *cmp; | ||
464 | int cbrnum, ctxnum; | 534 | int cbrnum, ctxnum; |
465 | 535 | ||
466 | STAT(intr); | 536 | STAT(intr); |
467 | 537 | ||
468 | gru = irq_to_gru(irq); | 538 | gru = &gru_base[blade]->bs_grus[chiplet]; |
469 | if (!gru) { | 539 | if (!gru) { |
470 | dev_err(grudev, "GRU: invalid interrupt: cpu %d, irq %d\n", | 540 | dev_err(grudev, "GRU: invalid interrupt: cpu %d, chiplet %d\n", |
471 | raw_smp_processor_id(), irq); | 541 | raw_smp_processor_id(), chiplet); |
472 | return IRQ_NONE; | 542 | return IRQ_NONE; |
473 | } | 543 | } |
474 | get_clear_fault_map(gru, &imap, &dmap); | 544 | get_clear_fault_map(gru, &imap, &dmap); |
545 | gru_dbg(grudev, | ||
546 | "cpu %d, chiplet %d, gid %d, imap %016lx %016lx, dmap %016lx %016lx\n", | ||
547 | smp_processor_id(), chiplet, gru->gs_gid, | ||
548 | imap.fault_bits[0], imap.fault_bits[1], | ||
549 | dmap.fault_bits[0], dmap.fault_bits[1]); | ||
475 | 550 | ||
476 | for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) { | 551 | for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) { |
477 | complete(gru->gs_blade->bs_async_wq); | 552 | STAT(intr_cbr); |
553 | cmp = gru->gs_blade->bs_async_wq; | ||
554 | if (cmp) | ||
555 | complete(cmp); | ||
478 | gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n", | 556 | gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n", |
479 | gru->gs_gid, cbrnum, gru->gs_blade->bs_async_wq->done); | 557 | gru->gs_gid, cbrnum, cmp ? cmp->done : -1); |
480 | } | 558 | } |
481 | 559 | ||
482 | for_each_cbr_in_tfm(cbrnum, imap.fault_bits) { | 560 | for_each_cbr_in_tfm(cbrnum, imap.fault_bits) { |
561 | STAT(intr_tfh); | ||
483 | tfh = get_tfh_by_index(gru, cbrnum); | 562 | tfh = get_tfh_by_index(gru, cbrnum); |
484 | prefetchw(tfh); /* Helps on hdw, required for emulator */ | 563 | prefetchw(tfh); /* Helps on hdw, required for emulator */ |
485 | 564 | ||
@@ -492,14 +571,20 @@ irqreturn_t gru_intr(int irq, void *dev_id) | |||
492 | ctxnum = tfh->ctxnum; | 571 | ctxnum = tfh->ctxnum; |
493 | gts = gru->gs_gts[ctxnum]; | 572 | gts = gru->gs_gts[ctxnum]; |
494 | 573 | ||
574 | /* Spurious interrupts can cause this. Ignore. */ | ||
575 | if (!gts) { | ||
576 | STAT(intr_spurious); | ||
577 | continue; | ||
578 | } | ||
579 | |||
495 | /* | 580 | /* |
496 | * This is running in interrupt context. Trylock the mmap_sem. | 581 | * This is running in interrupt context. Trylock the mmap_sem. |
497 | * If it fails, retry the fault in user context. | 582 | * If it fails, retry the fault in user context. |
498 | */ | 583 | */ |
584 | gts->ustats.fmm_tlbmiss++; | ||
499 | if (!gts->ts_force_cch_reload && | 585 | if (!gts->ts_force_cch_reload && |
500 | down_read_trylock(>s->ts_mm->mmap_sem)) { | 586 | down_read_trylock(>s->ts_mm->mmap_sem)) { |
501 | gts->ustats.fmm_tlbdropin++; | 587 | gru_try_dropin(gru, gts, tfh, NULL); |
502 | gru_try_dropin(gts, tfh, NULL); | ||
503 | up_read(>s->ts_mm->mmap_sem); | 588 | up_read(>s->ts_mm->mmap_sem); |
504 | } else { | 589 | } else { |
505 | tfh_user_polling_mode(tfh); | 590 | tfh_user_polling_mode(tfh); |
@@ -509,20 +594,43 @@ irqreturn_t gru_intr(int irq, void *dev_id) | |||
509 | return IRQ_HANDLED; | 594 | return IRQ_HANDLED; |
510 | } | 595 | } |
511 | 596 | ||
597 | irqreturn_t gru0_intr(int irq, void *dev_id) | ||
598 | { | ||
599 | return gru_intr(0, uv_numa_blade_id()); | ||
600 | } | ||
601 | |||
602 | irqreturn_t gru1_intr(int irq, void *dev_id) | ||
603 | { | ||
604 | return gru_intr(1, uv_numa_blade_id()); | ||
605 | } | ||
606 | |||
607 | irqreturn_t gru_intr_mblade(int irq, void *dev_id) | ||
608 | { | ||
609 | int blade; | ||
610 | |||
611 | for_each_possible_blade(blade) { | ||
612 | if (uv_blade_nr_possible_cpus(blade)) | ||
613 | continue; | ||
614 | gru_intr(0, blade); | ||
615 | gru_intr(1, blade); | ||
616 | } | ||
617 | return IRQ_HANDLED; | ||
618 | } | ||
619 | |||
512 | 620 | ||
513 | static int gru_user_dropin(struct gru_thread_state *gts, | 621 | static int gru_user_dropin(struct gru_thread_state *gts, |
514 | struct gru_tlb_fault_handle *tfh, | 622 | struct gru_tlb_fault_handle *tfh, |
515 | unsigned long __user *cb) | 623 | void *cb) |
516 | { | 624 | { |
517 | struct gru_mm_struct *gms = gts->ts_gms; | 625 | struct gru_mm_struct *gms = gts->ts_gms; |
518 | int ret; | 626 | int ret; |
519 | 627 | ||
520 | gts->ustats.upm_tlbdropin++; | 628 | gts->ustats.upm_tlbmiss++; |
521 | while (1) { | 629 | while (1) { |
522 | wait_event(gms->ms_wait_queue, | 630 | wait_event(gms->ms_wait_queue, |
523 | atomic_read(&gms->ms_range_active) == 0); | 631 | atomic_read(&gms->ms_range_active) == 0); |
524 | prefetchw(tfh); /* Helps on hdw, required for emulator */ | 632 | prefetchw(tfh); /* Helps on hdw, required for emulator */ |
525 | ret = gru_try_dropin(gts, tfh, cb); | 633 | ret = gru_try_dropin(gts->ts_gru, gts, tfh, cb); |
526 | if (ret <= 0) | 634 | if (ret <= 0) |
527 | return ret; | 635 | return ret; |
528 | STAT(call_os_wait_queue); | 636 | STAT(call_os_wait_queue); |
@@ -538,52 +646,41 @@ int gru_handle_user_call_os(unsigned long cb) | |||
538 | { | 646 | { |
539 | struct gru_tlb_fault_handle *tfh; | 647 | struct gru_tlb_fault_handle *tfh; |
540 | struct gru_thread_state *gts; | 648 | struct gru_thread_state *gts; |
541 | unsigned long __user *cbp; | 649 | void *cbk; |
542 | int ucbnum, cbrnum, ret = -EINVAL; | 650 | int ucbnum, cbrnum, ret = -EINVAL; |
543 | 651 | ||
544 | STAT(call_os); | 652 | STAT(call_os); |
545 | gru_dbg(grudev, "address 0x%lx\n", cb); | ||
546 | 653 | ||
547 | /* sanity check the cb pointer */ | 654 | /* sanity check the cb pointer */ |
548 | ucbnum = get_cb_number((void *)cb); | 655 | ucbnum = get_cb_number((void *)cb); |
549 | if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB) | 656 | if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB) |
550 | return -EINVAL; | 657 | return -EINVAL; |
551 | cbp = (unsigned long *)cb; | ||
552 | 658 | ||
553 | gts = gru_find_lock_gts(cb); | 659 | gts = gru_find_lock_gts(cb); |
554 | if (!gts) | 660 | if (!gts) |
555 | return -EINVAL; | 661 | return -EINVAL; |
662 | gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts); | ||
556 | 663 | ||
557 | if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) | 664 | if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) |
558 | goto exit; | 665 | goto exit; |
559 | 666 | ||
560 | /* | 667 | gru_check_context_placement(gts); |
561 | * If force_unload is set, the UPM TLB fault is phony. The task | ||
562 | * has migrated to another node and the GSEG must be moved. Just | ||
563 | * unload the context. The task will page fault and assign a new | ||
564 | * context. | ||
565 | */ | ||
566 | if (gts->ts_tgid_owner == current->tgid && gts->ts_blade >= 0 && | ||
567 | gts->ts_blade != uv_numa_blade_id()) { | ||
568 | STAT(call_os_offnode_reference); | ||
569 | gts->ts_force_unload = 1; | ||
570 | } | ||
571 | 668 | ||
572 | /* | 669 | /* |
573 | * CCH may contain stale data if ts_force_cch_reload is set. | 670 | * CCH may contain stale data if ts_force_cch_reload is set. |
574 | */ | 671 | */ |
575 | if (gts->ts_gru && gts->ts_force_cch_reload) { | 672 | if (gts->ts_gru && gts->ts_force_cch_reload) { |
576 | gts->ts_force_cch_reload = 0; | 673 | gts->ts_force_cch_reload = 0; |
577 | gru_update_cch(gts, 0); | 674 | gru_update_cch(gts); |
578 | } | 675 | } |
579 | 676 | ||
580 | ret = -EAGAIN; | 677 | ret = -EAGAIN; |
581 | cbrnum = thread_cbr_number(gts, ucbnum); | 678 | cbrnum = thread_cbr_number(gts, ucbnum); |
582 | if (gts->ts_force_unload) { | 679 | if (gts->ts_gru) { |
583 | gru_unload_context(gts, 1); | ||
584 | } else if (gts->ts_gru) { | ||
585 | tfh = get_tfh_by_index(gts->ts_gru, cbrnum); | 680 | tfh = get_tfh_by_index(gts->ts_gru, cbrnum); |
586 | ret = gru_user_dropin(gts, tfh, cbp); | 681 | cbk = get_gseg_base_address_cb(gts->ts_gru->gs_gru_base_vaddr, |
682 | gts->ts_ctxnum, ucbnum); | ||
683 | ret = gru_user_dropin(gts, tfh, cbk); | ||
587 | } | 684 | } |
588 | exit: | 685 | exit: |
589 | gru_unlock_gts(gts); | 686 | gru_unlock_gts(gts); |
@@ -605,11 +702,11 @@ int gru_get_exception_detail(unsigned long arg) | |||
605 | if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet))) | 702 | if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet))) |
606 | return -EFAULT; | 703 | return -EFAULT; |
607 | 704 | ||
608 | gru_dbg(grudev, "address 0x%lx\n", excdet.cb); | ||
609 | gts = gru_find_lock_gts(excdet.cb); | 705 | gts = gru_find_lock_gts(excdet.cb); |
610 | if (!gts) | 706 | if (!gts) |
611 | return -EINVAL; | 707 | return -EINVAL; |
612 | 708 | ||
709 | gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", excdet.cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts); | ||
613 | ucbnum = get_cb_number((void *)excdet.cb); | 710 | ucbnum = get_cb_number((void *)excdet.cb); |
614 | if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) { | 711 | if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) { |
615 | ret = -EINVAL; | 712 | ret = -EINVAL; |
@@ -617,6 +714,7 @@ int gru_get_exception_detail(unsigned long arg) | |||
617 | cbrnum = thread_cbr_number(gts, ucbnum); | 714 | cbrnum = thread_cbr_number(gts, ucbnum); |
618 | cbe = get_cbe_by_index(gts->ts_gru, cbrnum); | 715 | cbe = get_cbe_by_index(gts->ts_gru, cbrnum); |
619 | gru_flush_cache(cbe); /* CBE not coherent */ | 716 | gru_flush_cache(cbe); /* CBE not coherent */ |
717 | sync_core(); /* make sure we are have current data */ | ||
620 | excdet.opc = cbe->opccpy; | 718 | excdet.opc = cbe->opccpy; |
621 | excdet.exopc = cbe->exopccpy; | 719 | excdet.exopc = cbe->exopccpy; |
622 | excdet.ecause = cbe->ecause; | 720 | excdet.ecause = cbe->ecause; |
@@ -624,7 +722,7 @@ int gru_get_exception_detail(unsigned long arg) | |||
624 | excdet.exceptdet1 = cbe->idef3upd; | 722 | excdet.exceptdet1 = cbe->idef3upd; |
625 | excdet.cbrstate = cbe->cbrstate; | 723 | excdet.cbrstate = cbe->cbrstate; |
626 | excdet.cbrexecstatus = cbe->cbrexecstatus; | 724 | excdet.cbrexecstatus = cbe->cbrexecstatus; |
627 | gru_flush_cache(cbe); | 725 | gru_flush_cache_cbe(cbe); |
628 | ret = 0; | 726 | ret = 0; |
629 | } else { | 727 | } else { |
630 | ret = -EAGAIN; | 728 | ret = -EAGAIN; |
@@ -733,6 +831,11 @@ long gru_get_gseg_statistics(unsigned long arg) | |||
733 | if (copy_from_user(&req, (void __user *)arg, sizeof(req))) | 831 | if (copy_from_user(&req, (void __user *)arg, sizeof(req))) |
734 | return -EFAULT; | 832 | return -EFAULT; |
735 | 833 | ||
834 | /* | ||
835 | * The library creates arrays of contexts for threaded programs. | ||
836 | * If no gts exists in the array, the context has never been used & all | ||
837 | * statistics are implicitly 0. | ||
838 | */ | ||
736 | gts = gru_find_lock_gts(req.gseg); | 839 | gts = gru_find_lock_gts(req.gseg); |
737 | if (gts) { | 840 | if (gts) { |
738 | memcpy(&req.stats, >s->ustats, sizeof(gts->ustats)); | 841 | memcpy(&req.stats, >s->ustats, sizeof(gts->ustats)); |
@@ -762,11 +865,25 @@ int gru_set_context_option(unsigned long arg) | |||
762 | return -EFAULT; | 865 | return -EFAULT; |
763 | gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1); | 866 | gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1); |
764 | 867 | ||
765 | gts = gru_alloc_locked_gts(req.gseg); | 868 | gts = gru_find_lock_gts(req.gseg); |
766 | if (!gts) | 869 | if (!gts) { |
767 | return -EINVAL; | 870 | gts = gru_alloc_locked_gts(req.gseg); |
871 | if (IS_ERR(gts)) | ||
872 | return PTR_ERR(gts); | ||
873 | } | ||
768 | 874 | ||
769 | switch (req.op) { | 875 | switch (req.op) { |
876 | case sco_blade_chiplet: | ||
877 | /* Select blade/chiplet for GRU context */ | ||
878 | if (req.val1 < -1 || req.val1 >= GRU_MAX_BLADES || !gru_base[req.val1] || | ||
879 | req.val0 < -1 || req.val0 >= GRU_CHIPLETS_PER_HUB) { | ||
880 | ret = -EINVAL; | ||
881 | } else { | ||
882 | gts->ts_user_blade_id = req.val1; | ||
883 | gts->ts_user_chiplet_id = req.val0; | ||
884 | gru_check_context_placement(gts); | ||
885 | } | ||
886 | break; | ||
770 | case sco_gseg_owner: | 887 | case sco_gseg_owner: |
771 | /* Register the current task as the GSEG owner */ | 888 | /* Register the current task as the GSEG owner */ |
772 | gts->ts_tgid_owner = current->tgid; | 889 | gts->ts_tgid_owner = current->tgid; |