diff options
Diffstat (limited to 'drivers/misc/sgi-gru/grufault.c')
| -rw-r--r-- | drivers/misc/sgi-gru/grufault.c | 311 |
1 files changed, 214 insertions, 97 deletions
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index 679e01778286..38657cdaf54d 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c | |||
| @@ -40,6 +40,12 @@ | |||
| 40 | #include "gru_instructions.h" | 40 | #include "gru_instructions.h" |
| 41 | #include <asm/uv/uv_hub.h> | 41 | #include <asm/uv/uv_hub.h> |
| 42 | 42 | ||
| 43 | /* Return codes for vtop functions */ | ||
| 44 | #define VTOP_SUCCESS 0 | ||
| 45 | #define VTOP_INVALID -1 | ||
| 46 | #define VTOP_RETRY -2 | ||
| 47 | |||
| 48 | |||
| 43 | /* | 49 | /* |
| 44 | * Test if a physical address is a valid GRU GSEG address | 50 | * Test if a physical address is a valid GRU GSEG address |
| 45 | */ | 51 | */ |
| @@ -90,19 +96,22 @@ static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr) | |||
| 90 | { | 96 | { |
| 91 | struct mm_struct *mm = current->mm; | 97 | struct mm_struct *mm = current->mm; |
| 92 | struct vm_area_struct *vma; | 98 | struct vm_area_struct *vma; |
| 93 | struct gru_thread_state *gts = NULL; | 99 | struct gru_thread_state *gts = ERR_PTR(-EINVAL); |
| 94 | 100 | ||
| 95 | down_write(&mm->mmap_sem); | 101 | down_write(&mm->mmap_sem); |
| 96 | vma = gru_find_vma(vaddr); | 102 | vma = gru_find_vma(vaddr); |
| 97 | if (vma) | 103 | if (!vma) |
| 98 | gts = gru_alloc_thread_state(vma, TSID(vaddr, vma)); | 104 | goto err; |
| 99 | if (gts) { | ||
| 100 | mutex_lock(>s->ts_ctxlock); | ||
| 101 | downgrade_write(&mm->mmap_sem); | ||
| 102 | } else { | ||
| 103 | up_write(&mm->mmap_sem); | ||
| 104 | } | ||
| 105 | 105 | ||
| 106 | gts = gru_alloc_thread_state(vma, TSID(vaddr, vma)); | ||
| 107 | if (IS_ERR(gts)) | ||
| 108 | goto err; | ||
| 109 | mutex_lock(>s->ts_ctxlock); | ||
| 110 | downgrade_write(&mm->mmap_sem); | ||
| 111 | return gts; | ||
| 112 | |||
| 113 | err: | ||
| 114 | up_write(&mm->mmap_sem); | ||
| 106 | return gts; | 115 | return gts; |
| 107 | } | 116 | } |
| 108 | 117 | ||
| @@ -122,39 +131,15 @@ static void gru_unlock_gts(struct gru_thread_state *gts) | |||
| 122 | * is necessary to prevent the user from seeing a stale cb.istatus that will | 131 | * is necessary to prevent the user from seeing a stale cb.istatus that will |
| 123 | * change as soon as the TFH restart is complete. Races may cause an | 132 | * change as soon as the TFH restart is complete. Races may cause an |
| 124 | * occasional failure to clear the cb.istatus, but that is ok. | 133 | * occasional failure to clear the cb.istatus, but that is ok. |
| 125 | * | ||
| 126 | * If the cb address is not valid (should not happen, but...), nothing | ||
| 127 | * bad will happen.. The get_user()/put_user() will fail but there | ||
| 128 | * are no bad side-effects. | ||
| 129 | */ | 134 | */ |
| 130 | static void gru_cb_set_istatus_active(unsigned long __user *cb) | 135 | static void gru_cb_set_istatus_active(struct gru_instruction_bits *cbk) |
| 131 | { | 136 | { |
| 132 | union { | 137 | if (cbk) { |
| 133 | struct gru_instruction_bits bits; | 138 | cbk->istatus = CBS_ACTIVE; |
| 134 | unsigned long dw; | ||
| 135 | } u; | ||
| 136 | |||
| 137 | if (cb) { | ||
| 138 | get_user(u.dw, cb); | ||
| 139 | u.bits.istatus = CBS_ACTIVE; | ||
| 140 | put_user(u.dw, cb); | ||
| 141 | } | 139 | } |
| 142 | } | 140 | } |
| 143 | 141 | ||
| 144 | /* | 142 | /* |
| 145 | * Convert a interrupt IRQ to a pointer to the GRU GTS that caused the | ||
| 146 | * interrupt. Interrupts are always sent to a cpu on the blade that contains the | ||
| 147 | * GRU (except for headless blades which are not currently supported). A blade | ||
| 148 | * has N grus; a block of N consecutive IRQs is assigned to the GRUs. The IRQ | ||
| 149 | * number uniquely identifies the GRU chiplet on the local blade that caused the | ||
| 150 | * interrupt. Always called in interrupt context. | ||
| 151 | */ | ||
| 152 | static inline struct gru_state *irq_to_gru(int irq) | ||
| 153 | { | ||
| 154 | return &gru_base[uv_numa_blade_id()]->bs_grus[irq - IRQ_GRU]; | ||
| 155 | } | ||
| 156 | |||
| 157 | /* | ||
| 158 | * Read & clear a TFM | 143 | * Read & clear a TFM |
| 159 | * | 144 | * |
| 160 | * The GRU has an array of fault maps. A map is private to a cpu | 145 | * The GRU has an array of fault maps. A map is private to a cpu |
| @@ -207,10 +192,11 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma, | |||
| 207 | { | 192 | { |
| 208 | struct page *page; | 193 | struct page *page; |
| 209 | 194 | ||
| 210 | /* ZZZ Need to handle HUGE pages */ | 195 | #ifdef CONFIG_HUGETLB_PAGE |
| 211 | if (is_vm_hugetlb_page(vma)) | 196 | *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT; |
| 212 | return -EFAULT; | 197 | #else |
| 213 | *pageshift = PAGE_SHIFT; | 198 | *pageshift = PAGE_SHIFT; |
| 199 | #endif | ||
| 214 | if (get_user_pages | 200 | if (get_user_pages |
| 215 | (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0) | 201 | (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0) |
| 216 | return -EFAULT; | 202 | return -EFAULT; |
| @@ -268,7 +254,6 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, | |||
| 268 | return 0; | 254 | return 0; |
| 269 | 255 | ||
| 270 | err: | 256 | err: |
| 271 | local_irq_enable(); | ||
| 272 | return 1; | 257 | return 1; |
| 273 | } | 258 | } |
| 274 | 259 | ||
| @@ -301,14 +286,69 @@ static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr, | |||
| 301 | paddr = paddr & ~((1UL << ps) - 1); | 286 | paddr = paddr & ~((1UL << ps) - 1); |
| 302 | *gpa = uv_soc_phys_ram_to_gpa(paddr); | 287 | *gpa = uv_soc_phys_ram_to_gpa(paddr); |
| 303 | *pageshift = ps; | 288 | *pageshift = ps; |
| 304 | return 0; | 289 | return VTOP_SUCCESS; |
| 305 | 290 | ||
| 306 | inval: | 291 | inval: |
| 307 | return -1; | 292 | return VTOP_INVALID; |
| 308 | upm: | 293 | upm: |
| 309 | return -2; | 294 | return VTOP_RETRY; |
| 295 | } | ||
| 296 | |||
| 297 | |||
| 298 | /* | ||
| 299 | * Flush a CBE from cache. The CBE is clean in the cache. Dirty the | ||
| 300 | * CBE cacheline so that the line will be written back to home agent. | ||
| 301 | * Otherwise the line may be silently dropped. This has no impact | ||
| 302 | * except on performance. | ||
| 303 | */ | ||
| 304 | static void gru_flush_cache_cbe(struct gru_control_block_extended *cbe) | ||
| 305 | { | ||
| 306 | if (unlikely(cbe)) { | ||
| 307 | cbe->cbrexecstatus = 0; /* make CL dirty */ | ||
| 308 | gru_flush_cache(cbe); | ||
| 309 | } | ||
| 310 | } | 310 | } |
| 311 | 311 | ||
| 312 | /* | ||
| 313 | * Preload the TLB with entries that may be required. Currently, preloading | ||
| 314 | * is implemented only for BCOPY. Preload <tlb_preload_count> pages OR to | ||
| 315 | * the end of the bcopy tranfer, whichever is smaller. | ||
| 316 | */ | ||
| 317 | static void gru_preload_tlb(struct gru_state *gru, | ||
| 318 | struct gru_thread_state *gts, int atomic, | ||
| 319 | unsigned long fault_vaddr, int asid, int write, | ||
| 320 | unsigned char tlb_preload_count, | ||
| 321 | struct gru_tlb_fault_handle *tfh, | ||
| 322 | struct gru_control_block_extended *cbe) | ||
| 323 | { | ||
| 324 | unsigned long vaddr = 0, gpa; | ||
| 325 | int ret, pageshift; | ||
| 326 | |||
| 327 | if (cbe->opccpy != OP_BCOPY) | ||
| 328 | return; | ||
| 329 | |||
| 330 | if (fault_vaddr == cbe->cbe_baddr0) | ||
| 331 | vaddr = fault_vaddr + GRU_CACHE_LINE_BYTES * cbe->cbe_src_cl - 1; | ||
| 332 | else if (fault_vaddr == cbe->cbe_baddr1) | ||
| 333 | vaddr = fault_vaddr + (1 << cbe->xtypecpy) * cbe->cbe_nelemcur - 1; | ||
| 334 | |||
| 335 | fault_vaddr &= PAGE_MASK; | ||
| 336 | vaddr &= PAGE_MASK; | ||
| 337 | vaddr = min(vaddr, fault_vaddr + tlb_preload_count * PAGE_SIZE); | ||
| 338 | |||
| 339 | while (vaddr > fault_vaddr) { | ||
| 340 | ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift); | ||
| 341 | if (ret || tfh_write_only(tfh, gpa, GAA_RAM, vaddr, asid, write, | ||
| 342 | GRU_PAGESIZE(pageshift))) | ||
| 343 | return; | ||
| 344 | gru_dbg(grudev, | ||
| 345 | "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, rw %d, ps %d, gpa 0x%lx\n", | ||
| 346 | atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, | ||
| 347 | vaddr, asid, write, pageshift, gpa); | ||
| 348 | vaddr -= PAGE_SIZE; | ||
| 349 | STAT(tlb_preload_page); | ||
| 350 | } | ||
| 351 | } | ||
| 312 | 352 | ||
| 313 | /* | 353 | /* |
| 314 | * Drop a TLB entry into the GRU. The fault is described by info in an TFH. | 354 | * Drop a TLB entry into the GRU. The fault is described by info in an TFH. |
| @@ -320,11 +360,14 @@ upm: | |||
| 320 | * < 0 = error code | 360 | * < 0 = error code |
| 321 | * | 361 | * |
| 322 | */ | 362 | */ |
| 323 | static int gru_try_dropin(struct gru_thread_state *gts, | 363 | static int gru_try_dropin(struct gru_state *gru, |
| 364 | struct gru_thread_state *gts, | ||
| 324 | struct gru_tlb_fault_handle *tfh, | 365 | struct gru_tlb_fault_handle *tfh, |
| 325 | unsigned long __user *cb) | 366 | struct gru_instruction_bits *cbk) |
| 326 | { | 367 | { |
| 327 | int pageshift = 0, asid, write, ret, atomic = !cb; | 368 | struct gru_control_block_extended *cbe = NULL; |
| 369 | unsigned char tlb_preload_count = gts->ts_tlb_preload_count; | ||
| 370 | int pageshift = 0, asid, write, ret, atomic = !cbk, indexway; | ||
| 328 | unsigned long gpa = 0, vaddr = 0; | 371 | unsigned long gpa = 0, vaddr = 0; |
| 329 | 372 | ||
| 330 | /* | 373 | /* |
| @@ -335,24 +378,34 @@ static int gru_try_dropin(struct gru_thread_state *gts, | |||
| 335 | */ | 378 | */ |
| 336 | 379 | ||
| 337 | /* | 380 | /* |
| 381 | * Prefetch the CBE if doing TLB preloading | ||
| 382 | */ | ||
| 383 | if (unlikely(tlb_preload_count)) { | ||
| 384 | cbe = gru_tfh_to_cbe(tfh); | ||
| 385 | prefetchw(cbe); | ||
| 386 | } | ||
| 387 | |||
| 388 | /* | ||
| 338 | * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call. | 389 | * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call. |
| 339 | * Might be a hardware race OR a stupid user. Ignore FMM because FMM | 390 | * Might be a hardware race OR a stupid user. Ignore FMM because FMM |
| 340 | * is a transient state. | 391 | * is a transient state. |
| 341 | */ | 392 | */ |
| 342 | if (tfh->status != TFHSTATUS_EXCEPTION) { | 393 | if (tfh->status != TFHSTATUS_EXCEPTION) { |
| 343 | gru_flush_cache(tfh); | 394 | gru_flush_cache(tfh); |
| 395 | sync_core(); | ||
| 344 | if (tfh->status != TFHSTATUS_EXCEPTION) | 396 | if (tfh->status != TFHSTATUS_EXCEPTION) |
| 345 | goto failnoexception; | 397 | goto failnoexception; |
| 346 | STAT(tfh_stale_on_fault); | 398 | STAT(tfh_stale_on_fault); |
| 347 | } | 399 | } |
| 348 | if (tfh->state == TFHSTATE_IDLE) | 400 | if (tfh->state == TFHSTATE_IDLE) |
| 349 | goto failidle; | 401 | goto failidle; |
| 350 | if (tfh->state == TFHSTATE_MISS_FMM && cb) | 402 | if (tfh->state == TFHSTATE_MISS_FMM && cbk) |
| 351 | goto failfmm; | 403 | goto failfmm; |
| 352 | 404 | ||
| 353 | write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0; | 405 | write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0; |
| 354 | vaddr = tfh->missvaddr; | 406 | vaddr = tfh->missvaddr; |
| 355 | asid = tfh->missasid; | 407 | asid = tfh->missasid; |
| 408 | indexway = tfh->indexway; | ||
| 356 | if (asid == 0) | 409 | if (asid == 0) |
| 357 | goto failnoasid; | 410 | goto failnoasid; |
| 358 | 411 | ||
| @@ -366,41 +419,51 @@ static int gru_try_dropin(struct gru_thread_state *gts, | |||
| 366 | goto failactive; | 419 | goto failactive; |
| 367 | 420 | ||
| 368 | ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift); | 421 | ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift); |
| 369 | if (ret == -1) | 422 | if (ret == VTOP_INVALID) |
| 370 | goto failinval; | 423 | goto failinval; |
| 371 | if (ret == -2) | 424 | if (ret == VTOP_RETRY) |
| 372 | goto failupm; | 425 | goto failupm; |
| 373 | 426 | ||
| 374 | if (!(gts->ts_sizeavail & GRU_SIZEAVAIL(pageshift))) { | 427 | if (!(gts->ts_sizeavail & GRU_SIZEAVAIL(pageshift))) { |
| 375 | gts->ts_sizeavail |= GRU_SIZEAVAIL(pageshift); | 428 | gts->ts_sizeavail |= GRU_SIZEAVAIL(pageshift); |
| 376 | if (atomic || !gru_update_cch(gts, 0)) { | 429 | if (atomic || !gru_update_cch(gts)) { |
| 377 | gts->ts_force_cch_reload = 1; | 430 | gts->ts_force_cch_reload = 1; |
| 378 | goto failupm; | 431 | goto failupm; |
| 379 | } | 432 | } |
| 380 | } | 433 | } |
| 381 | gru_cb_set_istatus_active(cb); | 434 | |
| 435 | if (unlikely(cbe) && pageshift == PAGE_SHIFT) { | ||
| 436 | gru_preload_tlb(gru, gts, atomic, vaddr, asid, write, tlb_preload_count, tfh, cbe); | ||
| 437 | gru_flush_cache_cbe(cbe); | ||
| 438 | } | ||
| 439 | |||
| 440 | gru_cb_set_istatus_active(cbk); | ||
| 441 | gts->ustats.tlbdropin++; | ||
| 382 | tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write, | 442 | tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write, |
| 383 | GRU_PAGESIZE(pageshift)); | 443 | GRU_PAGESIZE(pageshift)); |
| 384 | STAT(tlb_dropin); | ||
| 385 | gru_dbg(grudev, | 444 | gru_dbg(grudev, |
| 386 | "%s: tfh 0x%p, vaddr 0x%lx, asid 0x%x, ps %d, gpa 0x%lx\n", | 445 | "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, indexway 0x%x," |
| 387 | ret ? "non-atomic" : "atomic", tfh, vaddr, asid, | 446 | " rw %d, ps %d, gpa 0x%lx\n", |
| 388 | pageshift, gpa); | 447 | atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, vaddr, asid, |
| 448 | indexway, write, pageshift, gpa); | ||
| 449 | STAT(tlb_dropin); | ||
| 389 | return 0; | 450 | return 0; |
| 390 | 451 | ||
| 391 | failnoasid: | 452 | failnoasid: |
| 392 | /* No asid (delayed unload). */ | 453 | /* No asid (delayed unload). */ |
| 393 | STAT(tlb_dropin_fail_no_asid); | 454 | STAT(tlb_dropin_fail_no_asid); |
| 394 | gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); | 455 | gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); |
| 395 | if (!cb) | 456 | if (!cbk) |
| 396 | tfh_user_polling_mode(tfh); | 457 | tfh_user_polling_mode(tfh); |
| 397 | else | 458 | else |
| 398 | gru_flush_cache(tfh); | 459 | gru_flush_cache(tfh); |
| 460 | gru_flush_cache_cbe(cbe); | ||
| 399 | return -EAGAIN; | 461 | return -EAGAIN; |
| 400 | 462 | ||
| 401 | failupm: | 463 | failupm: |
| 402 | /* Atomic failure switch CBR to UPM */ | 464 | /* Atomic failure switch CBR to UPM */ |
| 403 | tfh_user_polling_mode(tfh); | 465 | tfh_user_polling_mode(tfh); |
| 466 | gru_flush_cache_cbe(cbe); | ||
| 404 | STAT(tlb_dropin_fail_upm); | 467 | STAT(tlb_dropin_fail_upm); |
| 405 | gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); | 468 | gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); |
| 406 | return 1; | 469 | return 1; |
| @@ -408,6 +471,7 @@ failupm: | |||
| 408 | failfmm: | 471 | failfmm: |
| 409 | /* FMM state on UPM call */ | 472 | /* FMM state on UPM call */ |
| 410 | gru_flush_cache(tfh); | 473 | gru_flush_cache(tfh); |
| 474 | gru_flush_cache_cbe(cbe); | ||
| 411 | STAT(tlb_dropin_fail_fmm); | 475 | STAT(tlb_dropin_fail_fmm); |
| 412 | gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state); | 476 | gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state); |
| 413 | return 0; | 477 | return 0; |
| @@ -415,17 +479,20 @@ failfmm: | |||
| 415 | failnoexception: | 479 | failnoexception: |
| 416 | /* TFH status did not show exception pending */ | 480 | /* TFH status did not show exception pending */ |
| 417 | gru_flush_cache(tfh); | 481 | gru_flush_cache(tfh); |
| 418 | if (cb) | 482 | gru_flush_cache_cbe(cbe); |
| 419 | gru_flush_cache(cb); | 483 | if (cbk) |
| 484 | gru_flush_cache(cbk); | ||
| 420 | STAT(tlb_dropin_fail_no_exception); | 485 | STAT(tlb_dropin_fail_no_exception); |
| 421 | gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n", tfh, tfh->status, tfh->state); | 486 | gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n", |
| 487 | tfh, tfh->status, tfh->state); | ||
| 422 | return 0; | 488 | return 0; |
| 423 | 489 | ||
| 424 | failidle: | 490 | failidle: |
| 425 | /* TFH state was idle - no miss pending */ | 491 | /* TFH state was idle - no miss pending */ |
| 426 | gru_flush_cache(tfh); | 492 | gru_flush_cache(tfh); |
| 427 | if (cb) | 493 | gru_flush_cache_cbe(cbe); |
| 428 | gru_flush_cache(cb); | 494 | if (cbk) |
| 495 | gru_flush_cache(cbk); | ||
| 429 | STAT(tlb_dropin_fail_idle); | 496 | STAT(tlb_dropin_fail_idle); |
| 430 | gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state); | 497 | gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state); |
| 431 | return 0; | 498 | return 0; |
| @@ -433,16 +500,18 @@ failidle: | |||
| 433 | failinval: | 500 | failinval: |
| 434 | /* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */ | 501 | /* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */ |
| 435 | tfh_exception(tfh); | 502 | tfh_exception(tfh); |
| 503 | gru_flush_cache_cbe(cbe); | ||
| 436 | STAT(tlb_dropin_fail_invalid); | 504 | STAT(tlb_dropin_fail_invalid); |
| 437 | gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); | 505 | gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); |
| 438 | return -EFAULT; | 506 | return -EFAULT; |
| 439 | 507 | ||
| 440 | failactive: | 508 | failactive: |
| 441 | /* Range invalidate active. Switch to UPM iff atomic */ | 509 | /* Range invalidate active. Switch to UPM iff atomic */ |
| 442 | if (!cb) | 510 | if (!cbk) |
| 443 | tfh_user_polling_mode(tfh); | 511 | tfh_user_polling_mode(tfh); |
| 444 | else | 512 | else |
| 445 | gru_flush_cache(tfh); | 513 | gru_flush_cache(tfh); |
| 514 | gru_flush_cache_cbe(cbe); | ||
| 446 | STAT(tlb_dropin_fail_range_active); | 515 | STAT(tlb_dropin_fail_range_active); |
| 447 | gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n", | 516 | gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n", |
| 448 | tfh, vaddr); | 517 | tfh, vaddr); |
| @@ -455,31 +524,41 @@ failactive: | |||
| 455 | * Note that this is the interrupt handler that is registered with linux | 524 | * Note that this is the interrupt handler that is registered with linux |
| 456 | * interrupt handlers. | 525 | * interrupt handlers. |
| 457 | */ | 526 | */ |
| 458 | irqreturn_t gru_intr(int irq, void *dev_id) | 527 | static irqreturn_t gru_intr(int chiplet, int blade) |
| 459 | { | 528 | { |
| 460 | struct gru_state *gru; | 529 | struct gru_state *gru; |
| 461 | struct gru_tlb_fault_map imap, dmap; | 530 | struct gru_tlb_fault_map imap, dmap; |
| 462 | struct gru_thread_state *gts; | 531 | struct gru_thread_state *gts; |
| 463 | struct gru_tlb_fault_handle *tfh = NULL; | 532 | struct gru_tlb_fault_handle *tfh = NULL; |
| 533 | struct completion *cmp; | ||
| 464 | int cbrnum, ctxnum; | 534 | int cbrnum, ctxnum; |
| 465 | 535 | ||
| 466 | STAT(intr); | 536 | STAT(intr); |
| 467 | 537 | ||
| 468 | gru = irq_to_gru(irq); | 538 | gru = &gru_base[blade]->bs_grus[chiplet]; |
| 469 | if (!gru) { | 539 | if (!gru) { |
| 470 | dev_err(grudev, "GRU: invalid interrupt: cpu %d, irq %d\n", | 540 | dev_err(grudev, "GRU: invalid interrupt: cpu %d, chiplet %d\n", |
| 471 | raw_smp_processor_id(), irq); | 541 | raw_smp_processor_id(), chiplet); |
| 472 | return IRQ_NONE; | 542 | return IRQ_NONE; |
| 473 | } | 543 | } |
| 474 | get_clear_fault_map(gru, &imap, &dmap); | 544 | get_clear_fault_map(gru, &imap, &dmap); |
| 545 | gru_dbg(grudev, | ||
| 546 | "cpu %d, chiplet %d, gid %d, imap %016lx %016lx, dmap %016lx %016lx\n", | ||
| 547 | smp_processor_id(), chiplet, gru->gs_gid, | ||
| 548 | imap.fault_bits[0], imap.fault_bits[1], | ||
| 549 | dmap.fault_bits[0], dmap.fault_bits[1]); | ||
| 475 | 550 | ||
| 476 | for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) { | 551 | for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) { |
| 477 | complete(gru->gs_blade->bs_async_wq); | 552 | STAT(intr_cbr); |
| 553 | cmp = gru->gs_blade->bs_async_wq; | ||
| 554 | if (cmp) | ||
| 555 | complete(cmp); | ||
| 478 | gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n", | 556 | gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n", |
| 479 | gru->gs_gid, cbrnum, gru->gs_blade->bs_async_wq->done); | 557 | gru->gs_gid, cbrnum, cmp ? cmp->done : -1); |
| 480 | } | 558 | } |
| 481 | 559 | ||
| 482 | for_each_cbr_in_tfm(cbrnum, imap.fault_bits) { | 560 | for_each_cbr_in_tfm(cbrnum, imap.fault_bits) { |
| 561 | STAT(intr_tfh); | ||
| 483 | tfh = get_tfh_by_index(gru, cbrnum); | 562 | tfh = get_tfh_by_index(gru, cbrnum); |
| 484 | prefetchw(tfh); /* Helps on hdw, required for emulator */ | 563 | prefetchw(tfh); /* Helps on hdw, required for emulator */ |
| 485 | 564 | ||
| @@ -492,14 +571,20 @@ irqreturn_t gru_intr(int irq, void *dev_id) | |||
| 492 | ctxnum = tfh->ctxnum; | 571 | ctxnum = tfh->ctxnum; |
| 493 | gts = gru->gs_gts[ctxnum]; | 572 | gts = gru->gs_gts[ctxnum]; |
| 494 | 573 | ||
| 574 | /* Spurious interrupts can cause this. Ignore. */ | ||
| 575 | if (!gts) { | ||
| 576 | STAT(intr_spurious); | ||
| 577 | continue; | ||
| 578 | } | ||
| 579 | |||
| 495 | /* | 580 | /* |
| 496 | * This is running in interrupt context. Trylock the mmap_sem. | 581 | * This is running in interrupt context. Trylock the mmap_sem. |
| 497 | * If it fails, retry the fault in user context. | 582 | * If it fails, retry the fault in user context. |
| 498 | */ | 583 | */ |
| 584 | gts->ustats.fmm_tlbmiss++; | ||
| 499 | if (!gts->ts_force_cch_reload && | 585 | if (!gts->ts_force_cch_reload && |
| 500 | down_read_trylock(>s->ts_mm->mmap_sem)) { | 586 | down_read_trylock(>s->ts_mm->mmap_sem)) { |
| 501 | gts->ustats.fmm_tlbdropin++; | 587 | gru_try_dropin(gru, gts, tfh, NULL); |
| 502 | gru_try_dropin(gts, tfh, NULL); | ||
| 503 | up_read(>s->ts_mm->mmap_sem); | 588 | up_read(>s->ts_mm->mmap_sem); |
| 504 | } else { | 589 | } else { |
| 505 | tfh_user_polling_mode(tfh); | 590 | tfh_user_polling_mode(tfh); |
| @@ -509,20 +594,43 @@ irqreturn_t gru_intr(int irq, void *dev_id) | |||
| 509 | return IRQ_HANDLED; | 594 | return IRQ_HANDLED; |
| 510 | } | 595 | } |
| 511 | 596 | ||
| 597 | irqreturn_t gru0_intr(int irq, void *dev_id) | ||
| 598 | { | ||
| 599 | return gru_intr(0, uv_numa_blade_id()); | ||
| 600 | } | ||
| 601 | |||
| 602 | irqreturn_t gru1_intr(int irq, void *dev_id) | ||
| 603 | { | ||
| 604 | return gru_intr(1, uv_numa_blade_id()); | ||
| 605 | } | ||
| 606 | |||
| 607 | irqreturn_t gru_intr_mblade(int irq, void *dev_id) | ||
| 608 | { | ||
| 609 | int blade; | ||
| 610 | |||
| 611 | for_each_possible_blade(blade) { | ||
| 612 | if (uv_blade_nr_possible_cpus(blade)) | ||
| 613 | continue; | ||
| 614 | gru_intr(0, blade); | ||
| 615 | gru_intr(1, blade); | ||
| 616 | } | ||
| 617 | return IRQ_HANDLED; | ||
| 618 | } | ||
| 619 | |||
| 512 | 620 | ||
| 513 | static int gru_user_dropin(struct gru_thread_state *gts, | 621 | static int gru_user_dropin(struct gru_thread_state *gts, |
| 514 | struct gru_tlb_fault_handle *tfh, | 622 | struct gru_tlb_fault_handle *tfh, |
| 515 | unsigned long __user *cb) | 623 | void *cb) |
| 516 | { | 624 | { |
| 517 | struct gru_mm_struct *gms = gts->ts_gms; | 625 | struct gru_mm_struct *gms = gts->ts_gms; |
| 518 | int ret; | 626 | int ret; |
| 519 | 627 | ||
| 520 | gts->ustats.upm_tlbdropin++; | 628 | gts->ustats.upm_tlbmiss++; |
| 521 | while (1) { | 629 | while (1) { |
| 522 | wait_event(gms->ms_wait_queue, | 630 | wait_event(gms->ms_wait_queue, |
| 523 | atomic_read(&gms->ms_range_active) == 0); | 631 | atomic_read(&gms->ms_range_active) == 0); |
| 524 | prefetchw(tfh); /* Helps on hdw, required for emulator */ | 632 | prefetchw(tfh); /* Helps on hdw, required for emulator */ |
| 525 | ret = gru_try_dropin(gts, tfh, cb); | 633 | ret = gru_try_dropin(gts->ts_gru, gts, tfh, cb); |
| 526 | if (ret <= 0) | 634 | if (ret <= 0) |
| 527 | return ret; | 635 | return ret; |
| 528 | STAT(call_os_wait_queue); | 636 | STAT(call_os_wait_queue); |
| @@ -538,52 +646,41 @@ int gru_handle_user_call_os(unsigned long cb) | |||
| 538 | { | 646 | { |
| 539 | struct gru_tlb_fault_handle *tfh; | 647 | struct gru_tlb_fault_handle *tfh; |
| 540 | struct gru_thread_state *gts; | 648 | struct gru_thread_state *gts; |
| 541 | unsigned long __user *cbp; | 649 | void *cbk; |
| 542 | int ucbnum, cbrnum, ret = -EINVAL; | 650 | int ucbnum, cbrnum, ret = -EINVAL; |
| 543 | 651 | ||
| 544 | STAT(call_os); | 652 | STAT(call_os); |
| 545 | gru_dbg(grudev, "address 0x%lx\n", cb); | ||
| 546 | 653 | ||
| 547 | /* sanity check the cb pointer */ | 654 | /* sanity check the cb pointer */ |
| 548 | ucbnum = get_cb_number((void *)cb); | 655 | ucbnum = get_cb_number((void *)cb); |
| 549 | if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB) | 656 | if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB) |
| 550 | return -EINVAL; | 657 | return -EINVAL; |
| 551 | cbp = (unsigned long *)cb; | ||
| 552 | 658 | ||
| 553 | gts = gru_find_lock_gts(cb); | 659 | gts = gru_find_lock_gts(cb); |
| 554 | if (!gts) | 660 | if (!gts) |
| 555 | return -EINVAL; | 661 | return -EINVAL; |
| 662 | gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts); | ||
| 556 | 663 | ||
| 557 | if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) | 664 | if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) |
| 558 | goto exit; | 665 | goto exit; |
| 559 | 666 | ||
| 560 | /* | 667 | gru_check_context_placement(gts); |
| 561 | * If force_unload is set, the UPM TLB fault is phony. The task | ||
| 562 | * has migrated to another node and the GSEG must be moved. Just | ||
| 563 | * unload the context. The task will page fault and assign a new | ||
| 564 | * context. | ||
| 565 | */ | ||
| 566 | if (gts->ts_tgid_owner == current->tgid && gts->ts_blade >= 0 && | ||
| 567 | gts->ts_blade != uv_numa_blade_id()) { | ||
| 568 | STAT(call_os_offnode_reference); | ||
| 569 | gts->ts_force_unload = 1; | ||
| 570 | } | ||
| 571 | 668 | ||
| 572 | /* | 669 | /* |
| 573 | * CCH may contain stale data if ts_force_cch_reload is set. | 670 | * CCH may contain stale data if ts_force_cch_reload is set. |
| 574 | */ | 671 | */ |
| 575 | if (gts->ts_gru && gts->ts_force_cch_reload) { | 672 | if (gts->ts_gru && gts->ts_force_cch_reload) { |
| 576 | gts->ts_force_cch_reload = 0; | 673 | gts->ts_force_cch_reload = 0; |
| 577 | gru_update_cch(gts, 0); | 674 | gru_update_cch(gts); |
| 578 | } | 675 | } |
| 579 | 676 | ||
| 580 | ret = -EAGAIN; | 677 | ret = -EAGAIN; |
| 581 | cbrnum = thread_cbr_number(gts, ucbnum); | 678 | cbrnum = thread_cbr_number(gts, ucbnum); |
| 582 | if (gts->ts_force_unload) { | 679 | if (gts->ts_gru) { |
| 583 | gru_unload_context(gts, 1); | ||
| 584 | } else if (gts->ts_gru) { | ||
| 585 | tfh = get_tfh_by_index(gts->ts_gru, cbrnum); | 680 | tfh = get_tfh_by_index(gts->ts_gru, cbrnum); |
| 586 | ret = gru_user_dropin(gts, tfh, cbp); | 681 | cbk = get_gseg_base_address_cb(gts->ts_gru->gs_gru_base_vaddr, |
| 682 | gts->ts_ctxnum, ucbnum); | ||
| 683 | ret = gru_user_dropin(gts, tfh, cbk); | ||
| 587 | } | 684 | } |
| 588 | exit: | 685 | exit: |
| 589 | gru_unlock_gts(gts); | 686 | gru_unlock_gts(gts); |
| @@ -605,11 +702,11 @@ int gru_get_exception_detail(unsigned long arg) | |||
| 605 | if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet))) | 702 | if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet))) |
| 606 | return -EFAULT; | 703 | return -EFAULT; |
| 607 | 704 | ||
| 608 | gru_dbg(grudev, "address 0x%lx\n", excdet.cb); | ||
| 609 | gts = gru_find_lock_gts(excdet.cb); | 705 | gts = gru_find_lock_gts(excdet.cb); |
| 610 | if (!gts) | 706 | if (!gts) |
| 611 | return -EINVAL; | 707 | return -EINVAL; |
| 612 | 708 | ||
| 709 | gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", excdet.cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts); | ||
| 613 | ucbnum = get_cb_number((void *)excdet.cb); | 710 | ucbnum = get_cb_number((void *)excdet.cb); |
| 614 | if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) { | 711 | if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) { |
| 615 | ret = -EINVAL; | 712 | ret = -EINVAL; |
| @@ -617,6 +714,7 @@ int gru_get_exception_detail(unsigned long arg) | |||
| 617 | cbrnum = thread_cbr_number(gts, ucbnum); | 714 | cbrnum = thread_cbr_number(gts, ucbnum); |
| 618 | cbe = get_cbe_by_index(gts->ts_gru, cbrnum); | 715 | cbe = get_cbe_by_index(gts->ts_gru, cbrnum); |
| 619 | gru_flush_cache(cbe); /* CBE not coherent */ | 716 | gru_flush_cache(cbe); /* CBE not coherent */ |
| 717 | sync_core(); /* make sure we are have current data */ | ||
| 620 | excdet.opc = cbe->opccpy; | 718 | excdet.opc = cbe->opccpy; |
| 621 | excdet.exopc = cbe->exopccpy; | 719 | excdet.exopc = cbe->exopccpy; |
| 622 | excdet.ecause = cbe->ecause; | 720 | excdet.ecause = cbe->ecause; |
| @@ -624,7 +722,7 @@ int gru_get_exception_detail(unsigned long arg) | |||
| 624 | excdet.exceptdet1 = cbe->idef3upd; | 722 | excdet.exceptdet1 = cbe->idef3upd; |
| 625 | excdet.cbrstate = cbe->cbrstate; | 723 | excdet.cbrstate = cbe->cbrstate; |
| 626 | excdet.cbrexecstatus = cbe->cbrexecstatus; | 724 | excdet.cbrexecstatus = cbe->cbrexecstatus; |
| 627 | gru_flush_cache(cbe); | 725 | gru_flush_cache_cbe(cbe); |
| 628 | ret = 0; | 726 | ret = 0; |
| 629 | } else { | 727 | } else { |
| 630 | ret = -EAGAIN; | 728 | ret = -EAGAIN; |
| @@ -733,6 +831,11 @@ long gru_get_gseg_statistics(unsigned long arg) | |||
| 733 | if (copy_from_user(&req, (void __user *)arg, sizeof(req))) | 831 | if (copy_from_user(&req, (void __user *)arg, sizeof(req))) |
| 734 | return -EFAULT; | 832 | return -EFAULT; |
| 735 | 833 | ||
| 834 | /* | ||
| 835 | * The library creates arrays of contexts for threaded programs. | ||
| 836 | * If no gts exists in the array, the context has never been used & all | ||
| 837 | * statistics are implicitly 0. | ||
| 838 | */ | ||
| 736 | gts = gru_find_lock_gts(req.gseg); | 839 | gts = gru_find_lock_gts(req.gseg); |
| 737 | if (gts) { | 840 | if (gts) { |
| 738 | memcpy(&req.stats, >s->ustats, sizeof(gts->ustats)); | 841 | memcpy(&req.stats, >s->ustats, sizeof(gts->ustats)); |
| @@ -762,11 +865,25 @@ int gru_set_context_option(unsigned long arg) | |||
| 762 | return -EFAULT; | 865 | return -EFAULT; |
| 763 | gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1); | 866 | gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1); |
| 764 | 867 | ||
| 765 | gts = gru_alloc_locked_gts(req.gseg); | 868 | gts = gru_find_lock_gts(req.gseg); |
| 766 | if (!gts) | 869 | if (!gts) { |
| 767 | return -EINVAL; | 870 | gts = gru_alloc_locked_gts(req.gseg); |
| 871 | if (IS_ERR(gts)) | ||
| 872 | return PTR_ERR(gts); | ||
| 873 | } | ||
| 768 | 874 | ||
| 769 | switch (req.op) { | 875 | switch (req.op) { |
| 876 | case sco_blade_chiplet: | ||
| 877 | /* Select blade/chiplet for GRU context */ | ||
| 878 | if (req.val1 < -1 || req.val1 >= GRU_MAX_BLADES || !gru_base[req.val1] || | ||
| 879 | req.val0 < -1 || req.val0 >= GRU_CHIPLETS_PER_HUB) { | ||
| 880 | ret = -EINVAL; | ||
| 881 | } else { | ||
| 882 | gts->ts_user_blade_id = req.val1; | ||
| 883 | gts->ts_user_chiplet_id = req.val0; | ||
| 884 | gru_check_context_placement(gts); | ||
| 885 | } | ||
| 886 | break; | ||
| 770 | case sco_gseg_owner: | 887 | case sco_gseg_owner: |
| 771 | /* Register the current task as the GSEG owner */ | 888 | /* Register the current task as the GSEG owner */ |
| 772 | gts->ts_tgid_owner = current->tgid; | 889 | gts->ts_tgid_owner = current->tgid; |
