diff options
author | Michael Neuling <mikey@neuling.org> | 2010-05-10 16:28:26 -0400 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2010-05-21 03:31:11 -0400 |
commit | d504bed676caad29a3dba3d3727298c560628f5c (patch) | |
tree | ea7077d57b3e3018dc6be5b8687889283ba3f6e6 /arch | |
parent | f90ece28c1f5b3ec13fe481406857fe92f4bc7d1 (diff) |
powerpc/kexec: Speedup kexec hash PTE tear down
Currently for kexec the PTE tear down on 1TB segment systems normally
requires 3 hcalls for each PTE removal. On a machine with 32GB of
memory it can take around a minute to remove all the PTEs.
This optimises the path so that we only remove PTEs that are valid.
It also uses the read 4 PTEs at once HCALL. For the common case where
a PTEs is invalid in a 1TB segment, this turns the 3 HCALLs per PTE
down to 1 HCALL per 4 PTEs.
This gives an > 10x speedup in kexec times on PHYP, taking a 32GB
machine from around 1 minute down to a few seconds.
Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/powerpc/platforms/pseries/lpar.c | 33 |
1 files changed, 20 insertions, 13 deletions
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 0707653612ba..cf79b46d8f88 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c | |||
@@ -367,21 +367,28 @@ static void pSeries_lpar_hptab_clear(void) | |||
367 | { | 367 | { |
368 | unsigned long size_bytes = 1UL << ppc64_pft_size; | 368 | unsigned long size_bytes = 1UL << ppc64_pft_size; |
369 | unsigned long hpte_count = size_bytes >> 4; | 369 | unsigned long hpte_count = size_bytes >> 4; |
370 | unsigned long dummy1, dummy2, dword0; | 370 | struct { |
371 | unsigned long pteh; | ||
372 | unsigned long ptel; | ||
373 | } ptes[4]; | ||
371 | long lpar_rc; | 374 | long lpar_rc; |
372 | int i; | 375 | int i, j; |
373 | 376 | ||
374 | /* TODO: Use bulk call */ | 377 | /* Read in batches of 4, |
375 | for (i = 0; i < hpte_count; i++) { | 378 | * invalidate only valid entries not in the VRMA |
376 | /* dont remove HPTEs with VRMA mappings */ | 379 | * hpte_count will be a multiple of 4 |
377 | lpar_rc = plpar_pte_remove_raw(H_ANDCOND, i, HPTE_V_1TB_SEG, | 380 | */ |
378 | &dummy1, &dummy2); | 381 | for (i = 0; i < hpte_count; i += 4) { |
379 | if (lpar_rc == H_NOT_FOUND) { | 382 | lpar_rc = plpar_pte_read_4_raw(0, i, (void *)ptes); |
380 | lpar_rc = plpar_pte_read_raw(0, i, &dword0, &dummy1); | 383 | if (lpar_rc != H_SUCCESS) |
381 | if (!lpar_rc && ((dword0 & HPTE_V_VRMA_MASK) | 384 | continue; |
382 | != HPTE_V_VRMA_MASK)) | 385 | for (j = 0; j < 4; j++){ |
383 | /* Can be hpte for 1TB Seg. So remove it */ | 386 | if ((ptes[j].pteh & HPTE_V_VRMA_MASK) == |
384 | plpar_pte_remove_raw(0, i, 0, &dummy1, &dummy2); | 387 | HPTE_V_VRMA_MASK) |
388 | continue; | ||
389 | if (ptes[j].pteh & HPTE_V_VALID) | ||
390 | plpar_pte_remove_raw(0, i + j, 0, | ||
391 | &(ptes[j].pteh), &(ptes[j].ptel)); | ||
385 | } | 392 | } |
386 | } | 393 | } |
387 | } | 394 | } |