aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Neuling <mikey@neuling.org>2010-05-10 16:28:26 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2010-05-21 03:31:11 -0400
commitd504bed676caad29a3dba3d3727298c560628f5c (patch)
treeea7077d57b3e3018dc6be5b8687889283ba3f6e6
parentf90ece28c1f5b3ec13fe481406857fe92f4bc7d1 (diff)
powerpc/kexec: Speedup kexec hash PTE tear down
Currently for kexec the PTE tear down on 1TB segment systems normally requires 3 hcalls for each PTE removal. On a machine with 32GB of memory it can take around a minute to remove all the PTEs. This optimises the path so that we only remove PTEs that are valid. It also uses the read 4 PTEs at once HCALL. For the common case where a PTEs is invalid in a 1TB segment, this turns the 3 HCALLs per PTE down to 1 HCALL per 4 PTEs. This gives an > 10x speedup in kexec times on PHYP, taking a 32GB machine from around 1 minute down to a few seconds. Signed-off-by: Michael Neuling <mikey@neuling.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c33
1 files changed, 20 insertions, 13 deletions
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 0707653612b..cf79b46d8f8 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -367,21 +367,28 @@ static void pSeries_lpar_hptab_clear(void)
367{ 367{
368 unsigned long size_bytes = 1UL << ppc64_pft_size; 368 unsigned long size_bytes = 1UL << ppc64_pft_size;
369 unsigned long hpte_count = size_bytes >> 4; 369 unsigned long hpte_count = size_bytes >> 4;
370 unsigned long dummy1, dummy2, dword0; 370 struct {
371 unsigned long pteh;
372 unsigned long ptel;
373 } ptes[4];
371 long lpar_rc; 374 long lpar_rc;
372 int i; 375 int i, j;
373 376
374 /* TODO: Use bulk call */ 377 /* Read in batches of 4,
375 for (i = 0; i < hpte_count; i++) { 378 * invalidate only valid entries not in the VRMA
376 /* dont remove HPTEs with VRMA mappings */ 379 * hpte_count will be a multiple of 4
377 lpar_rc = plpar_pte_remove_raw(H_ANDCOND, i, HPTE_V_1TB_SEG, 380 */
378 &dummy1, &dummy2); 381 for (i = 0; i < hpte_count; i += 4) {
379 if (lpar_rc == H_NOT_FOUND) { 382 lpar_rc = plpar_pte_read_4_raw(0, i, (void *)ptes);
380 lpar_rc = plpar_pte_read_raw(0, i, &dword0, &dummy1); 383 if (lpar_rc != H_SUCCESS)
381 if (!lpar_rc && ((dword0 & HPTE_V_VRMA_MASK) 384 continue;
382 != HPTE_V_VRMA_MASK)) 385 for (j = 0; j < 4; j++){
383 /* Can be hpte for 1TB Seg. So remove it */ 386 if ((ptes[j].pteh & HPTE_V_VRMA_MASK) ==
384 plpar_pte_remove_raw(0, i, 0, &dummy1, &dummy2); 387 HPTE_V_VRMA_MASK)
388 continue;
389 if (ptes[j].pteh & HPTE_V_VALID)
390 plpar_pte_remove_raw(0, i + j, 0,
391 &(ptes[j].pteh), &(ptes[j].ptel));
385 } 392 }
386 } 393 }
387} 394}