aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian King <brking@linux.vnet.ibm.com>2009-02-17 01:49:50 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2009-02-22 23:53:04 -0500
commitf52862f4070fb930e407d466aa82d8efcc98c9ed (patch)
tree750b42530408632015bd77e29879c994fb854429
parent448e2ca0e32a5c437650d634b6032ab732662338 (diff)
powerpc/pseries: Fix partition migration hang under load
While testing partition migration with heavy CPU load using shared processors, it was observed that sometimes the migration would never complete and would appear to hang. Currently, the migration code assumes that if H_SUCCESS is returned from the H_JOIN then the migration is complete and the processor is waking up on the target system. If there was an outstanding PROD to the processor when the H_JOIN is called, however, it will return H_SUCCESS on the source system, causing the migration to hang, or in some scenarios cause the kernel to crash on the complete call waking the caller of rtas_percpu_suspend_me. Fix this by calling H_JOIN multiple times if necessary during the migration. Signed-off-by: Brian King <brking@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/kernel/rtas.c10
1 files changed, 8 insertions, 2 deletions
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index fdfe14c4bdef..ee4c7609b649 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -46,6 +46,7 @@ EXPORT_SYMBOL(rtas);
46 46
47struct rtas_suspend_me_data { 47struct rtas_suspend_me_data {
48 atomic_t working; /* number of cpus accessing this struct */ 48 atomic_t working; /* number of cpus accessing this struct */
49 atomic_t done;
49 int token; /* ibm,suspend-me */ 50 int token; /* ibm,suspend-me */
50 int error; 51 int error;
51 struct completion *complete; /* wait on this until working == 0 */ 52 struct completion *complete; /* wait on this until working == 0 */
@@ -689,7 +690,7 @@ static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE;
689#ifdef CONFIG_PPC_PSERIES 690#ifdef CONFIG_PPC_PSERIES
690static void rtas_percpu_suspend_me(void *info) 691static void rtas_percpu_suspend_me(void *info)
691{ 692{
692 long rc; 693 long rc = H_SUCCESS;
693 unsigned long msr_save; 694 unsigned long msr_save;
694 int cpu; 695 int cpu;
695 struct rtas_suspend_me_data *data = 696 struct rtas_suspend_me_data *data =
@@ -701,7 +702,8 @@ static void rtas_percpu_suspend_me(void *info)
701 msr_save = mfmsr(); 702 msr_save = mfmsr();
702 mtmsr(msr_save & ~(MSR_EE)); 703 mtmsr(msr_save & ~(MSR_EE));
703 704
704 rc = plpar_hcall_norets(H_JOIN); 705 while (rc == H_SUCCESS && !atomic_read(&data->done))
706 rc = plpar_hcall_norets(H_JOIN);
705 707
706 mtmsr(msr_save); 708 mtmsr(msr_save);
707 709
@@ -724,6 +726,9 @@ static void rtas_percpu_suspend_me(void *info)
724 smp_processor_id(), rc); 726 smp_processor_id(), rc);
725 data->error = rc; 727 data->error = rc;
726 } 728 }
729
730 atomic_set(&data->done, 1);
731
727 /* This cpu did the suspend or got an error; in either case, 732 /* This cpu did the suspend or got an error; in either case,
728 * we need to prod all other other cpus out of join state. 733 * we need to prod all other other cpus out of join state.
729 * Extra prods are harmless. 734 * Extra prods are harmless.
@@ -766,6 +771,7 @@ static int rtas_ibm_suspend_me(struct rtas_args *args)
766 } 771 }
767 772
768 atomic_set(&data.working, 0); 773 atomic_set(&data.working, 0);
774 atomic_set(&data.done, 0);
769 data.token = rtas_token("ibm,suspend-me"); 775 data.token = rtas_token("ibm,suspend-me");
770 data.error = 0; 776 data.error = 0;
771 data.complete = &done; 777 data.complete = &done;