aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorNathan Lynch <ntl@pobox.com>2007-11-13 11:15:13 -0500
committerPaul Mackerras <paulus@samba.org>2007-11-18 23:11:30 -0500
commit8f5c7579caba587a72ed91c7d76028efb8adb168 (patch)
tree365c896a35fd3ec977e7faf63e7093d1f882924b /arch
parent2ffbb8377c7a0713baf6644e285adc27a5654582 (diff)
[POWERPC] Fix multiple bugs in rtas_ibm_suspend_me code
There are several issues with the rtas_ibm_suspend_me code, which enables platform-assisted suspension of an LPAR as covered in PAPR 2.2. 1.) rtas_ibm_suspend_me uses on_each_cpu() to invoke rtas_percpu_suspend_me on all cpus via IPI: if (on_each_cpu(rtas_percpu_suspend_me, &data, 1, 0)) ... 'data' is on the calling task's stack, but rtas_ibm_suspend_me takes no measures to ensure that all instances of rtas_percpu_suspend_me are finished accessing 'data' before returning. This can result in the IPI'd cpus accessing random stack data and getting stuck in H_JOIN. This is addressed by using an atomic count of workers and a completion on the stack. 2.) rtas_percpu_suspend_me is needlessly calling H_JOIN in a loop. The only event that can cause a cpu to return from H_JOIN is an H_PROD from another cpu or a NMI/system reset. Each cpu need call H_JOIN only once per suspend operation. Remove the loop and the now unnecessary 'waiting' state variable. 3.) H_JOIN must be called with MSR[EE] off, but lazy interrupt disabling may cause the caller of rtas_ibm_suspend_me to call H_JOIN with it on; the local_irq_disable() in on_each_cpu() is not sufficient. Fix this by explicitly saving the MSR and clearing the EE bit before calling H_JOIN. 4.) H_PROD is being called with the Linux logical cpu number as the parameter, not the platform interrupt server value. (It's also being called for all possible cpus, which is harmless, but unnecessary.) This is fixed by calling H_PROD for each online cpu using get_hard_smp_processor_id(cpu) for the argument. Signed-off-by: Nathan Lynch <ntl@pobox.com> Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/kernel/rtas.c99
1 files changed, 58 insertions, 41 deletions
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 214780798289..52e95c2158c0 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -19,6 +19,9 @@
19#include <linux/init.h> 19#include <linux/init.h>
20#include <linux/capability.h> 20#include <linux/capability.h>
21#include <linux/delay.h> 21#include <linux/delay.h>
22#include <linux/smp.h>
23#include <linux/completion.h>
24#include <linux/cpumask.h>
22 25
23#include <asm/prom.h> 26#include <asm/prom.h>
24#include <asm/rtas.h> 27#include <asm/rtas.h>
@@ -34,6 +37,8 @@
34#include <asm/lmb.h> 37#include <asm/lmb.h>
35#include <asm/udbg.h> 38#include <asm/udbg.h>
36#include <asm/syscalls.h> 39#include <asm/syscalls.h>
40#include <asm/smp.h>
41#include <asm/atomic.h>
37 42
38struct rtas_t rtas = { 43struct rtas_t rtas = {
39 .lock = SPIN_LOCK_UNLOCKED 44 .lock = SPIN_LOCK_UNLOCKED
@@ -41,8 +46,10 @@ struct rtas_t rtas = {
41EXPORT_SYMBOL(rtas); 46EXPORT_SYMBOL(rtas);
42 47
43struct rtas_suspend_me_data { 48struct rtas_suspend_me_data {
44 long waiting; 49 atomic_t working; /* number of cpus accessing this struct */
45 struct rtas_args *args; 50 int token; /* ibm,suspend-me */
51 int error;
52 struct completion *complete; /* wait on this until working == 0 */
46}; 53};
47 54
48DEFINE_SPINLOCK(rtas_data_buf_lock); 55DEFINE_SPINLOCK(rtas_data_buf_lock);
@@ -657,50 +664,62 @@ static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE;
657#ifdef CONFIG_PPC_PSERIES 664#ifdef CONFIG_PPC_PSERIES
658static void rtas_percpu_suspend_me(void *info) 665static void rtas_percpu_suspend_me(void *info)
659{ 666{
660 int i;
661 long rc; 667 long rc;
662 long flags; 668 unsigned long msr_save;
669 int cpu;
663 struct rtas_suspend_me_data *data = 670 struct rtas_suspend_me_data *data =
664 (struct rtas_suspend_me_data *)info; 671 (struct rtas_suspend_me_data *)info;
665 672
666 /* 673 atomic_inc(&data->working);
667 * We use "waiting" to indicate our state. As long 674
668 * as it is >0, we are still trying to all join up. 675 /* really need to ensure MSR.EE is off for H_JOIN */
669 * If it goes to 0, we have successfully joined up and 676 msr_save = mfmsr();
670 * one thread got H_CONTINUE. If any error happens, 677 mtmsr(msr_save & ~(MSR_EE));
671 * we set it to <0. 678
672 */ 679 rc = plpar_hcall_norets(H_JOIN);
673 local_irq_save(flags); 680
674 do { 681 mtmsr(msr_save);
675 rc = plpar_hcall_norets(H_JOIN);
676 smp_rmb();
677 } while (rc == H_SUCCESS && data->waiting > 0);
678 if (rc == H_SUCCESS)
679 goto out;
680 682
681 if (rc == H_CONTINUE) { 683 if (rc == H_SUCCESS) {
682 data->waiting = 0; 684 /* This cpu was prodded and the suspend is complete. */
683 data->args->args[data->args->nargs] = 685 goto out;
684 rtas_call(ibm_suspend_me_token, 0, 1, NULL); 686 } else if (rc == H_CONTINUE) {
685 for_each_possible_cpu(i) 687 /* All other cpus are in H_JOIN, this cpu does
686 plpar_hcall_norets(H_PROD,i); 688 * the suspend.
689 */
690 printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n",
691 smp_processor_id());
692 data->error = rtas_call(data->token, 0, 1, NULL);
693
694 if (data->error)
695 printk(KERN_DEBUG "ibm,suspend-me returned %d\n",
696 data->error);
687 } else { 697 } else {
688 data->waiting = -EBUSY; 698 printk(KERN_ERR "H_JOIN on cpu %i failed with rc = %ld\n",
689 printk(KERN_ERR "Error on H_JOIN hypervisor call\n"); 699 smp_processor_id(), rc);
700 data->error = rc;
690 } 701 }
691 702 /* This cpu did the suspend or got an error; in either case,
703 * we need to prod all other other cpus out of join state.
704 * Extra prods are harmless.
705 */
706 for_each_online_cpu(cpu)
707 plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
692out: 708out:
693 local_irq_restore(flags); 709 if (atomic_dec_return(&data->working) == 0)
694 return; 710 complete(data->complete);
695} 711}
696 712
697static int rtas_ibm_suspend_me(struct rtas_args *args) 713static int rtas_ibm_suspend_me(struct rtas_args *args)
698{ 714{
699 int i;
700 long state; 715 long state;
701 long rc; 716 long rc;
702 unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; 717 unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
703 struct rtas_suspend_me_data data; 718 struct rtas_suspend_me_data data;
719 DECLARE_COMPLETION_ONSTACK(done);
720
721 if (!rtas_service_present("ibm,suspend-me"))
722 return -ENOSYS;
704 723
705 /* Make sure the state is valid */ 724 /* Make sure the state is valid */
706 rc = plpar_hcall(H_VASI_STATE, retbuf, 725 rc = plpar_hcall(H_VASI_STATE, retbuf,
@@ -721,25 +740,23 @@ static int rtas_ibm_suspend_me(struct rtas_args *args)
721 return 0; 740 return 0;
722 } 741 }
723 742
724 data.waiting = 1; 743 atomic_set(&data.working, 0);
725 data.args = args; 744 data.token = rtas_token("ibm,suspend-me");
745 data.error = 0;
746 data.complete = &done;
726 747
727 /* Call function on all CPUs. One of us will make the 748 /* Call function on all CPUs. One of us will make the
728 * rtas call 749 * rtas call
729 */ 750 */
730 if (on_each_cpu(rtas_percpu_suspend_me, &data, 1, 0)) 751 if (on_each_cpu(rtas_percpu_suspend_me, &data, 1, 0))
731 data.waiting = -EINVAL; 752 data.error = -EINVAL;
732 753
733 if (data.waiting != 0) 754 wait_for_completion(&done);
734 printk(KERN_ERR "Error doing global join\n");
735 755
736 /* Prod each CPU. This won't hurt, and will wake 756 if (data.error != 0)
737 * anyone we successfully put to sleep with H_JOIN. 757 printk(KERN_ERR "Error doing global join\n");
738 */
739 for_each_possible_cpu(i)
740 plpar_hcall_norets(H_PROD, i);
741 758
742 return data.waiting; 759 return data.error;
743} 760}
744#else /* CONFIG_PPC_PSERIES */ 761#else /* CONFIG_PPC_PSERIES */
745static int rtas_ibm_suspend_me(struct rtas_args *args) 762static int rtas_ibm_suspend_me(struct rtas_args *args)