aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIan Campbell <ian.campbell@citrix.com>2009-12-17 08:57:09 -0500
committerIan Campbell <ian.campbell@citrix.com>2010-01-13 05:01:35 -0500
commitc5cae661d6cf808b6984762f763261adf35f3eb7 (patch)
tree0f19bd47b97b13421da7c0777ae5b1a87478e25c
parent7284ce6c9f6153d1777df5f310c959724d1bd446 (diff)
xen: fix hang on suspend.
In 65f63384 "xen: improve error handling in do_suspend" I said: - xs_suspend()/xs_resume() and dpm_suspend_noirq()/dpm_resume_noirq() were not nested in the obvious way. and changed the ordering of the calls as so: BEFORE AFTER xs_suspend dpm_suspend_noirq dpm_suspend_noirq xs_suspend *SUSPEND* *SUSPEND* dpm_resume_noirq dpm_resume_noirq xs_resume xs_resume Clearly this is not an improvement and I was talking rubbish. In particular the new ordering is susceptible to a hang if a xenstore write is in progress at the point at which the suspend kicks in. When the suspend process calls xs_suspend it tries to take the request_mutex but if a write is in progress it could be looping in xenbus_xs.c:read_reply() waiting for something to arrive on &xs_state.reply_list while holding the request_mutex (taken in the caller of read_reply). However if we have done dpm_suspend_noirq before xs_suspend then we won't get any more xenstore interrupts and process_msg() will never be woken up to add anything to the reply_list. Fix this by calling xs_suspend before dpm_suspend_noirq. If dpm_suspend_noirq fails then make sure we go through the xs_suspend_cancel() code path. Signed-off-by: Ian Campbell <ian.campbell@citrix.com> Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Cc: Stable Kernel <stable@kernel.org>
-rw-r--r--drivers/xen/manage.c8
1 files changed, 4 insertions, 4 deletions
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index c4997930afc7..5d42d55e299b 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -102,15 +102,15 @@ static void do_suspend(void)
102 goto out_thaw; 102 goto out_thaw;
103 } 103 }
104 104
105 printk(KERN_DEBUG "suspending xenstore...\n");
106 xs_suspend();
107
105 err = dpm_suspend_noirq(PMSG_SUSPEND); 108 err = dpm_suspend_noirq(PMSG_SUSPEND);
106 if (err) { 109 if (err) {
107 printk(KERN_ERR "dpm_suspend_noirq failed: %d\n", err); 110 printk(KERN_ERR "dpm_suspend_noirq failed: %d\n", err);
108 goto out_resume; 111 goto out_resume;
109 } 112 }
110 113
111 printk(KERN_DEBUG "suspending xenstore...\n");
112 xs_suspend();
113
114 err = stop_machine(xen_suspend, &cancelled, cpumask_of(0)); 114 err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
115 115
116 dpm_resume_noirq(PMSG_RESUME); 116 dpm_resume_noirq(PMSG_RESUME);
@@ -120,13 +120,13 @@ static void do_suspend(void)
120 cancelled = 1; 120 cancelled = 1;
121 } 121 }
122 122
123out_resume:
123 if (!cancelled) { 124 if (!cancelled) {
124 xen_arch_resume(); 125 xen_arch_resume();
125 xs_resume(); 126 xs_resume();
126 } else 127 } else
127 xs_suspend_cancel(); 128 xs_suspend_cancel();
128 129
129out_resume:
130 dpm_resume_end(PMSG_RESUME); 130 dpm_resume_end(PMSG_RESUME);
131 131
132 /* Make sure timer events get retriggered on all CPUs */ 132 /* Make sure timer events get retriggered on all CPUs */