diff options
| author | Ian Campbell <ian.campbell@citrix.com> | 2009-12-17 08:57:09 -0500 |
|---|---|---|
| committer | Ian Campbell <ian.campbell@citrix.com> | 2010-01-13 05:01:35 -0500 |
| commit | c5cae661d6cf808b6984762f763261adf35f3eb7 (patch) | |
| tree | 0f19bd47b97b13421da7c0777ae5b1a87478e25c | |
| parent | 7284ce6c9f6153d1777df5f310c959724d1bd446 (diff) | |
xen: fix hang on suspend.
In 65f63384 "xen: improve error handling in do_suspend" I said:
- xs_suspend()/xs_resume() and dpm_suspend_noirq()/dpm_resume_noirq() were not
nested in the obvious way.
and changed the ordering of the calls as so:
BEFORE AFTER
xs_suspend dpm_suspend_noirq
dpm_suspend_noirq xs_suspend
*SUSPEND* *SUSPEND*
dpm_resume_noirq dpm_resume_noirq
xs_resume xs_resume
Clearly this is not an improvement and I was talking rubbish.
In particular the new ordering is susceptible to a hang if a xenstore write is
in progress at the point at which the suspend kicks in. When the suspend
process calls xs_suspend it tries to take the request_mutex but if a write is
in progress it could be looping in xenbus_xs.c:read_reply() waiting for
something to arrive on &xs_state.reply_list while holding the request_mutex
(taken in the caller of read_reply).
However if we have done dpm_suspend_noirq before xs_suspend then we won't get
any more xenstore interrupts and process_msg() will never be woken up to add
anything to the reply_list.
Fix this by calling xs_suspend before dpm_suspend_noirq. If dpm_suspend_noirq
fails then make sure we go through the xs_suspend_cancel() code path.
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stable Kernel <stable@kernel.org>
| -rw-r--r-- | drivers/xen/manage.c | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index c4997930afc7..5d42d55e299b 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c | |||
| @@ -102,15 +102,15 @@ static void do_suspend(void) | |||
| 102 | goto out_thaw; | 102 | goto out_thaw; |
| 103 | } | 103 | } |
| 104 | 104 | ||
| 105 | printk(KERN_DEBUG "suspending xenstore...\n"); | ||
| 106 | xs_suspend(); | ||
| 107 | |||
| 105 | err = dpm_suspend_noirq(PMSG_SUSPEND); | 108 | err = dpm_suspend_noirq(PMSG_SUSPEND); |
| 106 | if (err) { | 109 | if (err) { |
| 107 | printk(KERN_ERR "dpm_suspend_noirq failed: %d\n", err); | 110 | printk(KERN_ERR "dpm_suspend_noirq failed: %d\n", err); |
| 108 | goto out_resume; | 111 | goto out_resume; |
| 109 | } | 112 | } |
| 110 | 113 | ||
| 111 | printk(KERN_DEBUG "suspending xenstore...\n"); | ||
| 112 | xs_suspend(); | ||
| 113 | |||
| 114 | err = stop_machine(xen_suspend, &cancelled, cpumask_of(0)); | 114 | err = stop_machine(xen_suspend, &cancelled, cpumask_of(0)); |
| 115 | 115 | ||
| 116 | dpm_resume_noirq(PMSG_RESUME); | 116 | dpm_resume_noirq(PMSG_RESUME); |
| @@ -120,13 +120,13 @@ static void do_suspend(void) | |||
| 120 | cancelled = 1; | 120 | cancelled = 1; |
| 121 | } | 121 | } |
| 122 | 122 | ||
| 123 | out_resume: | ||
| 123 | if (!cancelled) { | 124 | if (!cancelled) { |
| 124 | xen_arch_resume(); | 125 | xen_arch_resume(); |
| 125 | xs_resume(); | 126 | xs_resume(); |
| 126 | } else | 127 | } else |
| 127 | xs_suspend_cancel(); | 128 | xs_suspend_cancel(); |
| 128 | 129 | ||
| 129 | out_resume: | ||
| 130 | dpm_resume_end(PMSG_RESUME); | 130 | dpm_resume_end(PMSG_RESUME); |
| 131 | 131 | ||
| 132 | /* Make sure timer events get retriggered on all CPUs */ | 132 | /* Make sure timer events get retriggered on all CPUs */ |
