aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnton Vorontsov <avorontsov@ru.mvista.com>2009-12-30 03:23:28 -0500
committerDavid S. Miller <davem@davemloft.net>2009-12-31 01:03:41 -0500
commit541cd3ee00a4fe975b22fac6a3bc846bacef37f7 (patch)
tree238becb3dab2920caea760356c13351a2e964872
parent7f9d3577e2603ca279c3176b696eba392f21cbe2 (diff)
phylib: Fix deadlock on resume
Sometimes kernel hangs on resume with the following trace: ucc_geth e0102000.ucc: resume INFO: task bash:1764 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. bash D 0fecf43c 0 1764 1763 0x00000000 Call Trace: [cf9a7c10] [c0012868] ret_from_except+0x0/0x14 (unreliable) --- Exception: cf9a7ce0 at __switch_to+0x4c/0x6c LR = 0xcf9a7cc0 [cf9a7cd0] [c0008c14] __switch_to+0x4c/0x6c (unreliable) [cf9a7ce0] [c028bcfc] schedule+0x158/0x260 [cf9a7d10] [c028c720] __mutex_lock_slowpath+0x80/0xd8 [cf9a7d40] [c01cf388] phy_stop+0x20/0x70 [cf9a7d50] [c01d514c] ugeth_resume+0x6c/0x13c [...] Here is why. On suspend: - PM core starts suspending devices, ucc_geth_suspend gets called; - ucc_geth calls phy_stop() on suspend. Note that phy_stop() is mostly asynchronous so it doesn't block ucc_geth's suspend routine, it just sets PHY_HALTED state and disables PHY's interrupts; - Suddenly the state machine gets scheduled, it grabs the phydev->lock mutex and tries to process the PHY_HALTED state, so it calls phydev->adjust_link(phydev->attached_dev). In ucc_geth case adjust_link() calls msleep(), which reschedules the code flow back to PM core, which now finishes suspend and so we end up sleeping with phydev->lock mutex held. On resume: - PM core starts resuming devices (notice that nobody rescheduled the state machine yet, so the mutex is still held), the core calls ucc_geth's resume routine; - ucc_geth_resume restarts the PHY with phy_stop()/phy_start() sequence, and the phy_*() calls are trying to grab the phydev->lock mutex. Here comes the deadlock. This patch fixes the issue by stopping the state machine on suspend and starting it again on resume. Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/phy/mdio_bus.c24
1 files changed, 22 insertions, 2 deletions
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index bd4e8d72dc08..49252d390903 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -303,8 +303,18 @@ static int mdio_bus_suspend(struct device * dev, pm_message_t state)
303 struct phy_driver *phydrv = to_phy_driver(dev->driver); 303 struct phy_driver *phydrv = to_phy_driver(dev->driver);
304 struct phy_device *phydev = to_phy_device(dev); 304 struct phy_device *phydev = to_phy_device(dev);
305 305
306 /*
307 * We must stop the state machine manually, otherwise it stops out of
308 * control, possibly with the phydev->lock held. Upon resume, netdev
309 * may call phy routines that try to grab the same lock, and that may
310 * lead to a deadlock.
311 */
312 if (phydev->attached_dev)
313 phy_stop_machine(phydev);
314
306 if (!mdio_bus_phy_may_suspend(phydev)) 315 if (!mdio_bus_phy_may_suspend(phydev))
307 return 0; 316 return 0;
317
308 return phydrv->suspend(phydev); 318 return phydrv->suspend(phydev);
309} 319}
310 320
@@ -312,10 +322,20 @@ static int mdio_bus_resume(struct device * dev)
312{ 322{
313 struct phy_driver *phydrv = to_phy_driver(dev->driver); 323 struct phy_driver *phydrv = to_phy_driver(dev->driver);
314 struct phy_device *phydev = to_phy_device(dev); 324 struct phy_device *phydev = to_phy_device(dev);
325 int ret;
315 326
316 if (!mdio_bus_phy_may_suspend(phydev)) 327 if (!mdio_bus_phy_may_suspend(phydev))
317 return 0; 328 goto no_resume;
318 return phydrv->resume(phydev); 329
330 ret = phydrv->resume(phydev);
331 if (ret < 0)
332 return ret;
333
334no_resume:
335 if (phydev->attached_dev)
336 phy_start_machine(phydev, NULL);
337
338 return 0;
319} 339}
320 340
321struct bus_type mdio_bus_type = { 341struct bus_type mdio_bus_type = {