diff options
author | Eric Dumazet <edumazet@google.com> | 2012-09-04 21:02:56 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-09-05 17:49:27 -0400 |
commit | 23d3b8bfb8eb20e7d96afa09991e6a5ed1c83164 (patch) | |
tree | 6b5b42f6c925e77a4fa0bd8dbd29683a23edc3e9 | |
parent | 7933aa5c75b880aad6fc26f2df1e3ee6c14f166d (diff) |
net: qdisc busylock needs lockdep annotations
It seems we need to provide ability for stacked devices
to use specific lock_class_key for sch->busylock
We could instead default l2tpeth tx_queue_len to 0 (no qdisc), but
a user might use a qdisc anyway.
(So same fixes are probably needed on non LLTX stacked drivers)
Noticed while stressing L2TPV3 setup :
======================================================
[ INFO: possible circular locking dependency detected ]
3.6.0-rc3+ #788 Not tainted
-------------------------------------------------------
netperf/4660 is trying to acquire lock:
(l2tpsock){+.-...}, at: [<ffffffffa0208db2>] l2tp_xmit_skb+0x172/0xa50 [l2tp_core]
but task is already holding lock:
(&(&sch->busylock)->rlock){+.-...}, at: [<ffffffff81596595>] dev_queue_xmit+0xd75/0xe00
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (&(&sch->busylock)->rlock){+.-...}:
[<ffffffff810a5df0>] lock_acquire+0x90/0x200
[<ffffffff817499fc>] _raw_spin_lock_irqsave+0x4c/0x60
[<ffffffff81074872>] __wake_up+0x32/0x70
[<ffffffff8136d39e>] tty_wakeup+0x3e/0x80
[<ffffffff81378fb3>] pty_write+0x73/0x80
[<ffffffff8136cb4c>] tty_put_char+0x3c/0x40
[<ffffffff813722b2>] process_echoes+0x142/0x330
[<ffffffff813742ab>] n_tty_receive_buf+0x8fb/0x1230
[<ffffffff813777b2>] flush_to_ldisc+0x142/0x1c0
[<ffffffff81062818>] process_one_work+0x198/0x760
[<ffffffff81063236>] worker_thread+0x186/0x4b0
[<ffffffff810694d3>] kthread+0x93/0xa0
[<ffffffff81753e24>] kernel_thread_helper+0x4/0x10
-> #0 (l2tpsock){+.-...}:
[<ffffffff810a5288>] __lock_acquire+0x1628/0x1b10
[<ffffffff810a5df0>] lock_acquire+0x90/0x200
[<ffffffff817498c1>] _raw_spin_lock+0x41/0x50
[<ffffffffa0208db2>] l2tp_xmit_skb+0x172/0xa50 [l2tp_core]
[<ffffffffa021a802>] l2tp_eth_dev_xmit+0x32/0x60 [l2tp_eth]
[<ffffffff815952b2>] dev_hard_start_xmit+0x502/0xa70
[<ffffffff815b63ce>] sch_direct_xmit+0xfe/0x290
[<ffffffff81595a05>] dev_queue_xmit+0x1e5/0xe00
[<ffffffff815d9d60>] ip_finish_output+0x3d0/0x890
[<ffffffff815db019>] ip_output+0x59/0xf0
[<ffffffff815da36d>] ip_local_out+0x2d/0xa0
[<ffffffff815da5a3>] ip_queue_xmit+0x1c3/0x680
[<ffffffff815f4192>] tcp_transmit_skb+0x402/0xa60
[<ffffffff815f4a94>] tcp_write_xmit+0x1f4/0xa30
[<ffffffff815f5300>] tcp_push_one+0x30/0x40
[<ffffffff815e6672>] tcp_sendmsg+0xe82/0x1040
[<ffffffff81614495>] inet_sendmsg+0x125/0x230
[<ffffffff81576cdc>] sock_sendmsg+0xdc/0xf0
[<ffffffff81579ece>] sys_sendto+0xfe/0x130
[<ffffffff81752c92>] system_call_fastpath+0x16/0x1b
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(&(&sch->busylock)->rlock);
lock(l2tpsock);
lock(&(&sch->busylock)->rlock);
lock(l2tpsock);
*** DEADLOCK ***
5 locks held by netperf/4660:
#0: (sk_lock-AF_INET){+.+.+.}, at: [<ffffffff815e581c>] tcp_sendmsg+0x2c/0x1040
#1: (rcu_read_lock){.+.+..}, at: [<ffffffff815da3e0>] ip_queue_xmit+0x0/0x680
#2: (rcu_read_lock_bh){.+....}, at: [<ffffffff815d9ac5>] ip_finish_output+0x135/0x890
#3: (rcu_read_lock_bh){.+....}, at: [<ffffffff81595820>] dev_queue_xmit+0x0/0xe00
#4: (&(&sch->busylock)->rlock){+.-...}, at: [<ffffffff81596595>] dev_queue_xmit+0xd75/0xe00
stack backtrace:
Pid: 4660, comm: netperf Not tainted 3.6.0-rc3+ #788
Call Trace:
[<ffffffff8173dbf8>] print_circular_bug+0x1fb/0x20c
[<ffffffff810a5288>] __lock_acquire+0x1628/0x1b10
[<ffffffff810a334b>] ? check_usage+0x9b/0x4d0
[<ffffffff810a3f44>] ? __lock_acquire+0x2e4/0x1b10
[<ffffffff810a5df0>] lock_acquire+0x90/0x200
[<ffffffffa0208db2>] ? l2tp_xmit_skb+0x172/0xa50 [l2tp_core]
[<ffffffff817498c1>] _raw_spin_lock+0x41/0x50
[<ffffffffa0208db2>] ? l2tp_xmit_skb+0x172/0xa50 [l2tp_core]
[<ffffffffa0208db2>] l2tp_xmit_skb+0x172/0xa50 [l2tp_core]
[<ffffffffa021a802>] l2tp_eth_dev_xmit+0x32/0x60 [l2tp_eth]
[<ffffffff815952b2>] dev_hard_start_xmit+0x502/0xa70
[<ffffffff81594e0e>] ? dev_hard_start_xmit+0x5e/0xa70
[<ffffffff81595961>] ? dev_queue_xmit+0x141/0xe00
[<ffffffff815b63ce>] sch_direct_xmit+0xfe/0x290
[<ffffffff81595a05>] dev_queue_xmit+0x1e5/0xe00
[<ffffffff81595820>] ? dev_hard_start_xmit+0xa70/0xa70
[<ffffffff815d9d60>] ip_finish_output+0x3d0/0x890
[<ffffffff815d9ac5>] ? ip_finish_output+0x135/0x890
[<ffffffff815db019>] ip_output+0x59/0xf0
[<ffffffff815da36d>] ip_local_out+0x2d/0xa0
[<ffffffff815da5a3>] ip_queue_xmit+0x1c3/0x680
[<ffffffff815da3e0>] ? ip_local_out+0xa0/0xa0
[<ffffffff815f4192>] tcp_transmit_skb+0x402/0xa60
[<ffffffff815fa25e>] ? tcp_md5_do_lookup+0x18e/0x1a0
[<ffffffff815f4a94>] tcp_write_xmit+0x1f4/0xa30
[<ffffffff815f5300>] tcp_push_one+0x30/0x40
[<ffffffff815e6672>] tcp_sendmsg+0xe82/0x1040
[<ffffffff81614495>] inet_sendmsg+0x125/0x230
[<ffffffff81614370>] ? inet_create+0x6b0/0x6b0
[<ffffffff8157e6e2>] ? sock_update_classid+0xc2/0x3b0
[<ffffffff8157e750>] ? sock_update_classid+0x130/0x3b0
[<ffffffff81576cdc>] sock_sendmsg+0xdc/0xf0
[<ffffffff81162579>] ? fget_light+0x3f9/0x4f0
[<ffffffff81579ece>] sys_sendto+0xfe/0x130
[<ffffffff810a69ad>] ? trace_hardirqs_on+0xd/0x10
[<ffffffff8174a0b0>] ? _raw_spin_unlock_irq+0x30/0x50
[<ffffffff810757e3>] ? finish_task_switch+0x83/0xf0
[<ffffffff810757a6>] ? finish_task_switch+0x46/0xf0
[<ffffffff81752cb7>] ? sysret_check+0x1b/0x56
[<ffffffff81752c92>] system_call_fastpath+0x16/0x1b
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/netdevice.h | 2 | ||||
-rw-r--r-- | net/l2tp/l2tp_eth.c | 3 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 9 |
3 files changed, 12 insertions, 2 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ccac82e61604..ae3153c0db0a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
@@ -1322,6 +1322,8 @@ struct net_device { | |||
1322 | /* phy device may attach itself for hardware timestamping */ | 1322 | /* phy device may attach itself for hardware timestamping */ |
1323 | struct phy_device *phydev; | 1323 | struct phy_device *phydev; |
1324 | 1324 | ||
1325 | struct lock_class_key *qdisc_tx_busylock; | ||
1326 | |||
1325 | /* group the device belongs to */ | 1327 | /* group the device belongs to */ |
1326 | int group; | 1328 | int group; |
1327 | 1329 | ||
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index f9ee74deeac2..ba89997bcd2e 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c | |||
@@ -67,6 +67,7 @@ static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net) | |||
67 | return net_generic(net, l2tp_eth_net_id); | 67 | return net_generic(net, l2tp_eth_net_id); |
68 | } | 68 | } |
69 | 69 | ||
70 | static struct lock_class_key l2tp_eth_tx_busylock; | ||
70 | static int l2tp_eth_dev_init(struct net_device *dev) | 71 | static int l2tp_eth_dev_init(struct net_device *dev) |
71 | { | 72 | { |
72 | struct l2tp_eth *priv = netdev_priv(dev); | 73 | struct l2tp_eth *priv = netdev_priv(dev); |
@@ -74,7 +75,7 @@ static int l2tp_eth_dev_init(struct net_device *dev) | |||
74 | priv->dev = dev; | 75 | priv->dev = dev; |
75 | eth_hw_addr_random(dev); | 76 | eth_hw_addr_random(dev); |
76 | memset(&dev->broadcast[0], 0xff, 6); | 77 | memset(&dev->broadcast[0], 0xff, 6); |
77 | 78 | dev->qdisc_tx_busylock = &l2tp_eth_tx_busylock; | |
78 | return 0; | 79 | return 0; |
79 | } | 80 | } |
80 | 81 | ||
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 6c4d5fe53ce8..aefc1504dc88 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c | |||
@@ -527,6 +527,8 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = { | |||
527 | }; | 527 | }; |
528 | EXPORT_SYMBOL(pfifo_fast_ops); | 528 | EXPORT_SYMBOL(pfifo_fast_ops); |
529 | 529 | ||
530 | static struct lock_class_key qdisc_tx_busylock; | ||
531 | |||
530 | struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, | 532 | struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, |
531 | struct Qdisc_ops *ops) | 533 | struct Qdisc_ops *ops) |
532 | { | 534 | { |
@@ -534,6 +536,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, | |||
534 | struct Qdisc *sch; | 536 | struct Qdisc *sch; |
535 | unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size; | 537 | unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size; |
536 | int err = -ENOBUFS; | 538 | int err = -ENOBUFS; |
539 | struct net_device *dev = dev_queue->dev; | ||
537 | 540 | ||
538 | p = kzalloc_node(size, GFP_KERNEL, | 541 | p = kzalloc_node(size, GFP_KERNEL, |
539 | netdev_queue_numa_node_read(dev_queue)); | 542 | netdev_queue_numa_node_read(dev_queue)); |
@@ -553,12 +556,16 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, | |||
553 | } | 556 | } |
554 | INIT_LIST_HEAD(&sch->list); | 557 | INIT_LIST_HEAD(&sch->list); |
555 | skb_queue_head_init(&sch->q); | 558 | skb_queue_head_init(&sch->q); |
559 | |||
556 | spin_lock_init(&sch->busylock); | 560 | spin_lock_init(&sch->busylock); |
561 | lockdep_set_class(&sch->busylock, | ||
562 | dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); | ||
563 | |||
557 | sch->ops = ops; | 564 | sch->ops = ops; |
558 | sch->enqueue = ops->enqueue; | 565 | sch->enqueue = ops->enqueue; |
559 | sch->dequeue = ops->dequeue; | 566 | sch->dequeue = ops->dequeue; |
560 | sch->dev_queue = dev_queue; | 567 | sch->dev_queue = dev_queue; |
561 | dev_hold(qdisc_dev(sch)); | 568 | dev_hold(dev); |
562 | atomic_set(&sch->refcnt, 1); | 569 | atomic_set(&sch->refcnt, 1); |
563 | 570 | ||
564 | return sch; | 571 | return sch; |