diff options
author | Johan Hedberg <johan.hedberg@intel.com> | 2015-10-21 08:21:31 -0400 |
---|---|---|
committer | Marcel Holtmann <marcel@holtmann.org> | 2015-10-21 08:25:34 -0400 |
commit | 8ce783dc5ea3af3a213ac9b4d9d2ccfeeb9c9058 (patch) | |
tree | e09ec934362e059ce20e607b367ce3c8c44e5de9 | |
parent | 213445b2b40e87e819c7d949ae7d97c30dcd0853 (diff) |
Bluetooth: Fix missing hdev locking for LE scan cleanup
The hci_conn objects don't have a dedicated lock themselves but rely
on the caller to hold the hci_dev lock for most types of access. The
hci_conn_timeout() function has so far sent certain HCI commands based
on the hci_conn state which has been possible without holding the
hci_dev lock.
The recent changes to do LE scanning before connect attempts added
even more operations to hci_conn and hci_dev from hci_conn_timeout,
thereby exposing potential race conditions with the hci_dev and
hci_conn states.
As an example of such a race, here there's a timeout but an
l2cap_sock_connect() call manages to race with the cleanup routine:
[Oct21 08:14] l2cap_chan_timeout: chan ee4b12c0 state BT_CONNECT
[ +0.000004] l2cap_chan_close: chan ee4b12c0 state BT_CONNECT
[ +0.000002] l2cap_chan_del: chan ee4b12c0, conn f3141580, err 111, state BT_CONNECT
[ +0.000002] l2cap_sock_teardown_cb: chan ee4b12c0 state BT_CONNECT
[ +0.000005] l2cap_chan_put: chan ee4b12c0 orig refcnt 4
[ +0.000010] hci_conn_drop: hcon f53d56e0 orig refcnt 1
[ +0.000013] l2cap_chan_put: chan ee4b12c0 orig refcnt 3
[ +0.000063] hci_conn_timeout: hcon f53d56e0 state BT_CONNECT
[ +0.000049] hci_conn_params_del: addr ee:0d:30:09:53:1f (type 1)
[ +0.000002] hci_chan_list_flush: hcon f53d56e0
[ +0.000001] hci_chan_del: hci0 hcon f53d56e0 chan f4e7ccc0
[ +0.004528] l2cap_sock_create: sock e708fc00
[ +0.000023] l2cap_chan_create: chan ee4b1770
[ +0.000001] l2cap_chan_hold: chan ee4b1770 orig refcnt 1
[ +0.000002] l2cap_sock_init: sk ee4b3390
[ +0.000029] l2cap_sock_bind: sk ee4b3390
[ +0.000010] l2cap_sock_setsockopt: sk ee4b3390
[ +0.000037] l2cap_sock_connect: sk ee4b3390
[ +0.000002] l2cap_chan_connect: 00:02:72:d9:e5:8b -> ee:0d:30:09:53:1f (type 2) psm 0x00
[ +0.000002] hci_get_route: 00:02:72:d9:e5:8b -> ee:0d:30:09:53:1f
[ +0.000001] hci_dev_hold: hci0 orig refcnt 8
[ +0.000003] hci_conn_hold: hcon f53d56e0 orig refcnt 0
Above the l2cap_chan_connect() shouldn't have been able to reach the
hci_conn f53d56e0 anymore but since hci_conn_timeout didn't do proper
locking that's not the case. The end result is a reference to hci_conn
that's not in the conn_hash list, resulting in list corruption when
trying to remove it later:
[Oct21 08:15] l2cap_chan_timeout: chan ee4b1770 state BT_CONNECT
[ +0.000004] l2cap_chan_close: chan ee4b1770 state BT_CONNECT
[ +0.000003] l2cap_chan_del: chan ee4b1770, conn f3141580, err 111, state BT_CONNECT
[ +0.000001] l2cap_sock_teardown_cb: chan ee4b1770 state BT_CONNECT
[ +0.000005] l2cap_chan_put: chan ee4b1770 orig refcnt 4
[ +0.000002] hci_conn_drop: hcon f53d56e0 orig refcnt 1
[ +0.000015] l2cap_chan_put: chan ee4b1770 orig refcnt 3
[ +0.000038] hci_conn_timeout: hcon f53d56e0 state BT_CONNECT
[ +0.000003] hci_chan_list_flush: hcon f53d56e0
[ +0.000002] hci_conn_hash_del: hci0 hcon f53d56e0
[ +0.000001] ------------[ cut here ]------------
[ +0.000461] WARNING: CPU: 0 PID: 1782 at lib/list_debug.c:56 __list_del_entry+0x3f/0x71()
[ +0.000839] list_del corruption, f53d56e0->prev is LIST_POISON2 (00000200)
The necessary fix is unfortunately more complicated than just adding
hci_dev_lock/unlock calls to the hci_conn_timeout() call path.
Particularly, the hci_conn_del() API, which expects the hci_dev lock to
be held, performs a cancel_delayed_work_sync(&hcon->disc_work) which
would lead to a deadlock if the hci_conn_timeout() call path tries to
acquire the same lock.
This patch solves the problem by deferring the cleanup work to a
separate work callback. To protect against the hci_dev or hci_conn
going away meanwhile temporary references are taken with the help of
hci_dev_hold() and hci_conn_get().
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Cc: stable@vger.kernel.org # 4.3
-rw-r--r-- | include/net/bluetooth/hci_core.h | 1 | ||||
-rw-r--r-- | net/bluetooth/hci_conn.c | 52 |
2 files changed, 44 insertions, 9 deletions
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 44fb95685611..0015d087d8b1 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h | |||
@@ -471,6 +471,7 @@ struct hci_conn { | |||
471 | struct delayed_work auto_accept_work; | 471 | struct delayed_work auto_accept_work; |
472 | struct delayed_work idle_work; | 472 | struct delayed_work idle_work; |
473 | struct delayed_work le_conn_timeout; | 473 | struct delayed_work le_conn_timeout; |
474 | struct work_struct le_scan_cleanup; | ||
474 | 475 | ||
475 | struct device dev; | 476 | struct device dev; |
476 | struct dentry *debugfs; | 477 | struct dentry *debugfs; |
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 2dda439c8cb8..ec4836f243bc 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c | |||
@@ -137,18 +137,51 @@ static void hci_conn_cleanup(struct hci_conn *conn) | |||
137 | hci_conn_put(conn); | 137 | hci_conn_put(conn); |
138 | } | 138 | } |
139 | 139 | ||
140 | /* This function requires the caller holds hdev->lock */ | 140 | static void le_scan_cleanup(struct work_struct *work) |
141 | static void hci_connect_le_scan_remove(struct hci_conn *conn) | ||
142 | { | 141 | { |
143 | hci_connect_le_scan_cleanup(conn); | 142 | struct hci_conn *conn = container_of(work, struct hci_conn, |
143 | le_scan_cleanup); | ||
144 | struct hci_dev *hdev = conn->hdev; | ||
145 | struct hci_conn *c = NULL; | ||
144 | 146 | ||
145 | /* We can't call hci_conn_del here since that would deadlock | 147 | BT_DBG("%s hcon %p", hdev->name, conn); |
146 | * with trying to call cancel_delayed_work_sync(&conn->disc_work). | 148 | |
147 | * Instead, call just hci_conn_cleanup() which contains the bare | 149 | hci_dev_lock(hdev); |
148 | * minimum cleanup operations needed for a connection in this | 150 | |
149 | * state. | 151 | /* Check that the hci_conn is still around */ |
152 | rcu_read_lock(); | ||
153 | list_for_each_entry_rcu(c, &hdev->conn_hash.list, list) { | ||
154 | if (c == conn) | ||
155 | break; | ||
156 | } | ||
157 | rcu_read_unlock(); | ||
158 | |||
159 | if (c == conn) { | ||
160 | hci_connect_le_scan_cleanup(conn); | ||
161 | hci_conn_cleanup(conn); | ||
162 | } | ||
163 | |||
164 | hci_dev_unlock(hdev); | ||
165 | hci_dev_put(hdev); | ||
166 | hci_conn_put(conn); | ||
167 | } | ||
168 | |||
169 | static void hci_connect_le_scan_remove(struct hci_conn *conn) | ||
170 | { | ||
171 | BT_DBG("%s hcon %p", conn->hdev->name, conn); | ||
172 | |||
173 | /* We can't call hci_conn_del/hci_conn_cleanup here since that | ||
174 | * could deadlock with another hci_conn_del() call that's holding | ||
175 | * hci_dev_lock and doing cancel_delayed_work_sync(&conn->disc_work). | ||
176 | * Instead, grab temporary extra references to the hci_dev and | ||
177 | * hci_conn and perform the necessary cleanup in a separate work | ||
178 | * callback. | ||
150 | */ | 179 | */ |
151 | hci_conn_cleanup(conn); | 180 | |
181 | hci_dev_hold(conn->hdev); | ||
182 | hci_conn_get(conn); | ||
183 | |||
184 | schedule_work(&conn->le_scan_cleanup); | ||
152 | } | 185 | } |
153 | 186 | ||
154 | static void hci_acl_create_connection(struct hci_conn *conn) | 187 | static void hci_acl_create_connection(struct hci_conn *conn) |
@@ -580,6 +613,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, | |||
580 | INIT_DELAYED_WORK(&conn->auto_accept_work, hci_conn_auto_accept); | 613 | INIT_DELAYED_WORK(&conn->auto_accept_work, hci_conn_auto_accept); |
581 | INIT_DELAYED_WORK(&conn->idle_work, hci_conn_idle); | 614 | INIT_DELAYED_WORK(&conn->idle_work, hci_conn_idle); |
582 | INIT_DELAYED_WORK(&conn->le_conn_timeout, le_conn_timeout); | 615 | INIT_DELAYED_WORK(&conn->le_conn_timeout, le_conn_timeout); |
616 | INIT_WORK(&conn->le_scan_cleanup, le_scan_cleanup); | ||
583 | 617 | ||
584 | atomic_set(&conn->refcnt, 0); | 618 | atomic_set(&conn->refcnt, 0); |
585 | 619 | ||