diff options
author | Miklos Szeredi <mszeredi@suse.cz> | 2008-11-09 09:23:57 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-11-09 14:17:33 -0500 |
commit | 6209344f5a3795d34b7f2c0061f49802283b6bdd (patch) | |
tree | 5c037ddbb8caac17b0c6101c9ab86387df106d41 | |
parent | 058e3739f6b0753696db1952378de9e8d2a11735 (diff) |
net: unix: fix inflight counting bug in garbage collector
Previously I assumed that the receive queues of candidates don't
change during the GC. This is only half true, nothing can be received
from the queues (see comment in unix_gc()), but buffers could be added
through the other half of the socket pair, which may still have file
descriptors referring to it.
This can result in inc_inflight_move_tail() erronously increasing the
"inflight" counter for a unix socket for which dec_inflight() wasn't
previously called. This in turn can trigger the "BUG_ON(total_refs <
inflight_refs)" in a later garbage collection run.
Fix this by only manipulating the "inflight" counter for sockets which
are candidates themselves. Duplicating the file references in
unix_attach_fds() is also needed to prevent a socket becoming a
candidate for GC while the skb that contains it is not yet queued.
Reported-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/net/af_unix.h | 1 | ||||
-rw-r--r-- | net/unix/af_unix.c | 31 | ||||
-rw-r--r-- | net/unix/garbage.c | 49 |
3 files changed, 62 insertions, 19 deletions
diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 7dd29b7e461d..c29ff1da8a18 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h | |||
@@ -54,6 +54,7 @@ struct unix_sock { | |||
54 | atomic_long_t inflight; | 54 | atomic_long_t inflight; |
55 | spinlock_t lock; | 55 | spinlock_t lock; |
56 | unsigned int gc_candidate : 1; | 56 | unsigned int gc_candidate : 1; |
57 | unsigned int gc_maybe_cycle : 1; | ||
57 | wait_queue_head_t peer_wait; | 58 | wait_queue_head_t peer_wait; |
58 | }; | 59 | }; |
59 | #define unix_sk(__sk) ((struct unix_sock *)__sk) | 60 | #define unix_sk(__sk) ((struct unix_sock *)__sk) |
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 4d3c6071b9a4..eb90f77bb0e2 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c | |||
@@ -1302,14 +1302,23 @@ static void unix_destruct_fds(struct sk_buff *skb) | |||
1302 | sock_wfree(skb); | 1302 | sock_wfree(skb); |
1303 | } | 1303 | } |
1304 | 1304 | ||
1305 | static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) | 1305 | static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) |
1306 | { | 1306 | { |
1307 | int i; | 1307 | int i; |
1308 | |||
1309 | /* | ||
1310 | * Need to duplicate file references for the sake of garbage | ||
1311 | * collection. Otherwise a socket in the fps might become a | ||
1312 | * candidate for GC while the skb is not yet queued. | ||
1313 | */ | ||
1314 | UNIXCB(skb).fp = scm_fp_dup(scm->fp); | ||
1315 | if (!UNIXCB(skb).fp) | ||
1316 | return -ENOMEM; | ||
1317 | |||
1308 | for (i=scm->fp->count-1; i>=0; i--) | 1318 | for (i=scm->fp->count-1; i>=0; i--) |
1309 | unix_inflight(scm->fp->fp[i]); | 1319 | unix_inflight(scm->fp->fp[i]); |
1310 | UNIXCB(skb).fp = scm->fp; | ||
1311 | skb->destructor = unix_destruct_fds; | 1320 | skb->destructor = unix_destruct_fds; |
1312 | scm->fp = NULL; | 1321 | return 0; |
1313 | } | 1322 | } |
1314 | 1323 | ||
1315 | /* | 1324 | /* |
@@ -1368,8 +1377,11 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, | |||
1368 | goto out; | 1377 | goto out; |
1369 | 1378 | ||
1370 | memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); | 1379 | memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); |
1371 | if (siocb->scm->fp) | 1380 | if (siocb->scm->fp) { |
1372 | unix_attach_fds(siocb->scm, skb); | 1381 | err = unix_attach_fds(siocb->scm, skb); |
1382 | if (err) | ||
1383 | goto out_free; | ||
1384 | } | ||
1373 | unix_get_secdata(siocb->scm, skb); | 1385 | unix_get_secdata(siocb->scm, skb); |
1374 | 1386 | ||
1375 | skb_reset_transport_header(skb); | 1387 | skb_reset_transport_header(skb); |
@@ -1538,8 +1550,13 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, | |||
1538 | size = min_t(int, size, skb_tailroom(skb)); | 1550 | size = min_t(int, size, skb_tailroom(skb)); |
1539 | 1551 | ||
1540 | memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); | 1552 | memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); |
1541 | if (siocb->scm->fp) | 1553 | if (siocb->scm->fp) { |
1542 | unix_attach_fds(siocb->scm, skb); | 1554 | err = unix_attach_fds(siocb->scm, skb); |
1555 | if (err) { | ||
1556 | kfree_skb(skb); | ||
1557 | goto out_err; | ||
1558 | } | ||
1559 | } | ||
1543 | 1560 | ||
1544 | if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) { | 1561 | if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) { |
1545 | kfree_skb(skb); | 1562 | kfree_skb(skb); |
diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 2a27b84f740b..6d4a9a8de5ef 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c | |||
@@ -186,8 +186,17 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), | |||
186 | */ | 186 | */ |
187 | struct sock *sk = unix_get_socket(*fp++); | 187 | struct sock *sk = unix_get_socket(*fp++); |
188 | if (sk) { | 188 | if (sk) { |
189 | hit = true; | 189 | struct unix_sock *u = unix_sk(sk); |
190 | func(unix_sk(sk)); | 190 | |
191 | /* | ||
192 | * Ignore non-candidates, they could | ||
193 | * have been added to the queues after | ||
194 | * starting the garbage collection | ||
195 | */ | ||
196 | if (u->gc_candidate) { | ||
197 | hit = true; | ||
198 | func(u); | ||
199 | } | ||
191 | } | 200 | } |
192 | } | 201 | } |
193 | if (hit && hitlist != NULL) { | 202 | if (hit && hitlist != NULL) { |
@@ -249,11 +258,11 @@ static void inc_inflight_move_tail(struct unix_sock *u) | |||
249 | { | 258 | { |
250 | atomic_long_inc(&u->inflight); | 259 | atomic_long_inc(&u->inflight); |
251 | /* | 260 | /* |
252 | * If this is still a candidate, move it to the end of the | 261 | * If this still might be part of a cycle, move it to the end |
253 | * list, so that it's checked even if it was already passed | 262 | * of the list, so that it's checked even if it was already |
254 | * over | 263 | * passed over |
255 | */ | 264 | */ |
256 | if (u->gc_candidate) | 265 | if (u->gc_maybe_cycle) |
257 | list_move_tail(&u->link, &gc_candidates); | 266 | list_move_tail(&u->link, &gc_candidates); |
258 | } | 267 | } |
259 | 268 | ||
@@ -267,6 +276,7 @@ void unix_gc(void) | |||
267 | struct unix_sock *next; | 276 | struct unix_sock *next; |
268 | struct sk_buff_head hitlist; | 277 | struct sk_buff_head hitlist; |
269 | struct list_head cursor; | 278 | struct list_head cursor; |
279 | LIST_HEAD(not_cycle_list); | ||
270 | 280 | ||
271 | spin_lock(&unix_gc_lock); | 281 | spin_lock(&unix_gc_lock); |
272 | 282 | ||
@@ -282,10 +292,14 @@ void unix_gc(void) | |||
282 | * | 292 | * |
283 | * Holding unix_gc_lock will protect these candidates from | 293 | * Holding unix_gc_lock will protect these candidates from |
284 | * being detached, and hence from gaining an external | 294 | * being detached, and hence from gaining an external |
285 | * reference. This also means, that since there are no | 295 | * reference. Since there are no possible receivers, all |
286 | * possible receivers, the receive queues of these sockets are | 296 | * buffers currently on the candidates' queues stay there |
287 | * static during the GC, even though the dequeue is done | 297 | * during the garbage collection. |
288 | * before the detach without atomicity guarantees. | 298 | * |
299 | * We also know that no new candidate can be added onto the | ||
300 | * receive queues. Other, non candidate sockets _can_ be | ||
301 | * added to queue, so we must make sure only to touch | ||
302 | * candidates. | ||
289 | */ | 303 | */ |
290 | list_for_each_entry_safe(u, next, &gc_inflight_list, link) { | 304 | list_for_each_entry_safe(u, next, &gc_inflight_list, link) { |
291 | long total_refs; | 305 | long total_refs; |
@@ -299,6 +313,7 @@ void unix_gc(void) | |||
299 | if (total_refs == inflight_refs) { | 313 | if (total_refs == inflight_refs) { |
300 | list_move_tail(&u->link, &gc_candidates); | 314 | list_move_tail(&u->link, &gc_candidates); |
301 | u->gc_candidate = 1; | 315 | u->gc_candidate = 1; |
316 | u->gc_maybe_cycle = 1; | ||
302 | } | 317 | } |
303 | } | 318 | } |
304 | 319 | ||
@@ -325,14 +340,24 @@ void unix_gc(void) | |||
325 | list_move(&cursor, &u->link); | 340 | list_move(&cursor, &u->link); |
326 | 341 | ||
327 | if (atomic_long_read(&u->inflight) > 0) { | 342 | if (atomic_long_read(&u->inflight) > 0) { |
328 | list_move_tail(&u->link, &gc_inflight_list); | 343 | list_move_tail(&u->link, ¬_cycle_list); |
329 | u->gc_candidate = 0; | 344 | u->gc_maybe_cycle = 0; |
330 | scan_children(&u->sk, inc_inflight_move_tail, NULL); | 345 | scan_children(&u->sk, inc_inflight_move_tail, NULL); |
331 | } | 346 | } |
332 | } | 347 | } |
333 | list_del(&cursor); | 348 | list_del(&cursor); |
334 | 349 | ||
335 | /* | 350 | /* |
351 | * not_cycle_list contains those sockets which do not make up a | ||
352 | * cycle. Restore these to the inflight list. | ||
353 | */ | ||
354 | while (!list_empty(¬_cycle_list)) { | ||
355 | u = list_entry(not_cycle_list.next, struct unix_sock, link); | ||
356 | u->gc_candidate = 0; | ||
357 | list_move_tail(&u->link, &gc_inflight_list); | ||
358 | } | ||
359 | |||
360 | /* | ||
336 | * Now gc_candidates contains only garbage. Restore original | 361 | * Now gc_candidates contains only garbage. Restore original |
337 | * inflight counters for these as well, and remove the skbuffs | 362 | * inflight counters for these as well, and remove the skbuffs |
338 | * which are creating the cycle(s). | 363 | * which are creating the cycle(s). |